mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-21 01:35:46 +00:00
Compare commits
1 Commits
reduce_bac
...
bugfix/ves
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1ac3ec7575 |
@@ -44,9 +44,6 @@ env:
|
||||
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
||||
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
|
||||
SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
|
||||
# Gitbook
|
||||
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
|
||||
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
|
||||
|
||||
jobs:
|
||||
connectors-check:
|
||||
|
||||
@@ -35,9 +35,7 @@ RUN apt-get update && \
|
||||
libuuid1=2.38.1-5+deb12u1 \
|
||||
libxmlsec1-dev \
|
||||
pkg-config \
|
||||
gcc \
|
||||
nano \
|
||||
vim && \
|
||||
gcc && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
apt-get clean
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ if __name__ == "__main__":
|
||||
|
||||
graph = basic_graph_builder()
|
||||
compiled_graph = graph.compile()
|
||||
input = BasicInput(unused=True)
|
||||
input = BasicInput(_unused=True)
|
||||
primary_llm, fast_llm = get_default_llms()
|
||||
with get_session_context_manager() as db_session:
|
||||
config, _ = get_test_config(
|
||||
|
||||
@@ -17,7 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
|
||||
class BasicInput(BaseModel):
|
||||
# Langgraph needs a nonempty input, but we pass in all static
|
||||
# data through a RunnableConfig.
|
||||
unused: bool = True
|
||||
_unused: bool = True
|
||||
|
||||
|
||||
## Graph Output State
|
||||
|
||||
@@ -9,6 +9,7 @@ class CoreState(BaseModel):
|
||||
This is the core state that is shared across all subgraphs.
|
||||
"""
|
||||
|
||||
base_question: str = ""
|
||||
log_messages: Annotated[list[str], add] = []
|
||||
|
||||
|
||||
@@ -17,4 +18,4 @@ class SubgraphCoreState(BaseModel):
|
||||
This is the core state that is shared across all subgraphs.
|
||||
"""
|
||||
|
||||
log_messages: Annotated[list[str], add] = []
|
||||
log_messages: Annotated[list[str], add]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
from langchain_core.runnables.config import RunnableConfig
|
||||
|
||||
from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
|
||||
@@ -12,43 +12,14 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
|
||||
SubQuestionAnswerCheckUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
binary_string_test,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_POSITIVE_VALUE_STR,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
|
||||
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
|
||||
from onyx.prompts.agent_search import UNKNOWN_ANSWER
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
|
||||
rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
|
||||
general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def check_sub_answer(
|
||||
state: AnswerQuestionState, config: RunnableConfig
|
||||
) -> SubQuestionAnswerCheckUpdate:
|
||||
@@ -82,40 +53,14 @@ def check_sub_answer(
|
||||
|
||||
graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
fast_llm = graph_config.tooling.fast_llm
|
||||
agent_error: AgentErrorLog | None = None
|
||||
response: BaseMessage | None = None
|
||||
try:
|
||||
response = fast_llm.invoke(
|
||||
response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
|
||||
)
|
||||
)
|
||||
|
||||
quality_str: str = cast(str, response.content)
|
||||
answer_quality = binary_string_test(
|
||||
text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
|
||||
)
|
||||
log_result = f"Answer quality: {quality_str}"
|
||||
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
)
|
||||
answer_quality = True
|
||||
log_result = agent_error.error_result
|
||||
logger.error("LLM Timeout Error - check sub answer")
|
||||
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
)
|
||||
|
||||
answer_quality = True
|
||||
log_result = agent_error.error_result
|
||||
logger.error("LLM Rate Limit Error - check sub answer")
|
||||
quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
answer_quality = "yes" in quality_str.lower()
|
||||
|
||||
return SubQuestionAnswerCheckUpdate(
|
||||
answer_quality=answer_quality,
|
||||
@@ -124,7 +69,7 @@ def check_sub_answer(
|
||||
graph_component="initial - generate individual sub answer",
|
||||
node_name="check sub answer",
|
||||
node_start_time=node_start_time,
|
||||
result=log_result,
|
||||
result=f"Answer quality: {quality_str}",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -16,23 +16,6 @@ from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
build_sub_question_answer_prompt,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.calculations import (
|
||||
dedup_sort_inference_section_list,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AgentLLMErrorType,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
LLM_ANSWER_ERROR_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
@@ -47,23 +30,12 @@ from onyx.chat.models import StreamStopInfo
|
||||
from onyx.chat.models import StreamStopReason
|
||||
from onyx.chat.models import StreamType
|
||||
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import NO_RECOVERED_DOCS
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
|
||||
rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
|
||||
general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def generate_sub_answer(
|
||||
state: AnswerQuestionState,
|
||||
config: RunnableConfig,
|
||||
@@ -79,17 +51,12 @@ def generate_sub_answer(
|
||||
state.verified_reranked_documents
|
||||
level, question_num = parse_question_id(state.question_id)
|
||||
context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
|
||||
|
||||
context_docs = dedup_sort_inference_section_list(context_docs)
|
||||
|
||||
persona_contextualized_prompt = get_persona_agent_prompt_expressions(
|
||||
graph_config.inputs.search_request.persona
|
||||
).contextualized_prompt
|
||||
|
||||
if len(context_docs) == 0:
|
||||
answer_str = NO_RECOVERED_DOCS
|
||||
cited_documents: list = []
|
||||
log_results = "No documents retrieved"
|
||||
write_custom_event(
|
||||
"sub_answers",
|
||||
AgentAnswerPiece(
|
||||
@@ -112,67 +79,41 @@ def generate_sub_answer(
|
||||
|
||||
response: list[str | list[str | dict[str, Any]]] = []
|
||||
dispatch_timings: list[float] = []
|
||||
|
||||
agent_error: AgentErrorLog | None = None
|
||||
|
||||
try:
|
||||
for message in fast_llm.stream(
|
||||
prompt=msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
|
||||
):
|
||||
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
|
||||
content = message.content
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(
|
||||
f"Expected content to be a string, but got {type(content)}"
|
||||
)
|
||||
start_stream_token = datetime.now()
|
||||
write_custom_event(
|
||||
"sub_answers",
|
||||
AgentAnswerPiece(
|
||||
answer_piece=content,
|
||||
level=level,
|
||||
level_question_num=question_num,
|
||||
answer_type="agent_sub_answer",
|
||||
),
|
||||
writer,
|
||||
for message in fast_llm.stream(
|
||||
prompt=msg,
|
||||
):
|
||||
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
|
||||
content = message.content
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(
|
||||
f"Expected content to be a string, but got {type(content)}"
|
||||
)
|
||||
end_stream_token = datetime.now()
|
||||
dispatch_timings.append(
|
||||
(end_stream_token - start_stream_token).microseconds
|
||||
)
|
||||
response.append(content)
|
||||
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
start_stream_token = datetime.now()
|
||||
write_custom_event(
|
||||
"sub_answers",
|
||||
AgentAnswerPiece(
|
||||
answer_piece=content,
|
||||
level=level,
|
||||
level_question_num=question_num,
|
||||
answer_type="agent_sub_answer",
|
||||
),
|
||||
writer,
|
||||
)
|
||||
logger.error("LLM Timeout Error - generate sub answer")
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
end_stream_token = datetime.now()
|
||||
dispatch_timings.append(
|
||||
(end_stream_token - start_stream_token).microseconds
|
||||
)
|
||||
logger.error("LLM Rate Limit Error - generate sub answer")
|
||||
response.append(content)
|
||||
|
||||
if agent_error:
|
||||
answer_str = LLM_ANSWER_ERROR_MESSAGE
|
||||
cited_documents = []
|
||||
log_results = (
|
||||
agent_error.error_result
|
||||
or "Sub-answer generation failed due to LLM error"
|
||||
)
|
||||
answer_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
logger.debug(
|
||||
f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
|
||||
)
|
||||
|
||||
else:
|
||||
answer_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
answer_citation_ids = get_answer_citation_ids(answer_str)
|
||||
cited_documents = [
|
||||
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
|
||||
]
|
||||
log_results = None
|
||||
answer_citation_ids = get_answer_citation_ids(answer_str)
|
||||
cited_documents = [
|
||||
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
|
||||
]
|
||||
|
||||
stop_event = StreamStopInfo(
|
||||
stop_reason=StreamStopReason.FINISHED,
|
||||
@@ -190,7 +131,7 @@ def generate_sub_answer(
|
||||
graph_component="initial - generate individual sub answer",
|
||||
node_name="generate sub answer",
|
||||
node_start_time=node_start_time,
|
||||
result=log_results or "",
|
||||
result="",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -42,8 +42,10 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):
|
||||
|
||||
|
||||
class SubQuestionAnsweringInput(SubgraphCoreState):
|
||||
question: str
|
||||
question_id: str
|
||||
question: str = ""
|
||||
question_id: str = (
|
||||
"" # 0_0 is original question, everything else is <level>_<question_num>.
|
||||
)
|
||||
# level 0 is original question and first decomposition, level 1 is follow up, etc
|
||||
# question_num is a unique number per original question per level.
|
||||
|
||||
|
||||
@@ -26,31 +26,14 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.calculations import (
|
||||
get_answer_generation_documents,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AgentLLMErrorType,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.operators import (
|
||||
dedup_inference_section_list,
|
||||
dedup_inference_sections,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
dispatch_main_answer_stop_info,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_deduplicated_structured_subquestion_documents,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
@@ -59,16 +42,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||
from onyx.chat.models import AgentAnswerPiece
|
||||
from onyx.chat.models import ExtendedToolResponse
|
||||
from onyx.chat.models import StreamingError
|
||||
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
|
||||
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.prompts.agent_search import (
|
||||
INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
|
||||
)
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
|
||||
from onyx.prompts.agent_search import (
|
||||
INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
|
||||
)
|
||||
@@ -77,16 +56,8 @@ from onyx.prompts.agent_search import (
|
||||
)
|
||||
from onyx.prompts.agent_search import UNKNOWN_ANSWER
|
||||
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="LLM Timeout Error. The initial answer could not be generated.",
|
||||
rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
|
||||
general_error="General LLM Error. The initial answer could not be generated.",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def generate_initial_answer(
|
||||
state: SubQuestionRetrievalState,
|
||||
config: RunnableConfig,
|
||||
@@ -102,19 +73,15 @@ def generate_initial_answer(
|
||||
question = graph_config.inputs.search_request.query
|
||||
prompt_enrichment_components = get_prompt_enrichment_components(graph_config)
|
||||
|
||||
# get all documents cited in sub-questions
|
||||
structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
|
||||
state.sub_question_results
|
||||
)
|
||||
|
||||
sub_questions_cited_documents = state.cited_documents
|
||||
orig_question_retrieval_documents = state.orig_question_retrieved_documents
|
||||
|
||||
consolidated_context_docs = structured_subquestion_docs.cited_documents
|
||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
|
||||
counter = 0
|
||||
for original_doc_number, original_doc in enumerate(
|
||||
orig_question_retrieval_documents
|
||||
):
|
||||
if original_doc_number not in structured_subquestion_docs.cited_documents:
|
||||
if original_doc_number not in sub_questions_cited_documents:
|
||||
if (
|
||||
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
||||
or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||
@@ -123,18 +90,15 @@ def generate_initial_answer(
|
||||
counter += 1
|
||||
|
||||
# sort docs by their scores - though the scores refer to different questions
|
||||
relevant_docs = dedup_inference_section_list(consolidated_context_docs)
|
||||
relevant_docs = dedup_inference_sections(
|
||||
consolidated_context_docs, consolidated_context_docs
|
||||
)
|
||||
|
||||
sub_questions: list[str] = []
|
||||
|
||||
# Create the list of documents to stream out. Start with the
|
||||
# ones that wil be in the context (or, if len == 0, use docs
|
||||
# that were retrieved for the original question)
|
||||
answer_generation_documents = get_answer_generation_documents(
|
||||
relevant_docs=relevant_docs,
|
||||
context_documents=structured_subquestion_docs.context_documents,
|
||||
original_question_docs=orig_question_retrieval_documents,
|
||||
max_docs=AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER,
|
||||
streamed_documents = (
|
||||
relevant_docs
|
||||
if len(relevant_docs) > 0
|
||||
else state.orig_question_retrieved_documents[:15]
|
||||
)
|
||||
|
||||
# Use the query info from the base document retrieval
|
||||
@@ -144,13 +108,11 @@ def generate_initial_answer(
|
||||
graph_config.tooling.search_tool
|
||||
), "search_tool must be provided for agentic search"
|
||||
|
||||
relevance_list = relevance_from_docs(
|
||||
answer_generation_documents.streaming_documents
|
||||
)
|
||||
relevance_list = relevance_from_docs(relevant_docs)
|
||||
for tool_response in yield_search_responses(
|
||||
query=question,
|
||||
reranked_sections=answer_generation_documents.streaming_documents,
|
||||
final_context_sections=answer_generation_documents.context_documents,
|
||||
reranked_sections=streamed_documents,
|
||||
final_context_sections=streamed_documents,
|
||||
search_query_info=query_info,
|
||||
get_section_relevance=lambda: relevance_list,
|
||||
search_tool=graph_config.tooling.search_tool,
|
||||
@@ -166,7 +128,7 @@ def generate_initial_answer(
|
||||
writer,
|
||||
)
|
||||
|
||||
if len(answer_generation_documents.context_documents) == 0:
|
||||
if len(relevant_docs) == 0:
|
||||
write_custom_event(
|
||||
"initial_agent_answer",
|
||||
AgentAnswerPiece(
|
||||
@@ -232,7 +194,7 @@ def generate_initial_answer(
|
||||
|
||||
model = graph_config.tooling.fast_llm
|
||||
|
||||
doc_context = format_docs(answer_generation_documents.context_documents)
|
||||
doc_context = format_docs(relevant_docs)
|
||||
doc_context = trim_prompt_piece(
|
||||
config=model.config,
|
||||
prompt_piece=doc_context,
|
||||
@@ -262,82 +224,30 @@ def generate_initial_answer(
|
||||
|
||||
streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
|
||||
dispatch_timings: list[float] = []
|
||||
|
||||
agent_error: AgentErrorLog | None = None
|
||||
|
||||
try:
|
||||
for message in model.stream(
|
||||
msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
|
||||
):
|
||||
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
|
||||
content = message.content
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(
|
||||
f"Expected content to be a string, but got {type(content)}"
|
||||
)
|
||||
start_stream_token = datetime.now()
|
||||
|
||||
write_custom_event(
|
||||
"initial_agent_answer",
|
||||
AgentAnswerPiece(
|
||||
answer_piece=content,
|
||||
level=0,
|
||||
level_question_num=0,
|
||||
answer_type="agent_level_answer",
|
||||
),
|
||||
writer,
|
||||
for message in model.stream(msg):
|
||||
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
|
||||
content = message.content
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(
|
||||
f"Expected content to be a string, but got {type(content)}"
|
||||
)
|
||||
end_stream_token = datetime.now()
|
||||
dispatch_timings.append(
|
||||
(end_stream_token - start_stream_token).microseconds
|
||||
)
|
||||
streamed_tokens.append(content)
|
||||
start_stream_token = datetime.now()
|
||||
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
)
|
||||
logger.error("LLM Timeout Error - generate initial answer")
|
||||
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
)
|
||||
logger.error("LLM Rate Limit Error - generate initial answer")
|
||||
|
||||
if agent_error:
|
||||
write_custom_event(
|
||||
"initial_agent_answer",
|
||||
StreamingError(
|
||||
error=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
AgentAnswerPiece(
|
||||
answer_piece=content,
|
||||
level=0,
|
||||
level_question_num=0,
|
||||
answer_type="agent_level_answer",
|
||||
),
|
||||
writer,
|
||||
)
|
||||
return InitialAnswerUpdate(
|
||||
initial_answer=None,
|
||||
answer_error=AgentErrorLog(
|
||||
error_message=agent_error.error_message or "An LLM error occurred",
|
||||
error_type=agent_error.error_type,
|
||||
error_result=agent_error.error_result,
|
||||
),
|
||||
initial_agent_stats=None,
|
||||
generated_sub_questions=sub_questions,
|
||||
agent_base_end_time=None,
|
||||
agent_base_metrics=None,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="initial - generate initial answer",
|
||||
node_name="generate initial answer",
|
||||
node_start_time=node_start_time,
|
||||
result=agent_error.error_result or "An LLM error occurred",
|
||||
)
|
||||
],
|
||||
end_stream_token = datetime.now()
|
||||
dispatch_timings.append(
|
||||
(end_stream_token - start_stream_token).microseconds
|
||||
)
|
||||
streamed_tokens.append(content)
|
||||
|
||||
logger.debug(
|
||||
f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
|
||||
|
||||
@@ -10,10 +10,8 @@ from onyx.agents.agent_search.deep_search.main.states import (
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def validate_initial_answer(
|
||||
state: SubQuestionRetrievalState,
|
||||
) -> InitialAnswerQualityUpdate:
|
||||
@@ -27,7 +25,7 @@ def validate_initial_answer(
|
||||
f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
|
||||
)
|
||||
|
||||
verdict = True # not actually required as already streamed out. Refinement will do similar
|
||||
verdict = True
|
||||
|
||||
return InitialAnswerQualityUpdate(
|
||||
initial_answer_quality_eval=verdict,
|
||||
|
||||
@@ -23,8 +23,6 @@ from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
build_history_prompt,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
@@ -35,30 +33,17 @@ from onyx.chat.models import StreamStopReason
|
||||
from onyx.chat.models import StreamType
|
||||
from onyx.chat.models import SubQuestionPiece
|
||||
from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
|
||||
)
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import (
|
||||
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT,
|
||||
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
|
||||
)
|
||||
from onyx.prompts.agent_search import (
|
||||
INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
|
||||
INITIAL_QUESTION_DECOMPOSITION_PROMPT,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="LLM Timeout Error. Sub-questions could not be generated.",
|
||||
rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
|
||||
general_error="General LLM Error. Sub-questions could not be generated.",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def decompose_orig_question(
|
||||
state: SubQuestionRetrievalState,
|
||||
config: RunnableConfig,
|
||||
@@ -100,15 +85,15 @@ def decompose_orig_question(
|
||||
]
|
||||
)
|
||||
|
||||
decomposition_prompt = INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT.format(
|
||||
question=question, sample_doc_str=sample_doc_str, history=history
|
||||
decomposition_prompt = (
|
||||
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH.format(
|
||||
question=question, sample_doc_str=sample_doc_str, history=history
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
decomposition_prompt = (
|
||||
INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT.format(
|
||||
question=question, history=history
|
||||
)
|
||||
decomposition_prompt = INITIAL_QUESTION_DECOMPOSITION_PROMPT.format(
|
||||
question=question, history=history
|
||||
)
|
||||
|
||||
# Start decomposition
|
||||
@@ -127,42 +112,32 @@ def decompose_orig_question(
|
||||
)
|
||||
|
||||
# dispatches custom events for subquestion tokens, adding in subquestion ids.
|
||||
streamed_tokens = dispatch_separated(
|
||||
model.stream(msg),
|
||||
dispatch_subquestion(0, writer),
|
||||
sep_callback=dispatch_subquestion_sep(0, writer),
|
||||
)
|
||||
|
||||
streamed_tokens: list[BaseMessage_Content] = []
|
||||
stop_event = StreamStopInfo(
|
||||
stop_reason=StreamStopReason.FINISHED,
|
||||
stream_type=StreamType.SUB_QUESTIONS,
|
||||
level=0,
|
||||
)
|
||||
write_custom_event("stream_finished", stop_event, writer)
|
||||
|
||||
try:
|
||||
streamed_tokens = dispatch_separated(
|
||||
model.stream(
|
||||
msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
|
||||
),
|
||||
dispatch_subquestion(0, writer),
|
||||
sep_callback=dispatch_subquestion_sep(0, writer),
|
||||
)
|
||||
deomposition_response = merge_content(*streamed_tokens)
|
||||
|
||||
decomposition_response = merge_content(*streamed_tokens)
|
||||
# this call should only return strings. Commenting out for efficiency
|
||||
# assert [type(tok) == str for tok in streamed_tokens]
|
||||
|
||||
list_of_subqs = cast(str, decomposition_response).split("\n")
|
||||
# use no-op cast() instead of str() which runs code
|
||||
# list_of_subquestions = clean_and_parse_list_string(cast(str, response))
|
||||
list_of_subqs = cast(str, deomposition_response).split("\n")
|
||||
|
||||
initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
|
||||
log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
|
||||
|
||||
stop_event = StreamStopInfo(
|
||||
stop_reason=StreamStopReason.FINISHED,
|
||||
stream_type=StreamType.SUB_QUESTIONS,
|
||||
level=0,
|
||||
)
|
||||
write_custom_event("stream_finished", stop_event, writer)
|
||||
|
||||
except LLMTimeoutError as e:
|
||||
logger.error("LLM Timeout Error - decompose orig question")
|
||||
raise e # fail loudly on this critical step
|
||||
except LLMRateLimitError as e:
|
||||
logger.error("LLM Rate Limit Error - decompose orig question")
|
||||
raise e
|
||||
decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
|
||||
|
||||
return InitialQuestionDecompositionUpdate(
|
||||
initial_sub_questions=initial_sub_questions,
|
||||
initial_sub_questions=decomp_list,
|
||||
agent_start_time=agent_start_time,
|
||||
agent_refined_start_time=None,
|
||||
agent_refined_end_time=None,
|
||||
@@ -176,7 +151,7 @@ def decompose_orig_question(
|
||||
graph_component="initial - generate sub answers",
|
||||
node_name="decompose original question",
|
||||
node_start_time=node_start_time,
|
||||
result=log_result,
|
||||
result=f"decomposed original question into {len(decomp_list)} subquestions",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -26,8 +26,8 @@ from onyx.agents.agent_search.deep_search.main.nodes.decide_refinement_need impo
|
||||
from onyx.agents.agent_search.deep_search.main.nodes.extract_entities_terms import (
|
||||
extract_entities_terms,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.nodes.generate_validate_refined_answer import (
|
||||
generate_validate_refined_answer,
|
||||
from onyx.agents.agent_search.deep_search.main.nodes.generate_refined_answer import (
|
||||
generate_refined_answer,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.nodes.ingest_refined_sub_answers import (
|
||||
ingest_refined_sub_answers,
|
||||
@@ -126,8 +126,8 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
|
||||
# Node to generate the refined answer
|
||||
graph.add_node(
|
||||
node="generate_validate_refined_answer",
|
||||
action=generate_validate_refined_answer,
|
||||
node="generate_refined_answer",
|
||||
action=generate_refined_answer,
|
||||
)
|
||||
|
||||
# Early node to extract the entities and terms from the initial answer,
|
||||
@@ -215,11 +215,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
|
||||
graph.add_edge(
|
||||
start_key="ingest_refined_sub_answers",
|
||||
end_key="generate_validate_refined_answer",
|
||||
end_key="generate_refined_answer",
|
||||
)
|
||||
|
||||
graph.add_edge(
|
||||
start_key="generate_validate_refined_answer",
|
||||
start_key="generate_refined_answer",
|
||||
end_key="compare_answers",
|
||||
)
|
||||
graph.add_edge(
|
||||
@@ -252,7 +252,9 @@ if __name__ == "__main__":
|
||||
db_session, primary_llm, fast_llm, search_request
|
||||
)
|
||||
|
||||
inputs = MainInput(log_messages=[])
|
||||
inputs = MainInput(
|
||||
base_question=graph_config.inputs.search_request.query, log_messages=[]
|
||||
)
|
||||
|
||||
for thing in compiled_graph.stream(
|
||||
input=inputs,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.types import StreamWriter
|
||||
@@ -11,51 +10,16 @@ from onyx.agents.agent_search.deep_search.main.states import (
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.states import MainState
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
binary_string_test,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_POSITIVE_VALUE_STR,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AgentLLMErrorType,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||
from onyx.chat.models import RefinedAnswerImprovement
|
||||
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import (
|
||||
INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="The LLM timed out, and the answers could not be compared.",
|
||||
rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
|
||||
general_error="The LLM encountered an error, and the answers could not be compared.",
|
||||
)
|
||||
|
||||
_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE = (
|
||||
"Answer quality is not sufficient, so stay with the initial answer."
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def compare_answers(
|
||||
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
) -> InitialRefinedAnswerComparisonUpdate:
|
||||
@@ -70,75 +34,21 @@ def compare_answers(
|
||||
initial_answer = state.initial_answer
|
||||
refined_answer = state.refined_answer
|
||||
|
||||
# if answer quality is not sufficient, then stay with the initial answer
|
||||
if not state.refined_answer_quality:
|
||||
write_custom_event(
|
||||
"refined_answer_improvement",
|
||||
RefinedAnswerImprovement(
|
||||
refined_answer_improvement=False,
|
||||
),
|
||||
writer,
|
||||
)
|
||||
|
||||
return InitialRefinedAnswerComparisonUpdate(
|
||||
refined_answer_improvement_eval=False,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="main",
|
||||
node_name="compare answers",
|
||||
node_start_time=node_start_time,
|
||||
result=_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
compare_answers_prompt = INITIAL_REFINED_ANSWER_COMPARISON_PROMPT.format(
|
||||
question=question, initial_answer=initial_answer, refined_answer=refined_answer
|
||||
)
|
||||
|
||||
msg = [HumanMessage(content=compare_answers_prompt)]
|
||||
|
||||
agent_error: AgentErrorLog | None = None
|
||||
# Get the rewritten queries in a defined format
|
||||
model = graph_config.tooling.fast_llm
|
||||
resp: BaseMessage | None = None
|
||||
refined_answer_improvement: bool | None = None
|
||||
|
||||
# no need to stream this
|
||||
try:
|
||||
resp = model.invoke(
|
||||
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
|
||||
)
|
||||
resp = model.invoke(msg)
|
||||
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
)
|
||||
logger.error("LLM Timeout Error - compare answers")
|
||||
# continue as True in this support step
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
)
|
||||
logger.error("LLM Rate Limit Error - compare answers")
|
||||
# continue as True in this support step
|
||||
|
||||
if agent_error or resp is None:
|
||||
refined_answer_improvement = True
|
||||
if agent_error:
|
||||
log_result = agent_error.error_result
|
||||
else:
|
||||
log_result = "An answer could not be generated."
|
||||
|
||||
else:
|
||||
refined_answer_improvement = binary_string_test(
|
||||
text=cast(str, resp.content),
|
||||
positive_value=AGENT_POSITIVE_VALUE_STR,
|
||||
)
|
||||
log_result = f"Answer comparison: {refined_answer_improvement}"
|
||||
refined_answer_improvement = (
|
||||
isinstance(resp.content, str) and "yes" in resp.content.lower()
|
||||
)
|
||||
|
||||
write_custom_event(
|
||||
"refined_answer_improvement",
|
||||
@@ -155,7 +65,7 @@ def compare_answers(
|
||||
graph_component="main",
|
||||
node_name="compare answers",
|
||||
node_start_time=node_start_time,
|
||||
result=log_result,
|
||||
result=f"Answer comparison: {refined_answer_improvement}",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -21,18 +21,6 @@ from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
build_history_prompt,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AgentLLMErrorType,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
format_entity_term_extraction,
|
||||
@@ -42,31 +30,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||
from onyx.chat.models import StreamingError
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
|
||||
)
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import (
|
||||
REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS,
|
||||
REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
|
||||
)
|
||||
from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_ANSWERED_SUBQUESTIONS_DIVIDER = "\n\n---\n\n"
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="The LLM timed out. The sub-questions could not be generated.",
|
||||
rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
|
||||
general_error="The LLM encountered an error. The sub-questions could not be generated.",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def create_refined_sub_questions(
|
||||
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
) -> RefinedQuestionDecompositionUpdate:
|
||||
@@ -103,10 +72,8 @@ def create_refined_sub_questions(
|
||||
|
||||
initial_question_answers = state.sub_question_results
|
||||
|
||||
addressed_subquestions_with_answers = [
|
||||
f"Subquestion: {x.question}\nSubanswer:\n{x.answer}"
|
||||
for x in initial_question_answers
|
||||
if x.verified_high_quality and x.answer
|
||||
addressed_question_list = [
|
||||
x.question for x in initial_question_answers if x.verified_high_quality
|
||||
]
|
||||
|
||||
failed_question_list = [
|
||||
@@ -115,14 +82,12 @@ def create_refined_sub_questions(
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS.format(
|
||||
content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT.format(
|
||||
question=question,
|
||||
history=history,
|
||||
entity_term_extraction_str=entity_term_extraction_str,
|
||||
base_answer=base_answer,
|
||||
answered_subquestions_with_answers=_ANSWERED_SUBQUESTIONS_DIVIDER.join(
|
||||
addressed_subquestions_with_answers
|
||||
),
|
||||
answered_sub_questions="\n - ".join(addressed_question_list),
|
||||
failed_sub_questions="\n - ".join(failed_question_list),
|
||||
),
|
||||
)
|
||||
@@ -131,65 +96,29 @@ def create_refined_sub_questions(
|
||||
# Grader
|
||||
model = graph_config.tooling.fast_llm
|
||||
|
||||
agent_error: AgentErrorLog | None = None
|
||||
streamed_tokens: list[BaseMessage_Content] = []
|
||||
try:
|
||||
streamed_tokens = dispatch_separated(
|
||||
model.stream(
|
||||
msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
|
||||
),
|
||||
dispatch_subquestion(1, writer),
|
||||
sep_callback=dispatch_subquestion_sep(1, writer),
|
||||
)
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
)
|
||||
logger.error("LLM Timeout Error - create refined sub questions")
|
||||
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
)
|
||||
logger.error("LLM Rate Limit Error - create refined sub questions")
|
||||
|
||||
if agent_error:
|
||||
refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
|
||||
log_result = agent_error.error_result
|
||||
write_custom_event(
|
||||
"refined_sub_question_creation_error",
|
||||
StreamingError(
|
||||
error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
|
||||
),
|
||||
writer,
|
||||
)
|
||||
streamed_tokens = dispatch_separated(
|
||||
model.stream(msg),
|
||||
dispatch_subquestion(1, writer),
|
||||
sep_callback=dispatch_subquestion_sep(1, writer),
|
||||
)
|
||||
response = merge_content(*streamed_tokens)
|
||||
|
||||
if isinstance(response, str):
|
||||
parsed_response = [q for q in response.split("\n") if q.strip() != ""]
|
||||
else:
|
||||
response = merge_content(*streamed_tokens)
|
||||
raise ValueError("LLM response is not a string")
|
||||
|
||||
if isinstance(response, str):
|
||||
parsed_response = [q for q in response.split("\n") if q.strip() != ""]
|
||||
else:
|
||||
raise ValueError("LLM response is not a string")
|
||||
refined_sub_question_dict = {}
|
||||
for sub_question_num, sub_question in enumerate(parsed_response):
|
||||
refined_sub_question = RefinementSubQuestion(
|
||||
sub_question=sub_question,
|
||||
sub_question_id=make_question_id(1, sub_question_num + 1),
|
||||
verified=False,
|
||||
answered=False,
|
||||
answer="",
|
||||
)
|
||||
|
||||
refined_sub_question_dict = {}
|
||||
for sub_question_num, sub_question in enumerate(parsed_response):
|
||||
refined_sub_question = RefinementSubQuestion(
|
||||
sub_question=sub_question,
|
||||
sub_question_id=make_question_id(1, sub_question_num + 1),
|
||||
verified=False,
|
||||
answered=False,
|
||||
answer="",
|
||||
)
|
||||
|
||||
refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
|
||||
|
||||
log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"
|
||||
refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
|
||||
|
||||
return RefinedQuestionDecompositionUpdate(
|
||||
refined_sub_questions=refined_sub_question_dict,
|
||||
@@ -199,7 +128,7 @@ def create_refined_sub_questions(
|
||||
graph_component="main",
|
||||
node_name="create refined sub questions",
|
||||
node_start_time=node_start_time,
|
||||
result=log_result,
|
||||
result=f"Created {len(refined_sub_question_dict)} refined sub questions",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -11,10 +11,8 @@ from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def decide_refinement_need(
|
||||
state: MainState, config: RunnableConfig
|
||||
) -> RequireRefinemenEvalUpdate:
|
||||
@@ -28,19 +26,6 @@ def decide_refinement_need(
|
||||
|
||||
decision = True # TODO: just for current testing purposes
|
||||
|
||||
if state.answer_error:
|
||||
return RequireRefinemenEvalUpdate(
|
||||
require_refined_answer_eval=False,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="main",
|
||||
node_name="decide refinement need",
|
||||
node_start_time=node_start_time,
|
||||
result="Timeout Error",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
log_messages = [
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="main",
|
||||
|
||||
@@ -21,16 +21,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
|
||||
)
|
||||
from onyx.configs.constants import NUM_EXPLORATORY_DOCS
|
||||
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
|
||||
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def extract_entities_terms(
|
||||
state: MainState, config: RunnableConfig
|
||||
) -> EntityTermExtractionUpdate:
|
||||
@@ -86,7 +81,6 @@ def extract_entities_terms(
|
||||
# Grader
|
||||
llm_response = fast_llm.invoke(
|
||||
prompt=msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
|
||||
)
|
||||
|
||||
cleaned_response = (
|
||||
|
||||
@@ -11,49 +11,27 @@ from onyx.agents.agent_search.deep_search.main.models import (
|
||||
AgentRefinedMetrics,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
||||
from onyx.agents.agent_search.deep_search.main.operations import logger
|
||||
from onyx.agents.agent_search.deep_search.main.states import MainState
|
||||
from onyx.agents.agent_search.deep_search.main.states import (
|
||||
RefinedAnswerUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
binary_string_test_after_answer_separator,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
get_prompt_enrichment_components,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.calculations import (
|
||||
get_answer_generation_documents,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import AGENT_ANSWER_SEPARATOR
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_POSITIVE_VALUE_STR,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AgentLLMErrorType,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
|
||||
from onyx.agents.agent_search.shared_graph_utils.operators import (
|
||||
dedup_inference_section_list,
|
||||
dedup_inference_sections,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
dispatch_main_answer_stop_info,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_deduplicated_structured_subquestion_documents,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
@@ -65,50 +43,26 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||
from onyx.chat.models import AgentAnswerPiece
|
||||
from onyx.chat.models import ExtendedToolResponse
|
||||
from onyx.chat.models import StreamingError
|
||||
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
|
||||
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
|
||||
)
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION,
|
||||
)
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import (
|
||||
REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
|
||||
)
|
||||
from onyx.prompts.agent_search import (
|
||||
REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
|
||||
)
|
||||
from onyx.prompts.agent_search import (
|
||||
REFINED_ANSWER_VALIDATION_PROMPT,
|
||||
)
|
||||
from onyx.prompts.agent_search import (
|
||||
SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
|
||||
)
|
||||
from onyx.prompts.agent_search import UNKNOWN_ANSWER
|
||||
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="The LLM timed out. The refined answer could not be generated.",
|
||||
rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
|
||||
general_error="The LLM encountered an error. The refined answer could not be generated.",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def generate_validate_refined_answer(
|
||||
def generate_refined_answer(
|
||||
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
) -> RefinedAnswerUpdate:
|
||||
"""
|
||||
LangGraph node to generate the refined answer and validate it.
|
||||
LangGraph node to generate the refined answer.
|
||||
"""
|
||||
|
||||
node_start_time = datetime.now()
|
||||
@@ -122,24 +76,19 @@ def generate_validate_refined_answer(
|
||||
)
|
||||
|
||||
verified_reranked_documents = state.verified_reranked_documents
|
||||
|
||||
# get all documents cited in sub-questions
|
||||
structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
|
||||
state.sub_question_results
|
||||
)
|
||||
|
||||
sub_questions_cited_documents = state.cited_documents
|
||||
original_question_verified_documents = (
|
||||
state.orig_question_verified_reranked_documents
|
||||
)
|
||||
original_question_retrieved_documents = state.orig_question_retrieved_documents
|
||||
|
||||
consolidated_context_docs = structured_subquestion_docs.cited_documents
|
||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
|
||||
|
||||
counter = 0
|
||||
for original_doc_number, original_doc in enumerate(
|
||||
original_question_verified_documents
|
||||
):
|
||||
if original_doc_number not in structured_subquestion_docs.cited_documents:
|
||||
if original_doc_number not in sub_questions_cited_documents:
|
||||
if (
|
||||
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
||||
or len(consolidated_context_docs)
|
||||
@@ -150,16 +99,14 @@ def generate_validate_refined_answer(
|
||||
counter += 1
|
||||
|
||||
# sort docs by their scores - though the scores refer to different questions
|
||||
relevant_docs = dedup_inference_section_list(consolidated_context_docs)
|
||||
relevant_docs = dedup_inference_sections(
|
||||
consolidated_context_docs, consolidated_context_docs
|
||||
)
|
||||
|
||||
# Create the list of documents to stream out. Start with the
|
||||
# ones that wil be in the context (or, if len == 0, use docs
|
||||
# that were retrieved for the original question)
|
||||
answer_generation_documents = get_answer_generation_documents(
|
||||
relevant_docs=relevant_docs,
|
||||
context_documents=structured_subquestion_docs.context_documents,
|
||||
original_question_docs=original_question_retrieved_documents,
|
||||
max_docs=AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER,
|
||||
streaming_docs = (
|
||||
relevant_docs
|
||||
if len(relevant_docs) > 0
|
||||
else original_question_retrieved_documents[:15]
|
||||
)
|
||||
|
||||
query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
|
||||
@@ -167,13 +114,11 @@ def generate_validate_refined_answer(
|
||||
graph_config.tooling.search_tool
|
||||
), "search_tool must be provided for agentic search"
|
||||
# stream refined answer docs, or original question docs if no relevant docs are found
|
||||
relevance_list = relevance_from_docs(
|
||||
answer_generation_documents.streaming_documents
|
||||
)
|
||||
relevance_list = relevance_from_docs(relevant_docs)
|
||||
for tool_response in yield_search_responses(
|
||||
query=question,
|
||||
reranked_sections=answer_generation_documents.streaming_documents,
|
||||
final_context_sections=answer_generation_documents.context_documents,
|
||||
reranked_sections=streaming_docs,
|
||||
final_context_sections=streaming_docs,
|
||||
search_query_info=query_info,
|
||||
get_section_relevance=lambda: relevance_list,
|
||||
search_tool=graph_config.tooling.search_tool,
|
||||
@@ -254,7 +199,7 @@ def generate_validate_refined_answer(
|
||||
)
|
||||
|
||||
model = graph_config.tooling.fast_llm
|
||||
relevant_docs_str = format_docs(answer_generation_documents.context_documents)
|
||||
relevant_docs_str = format_docs(relevant_docs)
|
||||
relevant_docs_str = trim_prompt_piece(
|
||||
model.config,
|
||||
relevant_docs_str,
|
||||
@@ -286,80 +231,28 @@ def generate_validate_refined_answer(
|
||||
|
||||
streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
|
||||
dispatch_timings: list[float] = []
|
||||
agent_error: AgentErrorLog | None = None
|
||||
|
||||
try:
|
||||
for message in model.stream(
|
||||
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
|
||||
):
|
||||
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
|
||||
content = message.content
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(
|
||||
f"Expected content to be a string, but got {type(content)}"
|
||||
)
|
||||
|
||||
start_stream_token = datetime.now()
|
||||
write_custom_event(
|
||||
"refined_agent_answer",
|
||||
AgentAnswerPiece(
|
||||
answer_piece=content,
|
||||
level=1,
|
||||
level_question_num=0,
|
||||
answer_type="agent_level_answer",
|
||||
),
|
||||
writer,
|
||||
for message in model.stream(msg):
|
||||
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
|
||||
content = message.content
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(
|
||||
f"Expected content to be a string, but got {type(content)}"
|
||||
)
|
||||
end_stream_token = datetime.now()
|
||||
dispatch_timings.append(
|
||||
(end_stream_token - start_stream_token).microseconds
|
||||
)
|
||||
streamed_tokens.append(content)
|
||||
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
)
|
||||
logger.error("LLM Timeout Error - generate refined answer")
|
||||
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
)
|
||||
logger.error("LLM Rate Limit Error - generate refined answer")
|
||||
|
||||
if agent_error:
|
||||
start_stream_token = datetime.now()
|
||||
write_custom_event(
|
||||
"initial_agent_answer",
|
||||
StreamingError(
|
||||
error=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
"refined_agent_answer",
|
||||
AgentAnswerPiece(
|
||||
answer_piece=content,
|
||||
level=1,
|
||||
level_question_num=0,
|
||||
answer_type="agent_level_answer",
|
||||
),
|
||||
writer,
|
||||
)
|
||||
|
||||
return RefinedAnswerUpdate(
|
||||
refined_answer=None,
|
||||
refined_answer_quality=False, # TODO: replace this with the actual check value
|
||||
refined_agent_stats=None,
|
||||
agent_refined_end_time=None,
|
||||
agent_refined_metrics=AgentRefinedMetrics(
|
||||
refined_doc_boost_factor=0.0,
|
||||
refined_question_boost_factor=0.0,
|
||||
duration_s=None,
|
||||
),
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="main",
|
||||
node_name="generate refined answer",
|
||||
node_start_time=node_start_time,
|
||||
result=agent_error.error_result or "An LLM error occurred",
|
||||
)
|
||||
],
|
||||
)
|
||||
end_stream_token = datetime.now()
|
||||
dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
|
||||
streamed_tokens.append(content)
|
||||
|
||||
logger.debug(
|
||||
f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
|
||||
@@ -368,43 +261,54 @@ def generate_validate_refined_answer(
|
||||
response = merge_content(*streamed_tokens)
|
||||
answer = cast(str, response)
|
||||
|
||||
# run a validation step for the refined answer only
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REFINED_ANSWER_VALIDATION_PROMPT.format(
|
||||
question=question,
|
||||
history=prompt_enrichment_components.history,
|
||||
answered_sub_questions=sub_question_answer_str,
|
||||
relevant_docs=relevant_docs_str,
|
||||
proposed_answer=answer,
|
||||
persona_specification=persona_contextualized_prompt,
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
try:
|
||||
validation_response = model.invoke(
|
||||
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
|
||||
)
|
||||
refined_answer_quality = binary_string_test_after_answer_separator(
|
||||
text=cast(str, validation_response.content),
|
||||
positive_value=AGENT_POSITIVE_VALUE_STR,
|
||||
separator=AGENT_ANSWER_SEPARATOR,
|
||||
)
|
||||
except LLMTimeoutError:
|
||||
refined_answer_quality = True
|
||||
logger.error("LLM Timeout Error - validate refined answer")
|
||||
|
||||
except LLMRateLimitError:
|
||||
refined_answer_quality = True
|
||||
logger.error("LLM Rate Limit Error - validate refined answer")
|
||||
|
||||
refined_agent_stats = RefinedAgentStats(
|
||||
revision_doc_efficiency=refined_doc_effectiveness,
|
||||
revision_question_efficiency=revision_question_efficiency,
|
||||
)
|
||||
|
||||
logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
|
||||
logger.debug("-" * 10)
|
||||
logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
|
||||
|
||||
logger.debug("-" * 100)
|
||||
|
||||
if state.initial_agent_stats:
|
||||
initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
|
||||
"utilized_chunk_ratio", "--"
|
||||
)
|
||||
initial_support_boost_factor = (
|
||||
state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
|
||||
)
|
||||
num_initial_verified_docs = state.initial_agent_stats.original_question.get(
|
||||
"num_verified_documents", "--"
|
||||
)
|
||||
initial_verified_docs_avg_score = (
|
||||
state.initial_agent_stats.original_question.get("verified_avg_score", "--")
|
||||
)
|
||||
initial_sub_questions_verified_docs = (
|
||||
state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
|
||||
)
|
||||
|
||||
logger.debug("INITIAL AGENT STATS")
|
||||
logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
|
||||
logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
|
||||
logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
|
||||
logger.debug(
|
||||
f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
|
||||
)
|
||||
logger.debug(
|
||||
f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
|
||||
)
|
||||
if refined_agent_stats:
|
||||
logger.debug("-" * 10)
|
||||
logger.debug("REFINED AGENT STATS")
|
||||
logger.debug(
|
||||
f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
|
||||
)
|
||||
logger.debug(
|
||||
f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
|
||||
)
|
||||
|
||||
agent_refined_end_time = datetime.now()
|
||||
if state.agent_refined_start_time:
|
||||
agent_refined_duration = (
|
||||
@@ -421,7 +325,7 @@ def generate_validate_refined_answer(
|
||||
|
||||
return RefinedAnswerUpdate(
|
||||
refined_answer=answer,
|
||||
refined_answer_quality=refined_answer_quality,
|
||||
refined_answer_quality=True, # TODO: replace this with the actual check value
|
||||
refined_agent_stats=refined_agent_stats,
|
||||
agent_refined_end_time=agent_refined_end_time,
|
||||
agent_refined_metrics=agent_refined_metrics,
|
||||
@@ -17,7 +17,6 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
|
||||
from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
|
||||
from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import (
|
||||
EntityRelationshipTermExtraction,
|
||||
)
|
||||
@@ -77,7 +76,6 @@ class InitialAnswerUpdate(LoggerUpdate):
|
||||
"""
|
||||
|
||||
initial_answer: str | None = None
|
||||
answer_error: AgentErrorLog | None = None
|
||||
initial_agent_stats: InitialAgentResultStats | None = None
|
||||
generated_sub_questions: list[str] = []
|
||||
agent_base_end_time: datetime | None = None
|
||||
@@ -90,7 +88,6 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
|
||||
"""
|
||||
|
||||
refined_answer: str | None = None
|
||||
answer_error: AgentErrorLog | None = None
|
||||
refined_agent_stats: RefinedAgentStats | None = None
|
||||
refined_answer_quality: bool = False
|
||||
|
||||
|
||||
@@ -16,44 +16,16 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
|
||||
QueryExpansionUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AgentLLMErrorType,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
|
||||
)
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import (
|
||||
QUERY_REWRITING_PROMPT,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
|
||||
rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
|
||||
general_error="Query rewriting failed due to LLM error - the original question will be used.",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def expand_queries(
|
||||
state: ExpandedRetrievalInput,
|
||||
config: RunnableConfig,
|
||||
@@ -82,43 +54,13 @@ def expand_queries(
|
||||
)
|
||||
]
|
||||
|
||||
agent_error: AgentErrorLog | None = None
|
||||
llm_response_list: list[BaseMessage_Content] = []
|
||||
llm_response = ""
|
||||
rewritten_queries = []
|
||||
llm_response_list = dispatch_separated(
|
||||
llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
|
||||
)
|
||||
|
||||
try:
|
||||
llm_response_list = dispatch_separated(
|
||||
llm.stream(
|
||||
prompt=msg,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
|
||||
),
|
||||
dispatch_subquery(level, question_num, writer),
|
||||
)
|
||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
|
||||
0
|
||||
].content
|
||||
rewritten_queries = llm_response.split("\n")
|
||||
log_result = f"Number of expanded queries: {len(rewritten_queries)}"
|
||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
|
||||
|
||||
except LLMTimeoutError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.TIMEOUT,
|
||||
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.timeout,
|
||||
)
|
||||
logger.error("LLM Timeout Error - expand queries")
|
||||
log_result = agent_error.error_result
|
||||
|
||||
except LLMRateLimitError:
|
||||
agent_error = AgentErrorLog(
|
||||
error_type=AgentLLMErrorType.RATE_LIMIT,
|
||||
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
|
||||
error_result=_llm_node_error_strings.rate_limit,
|
||||
)
|
||||
logger.error("LLM Rate Limit Error - expand queries")
|
||||
log_result = agent_error.error_result
|
||||
# use subquestion as query if query generation fails
|
||||
rewritten_queries = llm_response.split("\n")
|
||||
|
||||
return QueryExpansionUpdate(
|
||||
expanded_queries=rewritten_queries,
|
||||
@@ -127,7 +69,7 @@ def expand_queries(
|
||||
graph_component="shared - expanded retrieval",
|
||||
node_name="expand queries",
|
||||
node_start_time=node_start_time,
|
||||
result=log_result,
|
||||
result=f"Number of expanded queries: {len(rewritten_queries)}",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -26,10 +26,8 @@ from onyx.context.search.postprocessing.postprocessing import rerank_sections
|
||||
from onyx.context.search.postprocessing.postprocessing import should_rerank
|
||||
from onyx.db.engine import get_session_context_manager
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def rerank_documents(
|
||||
state: ExpandedRetrievalState, config: RunnableConfig
|
||||
) -> DocRerankingUpdate:
|
||||
|
||||
@@ -28,10 +28,8 @@ from onyx.tools.tool_implementations.search.search_tool import (
|
||||
SEARCH_RESPONSE_SUMMARY_ID,
|
||||
)
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def retrieve_documents(
|
||||
state: RetrievalInput, config: RunnableConfig
|
||||
) -> DocRetrievalUpdate:
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.runnables.config import RunnableConfig
|
||||
|
||||
@@ -12,38 +10,14 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
|
||||
DocVerificationUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
binary_string_test,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_POSITIVE_VALUE_STR,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.prompts.agent_search import (
|
||||
DOCUMENT_VERIFICATION_PROMPT,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_llm_node_error_strings = LLMNodeErrorStrings(
|
||||
timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
|
||||
rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
|
||||
general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
|
||||
)
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def verify_documents(
|
||||
state: DocVerificationInput, config: RunnableConfig
|
||||
) -> DocVerificationUpdate:
|
||||
@@ -52,14 +26,12 @@ def verify_documents(
|
||||
|
||||
Args:
|
||||
state (DocVerificationInput): The current state
|
||||
config (RunnableConfig): Configuration containing AgentSearchConfig
|
||||
config (RunnableConfig): Configuration containing ProSearchConfig
|
||||
|
||||
Updates:
|
||||
verified_documents: list[InferenceSection]
|
||||
"""
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state.question
|
||||
retrieved_document_to_verify = state.retrieved_document_to_verify
|
||||
document_content = retrieved_document_to_verify.combined_content
|
||||
@@ -79,40 +51,12 @@ def verify_documents(
|
||||
)
|
||||
]
|
||||
|
||||
response: BaseMessage | None = None
|
||||
response = fast_llm.invoke(msg)
|
||||
|
||||
verified_documents = [
|
||||
retrieved_document_to_verify
|
||||
] # default is to treat document as relevant
|
||||
|
||||
try:
|
||||
response = fast_llm.invoke(
|
||||
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
|
||||
)
|
||||
|
||||
assert isinstance(response.content, str)
|
||||
if not binary_string_test(
|
||||
text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
|
||||
):
|
||||
verified_documents = []
|
||||
|
||||
except LLMTimeoutError:
|
||||
# In this case, we decide to continue and don't raise an error, as
|
||||
# little harm in letting some docs through that are less relevant.
|
||||
logger.error("LLM Timeout Error - verify documents")
|
||||
|
||||
except LLMRateLimitError:
|
||||
# In this case, we decide to continue and don't raise an error, as
|
||||
# little harm in letting some docs through that are less relevant.
|
||||
logger.error("LLM Rate Limit Error - verify documents")
|
||||
verified_documents = []
|
||||
if isinstance(response.content, str) and "yes" in response.content.lower():
|
||||
verified_documents.append(retrieved_document_to_verify)
|
||||
|
||||
return DocVerificationUpdate(
|
||||
verified_documents=verified_documents,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="shared - expanded retrieval",
|
||||
node_name="verify documents",
|
||||
node_start_time=node_start_time,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -21,13 +21,9 @@ from onyx.context.search.models import InferenceSection
|
||||
|
||||
|
||||
class ExpandedRetrievalInput(SubgraphCoreState):
|
||||
# exception from 'no default value'for LangGraph input states
|
||||
# Here, sub_question_id default None implies usage for the
|
||||
# original question. This is sometimes needed for nested sub-graphs
|
||||
|
||||
question: str = ""
|
||||
base_search: bool = False
|
||||
sub_question_id: str | None = None
|
||||
question: str
|
||||
base_search: bool
|
||||
|
||||
|
||||
## Update/Return States
|
||||
@@ -38,7 +34,7 @@ class QueryExpansionUpdate(LoggerUpdate, BaseModel):
|
||||
log_messages: list[str] = []
|
||||
|
||||
|
||||
class DocVerificationUpdate(LoggerUpdate, BaseModel):
|
||||
class DocVerificationUpdate(BaseModel):
|
||||
verified_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
|
||||
|
||||
@@ -92,4 +88,4 @@ class DocVerificationInput(ExpandedRetrievalInput):
|
||||
|
||||
|
||||
class RetrievalInput(ExpandedRetrievalInput):
|
||||
query_to_retrieve: str
|
||||
query_to_retrieve: str = ""
|
||||
|
||||
@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
|
||||
main_graph_builder as main_graph_builder_a,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.states import (
|
||||
MainInput as MainInput,
|
||||
MainInput as MainInput_a,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
|
||||
@@ -21,7 +21,6 @@ from onyx.chat.models import AnswerPacket
|
||||
from onyx.chat.models import AnswerStream
|
||||
from onyx.chat.models import ExtendedToolResponse
|
||||
from onyx.chat.models import RefinedAnswerImprovement
|
||||
from onyx.chat.models import StreamingError
|
||||
from onyx.chat.models import StreamStopInfo
|
||||
from onyx.chat.models import SubQueryPiece
|
||||
from onyx.chat.models import SubQuestionPiece
|
||||
@@ -34,7 +33,6 @@ from onyx.llm.factory import get_default_llms
|
||||
from onyx.tools.tool_runner import ToolCallKickoff
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_COMPILED_GRAPH: CompiledStateGraph | None = None
|
||||
@@ -74,15 +72,13 @@ def _parse_agent_event(
|
||||
return cast(AnswerPacket, event["data"])
|
||||
elif event["name"] == "refined_answer_improvement":
|
||||
return cast(RefinedAnswerImprovement, event["data"])
|
||||
elif event["name"] == "refined_sub_question_creation_error":
|
||||
return cast(StreamingError, event["data"])
|
||||
return None
|
||||
|
||||
|
||||
def manage_sync_streaming(
|
||||
compiled_graph: CompiledStateGraph,
|
||||
config: GraphConfig,
|
||||
graph_input: BasicInput | MainInput,
|
||||
graph_input: BasicInput | MainInput_a,
|
||||
) -> Iterable[StreamEvent]:
|
||||
message_id = config.persistence.message_id if config.persistence else None
|
||||
for event in compiled_graph.stream(
|
||||
@@ -96,7 +92,7 @@ def manage_sync_streaming(
|
||||
def run_graph(
|
||||
compiled_graph: CompiledStateGraph,
|
||||
config: GraphConfig,
|
||||
input: BasicInput | MainInput,
|
||||
input: BasicInput | MainInput_a,
|
||||
) -> AnswerStream:
|
||||
config.behavior.perform_initial_search_decomposition = (
|
||||
INITIAL_SEARCH_DECOMPOSITION_ENABLED
|
||||
@@ -127,7 +123,9 @@ def run_main_graph(
|
||||
) -> AnswerStream:
|
||||
compiled_graph = load_compiled_graph()
|
||||
|
||||
input = MainInput(log_messages=[])
|
||||
input = MainInput_a(
|
||||
base_question=config.inputs.search_request.query, log_messages=[]
|
||||
)
|
||||
|
||||
# Agent search is not a Tool per se, but this is helpful for the frontend
|
||||
yield ToolCallKickoff(
|
||||
@@ -142,7 +140,7 @@ def run_basic_graph(
|
||||
) -> AnswerStream:
|
||||
graph = basic_graph_builder()
|
||||
compiled_graph = graph.compile()
|
||||
input = BasicInput(unused=True)
|
||||
input = BasicInput()
|
||||
return run_graph(compiled_graph, config, input)
|
||||
|
||||
|
||||
@@ -174,7 +172,9 @@ if __name__ == "__main__":
|
||||
# search_request.persona = get_persona_by_id(1, None, db_session)
|
||||
# config.perform_initial_search_path_decision = False
|
||||
config.behavior.perform_initial_search_decomposition = True
|
||||
input = MainInput(log_messages=[])
|
||||
input = MainInput_a(
|
||||
base_question=config.inputs.search_request.query, log_messages=[]
|
||||
)
|
||||
|
||||
tool_responses: list = []
|
||||
for output in run_graph(compiled_graph, config, input):
|
||||
|
||||
@@ -7,7 +7,6 @@ from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import (
|
||||
AgentPromptEnrichmentComponents,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_persona_agent_prompt_expressions,
|
||||
)
|
||||
@@ -41,7 +40,13 @@ def build_sub_question_answer_prompt(
|
||||
|
||||
date_str = build_date_time_string()
|
||||
|
||||
docs_str = format_docs(docs)
|
||||
# TODO: This should include document metadata and title
|
||||
docs_format_list = [
|
||||
f"Document Number: [D{doc_num + 1}]\nContent: {doc.combined_content}\n\n"
|
||||
for doc_num, doc in enumerate(docs)
|
||||
]
|
||||
|
||||
docs_str = "\n\n".join(docs_format_list)
|
||||
|
||||
docs_str = trim_prompt_piece(
|
||||
config,
|
||||
@@ -145,38 +150,3 @@ def get_prompt_enrichment_components(
|
||||
history=history,
|
||||
date_str=date_str,
|
||||
)
|
||||
|
||||
|
||||
def binary_string_test(text: str, positive_value: str = "yes") -> bool:
|
||||
"""
|
||||
Tests if a string contains a positive value (case-insensitive).
|
||||
|
||||
Args:
|
||||
text: The string to test
|
||||
positive_value: The value to look for (defaults to "yes")
|
||||
|
||||
Returns:
|
||||
True if the positive value is found in the text
|
||||
"""
|
||||
return positive_value.lower() in text.lower()
|
||||
|
||||
|
||||
def binary_string_test_after_answer_separator(
|
||||
text: str, positive_value: str = "yes", separator: str = "Answer:"
|
||||
) -> bool:
|
||||
"""
|
||||
Tests if a string contains a positive value (case-insensitive).
|
||||
|
||||
Args:
|
||||
text: The string to test
|
||||
positive_value: The value to look for (defaults to "yes")
|
||||
|
||||
Returns:
|
||||
True if the positive value is found in the text
|
||||
"""
|
||||
|
||||
if separator not in text:
|
||||
return False
|
||||
relevant_text = text.split(f"{separator}")[-1]
|
||||
|
||||
return binary_string_test(relevant_text, positive_value)
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AnswerGenerationDocuments
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitScoreMetrics
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitStats
|
||||
from onyx.agents.agent_search.shared_graph_utils.operators import (
|
||||
dedup_inference_section_list,
|
||||
)
|
||||
from onyx.chat.models import SectionRelevancePiece
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -100,106 +96,3 @@ def get_fit_scores(
|
||||
)
|
||||
|
||||
return fit_eval
|
||||
|
||||
|
||||
def get_answer_generation_documents(
|
||||
relevant_docs: list[InferenceSection],
|
||||
context_documents: list[InferenceSection],
|
||||
original_question_docs: list[InferenceSection],
|
||||
max_docs: int,
|
||||
) -> AnswerGenerationDocuments:
|
||||
"""
|
||||
Create a deduplicated list of documents to stream, prioritizing relevant docs.
|
||||
|
||||
Args:
|
||||
relevant_docs: Primary documents to include
|
||||
context_documents: Additional context documents to append
|
||||
original_question_docs: Original question documents to append
|
||||
max_docs: Maximum number of documents to return
|
||||
|
||||
Returns:
|
||||
List of deduplicated documents, limited to max_docs
|
||||
"""
|
||||
# get relevant_doc ids
|
||||
relevant_doc_ids = [doc.center_chunk.document_id for doc in relevant_docs]
|
||||
|
||||
# Start with relevant docs or fallback to original question docs
|
||||
streaming_documents = relevant_docs.copy()
|
||||
|
||||
# Use a set for O(1) lookups of document IDs
|
||||
seen_doc_ids = {doc.center_chunk.document_id for doc in streaming_documents}
|
||||
|
||||
# Combine additional documents to check in one iteration
|
||||
additional_docs = context_documents + original_question_docs
|
||||
for doc_idx, doc in enumerate(additional_docs):
|
||||
doc_id = doc.center_chunk.document_id
|
||||
if doc_id not in seen_doc_ids:
|
||||
streaming_documents.append(doc)
|
||||
seen_doc_ids.add(doc_id)
|
||||
|
||||
streaming_documents = dedup_inference_section_list(streaming_documents)
|
||||
|
||||
relevant_streaming_docs = [
|
||||
doc
|
||||
for doc in streaming_documents
|
||||
if doc.center_chunk.document_id in relevant_doc_ids
|
||||
]
|
||||
relevant_streaming_docs = dedup_sort_inference_section_list(relevant_streaming_docs)
|
||||
|
||||
additional_streaming_docs = [
|
||||
doc
|
||||
for doc in streaming_documents
|
||||
if doc.center_chunk.document_id not in relevant_doc_ids
|
||||
]
|
||||
additional_streaming_docs = dedup_sort_inference_section_list(
|
||||
additional_streaming_docs
|
||||
)
|
||||
|
||||
for doc in additional_streaming_docs:
|
||||
if doc.center_chunk.score:
|
||||
doc.center_chunk.score += -2.0
|
||||
else:
|
||||
doc.center_chunk.score = -2.0
|
||||
|
||||
sorted_streaming_documents = relevant_streaming_docs + additional_streaming_docs
|
||||
|
||||
return AnswerGenerationDocuments(
|
||||
streaming_documents=sorted_streaming_documents[:max_docs],
|
||||
context_documents=relevant_streaming_docs[:max_docs],
|
||||
)
|
||||
|
||||
|
||||
def dedup_sort_inference_section_list(
|
||||
sections: list[InferenceSection],
|
||||
) -> list[InferenceSection]:
|
||||
"""Deduplicates InferenceSections by document_id and sorts by score.
|
||||
|
||||
Args:
|
||||
sections: List of InferenceSections to deduplicate and sort
|
||||
|
||||
Returns:
|
||||
Deduplicated list of InferenceSections sorted by score in descending order
|
||||
"""
|
||||
# dedupe/merge with existing framework
|
||||
sections = dedup_inference_section_list(sections)
|
||||
|
||||
# Use dict to deduplicate by document_id, keeping highest scored version
|
||||
unique_sections: dict[str, InferenceSection] = {}
|
||||
for section in sections:
|
||||
doc_id = section.center_chunk.document_id
|
||||
if doc_id not in unique_sections:
|
||||
unique_sections[doc_id] = section
|
||||
continue
|
||||
|
||||
# Keep version with higher score
|
||||
existing_score = unique_sections[doc_id].center_chunk.score or 0
|
||||
new_score = section.center_chunk.score or 0
|
||||
if new_score > existing_score:
|
||||
unique_sections[doc_id] = section
|
||||
|
||||
# Sort by score in descending order, handling None scores
|
||||
sorted_sections = sorted(
|
||||
unique_sections.values(), key=lambda x: x.center_chunk.score or 0, reverse=True
|
||||
)
|
||||
|
||||
return sorted_sections
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
from enum import Enum
|
||||
|
||||
AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
|
||||
AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
|
||||
AGENT_LLM_RATELIMIT_MESSAGE = (
|
||||
"The agent encountered a rate limit error. Please try again."
|
||||
)
|
||||
LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
|
||||
|
||||
AGENT_POSITIVE_VALUE_STR = "yes"
|
||||
AGENT_NEGATIVE_VALUE_STR = "no"
|
||||
|
||||
AGENT_ANSWER_SEPARATOR = "Answer:"
|
||||
|
||||
|
||||
class AgentLLMErrorType(str, Enum):
|
||||
TIMEOUT = "timeout"
|
||||
RATE_LIMIT = "rate_limit"
|
||||
GENERAL_ERROR = "general_error"
|
||||
@@ -1,5 +1,3 @@
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.agents.agent_search.deep_search.main.models import (
|
||||
@@ -58,12 +56,6 @@ class InitialAgentResultStats(BaseModel):
|
||||
agent_effectiveness: dict[str, float | int | None]
|
||||
|
||||
|
||||
class AgentErrorLog(BaseModel):
|
||||
error_message: str
|
||||
error_type: str
|
||||
error_result: str
|
||||
|
||||
|
||||
class RefinedAgentStats(BaseModel):
|
||||
revision_doc_efficiency: float | None
|
||||
revision_question_efficiency: float | None
|
||||
@@ -118,11 +110,6 @@ class SubQuestionAnswerResults(BaseModel):
|
||||
sub_question_retrieval_stats: AgentChunkRetrievalStats
|
||||
|
||||
|
||||
class StructuredSubquestionDocuments(BaseModel):
|
||||
cited_documents: list[InferenceSection]
|
||||
context_documents: list[InferenceSection]
|
||||
|
||||
|
||||
class CombinedAgentMetrics(BaseModel):
|
||||
timings: AgentTimings
|
||||
base_metrics: AgentBaseMetrics | None
|
||||
@@ -139,17 +126,3 @@ class AgentPromptEnrichmentComponents(BaseModel):
|
||||
persona_prompts: PersonaPromptExpressions
|
||||
history: str
|
||||
date_str: str
|
||||
|
||||
|
||||
class LLMNodeErrorStrings(BaseModel):
|
||||
timeout: str = "LLM Timeout Error"
|
||||
rate_limit: str = "LLM Rate Limit Error"
|
||||
general_error: str = "General LLM Error"
|
||||
|
||||
|
||||
class AnswerGenerationDocuments(BaseModel):
|
||||
streaming_documents: list[InferenceSection]
|
||||
context_documents: list[InferenceSection]
|
||||
|
||||
|
||||
BaseMessage_Content = str | list[str | dict[str, Any]]
|
||||
|
||||
@@ -12,13 +12,6 @@ def dedup_inference_sections(
|
||||
return deduped
|
||||
|
||||
|
||||
def dedup_inference_section_list(
|
||||
list: list[InferenceSection],
|
||||
) -> list[InferenceSection]:
|
||||
deduped = _merge_sections(list)
|
||||
return deduped
|
||||
|
||||
|
||||
def dedup_question_answer_results(
|
||||
question_answer_results_1: list[SubQuestionAnswerResults],
|
||||
question_answer_results_2: list[SubQuestionAnswerResults],
|
||||
|
||||
@@ -20,18 +20,10 @@ from onyx.agents.agent_search.models import GraphInputs
|
||||
from onyx.agents.agent_search.models import GraphPersistence
|
||||
from onyx.agents.agent_search.models import GraphSearchConfig
|
||||
from onyx.agents.agent_search.models import GraphTooling
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import (
|
||||
EntityRelationshipTermExtraction,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import (
|
||||
StructuredSubquestionDocuments,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import SubQuestionAnswerResults
|
||||
from onyx.agents.agent_search.shared_graph_utils.operators import (
|
||||
dedup_inference_section_list,
|
||||
)
|
||||
from onyx.chat.models import AnswerPacket
|
||||
from onyx.chat.models import AnswerStyleConfig
|
||||
from onyx.chat.models import CitationConfig
|
||||
@@ -42,9 +34,6 @@ from onyx.chat.models import StreamStopInfo
|
||||
from onyx.chat.models import StreamStopReason
|
||||
from onyx.chat.models import StreamType
|
||||
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
|
||||
from onyx.configs.agent_configs import (
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
|
||||
)
|
||||
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
|
||||
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
|
||||
from onyx.configs.constants import DEFAULT_PERSONA_ID
|
||||
@@ -57,8 +46,6 @@ from onyx.context.search.models import SearchRequest
|
||||
from onyx.db.engine import get_session_context_manager
|
||||
from onyx.db.persona import get_persona_by_id
|
||||
from onyx.db.persona import Persona
|
||||
from onyx.llm.chat_llm import LLMRateLimitError
|
||||
from onyx.llm.chat_llm import LLMTimeoutError
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.prompts.agent_search import (
|
||||
ASSISTANT_SYSTEM_PROMPT_DEFAULT,
|
||||
@@ -79,9 +66,8 @@ from onyx.tools.tool_implementations.search.search_tool import (
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchTool
|
||||
from onyx.tools.utils import explicit_tool_calling_supported
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
BaseMessage_Content = str | list[str | dict[str, Any]]
|
||||
|
||||
|
||||
# Post-processing
|
||||
@@ -394,24 +380,8 @@ def summarize_history(
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
history_response = llm.invoke(
|
||||
history_context_prompt,
|
||||
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
|
||||
)
|
||||
except LLMTimeoutError:
|
||||
logger.error("LLM Timeout Error - summarize history")
|
||||
return (
|
||||
history # this is what is done at this point anyway, so we default to this
|
||||
)
|
||||
except LLMRateLimitError:
|
||||
logger.error("LLM Rate Limit Error - summarize history")
|
||||
return (
|
||||
history # this is what is done at this point anyway, so we default to this
|
||||
)
|
||||
|
||||
history_response = llm.invoke(history_context_prompt)
|
||||
assert isinstance(history_response.content, str)
|
||||
|
||||
return history_response.content
|
||||
|
||||
|
||||
@@ -477,27 +447,3 @@ def remove_document_citations(text: str) -> str:
|
||||
# \d+ - one or more digits
|
||||
# \] - literal ] character
|
||||
return re.sub(r"\[(?:D|Q)?\d+\]", "", text)
|
||||
|
||||
|
||||
def get_deduplicated_structured_subquestion_documents(
|
||||
sub_question_results: list[SubQuestionAnswerResults],
|
||||
) -> StructuredSubquestionDocuments:
|
||||
"""
|
||||
Extract and deduplicate all cited documents from sub-question results.
|
||||
|
||||
Args:
|
||||
sub_question_results: List of sub-question results containing cited documents
|
||||
|
||||
Returns:
|
||||
Deduplicated list of cited documents
|
||||
"""
|
||||
cited_docs = [
|
||||
doc for result in sub_question_results for doc in result.cited_documents
|
||||
]
|
||||
context_docs = [
|
||||
doc for result in sub_question_results for doc in result.context_documents
|
||||
]
|
||||
return StructuredSubquestionDocuments(
|
||||
cited_documents=dedup_inference_section_list(cited_docs),
|
||||
context_documents=dedup_inference_section_list(context_docs),
|
||||
)
|
||||
|
||||
@@ -94,7 +94,6 @@ from onyx.db.models import User
|
||||
from onyx.db.users import get_user_by_email
|
||||
from onyx.redis.redis_pool import get_async_redis_connection
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.utils import BasicAuthenticationError
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import create_milestone_and_report
|
||||
from onyx.utils.telemetry import optional_telemetry
|
||||
@@ -108,6 +107,11 @@ from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class BasicAuthenticationError(HTTPException):
|
||||
def __init__(self, detail: str):
|
||||
super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
|
||||
|
||||
|
||||
def is_user_admin(user: User | None) -> bool:
|
||||
if AUTH_TYPE == AuthType.DISABLED:
|
||||
return True
|
||||
|
||||
@@ -190,9 +190,9 @@ def _build_connector_start_latency_metric(
|
||||
desired_start_time = cc_pair.connector.time_created
|
||||
else:
|
||||
if not cc_pair.connector.refresh_freq:
|
||||
task_logger.debug(
|
||||
"Connector has no refresh_freq and this is a non-initial index attempt. "
|
||||
"Assuming user manually triggered indexing, so we'll skip start latency metric."
|
||||
task_logger.error(
|
||||
"Found non-initial index attempt for connector "
|
||||
"without refresh_freq. This should never happen."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -78,10 +78,6 @@ logger = setup_logger()
|
||||
def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | None:
|
||||
"""Runs periodically to check if any document needs syncing.
|
||||
Generates sets of tasks for Celery if syncing is needed."""
|
||||
|
||||
# Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
|
||||
task_logger.info("check_for_vespa_sync_task started")
|
||||
|
||||
time_start = time.monotonic()
|
||||
|
||||
r = get_redis_client(tenant_id=tenant_id)
|
||||
@@ -496,21 +492,13 @@ def monitor_document_set_taskset(
|
||||
task_logger.info(
|
||||
f"Successfully synced document set: document_set={document_set_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
update_sync_record_status(
|
||||
db_session=db_session,
|
||||
entity_id=document_set_id,
|
||||
sync_type=SyncType.DOCUMENT_SET,
|
||||
sync_status=SyncStatus.SUCCESS,
|
||||
num_docs_synced=initial_count,
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
"update_sync_record_status exceptioned. "
|
||||
f"document_set_id={document_set_id} "
|
||||
"Resetting document set regardless."
|
||||
)
|
||||
update_sync_record_status(
|
||||
db_session=db_session,
|
||||
entity_id=document_set_id,
|
||||
sync_type=SyncType.DOCUMENT_SET,
|
||||
sync_status=SyncStatus.SUCCESS,
|
||||
num_docs_synced=initial_count,
|
||||
)
|
||||
|
||||
rds.reset()
|
||||
|
||||
|
||||
@@ -8,44 +8,14 @@ AGENT_DEFAULT_RERANKING_HITS = 10
|
||||
AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
|
||||
AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
|
||||
AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
|
||||
|
||||
AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = 25
|
||||
AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = 35
|
||||
|
||||
|
||||
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
|
||||
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
|
||||
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
|
||||
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
|
||||
|
||||
INITIAL_SEARCH_DECOMPOSITION_ENABLED = True
|
||||
ALLOW_REFINEMENT = True
|
||||
|
||||
AGENT_DEFAULT_RETRIEVAL_HITS = 15
|
||||
AGENT_DEFAULT_RERANKING_HITS = 10
|
||||
AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
|
||||
AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
|
||||
AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
|
||||
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
|
||||
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
|
||||
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
|
||||
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
|
||||
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30 # in seconds
|
||||
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 1 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 3 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25 # in seconds
|
||||
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds
|
||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds
|
||||
#####
|
||||
# Agent Configs
|
||||
#####
|
||||
|
||||
|
||||
AGENT_RETRIEVAL_STATS = (
|
||||
@@ -107,151 +77,4 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
|
||||
or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||
) # 2000
|
||||
|
||||
AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = int(
|
||||
os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER")
|
||||
or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
|
||||
) # 25
|
||||
|
||||
AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = int(
|
||||
os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER")
|
||||
or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
|
||||
) # 35
|
||||
|
||||
|
||||
AGENT_RETRIEVAL_STATS = (
|
||||
not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
|
||||
) or True # default True
|
||||
|
||||
|
||||
AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
|
||||
os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
|
||||
) # 15
|
||||
|
||||
AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
|
||||
os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
|
||||
) # 15
|
||||
|
||||
# Reranking agent configs
|
||||
# Reranking stats - no influence on flow outside of stats collection
|
||||
AGENT_RERANKING_STATS = (
|
||||
not os.environ.get("AGENT_RERANKING_STATS") == "True"
|
||||
) or False # default False
|
||||
|
||||
AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
|
||||
os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
|
||||
) # 15
|
||||
|
||||
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(
|
||||
os.environ.get("AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS")
|
||||
or AGENT_DEFAULT_RERANKING_HITS
|
||||
) # 10
|
||||
|
||||
AGENT_NUM_DOCS_FOR_DECOMPOSITION = int(
|
||||
os.environ.get("AGENT_NUM_DOCS_FOR_DECOMPOSITION")
|
||||
or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION
|
||||
) # 3
|
||||
|
||||
AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
|
||||
os.environ.get("AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION")
|
||||
or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION
|
||||
) # 5
|
||||
|
||||
AGENT_EXPLORATORY_SEARCH_RESULTS = int(
|
||||
os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
|
||||
or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
|
||||
) # 5
|
||||
|
||||
AGENT_MIN_ORIG_QUESTION_DOCS = int(
|
||||
os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")
|
||||
or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS
|
||||
) # 3
|
||||
|
||||
AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
|
||||
os.environ.get("AGENT_MAX_ANSWER_CONTEXT_DOCS")
|
||||
or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS
|
||||
) # 8
|
||||
|
||||
|
||||
AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
|
||||
os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
|
||||
or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||
) # 2000
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
|
||||
) # 25
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
|
||||
) # 3
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
|
||||
) # 30
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
|
||||
) # 8
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
|
||||
) # 12
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
|
||||
) # 25
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
|
||||
) # 25
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
|
||||
) # 8
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
|
||||
) # 6
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
|
||||
) # 1
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
|
||||
) # 4
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
|
||||
) # 8
|
||||
|
||||
|
||||
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = int(
|
||||
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION")
|
||||
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
|
||||
) # 8
|
||||
|
||||
GRAPH_VERSION_NAME: str = "a"
|
||||
|
||||
@@ -125,7 +125,6 @@ class DocumentSource(str, Enum):
|
||||
GMAIL = "gmail"
|
||||
REQUESTTRACKER = "requesttracker"
|
||||
GITHUB = "github"
|
||||
GITBOOK = "gitbook"
|
||||
GITLAB = "gitlab"
|
||||
GURU = "guru"
|
||||
BOOKSTACK = "bookstack"
|
||||
|
||||
@@ -20,7 +20,6 @@ from onyx.connectors.egnyte.connector import EgnyteConnector
|
||||
from onyx.connectors.file.connector import LocalFileConnector
|
||||
from onyx.connectors.fireflies.connector import FirefliesConnector
|
||||
from onyx.connectors.freshdesk.connector import FreshdeskConnector
|
||||
from onyx.connectors.gitbook.connector import GitbookConnector
|
||||
from onyx.connectors.github.connector import GithubConnector
|
||||
from onyx.connectors.gitlab.connector import GitlabConnector
|
||||
from onyx.connectors.gmail.connector import GmailConnector
|
||||
@@ -72,7 +71,6 @@ def identify_connector_class(
|
||||
DocumentSource.GITHUB: GithubConnector,
|
||||
DocumentSource.GMAIL: GmailConnector,
|
||||
DocumentSource.GITLAB: GitlabConnector,
|
||||
DocumentSource.GITBOOK: GitbookConnector,
|
||||
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
|
||||
DocumentSource.BOOKSTACK: BookstackConnector,
|
||||
DocumentSource.CONFLUENCE: ConfluenceConnector,
|
||||
|
||||
@@ -1,279 +0,0 @@
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
GITBOOK_API_BASE = "https://api.gitbook.com/v1/"
|
||||
|
||||
|
||||
class GitbookApiClient:
|
||||
def __init__(self, access_token: str) -> None:
|
||||
self.access_token = access_token
|
||||
|
||||
def get(self, endpoint: str, params: dict[str, Any] | None = None) -> Any:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.access_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
url = urljoin(GITBOOK_API_BASE, endpoint.lstrip("/"))
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def get_page_content(self, space_id: str, page_id: str) -> dict[str, Any]:
|
||||
return self.get(f"/spaces/{space_id}/content/page/{page_id}")
|
||||
|
||||
|
||||
def _extract_text_from_document(document: dict[str, Any]) -> str:
|
||||
"""Extract text content from GitBook document structure by parsing the document nodes
|
||||
into markdown format."""
|
||||
|
||||
def parse_leaf(leaf: dict[str, Any]) -> str:
|
||||
text = leaf.get("text", "")
|
||||
leaf.get("marks", [])
|
||||
return text
|
||||
|
||||
def parse_text_node(node: dict[str, Any]) -> str:
|
||||
text = ""
|
||||
for leaf in node.get("leaves", []):
|
||||
text += parse_leaf(leaf)
|
||||
return text
|
||||
|
||||
def parse_block_node(node: dict[str, Any]) -> str:
|
||||
block_type = node.get("type", "")
|
||||
result = ""
|
||||
|
||||
if block_type == "heading-1":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"# {text}\n\n"
|
||||
|
||||
elif block_type == "heading-2":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"## {text}\n\n"
|
||||
|
||||
elif block_type == "heading-3":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"### {text}\n\n"
|
||||
|
||||
elif block_type == "heading-4":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"#### {text}\n\n"
|
||||
|
||||
elif block_type == "heading-5":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"##### {text}\n\n"
|
||||
|
||||
elif block_type == "heading-6":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"###### {text}\n\n"
|
||||
|
||||
elif block_type == "list-unordered":
|
||||
for list_item in node.get("nodes", []):
|
||||
paragraph = list_item.get("nodes", [])[0]
|
||||
text = "".join(parse_text_node(n) for n in paragraph.get("nodes", []))
|
||||
result += f"* {text}\n"
|
||||
result += "\n"
|
||||
|
||||
elif block_type == "paragraph":
|
||||
text = "".join(parse_text_node(n) for n in node.get("nodes", []))
|
||||
result = f"{text}\n\n"
|
||||
|
||||
elif block_type == "list-tasks":
|
||||
for task_item in node.get("nodes", []):
|
||||
checked = task_item.get("data", {}).get("checked", False)
|
||||
paragraph = task_item.get("nodes", [])[0]
|
||||
text = "".join(parse_text_node(n) for n in paragraph.get("nodes", []))
|
||||
checkbox = "[x]" if checked else "[ ]"
|
||||
result += f"- {checkbox} {text}\n"
|
||||
result += "\n"
|
||||
|
||||
elif block_type == "code":
|
||||
for code_line in node.get("nodes", []):
|
||||
if code_line.get("type") == "code-line":
|
||||
text = "".join(
|
||||
parse_text_node(n) for n in code_line.get("nodes", [])
|
||||
)
|
||||
result += f"{text}\n"
|
||||
result += "\n"
|
||||
|
||||
elif block_type == "blockquote":
|
||||
for quote_node in node.get("nodes", []):
|
||||
if quote_node.get("type") == "paragraph":
|
||||
text = "".join(
|
||||
parse_text_node(n) for n in quote_node.get("nodes", [])
|
||||
)
|
||||
result += f"> {text}\n"
|
||||
result += "\n"
|
||||
|
||||
elif block_type == "table":
|
||||
records = node.get("data", {}).get("records", {})
|
||||
definition = node.get("data", {}).get("definition", {})
|
||||
view = node.get("data", {}).get("view", {})
|
||||
|
||||
columns = view.get("columns", [])
|
||||
|
||||
header_cells = []
|
||||
for col_id in columns:
|
||||
col_def = definition.get(col_id, {})
|
||||
header_cells.append(col_def.get("title", ""))
|
||||
|
||||
result = "| " + " | ".join(header_cells) + " |\n"
|
||||
result += "|" + "---|" * len(header_cells) + "\n"
|
||||
|
||||
sorted_records = sorted(
|
||||
records.items(), key=lambda x: x[1].get("orderIndex", "")
|
||||
)
|
||||
|
||||
for record_id, record_data in sorted_records:
|
||||
values = record_data.get("values", {})
|
||||
row_cells = []
|
||||
for col_id in columns:
|
||||
fragment_id = values.get(col_id, "")
|
||||
fragment_text = ""
|
||||
for fragment in node.get("fragments", []):
|
||||
if fragment.get("fragment") == fragment_id:
|
||||
for frag_node in fragment.get("nodes", []):
|
||||
if frag_node.get("type") == "paragraph":
|
||||
fragment_text = "".join(
|
||||
parse_text_node(n)
|
||||
for n in frag_node.get("nodes", [])
|
||||
)
|
||||
break
|
||||
row_cells.append(fragment_text)
|
||||
result += "| " + " | ".join(row_cells) + " |\n"
|
||||
|
||||
result += "\n"
|
||||
return result
|
||||
|
||||
if not document or "document" not in document:
|
||||
return ""
|
||||
|
||||
markdown = ""
|
||||
nodes = document["document"].get("nodes", [])
|
||||
|
||||
for node in nodes:
|
||||
markdown += parse_block_node(node)
|
||||
|
||||
return markdown
|
||||
|
||||
|
||||
def _convert_page_to_document(
|
||||
client: GitbookApiClient, space_id: str, page: dict[str, Any]
|
||||
) -> Document:
|
||||
page_id = page["id"]
|
||||
page_content = client.get_page_content(space_id, page_id)
|
||||
|
||||
return Document(
|
||||
id=f"gitbook-{space_id}-{page_id}",
|
||||
sections=[
|
||||
Section(
|
||||
link=page.get("urls", {}).get("app", ""),
|
||||
text=_extract_text_from_document(page_content),
|
||||
)
|
||||
],
|
||||
source=DocumentSource.GITBOOK,
|
||||
semantic_identifier=page.get("title", ""),
|
||||
doc_updated_at=datetime.fromisoformat(page["updatedAt"]).replace(
|
||||
tzinfo=timezone.utc
|
||||
),
|
||||
metadata={
|
||||
"path": page.get("path", ""),
|
||||
"type": page.get("type", ""),
|
||||
"kind": page.get("kind", ""),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class GitbookConnector(LoadConnector, PollConnector):
|
||||
def __init__(
|
||||
self,
|
||||
space_id: str,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
self.space_id = space_id
|
||||
self.batch_size = batch_size
|
||||
self.access_token: str | None = None
|
||||
self.client: GitbookApiClient | None = None
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> None:
|
||||
access_token = credentials.get("gitbook_api_key")
|
||||
if not access_token:
|
||||
raise ConnectorMissingCredentialError("GitBook access token")
|
||||
self.access_token = access_token
|
||||
self.client = GitbookApiClient(access_token)
|
||||
|
||||
def _fetch_all_pages(
|
||||
self,
|
||||
start: datetime | None = None,
|
||||
end: datetime | None = None,
|
||||
) -> GenerateDocumentsOutput:
|
||||
if not self.client:
|
||||
raise ConnectorMissingCredentialError("GitBook")
|
||||
|
||||
try:
|
||||
content = self.client.get(f"/spaces/{self.space_id}/content")
|
||||
pages = content.get("pages", [])
|
||||
|
||||
current_batch: list[Document] = []
|
||||
for page in pages:
|
||||
updated_at = datetime.fromisoformat(page["updatedAt"])
|
||||
|
||||
if start and updated_at < start:
|
||||
if current_batch:
|
||||
yield current_batch
|
||||
return
|
||||
if end and updated_at > end:
|
||||
continue
|
||||
|
||||
current_batch.append(
|
||||
_convert_page_to_document(self.client, self.space_id, page)
|
||||
)
|
||||
|
||||
if len(current_batch) >= self.batch_size:
|
||||
yield current_batch
|
||||
current_batch = []
|
||||
|
||||
if current_batch:
|
||||
yield current_batch
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Error fetching GitBook content: {str(e)}")
|
||||
raise
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self._fetch_all_pages()
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
|
||||
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
|
||||
return self._fetch_all_pages(start_datetime, end_datetime)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
connector = GitbookConnector(
|
||||
space_id=os.environ["GITBOOK_SPACE_ID"],
|
||||
)
|
||||
connector.load_credentials({"gitbook_api_key": os.environ["GITBOOK_API_KEY"]})
|
||||
document_batches = connector.load_from_state()
|
||||
print(next(document_batches))
|
||||
@@ -302,7 +302,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
if e.status_code == 401:
|
||||
# fail gracefully, let the other impersonations continue
|
||||
# one user without access shouldn't block the entire connector
|
||||
logger.warning(
|
||||
logger.exception(
|
||||
f"User '{user_email}' does not have access to the drive APIs."
|
||||
)
|
||||
return
|
||||
|
||||
@@ -350,14 +350,13 @@ def delete_chat_session(
|
||||
user_id: UUID | None,
|
||||
chat_session_id: UUID,
|
||||
db_session: Session,
|
||||
include_deleted: bool = False,
|
||||
hard_delete: bool = HARD_DELETE_CHATS,
|
||||
) -> None:
|
||||
chat_session = get_chat_session_by_id(
|
||||
chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
|
||||
)
|
||||
|
||||
if chat_session.deleted and not include_deleted:
|
||||
if chat_session.deleted:
|
||||
raise ValueError("Cannot delete an already deleted chat session")
|
||||
|
||||
if hard_delete:
|
||||
@@ -381,15 +380,7 @@ def delete_chat_sessions_older_than(days_old: int, db_session: Session) -> None:
|
||||
).fetchall()
|
||||
|
||||
for user_id, session_id in old_sessions:
|
||||
try:
|
||||
delete_chat_session(
|
||||
user_id, session_id, db_session, include_deleted=True, hard_delete=True
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"delete_chat_session exceptioned. "
|
||||
f"user_id={user_id} session_id={session_id}"
|
||||
)
|
||||
delete_chat_session(user_id, session_id, db_session, hard_delete=True)
|
||||
|
||||
|
||||
def get_chat_message(
|
||||
@@ -902,18 +893,14 @@ def translate_db_sub_questions_to_server_objects(
|
||||
question=sub_question.sub_question,
|
||||
answer=sub_question.sub_answer,
|
||||
sub_queries=sub_queries,
|
||||
context_docs=get_retrieval_docs_from_search_docs(
|
||||
verified_docs, sort_by_score=False
|
||||
),
|
||||
context_docs=get_retrieval_docs_from_search_docs(verified_docs),
|
||||
)
|
||||
)
|
||||
return sub_questions
|
||||
|
||||
|
||||
def get_retrieval_docs_from_search_docs(
|
||||
search_docs: list[SearchDoc],
|
||||
remove_doc_content: bool = False,
|
||||
sort_by_score: bool = True,
|
||||
search_docs: list[SearchDoc], remove_doc_content: bool = False
|
||||
) -> RetrievalDocs:
|
||||
top_documents = [
|
||||
translate_db_search_doc_to_server_search_doc(
|
||||
@@ -921,8 +908,7 @@ def get_retrieval_docs_from_search_docs(
|
||||
)
|
||||
for db_doc in search_docs
|
||||
]
|
||||
if sort_by_score:
|
||||
top_documents = sorted(top_documents, key=lambda doc: doc.score, reverse=True) # type: ignore
|
||||
top_documents = sorted(top_documents, key=lambda doc: doc.score, reverse=True) # type: ignore
|
||||
return RetrievalDocs(top_documents=top_documents)
|
||||
|
||||
|
||||
@@ -1032,7 +1018,7 @@ def log_agent_sub_question_results(
|
||||
sub_question = sub_question_answer_result.question
|
||||
sub_answer = sub_question_answer_result.answer
|
||||
sub_document_results = _create_citation_format_list(
|
||||
sub_question_answer_result.context_documents
|
||||
sub_question_answer_result.verified_reranked_documents
|
||||
)
|
||||
|
||||
sub_question_object = AgentSubQuestion(
|
||||
|
||||
@@ -52,18 +52,6 @@ litellm.telemetry = False
|
||||
_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
|
||||
|
||||
|
||||
class LLMTimeoutError(Exception):
|
||||
"""
|
||||
Exception raised when an LLM call times out.
|
||||
"""
|
||||
|
||||
|
||||
class LLMRateLimitError(Exception):
|
||||
"""
|
||||
Exception raised when an LLM call is rate limited.
|
||||
"""
|
||||
|
||||
|
||||
def _base_msg_to_role(msg: BaseMessage) -> str:
|
||||
if isinstance(msg, HumanMessage) or isinstance(msg, HumanMessageChunk):
|
||||
return "user"
|
||||
@@ -401,7 +389,6 @@ class DefaultMultiLLM(LLM):
|
||||
tool_choice: ToolChoiceOptions | None,
|
||||
stream: bool,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
|
||||
# litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
|
||||
# to a dict representation
|
||||
@@ -432,7 +419,7 @@ class DefaultMultiLLM(LLM):
|
||||
stream=stream,
|
||||
# model params
|
||||
temperature=0,
|
||||
timeout=timeout_override or self._timeout,
|
||||
timeout=self._timeout,
|
||||
# For now, we don't support parallel tool calls
|
||||
# NOTE: we can't pass this in if tools are not specified
|
||||
# or else OpenAI throws an error
|
||||
@@ -451,12 +438,6 @@ class DefaultMultiLLM(LLM):
|
||||
except Exception as e:
|
||||
self._record_error(processed_prompt, e)
|
||||
# for break pointing
|
||||
if isinstance(e, litellm.Timeout):
|
||||
raise LLMTimeoutError(e)
|
||||
|
||||
elif isinstance(e, litellm.RateLimitError):
|
||||
raise LLMRateLimitError(e)
|
||||
|
||||
raise e
|
||||
|
||||
@property
|
||||
@@ -477,7 +458,6 @@ class DefaultMultiLLM(LLM):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> BaseMessage:
|
||||
if LOG_DANSWER_MODEL_INTERACTIONS:
|
||||
self.log_model_configs()
|
||||
@@ -485,12 +465,7 @@ class DefaultMultiLLM(LLM):
|
||||
response = cast(
|
||||
litellm.ModelResponse,
|
||||
self._completion(
|
||||
prompt=prompt,
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
stream=False,
|
||||
structured_response_format=structured_response_format,
|
||||
timeout_override=timeout_override,
|
||||
prompt, tools, tool_choice, False, structured_response_format
|
||||
),
|
||||
)
|
||||
choice = response.choices[0]
|
||||
@@ -508,31 +483,19 @@ class DefaultMultiLLM(LLM):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> Iterator[BaseMessage]:
|
||||
if LOG_DANSWER_MODEL_INTERACTIONS:
|
||||
self.log_model_configs()
|
||||
|
||||
if DISABLE_LITELLM_STREAMING:
|
||||
yield self.invoke(
|
||||
prompt,
|
||||
tools,
|
||||
tool_choice,
|
||||
structured_response_format,
|
||||
timeout_override,
|
||||
)
|
||||
yield self.invoke(prompt, tools, tool_choice, structured_response_format)
|
||||
return
|
||||
|
||||
output = None
|
||||
response = cast(
|
||||
litellm.CustomStreamWrapper,
|
||||
self._completion(
|
||||
prompt=prompt,
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
stream=True,
|
||||
structured_response_format=structured_response_format,
|
||||
timeout_override=timeout_override,
|
||||
prompt, tools, tool_choice, True, structured_response_format
|
||||
),
|
||||
)
|
||||
try:
|
||||
|
||||
@@ -81,7 +81,6 @@ class CustomModelServer(LLM):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> BaseMessage:
|
||||
return self._execute(prompt)
|
||||
|
||||
@@ -91,6 +90,5 @@ class CustomModelServer(LLM):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> Iterator[BaseMessage]:
|
||||
yield self._execute(prompt)
|
||||
|
||||
@@ -90,13 +90,12 @@ class LLM(abc.ABC):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> BaseMessage:
|
||||
self._precall(prompt)
|
||||
# TODO add a postcall to log model outputs independent of concrete class
|
||||
# implementation
|
||||
return self._invoke_implementation(
|
||||
prompt, tools, tool_choice, structured_response_format, timeout_override
|
||||
prompt, tools, tool_choice, structured_response_format
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -106,7 +105,6 @@ class LLM(abc.ABC):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> BaseMessage:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -116,13 +114,12 @@ class LLM(abc.ABC):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> Iterator[BaseMessage]:
|
||||
self._precall(prompt)
|
||||
# TODO add a postcall to log model outputs independent of concrete class
|
||||
# implementation
|
||||
messages = self._stream_implementation(
|
||||
prompt, tools, tool_choice, structured_response_format, timeout_override
|
||||
prompt, tools, tool_choice, structured_response_format
|
||||
)
|
||||
|
||||
tokens = []
|
||||
@@ -141,6 +138,5 @@ class LLM(abc.ABC):
|
||||
tools: list[dict] | None = None,
|
||||
tool_choice: ToolChoiceOptions | None = None,
|
||||
structured_response_format: dict | None = None,
|
||||
timeout_override: int | None = None,
|
||||
) -> Iterator[BaseMessage]:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -238,20 +238,14 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
await close_auth_limiter()
|
||||
|
||||
|
||||
def log_http_error(request: Request, exc: Exception) -> JSONResponse:
|
||||
def log_http_error(_: Request, exc: Exception) -> JSONResponse:
|
||||
status_code = getattr(exc, "status_code", 500)
|
||||
|
||||
if isinstance(exc, BasicAuthenticationError):
|
||||
# For BasicAuthenticationError, just log a brief message without stack trace
|
||||
# (almost always spammy)
|
||||
logger.debug(f"Authentication failed: {str(exc)}")
|
||||
|
||||
elif status_code == 404 and request.url.path == "/metrics":
|
||||
# Log 404 errors for the /metrics endpoint with debug level
|
||||
logger.debug(f"404 error for /metrics endpoint: {str(exc)}")
|
||||
# For BasicAuthenticationError, just log a brief message without stack trace (almost always spam)
|
||||
logger.warning(f"Authentication failed: {str(exc)}")
|
||||
|
||||
elif status_code >= 400:
|
||||
print("FORMATTING ERROR")
|
||||
error_msg = f"{str(exc)}\n"
|
||||
error_msg += "".join(traceback.format_tb(exc.__traceback__))
|
||||
logger.error(error_msg)
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
from onyx.agents.agent_search.shared_graph_utils.constants import (
|
||||
AGENT_ANSWER_SEPARATOR,
|
||||
)
|
||||
|
||||
# Standards
|
||||
SEPARATOR_LINE = "-------"
|
||||
SEPARATOR_LINE_LONG = "---------------"
|
||||
@@ -9,6 +5,8 @@ UNKNOWN_ANSWER = "I do not have enough information to answer this question."
|
||||
NO_RECOVERED_DOCS = "No relevant information recovered"
|
||||
YES = "yes"
|
||||
NO = "no"
|
||||
|
||||
|
||||
# Framing/Support/Template Prompts
|
||||
HISTORY_FRAMING_PROMPT = f"""
|
||||
For more context, here is the history of the conversation so far that preceded this question:
|
||||
@@ -18,43 +16,6 @@ For more context, here is the history of the conversation so far that preceded t
|
||||
""".strip()
|
||||
|
||||
|
||||
COMMON_RAG_RULES = f"""
|
||||
IMPORTANT RULES:
|
||||
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
|
||||
You may give some additional facts you learned, but do not try to invent an answer.
|
||||
|
||||
- If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
|
||||
|
||||
- If the information is relevant but not fully conclusive, provide an answer to the extent you can but also specify that \
|
||||
the information is not conclusive and why.
|
||||
|
||||
- When constructing/considering categories, focus less on the question and more on the context actually provided! \
|
||||
Example: if the question is about the products of company A, and the content provided lists a number of products, \
|
||||
do automatically NOT ASSUME that those belong to company A! So you cannot list those as products of company A, despite \
|
||||
the fact that the question is about company A's products. What you should say instead is maybe something like \
|
||||
"Here are a number of products, but I cannot say whether some or all of them belong to company A: \
|
||||
<proceed with listing the products>". It is ABSOLUTELY ESSENTIAL that the answer constructed reflects \
|
||||
actual knowledge. For that matter, also consider the title of the document and other information that may be \
|
||||
provided. If that does not make it clear that - in the example above - the products belong to company A, \
|
||||
then do not list them as products of company A, just maybe as "A list products that may not necessarily \
|
||||
belong to company A". THIS IS IMPORTANT!
|
||||
|
||||
- Related, if the context provides a list of items with associated data or other information that seems \
|
||||
to align with the categories in the question, but does not specify whether the items or the information is \
|
||||
specific to the exact requested category, then present the information with a disclaimer. Use a title such as \
|
||||
"I am not sure whether these items (or the information provided) is specific to [relevant category] or whether \
|
||||
these are all [specific group], but I found this information may be helpful:" \
|
||||
followed by the list of items and associated data/or information discovered.
|
||||
|
||||
- Do not group together items amongst one headline where not all items belong to the category of the headline! \
|
||||
(Example: "Products used by Company A" where some products listed are not built by Company A, but other companies,
|
||||
or it is not clear that the products are built by Company A). Only state what you know for sure!
|
||||
|
||||
- Do NOT perform any calculations in the answer! Just report on facts.
|
||||
|
||||
- If appropriate, organizing your answer in bullet points is often useful.
|
||||
""".strip()
|
||||
|
||||
ASSISTANT_SYSTEM_PROMPT_DEFAULT = "You are an assistant for question-answering tasks."
|
||||
|
||||
ASSISTANT_SYSTEM_PROMPT_PERSONA = f"""
|
||||
@@ -168,44 +129,20 @@ History summary:
|
||||
# Sub-question
|
||||
# Intentionally left a copy in case we want to modify this one differently
|
||||
INITIAL_QUESTION_DECOMPOSITION_PROMPT = f"""
|
||||
Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
|
||||
to the initial question.
|
||||
|
||||
The purpose for these sub-questions could be:
|
||||
1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
|
||||
Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the \
|
||||
original question. The purpose for this decomposition may be to:
|
||||
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> \
|
||||
['what are sales for company A', 'what are sales for company B'])
|
||||
|
||||
2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
|
||||
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> \
|
||||
['what are our sales with company A','what is our market share with company A', \
|
||||
'is company A a reference customer for us', etc.])
|
||||
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you \
|
||||
are generally familiar with the entity, then you can decompose the question into sub-questions that are more \
|
||||
specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve \
|
||||
scalability of product X', 'what do we do to improve stability of product X', ...])
|
||||
4) research an area that could really help to answer the question.
|
||||
|
||||
3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
|
||||
are generally familiar with the entity, then you can create sub-questions that are more \
|
||||
specific (i.e., 'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
|
||||
'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
|
||||
|
||||
4) research individual questions and areas that should really help to ultimately answer the question.
|
||||
|
||||
Important:
|
||||
|
||||
- Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
|
||||
in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
|
||||
'what can I do to improve the performance of workflow X' -> \
|
||||
'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
|
||||
workflow X performance', 'what are performance benchmarks for workflow X', ...
|
||||
|
||||
- Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
|
||||
'I', etc.
|
||||
|
||||
- Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
|
||||
like making suggestions or asking the user for more information! This is not useful for the actual \
|
||||
question-answering process! You need to take the information from the user as it is given to you! \
|
||||
For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
|
||||
sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
|
||||
could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
|
||||
|
||||
|
||||
And here is the initial question to create sub-questions for, so that you have the full context:
|
||||
Here is the initial question to decompose:
|
||||
{SEPARATOR_LINE}
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
@@ -213,79 +150,7 @@ And here is the initial question to create sub-questions for, so that you have t
|
||||
{{history}}
|
||||
|
||||
Do NOT include any text in your answer outside of the list of sub-questions!
|
||||
Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
|
||||
add any explanations or other text!):
|
||||
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
...
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
|
||||
# INITIAL PHASE - AWARE OF REFINEMENT
|
||||
# Sub-question
|
||||
# Suggest augmenting question generation as well, that a future refinement phase could use
|
||||
# to generate new questions
|
||||
# Intentionally left a copy in case we want to modify this one differently
|
||||
INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT = f"""
|
||||
Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
|
||||
to the initial question.
|
||||
|
||||
The purpose for these sub-questions could be:
|
||||
1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
|
||||
['what are sales for company A', 'what are sales for company B'])
|
||||
|
||||
2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
|
||||
['what are our sales with company A','what is our market share with company A', \
|
||||
'is company A a reference customer for us', etc.])
|
||||
|
||||
3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
|
||||
are generally familiar with the entity, then you can create sub-questions that are more \
|
||||
specific (i.e., 'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
|
||||
'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
|
||||
|
||||
4) research individual questions and areas that should really help to ultimately answer the question.
|
||||
|
||||
5) if meaningful, find relevant facts that may inform another set of sub-questions generate after the set you \
|
||||
create now are answered. Example: 'which products have we implemented at company A, and is this different to \
|
||||
its competitors?' could potentially create sub-questions 'what products have we implemented at company A', \
|
||||
and 'who are the competitors of company A'. The additional round of sub-question generation which sees the \
|
||||
answers for this initial round of sub-question creation could then use the answer to the second sub-question \
|
||||
(which could be 'company B and C are competitors of company A') to then ask 'which products have we implemented \
|
||||
at company B', 'which products have we implemented at company C'...
|
||||
|
||||
Important:
|
||||
|
||||
- Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
|
||||
in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
|
||||
'what can I do to improve the performance of workflow X' -> \
|
||||
'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
|
||||
workflow X performance', 'what are performance benchmarks for workflow X', ...
|
||||
|
||||
- Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
|
||||
'I', etc.
|
||||
|
||||
- Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
|
||||
like making suggestions or asking the user for more information! This is not useful for the actual \
|
||||
question-answering process! You need to take the information from the user as it is given to you! \
|
||||
For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
|
||||
sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
|
||||
could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
|
||||
|
||||
|
||||
And here is the initial question to create sub-questions for:
|
||||
{SEPARATOR_LINE}
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
{{history}}
|
||||
|
||||
Do NOT include any text in your answer outside of the list of sub-questions!
|
||||
Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
|
||||
add any explanations or other text!):
|
||||
|
||||
Please formulate your answer as a newline-separated list of questions like so:
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
@@ -297,47 +162,23 @@ Answer:
|
||||
|
||||
# TODO: combine shared pieces with INITIAL_QUESTION_DECOMPOSITION_PROMPT
|
||||
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = f"""
|
||||
Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
|
||||
to the initial question.
|
||||
|
||||
The purpose for these sub-questions could be:
|
||||
1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
|
||||
Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the \
|
||||
original question. The purpose for this decomposition may be to:
|
||||
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> \
|
||||
['what are sales for company A', 'what are sales for company B'])
|
||||
|
||||
2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
|
||||
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> \
|
||||
['what are our sales with company A','what is our market share with company A', \
|
||||
'is company A a reference customer for us', etc.])
|
||||
|
||||
3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
|
||||
are generally familiar with the entity, then you can create sub-questions that are more \
|
||||
specific (i.e., 'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
|
||||
'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
|
||||
|
||||
4) research individual questions and areas that should really help to ultimately answer the question.
|
||||
|
||||
Important:
|
||||
|
||||
- Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
|
||||
in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
|
||||
'what can I do to improve the performance of workflow X' -> \
|
||||
'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
|
||||
workflow X performance', 'what are performance benchmarks for workflow X', ...
|
||||
|
||||
- Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
|
||||
'I', etc.
|
||||
|
||||
- Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
|
||||
like making suggestions or asking the user for more information! This is not useful for the actual \
|
||||
question-answering process! You need to take the information from the user as it is given to you! \
|
||||
For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
|
||||
sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
|
||||
could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
|
||||
|
||||
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you \
|
||||
are generally familiar with the entity, then you can decompose the question into sub-questions that are more \
|
||||
specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve \
|
||||
scalability of product X', 'what do we do to improve stability of product X', ...])
|
||||
4) research an area that could really help to answer the question.
|
||||
|
||||
To give you some context, you will see below also some documents that may relate to the question. Please only \
|
||||
use this information to learn what the question is approximately asking about, but do not focus on the details \
|
||||
to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may \
|
||||
not be in these few documents, so DO NOT focus too much on the documents when constructing the sub-questions! \
|
||||
not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions! \
|
||||
Decomposition and disambiguations are most important!
|
||||
|
||||
Here are the sample docs to give you some context:
|
||||
@@ -345,7 +186,7 @@ Here are the sample docs to give you some context:
|
||||
{{sample_doc_str}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
And here is the initial question to create sub-questions for, so that you have the full context:
|
||||
And here is the initial question to decompose:
|
||||
{SEPARATOR_LINE}
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
@@ -353,9 +194,7 @@ And here is the initial question to create sub-questions for, so that you have t
|
||||
{{history}}
|
||||
|
||||
Do NOT include any text in your answer outside of the list of sub-questions!\
|
||||
Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
|
||||
add any explanations or other text!):
|
||||
|
||||
Please formulate your answer as a newline-separated list of questions like so:
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
@@ -364,84 +203,6 @@ add any explanations or other text!):
|
||||
Answer:
|
||||
""".strip()
|
||||
|
||||
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT = f"""
|
||||
Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
|
||||
to the initial question.
|
||||
|
||||
The purpose for these sub-questions could be:
|
||||
1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
|
||||
['what are sales for company A', 'what are sales for company B'])
|
||||
|
||||
2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
|
||||
['what are our sales with company A','what is our market share with company A', \
|
||||
'is company A a reference customer for us', etc.])
|
||||
|
||||
3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
|
||||
are generally familiar with the entity, then you can create sub-questions that are more \
|
||||
specific (i.e., 'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
|
||||
'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
|
||||
|
||||
4) research individual questions and areas that should really help to ultimately answer the question.
|
||||
|
||||
5) if applicable and useful, consider using sub-questions to gather relevant information that can inform a \
|
||||
subsequent set of sub-questions. The answers to your initial sub-questions will be available when generating \
|
||||
the next set.
|
||||
For example, if you start with the question, "Which products have we implemented at Company A, and how does \
|
||||
this compare to its competitors?" you might first create sub-questions like "What products have we implemented \
|
||||
at Company A?" and "Who are the competitors of Company A?"
|
||||
The answer to the second sub-question, such as "Company B and C are competitors of Company A," can then be used \
|
||||
to generate more specific sub-questions in the next round, like "Which products have we implemented at Company B?" \
|
||||
and "Which products have we implemented at Company C?"
|
||||
|
||||
You'll be the judge!
|
||||
|
||||
Important:
|
||||
|
||||
- Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
|
||||
in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
|
||||
'what can I do to improve the performance of workflow X' -> \
|
||||
'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
|
||||
workflow X performance', 'what are performance benchmarks for workflow X', ...
|
||||
|
||||
- Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
|
||||
'I', etc.
|
||||
|
||||
- Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
|
||||
like making suggestions or asking the user for more information! This is not useful for the actual \
|
||||
question-answering process! You need to take the information from the user as it is given to you! \
|
||||
For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
|
||||
sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
|
||||
could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
|
||||
|
||||
To give you some context, you will see below also some documents that may relate to the question. Please only \
|
||||
use this information to learn what the question is approximately asking about, but do not focus on the details \
|
||||
to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may \
|
||||
not be in these few documents, so DO NOT focus too much on the documents when constructing the sub-questions! \
|
||||
Decomposition and disambiguations are most important!
|
||||
|
||||
Here are the sample docs to give you some context:
|
||||
{SEPARATOR_LINE}
|
||||
{{sample_doc_str}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
And here is the initial question to create sub-questions for, so that you have the full context:
|
||||
{SEPARATOR_LINE}
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
{{history}}
|
||||
|
||||
Do NOT include any text in your answer outside of the list of sub-questions!\
|
||||
Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
|
||||
add any explanations or other text!):
|
||||
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
<sub-question>
|
||||
...
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
|
||||
# Retrieval
|
||||
QUERY_REWRITING_PROMPT = f"""
|
||||
@@ -496,35 +257,23 @@ Answer:
|
||||
""".strip()
|
||||
|
||||
|
||||
# Sub-Question Answer Generation
|
||||
# Sub-Question Anser Generation
|
||||
SUB_QUESTION_RAG_PROMPT = f"""
|
||||
Use the context provided below - and only the provided context - to answer the given question. \
|
||||
(Note that the answer is in service of answering a broader question, given below as 'motivation').
|
||||
|
||||
Make sure that you keep all relevant information, specifically as it concerns the ultimate goal. \
|
||||
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the \
|
||||
question based on the context, say "{UNKNOWN_ANSWER}". It is a matter of life and death that you do NOT \
|
||||
use your internal knowledge, just the provided information!
|
||||
|
||||
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. \
|
||||
(But keep other details as well.)
|
||||
|
||||
{COMMON_RAG_RULES}
|
||||
|
||||
- Make sure that you only state what you actually can positively learn from the provided context! Particularly \
|
||||
don't make assumptions! Example: if i) a question you should answer is asking for products of companies that \
|
||||
are competitors of company A, and ii) the context mentions products of companies A, B, C, D, E, etc., do NOT assume \
|
||||
that B, C, D, E, etc. are competitors of A! All you know is that these are products of a number of companies, and you \
|
||||
would have to rely on another question - that you do not have access to - to learn which companies are competitors of A.
|
||||
Correspondingly, you should not say that these are the products of competitors of A, but rather something like \
|
||||
"Here are some products of various companies".
|
||||
|
||||
It is critical that you provide inline citations in the format [D1], [D2], [D3], etc! Please use format [D1][D2] and NOT \
|
||||
[D1, D2] format if you cite two or more documents together! \
|
||||
It is critical that you provide inline citations in the format [D1], [D2], [D3], etc! \
|
||||
It is important that the citation is close to the information it supports. \
|
||||
Proper citations are very important to the user!
|
||||
|
||||
Here is the document context for you to consider:
|
||||
{SEPARATOR_LINE}
|
||||
{{context}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
For your general information, here is the ultimate motivation for the question you need to answer:
|
||||
For your general information, here is the ultimate motivation:
|
||||
{SEPARATOR_LINE}
|
||||
{{original_question}}
|
||||
{SEPARATOR_LINE}
|
||||
@@ -534,8 +283,12 @@ And here is the actual question I want you to answer based on the context above
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents \
|
||||
for sure and communicate if/in what way this may or may not relate to the question you need to answer!)
|
||||
Here is the context:
|
||||
{SEPARATOR_LINE}
|
||||
{{context}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data.
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
@@ -568,18 +321,22 @@ Use the information provided below - and only the provided information - to answ
|
||||
|
||||
The information provided below consists of:
|
||||
1) a number of answered sub-questions - these are very important to help you organize your thoughts and your answer
|
||||
2) a number of documents that are deemed relevant for the question.
|
||||
2) a number of documents that deemed relevant for the question.
|
||||
|
||||
{{history}}
|
||||
|
||||
It is critical that you provide proper inline citations to documents in the format [D1], [D2], [D3], etc.! \
|
||||
It is critical that you provide prover inline citations to documents in the format [D1], [D2], [D3], etc.! \
|
||||
It is important that the citation is close to the information it supports. If you have multiple citations that support \
|
||||
a fact, please cite for example as [D1][D3], or [D2][D4], etc. \
|
||||
Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the \
|
||||
sub-question citation. If you want to cite both a document and a sub-question, please use [D1][Q3], or [D2][D7][Q4], etc. \
|
||||
Again, please NEVER cite sub-questions without a document citation! Proper citations are very important for the user!
|
||||
|
||||
{COMMON_RAG_RULES}
|
||||
IMPORTANT RULES:
|
||||
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
|
||||
You may give some additional facts you learned, but do not try to invent an answer.
|
||||
- If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
|
||||
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
||||
|
||||
Again, you should be sure that the answer is supported by the information provided!
|
||||
|
||||
@@ -604,9 +361,7 @@ And here is the question I want you to answer based on the information above:
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents for \
|
||||
sure and communicate if/in what way this may or may not relate to the question you need to answer! Use the answered \
|
||||
sub-questions as well, but be cautious and reconsider the docments again for validation.)
|
||||
Please keep your answer brief and concise, and focus on facts and data.
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
@@ -621,7 +376,11 @@ The information provided below consists of a number of documents that were deeme
|
||||
|
||||
{{history}}
|
||||
|
||||
{COMMON_RAG_RULES}
|
||||
IMPORTANT RULES:
|
||||
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
|
||||
You may give some additional facts you learned, but do not try to invent an answer.
|
||||
- If the information is irrelevant, just say "{UNKNOWN_ANSWER}".
|
||||
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
||||
|
||||
Again, you should be sure that the answer is supported by the information provided!
|
||||
|
||||
@@ -640,8 +399,7 @@ And here is the question I want you to answer based on the context above:
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents \
|
||||
for sure and communicate if/in what way this may or may not relate to the question you need to answer!)
|
||||
Please keep your answer brief and concise, and focus on facts and data.
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
@@ -681,12 +439,6 @@ independently without the original question available
|
||||
- For each sub-question, please also provide a search term that can be used to retrieve relevant documents from a document store.
|
||||
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not answerable \
|
||||
with the available context, and you should not ask similar questions.
|
||||
- Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
|
||||
like making suggestions or asking the user for more information! This is not useful for the actual \
|
||||
question-answering process! You need to take the information from the user as it is given to you! \
|
||||
For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
|
||||
sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
|
||||
could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
|
||||
|
||||
Here is the initial question:
|
||||
{SEPARATOR_LINE}
|
||||
@@ -722,111 +474,7 @@ objects/relationships/terms you can ask about! Do not ask about entities, terms
|
||||
Again, please find questions that are NOT overlapping too much with the already answered sub-questions or those that \
|
||||
already were suggested and failed. In other words - what can we try in addition to what has been tried so far?
|
||||
|
||||
Generate the list of questions separated by one new line like this (and please ONLY ANSWER WITH THIS LIST! Do not \
|
||||
add any explanations or other text!):
|
||||
|
||||
<sub-question 1>
|
||||
<sub-question 2>
|
||||
<sub-question 3>
|
||||
...""".strip()
|
||||
|
||||
REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS = f"""
|
||||
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite good enough. \
|
||||
Also, some sub-questions had been answered and this information has been used to provide the initial answer. \
|
||||
Some other subquestions may have been suggested based on little knowledge, but they were not directly answerable. \
|
||||
Also, some entities, relationships and terms are given to you so that you have an idea of how the available data looks like.
|
||||
|
||||
Your role is to generate 2-4 new sub-questions that would help to answer the initial question, considering:
|
||||
|
||||
1) The initial question
|
||||
2) The initial answer that was found to be unsatisfactory
|
||||
3) The sub-questions that were answered AND their answers
|
||||
4) The sub-questions that were suggested but not answered (and that you should not repeat!)
|
||||
5) The entities, relationships and terms that were extracted from the context
|
||||
|
||||
The individual questions should be answerable by a good RAG system. So a good idea would be to use the sub-questions to \
|
||||
resolve ambiguities and/or to separate the question for different entities that may be involved in the original question, \
|
||||
but in a way that does not duplicate questions that were already tried.
|
||||
|
||||
Additional Guidelines:
|
||||
|
||||
- The new sub-questions should be specific to the question and provide richer context for the question, resolve ambiguities, \
|
||||
or address shortcoming of the initial answer
|
||||
|
||||
- Each new sub-question - when answered - should be relevant for the answer to the original question
|
||||
|
||||
- The new sub-questions should be free from comparisons, ambiguities,judgements, aggregations, or any other complications that \
|
||||
may require extra context
|
||||
|
||||
- The new sub-questions MUST have the full context of the original question so that it can be executed by a RAG system \
|
||||
independently without the original question available
|
||||
Example:
|
||||
- initial question: "What is the capital of France?"
|
||||
- bad sub-question: "What is the name of the river there?"
|
||||
- good sub-question: "What is the name of the river that flows through Paris?"
|
||||
|
||||
- For each new sub-question, please also provide a search term that can be used to retrieve relevant documents \
|
||||
from a document store.
|
||||
|
||||
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not answerable \
|
||||
with the available context, and you should not ask similar questions.
|
||||
|
||||
- Pay attention to the answers of previous sub-question to make your sub-questions more specific! \
|
||||
Often the initial sub-questions were set up to give you critical information that you should use to generate new sub-questions.\
|
||||
For example, if the answer to a an earlier sub-question is \
|
||||
'Company B and C are competitors of Company A', you should not ask now a new sub-question involving the term 'competitors', \
|
||||
as you already have the information to create a more precise question - you should instead explicitly reference \
|
||||
'Company B' and 'Company C' in your new sub-questions, as these are the competitors based on the previously answered question.
|
||||
|
||||
- Be precise(!) and don't make inferences you cannot be sure about! For example, in the previous example \
|
||||
where Company B and Company C were identified as competitors of Company A, and then you also get information on \
|
||||
companies D and E, do not make the inference that these are also competitors of Company A! Stick to the information you have!
|
||||
(Also, don't assume that companies B and C arethe only competitors of A, unless stated!)
|
||||
|
||||
- Do not(!) create sub-questions that are clarifying question *to the person who asked the question*, \
|
||||
like making suggestions or asking the user for more information! This is not useful for the actual \
|
||||
question-answering process! You need to take the information from the user as it is given to you! \
|
||||
For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
|
||||
sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
|
||||
could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
|
||||
|
||||
Here is the initial question:
|
||||
{SEPARATOR_LINE}
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
{{history}}
|
||||
|
||||
Here is the initial sub-optimal answer:
|
||||
{SEPARATOR_LINE}
|
||||
{{base_answer}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Here are the sub-questions that were answered:
|
||||
{SEPARATOR_LINE}
|
||||
{{answered_subquestions_with_answers}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Here are the sub-questions that were suggested but not answered:
|
||||
{SEPARATOR_LINE}
|
||||
{{failed_sub_questions}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
And here are the entities, relationships and terms extracted from the context:
|
||||
{SEPARATOR_LINE}
|
||||
{{entity_term_extraction_str}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please generate the list of good, fully contextualized sub-questions that would help to address the main question. \
|
||||
Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of \
|
||||
objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not mentioned \
|
||||
in the 'entities, relationships and terms' section.
|
||||
|
||||
Again, please find questions that are NOT overlapping too much with the already answered sub-questions or those that \
|
||||
already were suggested and failed. In other words - what can we try in addition to what has been tried so far?
|
||||
|
||||
Generate the list of questions separated by one new line like this (and please ONLY ANSWER WITH THIS LIST! Do not \
|
||||
add any explanations or other text!):
|
||||
|
||||
Generate the list of questions separated by one new line like this:
|
||||
<sub-question 1>
|
||||
<sub-question 2>
|
||||
<sub-question 3>
|
||||
@@ -841,7 +489,7 @@ Your task is to improve on a given answer to a question, as the initial answer w
|
||||
Use the information provided below - and only the provided information - to write your new and improved answer.
|
||||
|
||||
The information provided below consists of:
|
||||
1) an initial answer that was given but likely found to be lacking in some way.
|
||||
1) an initial answer that was given but found to be lacking in some way.
|
||||
2) a number of answered sub-questions - these are very important(!) and definitely should help you to answer the main \
|
||||
question. Note that the sub-questions have a type, 'initial' and 'refined'. The 'initial' ones were available for the \
|
||||
creation of the initial answer, but the 'refined' were not, they are new. So please use the 'refined' sub-questions in \
|
||||
@@ -851,7 +499,6 @@ particular to update/extend/correct/enrich the initial answer and to add more de
|
||||
the relevant document for a fact!
|
||||
|
||||
It is critical that you provide proper inline citations to documents in the format [D1], [D2], [D3], etc! \
|
||||
Please use format [D1][D2] and NOT [D1, D2] format if you cite two or more documents together! \
|
||||
It is important that the citation is close to the information it supports. \
|
||||
DO NOT just list all of the citations at the very end. \
|
||||
Feel free to also cite sub-questions in addition to documents, \
|
||||
@@ -862,7 +509,14 @@ Proper citations are very important for the user!
|
||||
|
||||
{{history}}
|
||||
|
||||
{COMMON_RAG_RULES}
|
||||
IMPORTANT RULES:
|
||||
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
|
||||
You may give some additional facts you learned, but do not try to invent an answer.
|
||||
- If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
|
||||
- If the information is relevant but not fully conclusive, provide an answer to the extent you can but also specify that \
|
||||
the information is not conclusive and why.
|
||||
- Ignore any existing citations within the answered sub-questions, like [D1]... and [Q2]! The citations you will need to \
|
||||
use will need to refer to the documents (and sub-questions) that you are explicitly presented with below!
|
||||
|
||||
Again, you should be sure that the answer is supported by the information provided!
|
||||
|
||||
@@ -891,9 +545,7 @@ Lastly, here is the main question I want you to answer based on the information
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents for \
|
||||
sure and communicate if/in what way this may or may not relate to the question you need to answer! Use the answered \
|
||||
sub-questions as well, but be cautious and reconsider the docments again for validation.)
|
||||
Please keep your answer brief and concise, and focus on facts and data.
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
@@ -909,13 +561,18 @@ The information provided below consists of:
|
||||
2) a number of documents that were also deemed relevant for the question.
|
||||
|
||||
It is critical that you provide proper inline citations to documents in the format [D1], [D2], [D3], etc! \
|
||||
Please use format [D1][D2] and NOT [D1, D2] format if you cite two or more documents together! \
|
||||
It is important that the citation is close to the information it supports. \
|
||||
DO NOT just list all of the citations at the very end of your response. Citations are very important for the user!
|
||||
|
||||
{{history}}
|
||||
|
||||
{COMMON_RAG_RULES}
|
||||
IMPORTANT RULES:
|
||||
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
|
||||
You may give some additional facts you learned, but do not try to invent an answer.
|
||||
- If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
|
||||
- If the information is relevant but not fully conclusive, provide an answer to the extent you can but also specify that \
|
||||
the information is not conclusive and why.
|
||||
|
||||
Again, you should be sure that the answer is supported by the information provided!
|
||||
|
||||
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones, \
|
||||
@@ -940,103 +597,11 @@ Lastly, here is the question I want you to answer based on the information above
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents for \
|
||||
sure and communicate if/in what way this may or may not relate to the question you need to answer!)
|
||||
Please keep your answer brief and concise, and focus on facts and data.
|
||||
|
||||
Answer:
|
||||
""".strip()
|
||||
|
||||
REFINED_ANSWER_VALIDATION_PROMPT = f"""
|
||||
{{persona_specification}}
|
||||
|
||||
Your task is to verify whether a given answer is truthful and accurate, and supported by the facts that you \
|
||||
will be provided with.
|
||||
|
||||
The information provided below consists of:
|
||||
|
||||
1) a question that needed to be answered
|
||||
|
||||
2) a proposed answer to the question, whose accuracy you should assess
|
||||
|
||||
3) potentially, a brief summary of the history of the conversation thus far, as it may give more context \
|
||||
to the question. Note that the statements in the history are NOT considered as facts, ONLY but serve to to \
|
||||
give context to the question.
|
||||
|
||||
4) a number of answered sub-questions - you can take the answers as facts for these purposes.
|
||||
|
||||
5) a number of relevant documents that should support the answer and that you should use as fact, \
|
||||
i.e., if a statement in the document backs up a statement in the answer, then that statement in the answer \
|
||||
should be considered as true.
|
||||
|
||||
|
||||
IMPORTANT RULES AND CONSIDERATIONS:
|
||||
|
||||
- Please consider the statements made in the proposed answer and assess whether they are truthful and accurate, based \
|
||||
on the provided sub-answered and the documents. (Again, the history is NOT considered as facts!)
|
||||
|
||||
- Look in particular for:
|
||||
* material statements that are not supported by the sub-answered or the documents
|
||||
* assignments and groupings that are not supported, like company A is competitor of company B, but this is not \
|
||||
explicitly supported by documents or sub-answers, guesses or interpretations unless explicitly asked for
|
||||
|
||||
- look also at the citations in the proposed answer and assess whether they are appropriate given the statements \
|
||||
made in the proposed answer that cites the document.
|
||||
|
||||
- Are items grouped together amongst one headline where not all items belong to the category of the headline? \
|
||||
(Example: "Products used by Company A" where some products listed are not used by Company A)
|
||||
|
||||
- Does the proposed answer address the question in full?
|
||||
|
||||
- Is the answer specific to the question? Example: if the question asks for the prices for products by Company A, \
|
||||
but the answer lists the prices for products by Company A and Company B, or products it cannot be sure are by \
|
||||
Company A, then this is not quite specific enough to the question and the answer should be rejected.
|
||||
|
||||
- Similarly, if the question asks for properties of a certain class but the proposed answer lists or includes entities \
|
||||
that are not of that class without very explicitly saying so, then the answer should be considered inaccurate.
|
||||
|
||||
- If there are any calculations in the proposed answer that are not supported by the documents, they need to be tested. \
|
||||
If any calculation is wrong, the proposed answer should be considered as not trustworthy.
|
||||
|
||||
|
||||
Here is the information:
|
||||
{SEPARATOR_LINE_LONG}
|
||||
|
||||
QUESTION:
|
||||
{SEPARATOR_LINE}
|
||||
{{question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
PROPOSED ANSWER:
|
||||
{SEPARATOR_LINE}
|
||||
{{proposed_answer}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Here is the additional contextual information:
|
||||
{SEPARATOR_LINE_LONG}
|
||||
|
||||
{{history}}
|
||||
|
||||
Sub-questions and their answers (to be considered as facts):
|
||||
{SEPARATOR_LINE}
|
||||
{{answered_sub_questions}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
And here are the relevant documents that support the sub-question answers, and that are relevant for the actual question:
|
||||
{SEPARATOR_LINE}
|
||||
{{relevant_docs}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
|
||||
Please think through this step by step. Format your response just as a string in the following format:
|
||||
|
||||
Analysis: <think through your reasoning as outlined in the 'IMPORTANT RULES AND CONSIDERATIONS' section above, \
|
||||
but keep it short. Come to a conclusion whether the proposed answer can be trusted>
|
||||
Comments: <state your condensed comments you would give to a user reading the proposed answer, regarding the accuracy and \
|
||||
specificity.>
|
||||
{AGENT_ANSWER_SEPARATOR} <answer here only with yes or no, whether the proposed answer can be trusted. Base this on your \
|
||||
analysis, but only say 'yes' (trustworthy) or 'no' (not trustworthy)>
|
||||
""".strip()
|
||||
|
||||
|
||||
INITIAL_REFINED_ANSWER_COMPARISON_PROMPT = f"""
|
||||
For the given question, please compare the initial answer and the refined answer and determine if the refined answer is \
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from onyx.configs.constants import KV_SETTINGS_KEY
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.key_value_store.factory import get_kv_store
|
||||
from onyx.key_value_store.interface import KvKeyNotFoundError
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.settings.models import Settings
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -18,9 +17,6 @@ def load_settings() -> Settings:
|
||||
settings = (
|
||||
Settings.model_validate(stored_settings) if stored_settings else Settings()
|
||||
)
|
||||
except KvKeyNotFoundError:
|
||||
logger.error(f"No settings found in KV store for key: {KV_SETTINGS_KEY}")
|
||||
settings = Settings()
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading settings from KV store: {str(e)}")
|
||||
settings = Settings()
|
||||
|
||||
@@ -34,8 +34,8 @@ langchain-core==0.3.24
|
||||
langchain-openai==0.2.9
|
||||
langchain-text-splitters==0.3.2
|
||||
langchainhub==0.1.21
|
||||
langgraph==0.2.72
|
||||
langgraph-checkpoint==2.0.13
|
||||
langgraph==0.2.59
|
||||
langgraph-checkpoint==2.0.5
|
||||
langgraph-sdk==0.1.44
|
||||
litellm==1.60.2
|
||||
lxml==5.3.0
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.gitbook.connector import GitbookConnector
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gitbook_connector() -> GitbookConnector:
|
||||
connector = GitbookConnector(
|
||||
space_id=os.environ["GITBOOK_SPACE_ID"],
|
||||
)
|
||||
connector.load_credentials(
|
||||
{
|
||||
"gitbook_api_key": os.environ["GITBOOK_API_KEY"],
|
||||
}
|
||||
)
|
||||
return connector
|
||||
|
||||
|
||||
def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
|
||||
doc_batch_generator = gitbook_connector.load_from_state()
|
||||
|
||||
# Get first batch of documents
|
||||
doc_batch = next(doc_batch_generator)
|
||||
assert len(doc_batch) > 0
|
||||
|
||||
# Verify first document structure
|
||||
doc = doc_batch[0]
|
||||
|
||||
# Basic document properties
|
||||
assert doc.id.startswith("gitbook-")
|
||||
assert doc.semantic_identifier == "Acme Corp Internal Handbook"
|
||||
assert doc.source == DocumentSource.GITBOOK
|
||||
|
||||
# Metadata checks
|
||||
assert "path" in doc.metadata
|
||||
assert "type" in doc.metadata
|
||||
assert "kind" in doc.metadata
|
||||
|
||||
# Section checks
|
||||
assert len(doc.sections) == 1
|
||||
section = doc.sections[0]
|
||||
|
||||
# Content specific checks
|
||||
content = section.text
|
||||
|
||||
# Check for specific content elements
|
||||
assert "* Fruit Shopping List:" in content
|
||||
assert "> test quote it doesn't mean anything" in content
|
||||
|
||||
# Check headings
|
||||
assert "# Heading 1" in content
|
||||
assert "## Heading 2" in content
|
||||
assert "### Heading 3" in content
|
||||
|
||||
# Check task list
|
||||
assert "- [ ] Uncompleted Task" in content
|
||||
assert "- [x] Completed Task" in content
|
||||
|
||||
# Check table content
|
||||
assert "| ethereum | 10 | 3000 |" in content
|
||||
assert "| bitcoin | 2 | 98000 |" in content
|
||||
|
||||
# Check paragraph content
|
||||
assert "New York City comprises 5 boroughs" in content
|
||||
assert "Empire State Building" in content
|
||||
|
||||
# Check code block (just verify presence of some unique code elements)
|
||||
assert "function fizzBuzz(n)" in content
|
||||
assert 'res.push("FizzBuzz")' in content
|
||||
|
||||
assert section.link # Should have a URL
|
||||
|
||||
# Time-based polling test
|
||||
current_time = time.time()
|
||||
poll_docs = gitbook_connector.poll_source(0, current_time)
|
||||
poll_batch = next(poll_docs)
|
||||
assert len(poll_batch) > 0
|
||||
@@ -1,39 +1,18 @@
|
||||
import csv
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from onyx.agents.agent_search.deep_search.main.graph_builder import (
|
||||
main_graph_builder,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.graph_builder import (
|
||||
main_graph_builder as main_graph_builder_a,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.states import (
|
||||
MainInput as MainInput_a,
|
||||
)
|
||||
from onyx.agents.agent_search.run_graph import run_basic_graph
|
||||
from onyx.agents.agent_search.run_graph import run_main_graph
|
||||
from onyx.agents.agent_search.deep_search.main.states import MainInput
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
|
||||
from onyx.chat.models import AgentAnswerPiece
|
||||
from onyx.chat.models import OnyxAnswerPiece
|
||||
from onyx.chat.models import RefinedAnswerImprovement
|
||||
from onyx.chat.models import StreamStopInfo
|
||||
from onyx.chat.models import StreamType
|
||||
from onyx.chat.models import SubQuestionPiece
|
||||
from onyx.context.search.models import SearchRequest
|
||||
from onyx.db.engine import get_session_context_manager
|
||||
from onyx.llm.factory import get_default_llms
|
||||
from onyx.tools.force import ForceUseTool
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchTool
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
cwd = os.getcwd()
|
||||
@@ -56,183 +35,95 @@ input_file_object = open(
|
||||
)
|
||||
output_file = f"{OUTPUT_DIR}/agent_test_output.csv"
|
||||
|
||||
csv_output_data: list[list[str]] = []
|
||||
|
||||
test_data = json.load(input_file_object)
|
||||
example_data = test_data["examples"]
|
||||
example_ids = test_data["example_ids"]
|
||||
|
||||
failed_example_ids: list[int] = []
|
||||
|
||||
with get_session_context_manager() as db_session:
|
||||
output_data: dict[str, Any] = {}
|
||||
|
||||
primary_llm, fast_llm = get_default_llms()
|
||||
output_data = []
|
||||
|
||||
for example in example_data:
|
||||
query_start_time: datetime = datetime.now()
|
||||
example_id: int = int(example.get("id"))
|
||||
example_question: str = example.get("question")
|
||||
if not example_question or not example_id:
|
||||
continue
|
||||
example_id = example["id"]
|
||||
if len(example_ids) > 0 and example_id not in example_ids:
|
||||
continue
|
||||
|
||||
logger.info(f"{query_start_time} -- Processing example {example_id}")
|
||||
example_question = example["question"]
|
||||
target_sub_questions = example.get("target_sub_questions", [])
|
||||
num_target_sub_questions = len(target_sub_questions)
|
||||
search_request = SearchRequest(query=example_question)
|
||||
|
||||
try:
|
||||
example_question = example["question"]
|
||||
target_sub_questions = example.get("target_sub_questions", [])
|
||||
num_target_sub_questions = len(target_sub_questions)
|
||||
search_request = SearchRequest(query=example_question)
|
||||
config, search_tool = get_test_config(
|
||||
db_session=db_session,
|
||||
primary_llm=primary_llm,
|
||||
fast_llm=fast_llm,
|
||||
search_request=search_request,
|
||||
)
|
||||
|
||||
initial_answer_duration: timedelta | None = None
|
||||
refined_answer_duration: timedelta | None = None
|
||||
base_answer_duration: timedelta | None = None
|
||||
inputs = MainInput()
|
||||
|
||||
logger.debug("\n\nTEST QUERY START\n\n")
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
graph = main_graph_builder_a()
|
||||
compiled_graph = graph.compile()
|
||||
query_end_time = datetime.now()
|
||||
question_result = compiled_graph.invoke(
|
||||
input=inputs, config={"metadata": {"config": config}}
|
||||
)
|
||||
end_time = datetime.datetime.now()
|
||||
|
||||
search_request = SearchRequest(
|
||||
# query="what can you do with gitlab?",
|
||||
# query="What are the guiding principles behind the development of cockroachDB",
|
||||
# query="What are the temperatures in Munich, Hawaii, and New York?",
|
||||
# query="When was Washington born?",
|
||||
# query="What is Onyx?",
|
||||
# query="What is the difference between astronomy and astrology?",
|
||||
query=example_question,
|
||||
duration = end_time - start_time
|
||||
if num_target_sub_questions > 0:
|
||||
chunk_expansion_ratio = (
|
||||
question_result["initial_agent_stats"]
|
||||
.get("agent_effectiveness", {})
|
||||
.get("utilized_chunk_ratio", None)
|
||||
)
|
||||
support_effectiveness_ratio = (
|
||||
question_result["initial_agent_stats"]
|
||||
.get("agent_effectiveness", {})
|
||||
.get("support_ratio", None)
|
||||
)
|
||||
else:
|
||||
chunk_expansion_ratio = None
|
||||
support_effectiveness_ratio = None
|
||||
|
||||
answer_tokens: dict[str, list[str]] = defaultdict(list)
|
||||
generated_sub_questions = question_result.get("generated_sub_questions", [])
|
||||
num_generated_sub_questions = len(generated_sub_questions)
|
||||
base_answer = question_result["initial_base_answer"].split("==")[-1]
|
||||
agent_answer = question_result["initial_answer"].split("==")[-1]
|
||||
|
||||
with get_session_context_manager() as db_session:
|
||||
config = get_test_config(
|
||||
db_session, primary_llm, fast_llm, search_request
|
||||
)
|
||||
assert (
|
||||
config.persistence is not None
|
||||
), "set a chat session id to run this test"
|
||||
output_point = {
|
||||
"example_id": example_id,
|
||||
"question": example_question,
|
||||
"duration": duration,
|
||||
"target_sub_questions": target_sub_questions,
|
||||
"generated_sub_questions": generated_sub_questions,
|
||||
"num_target_sub_questions": num_target_sub_questions,
|
||||
"num_generated_sub_questions": num_generated_sub_questions,
|
||||
"chunk_expansion_ratio": chunk_expansion_ratio,
|
||||
"support_effectiveness_ratio": support_effectiveness_ratio,
|
||||
"base_answer": base_answer,
|
||||
"agent_answer": agent_answer,
|
||||
}
|
||||
|
||||
# search_request.persona = get_persona_by_id(1, None, db_session)
|
||||
# config.perform_initial_search_path_decision = False
|
||||
config.behavior.perform_initial_search_decomposition = True
|
||||
input = MainInput_a()
|
||||
|
||||
# Base Flow
|
||||
base_flow_start_time: datetime = datetime.now()
|
||||
for output in run_basic_graph(config):
|
||||
if isinstance(output, OnyxAnswerPiece):
|
||||
answer_tokens["base_answer"].append(output.answer_piece or "")
|
||||
|
||||
output_data["base_answer"] = "".join(answer_tokens["base_answer"])
|
||||
output_data["base_answer_duration"] = (
|
||||
datetime.now() - base_flow_start_time
|
||||
)
|
||||
|
||||
# Agent Flow
|
||||
agent_flow_start_time: datetime = datetime.now()
|
||||
config = get_test_config(
|
||||
db_session,
|
||||
primary_llm,
|
||||
fast_llm,
|
||||
search_request,
|
||||
use_agentic_search=True,
|
||||
)
|
||||
|
||||
config.tooling.force_use_tool = ForceUseTool(
|
||||
force_use=True, tool_name=SearchTool._NAME
|
||||
)
|
||||
|
||||
tool_responses: list = []
|
||||
|
||||
sub_question_dict_tokens: dict[int, dict[int, str]] = defaultdict(
|
||||
lambda: defaultdict(str)
|
||||
)
|
||||
|
||||
for output in run_main_graph(config):
|
||||
if isinstance(output, AgentAnswerPiece):
|
||||
if output.level == 0 and output.level_question_num == 0:
|
||||
answer_tokens["initial"].append(output.answer_piece)
|
||||
elif output.level == 1 and output.level_question_num == 0:
|
||||
answer_tokens["refined"].append(output.answer_piece)
|
||||
elif isinstance(output, SubQuestionPiece):
|
||||
if (
|
||||
output.level is not None
|
||||
and output.level_question_num is not None
|
||||
):
|
||||
sub_question_dict_tokens[output.level][
|
||||
output.level_question_num
|
||||
] += output.sub_question
|
||||
elif isinstance(output, StreamStopInfo):
|
||||
if (
|
||||
output.stream_type == StreamType.MAIN_ANSWER
|
||||
and output.level == 0
|
||||
):
|
||||
initial_answer_duration = (
|
||||
datetime.now() - agent_flow_start_time
|
||||
)
|
||||
elif isinstance(output, RefinedAnswerImprovement):
|
||||
output_data["refined_answer_improves_on_initial_answer"] = str(
|
||||
output.refined_answer_improvement
|
||||
)
|
||||
|
||||
refined_answer_duration = datetime.now() - agent_flow_start_time
|
||||
|
||||
output_data["example_id"] = example_id
|
||||
output_data["question"] = example_question
|
||||
output_data["initial_answer"] = "".join(answer_tokens["initial"])
|
||||
output_data["refined_answer"] = "".join(answer_tokens["refined"])
|
||||
output_data["initial_answer_duration"] = initial_answer_duration or ""
|
||||
output_data["refined_answer_duration"] = refined_answer_duration
|
||||
|
||||
output_data["initial_sub_questions"] = "\n---\n".join(
|
||||
[x for x in sub_question_dict_tokens[0].values()]
|
||||
)
|
||||
output_data["refined_sub_questions"] = "\n---\n".join(
|
||||
[x for x in sub_question_dict_tokens[1].values()]
|
||||
)
|
||||
|
||||
csv_output_data.append(
|
||||
[
|
||||
str(example_id),
|
||||
example_question,
|
||||
output_data["base_answer"],
|
||||
output_data["base_answer_duration"],
|
||||
output_data["initial_sub_questions"],
|
||||
output_data["initial_answer"],
|
||||
output_data["initial_answer_duration"],
|
||||
output_data["refined_sub_questions"],
|
||||
output_data["refined_answer"],
|
||||
output_data["refined_answer_duration"],
|
||||
output_data["refined_answer_improves_on_initial_answer"],
|
||||
]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing example {example_id}: {e}")
|
||||
failed_example_ids.append(example_id)
|
||||
continue
|
||||
output_data.append(output_point)
|
||||
|
||||
|
||||
with open(output_file, "w", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter="\t")
|
||||
writer.writerow(
|
||||
[
|
||||
"example_id",
|
||||
"question",
|
||||
"base_answer",
|
||||
"base_answer_duration",
|
||||
"initial_sub_questions",
|
||||
"initial_answer",
|
||||
"initial_answer_duration",
|
||||
"refined_sub_questions",
|
||||
"refined_answer",
|
||||
"refined_answer_duration",
|
||||
"refined_answer_improves_on_initial_answer",
|
||||
]
|
||||
)
|
||||
writer.writerows(csv_output_data)
|
||||
fieldnames = [
|
||||
"example_id",
|
||||
"question",
|
||||
"duration",
|
||||
"target_sub_questions",
|
||||
"generated_sub_questions",
|
||||
"num_target_sub_questions",
|
||||
"num_generated_sub_questions",
|
||||
"chunk_expansion_ratio",
|
||||
"support_effectiveness_ratio",
|
||||
"base_answer",
|
||||
"agent_answer",
|
||||
]
|
||||
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter="\t")
|
||||
writer.writeheader()
|
||||
writer.writerows(output_data)
|
||||
|
||||
print("DONE")
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 3.2 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 3.1 KiB |
@@ -1122,7 +1122,7 @@ export function ChatPage({
|
||||
"Continue Generating (pick up exactly where you left off)",
|
||||
});
|
||||
};
|
||||
const [uncaughtError, setUncaughtError] = useState<string | null>(null);
|
||||
const [gener, setFinishedStreaming] = useState(false);
|
||||
|
||||
const onSubmit = async ({
|
||||
messageIdToResend,
|
||||
@@ -1445,6 +1445,7 @@ export function ChatPage({
|
||||
Object.hasOwn(packet, "level_question_num")
|
||||
) {
|
||||
if ((packet as StreamStopInfo).stream_type == "main_answer") {
|
||||
setFinishedStreaming(true);
|
||||
updateChatState("streaming", frozenSessionId);
|
||||
}
|
||||
if (
|
||||
@@ -1562,23 +1563,8 @@ export function ChatPage({
|
||||
}
|
||||
);
|
||||
} else if (Object.hasOwn(packet, "error")) {
|
||||
if (
|
||||
sub_questions.length > 0 &&
|
||||
sub_questions
|
||||
.filter((q) => q.level === 0)
|
||||
.every((q) => q.is_stopped === true)
|
||||
) {
|
||||
setUncaughtError((packet as StreamingError).error);
|
||||
updateChatState("input");
|
||||
setAgenticGenerating(false);
|
||||
setAlternativeGeneratingAssistant(null);
|
||||
setSubmittedMessage("");
|
||||
return;
|
||||
// throw new Error((packet as StreamingError).error);
|
||||
} else {
|
||||
error = (packet as StreamingError).error;
|
||||
stackTrace = (packet as StreamingError).stack_trace;
|
||||
}
|
||||
error = (packet as StreamingError).error;
|
||||
stackTrace = (packet as StreamingError).stack_trace;
|
||||
} else if (Object.hasOwn(packet, "message_id")) {
|
||||
finalMessage = packet as BackendMessage;
|
||||
} else if (Object.hasOwn(packet, "stop_reason")) {
|
||||
@@ -2068,7 +2054,6 @@ export function ChatPage({
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
router.push(data.redirect_url);
|
||||
} catch (error) {
|
||||
console.error("Error seeding chat from Slack:", error);
|
||||
@@ -2664,7 +2649,6 @@ export function ChatPage({
|
||||
{message.sub_questions &&
|
||||
message.sub_questions.length > 0 ? (
|
||||
<AgenticMessage
|
||||
error={uncaughtError}
|
||||
isStreamingQuestions={
|
||||
message.isStreamingQuestions ?? false
|
||||
}
|
||||
|
||||
@@ -81,7 +81,6 @@ export const AgenticMessage = ({
|
||||
agenticDocs,
|
||||
secondLevelSubquestions,
|
||||
toggleDocDisplay,
|
||||
error,
|
||||
}: {
|
||||
isStreamingQuestions: boolean;
|
||||
isGenerating: boolean;
|
||||
@@ -114,7 +113,6 @@ export const AgenticMessage = ({
|
||||
regenerate?: (modelOverRide: LlmOverride) => Promise<void>;
|
||||
setPresentingDocument?: (document: OnyxDocument) => void;
|
||||
toggleDocDisplay?: (agentic: boolean) => void;
|
||||
error?: string | null;
|
||||
}) => {
|
||||
const [noShowingMessage, setNoShowingMessage] = useState(isComplete);
|
||||
|
||||
@@ -493,28 +491,11 @@ export const AgenticMessage = ({
|
||||
) : (
|
||||
content
|
||||
)}
|
||||
{error && (
|
||||
<p className="mt-2 text-red-700 text-sm my-auto">
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
) : isComplete ? (
|
||||
error && (
|
||||
<p className="mt-2 mx-4 text-red-700 text-sm my-auto">
|
||||
{error}
|
||||
</p>
|
||||
)
|
||||
) : (
|
||||
<>
|
||||
{error && (
|
||||
<p className="mt-2 mx-4 text-red-700 text-sm my-auto">
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
</>
|
||||
) : isComplete ? null : (
|
||||
<></>
|
||||
)}
|
||||
{handleFeedback &&
|
||||
(isActive ? (
|
||||
|
||||
@@ -95,7 +95,7 @@ export const MemoizedAnchor = memo(
|
||||
}
|
||||
return (
|
||||
<MemoizedLink updatePresentingDocument={updatePresentingDocument}>
|
||||
{children}
|
||||
{children}l
|
||||
</MemoizedLink>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -186,7 +186,6 @@ export const AIMessage = ({
|
||||
setPresentingDocument,
|
||||
index,
|
||||
documentSidebarVisible,
|
||||
removePadding,
|
||||
}: {
|
||||
index?: number;
|
||||
shared?: boolean;
|
||||
@@ -215,7 +214,6 @@ export const AIMessage = ({
|
||||
overriddenModel?: string;
|
||||
regenerate?: (modelOverRide: LlmOverride) => Promise<void>;
|
||||
setPresentingDocument: (document: OnyxDocument) => void;
|
||||
removePadding?: boolean;
|
||||
}) => {
|
||||
const toolCallGenerating = toolCall && !toolCall.tool_result;
|
||||
|
||||
@@ -404,9 +402,7 @@ export const AIMessage = ({
|
||||
<div
|
||||
id={isComplete ? "onyx-ai-message" : undefined}
|
||||
ref={trackedElementRef}
|
||||
className={`py-5 ml-4 lg:px-5 relative flex
|
||||
|
||||
${removePadding && "!pl-24 -mt-12"}`}
|
||||
className={`py-5 text-text ml-4 lg:px-5 relative flex `}
|
||||
>
|
||||
<div
|
||||
className={`mx-auto ${
|
||||
@@ -415,13 +411,11 @@ export const AIMessage = ({
|
||||
>
|
||||
<div className={`lg:mr-12 ${!shared && "mobile:ml-0 md:ml-8"}`}>
|
||||
<div className="flex items-start">
|
||||
{!removePadding && (
|
||||
<AssistantIcon
|
||||
className="mobile:hidden"
|
||||
size={24}
|
||||
assistant={alternativeAssistant || currentPersona}
|
||||
/>
|
||||
)}
|
||||
<AssistantIcon
|
||||
className="mobile:hidden"
|
||||
size={24}
|
||||
assistant={alternativeAssistant || currentPersona}
|
||||
/>
|
||||
|
||||
<div className="w-full">
|
||||
<div className="max-w-message-max break-words">
|
||||
@@ -602,8 +596,7 @@ export const AIMessage = ({
|
||||
)}
|
||||
</div>
|
||||
|
||||
{!removePadding &&
|
||||
handleFeedback &&
|
||||
{handleFeedback &&
|
||||
(isActive ? (
|
||||
<div
|
||||
className={`
|
||||
|
||||
@@ -194,7 +194,7 @@ export default function CreateCredential({
|
||||
for information on setting up this connector.
|
||||
</p>
|
||||
)}
|
||||
<CardSection className="w-full items-start dark:bg-neutral-900 mt-4 flex flex-col gap-y-6">
|
||||
<CardSection className="w-full items-start bg-blue-200 !border-0 mt-4 flex flex-col gap-y-6">
|
||||
<TextFormField
|
||||
name="name"
|
||||
placeholder="(Optional) credential name.."
|
||||
@@ -215,7 +215,7 @@ export default function CreateCredential({
|
||||
/>
|
||||
))}
|
||||
{!swapConnector && (
|
||||
<div className="mt-4 flex w-full flex-col sm:flex-row justify-between items-end">
|
||||
<div className="mt-4 flex flex-col sm:flex-row justify-between items-end">
|
||||
<div className="w-full sm:w-3/4 mb-4 sm:mb-0">
|
||||
{isPaidEnterpriseFeaturesEnabled && (
|
||||
<div className="flex flex-col items-start">
|
||||
|
||||
@@ -76,8 +76,7 @@ import r2Icon from "../../../public/r2.png";
|
||||
import salesforceIcon from "../../../public/Salesforce.png";
|
||||
import freshdeskIcon from "../../../public/Freshdesk.png";
|
||||
import firefliesIcon from "../../../public/Fireflies.png";
|
||||
import gitbookDarkIcon from "../../../public/GitBookDark.png";
|
||||
import gitbookLightIcon from "../../../public/GitBookLight.png";
|
||||
|
||||
import sharepointIcon from "../../../public/Sharepoint.png";
|
||||
import teamsIcon from "../../../public/Teams.png";
|
||||
import mediawikiIcon from "../../../public/MediaWiki.svg";
|
||||
@@ -2890,20 +2889,6 @@ export const AirtableIcon = ({
|
||||
return <LogoIcon size={size} className={className} src={airtableIcon} />;
|
||||
};
|
||||
|
||||
export const GitbookIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => (
|
||||
<div className="flex items-center justify-center">
|
||||
<div className="dark:hidden">
|
||||
<LogoIcon size={size} className={className} src={gitbookDarkIcon} />
|
||||
</div>
|
||||
<div className="hidden dark:block">
|
||||
<LogoIcon size={size} className={className} src={gitbookLightIcon} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
export const PinnedIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import { useContext } from "react";
|
||||
import { SettingsContext } from "../settings/SettingsProvider";
|
||||
import { useTheme } from "next-themes";
|
||||
import { OnyxIcon, OnyxLogoTypeIcon } from "../icons/icons";
|
||||
|
||||
export function Logo({
|
||||
|
||||
@@ -227,24 +227,6 @@ export const connectorConfigs: Record<
|
||||
],
|
||||
advanced_values: [],
|
||||
},
|
||||
gitbook: {
|
||||
description: "Configure GitBook connector",
|
||||
values: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the space ID:",
|
||||
label: "Space ID",
|
||||
name: "space_id",
|
||||
optional: false,
|
||||
description:
|
||||
"The ID of the GitBook space to index. This can be found in the URL " +
|
||||
"of a page in the space. For example, if your URL looks like " +
|
||||
"`https://app.gitbook.com/o/ccLx08XZ5wZ54LwdP9QU/s/8JkzVx8QCIGRrmxhGHU8/`, " +
|
||||
"then your space ID is `8JkzVx8QCIGRrmxhGHU8`.",
|
||||
},
|
||||
],
|
||||
advanced_values: [],
|
||||
},
|
||||
google_drive: {
|
||||
description: "Configure Google Drive connector",
|
||||
values: [
|
||||
|
||||
@@ -30,10 +30,6 @@ export interface GithubCredentialJson {
|
||||
github_access_token: string;
|
||||
}
|
||||
|
||||
export interface GitbookCredentialJson {
|
||||
gitbook_api_key: string;
|
||||
}
|
||||
|
||||
export interface GitlabCredentialJson {
|
||||
gitlab_url: string;
|
||||
gitlab_access_token: string;
|
||||
@@ -348,9 +344,6 @@ export const credentialTemplates: Record<ValidSources, any> = {
|
||||
// NOTE: These are Special Cases
|
||||
google_drive: { google_tokens: "" } as GoogleDriveCredentialJson,
|
||||
gmail: { google_tokens: "" } as GmailCredentialJson,
|
||||
gitbook: {
|
||||
gitbook_api_key: "",
|
||||
} as GitbookCredentialJson,
|
||||
};
|
||||
|
||||
export const credentialDisplayNames: Record<string, string> = {
|
||||
@@ -481,10 +474,6 @@ export const credentialDisplayNames: Record<string, string> = {
|
||||
|
||||
// Fireflies
|
||||
fireflies_api_key: "Fireflies API Key",
|
||||
|
||||
// GitBook
|
||||
gitbook_space_id: "GitBook Space ID",
|
||||
gitbook_api_key: "GitBook API Key",
|
||||
};
|
||||
|
||||
export function getDisplayNameForCredentialKey(key: string): string {
|
||||
|
||||
@@ -43,7 +43,6 @@ import {
|
||||
AirtableIcon,
|
||||
GlobeIcon2,
|
||||
FileIcon2,
|
||||
GitbookIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import { ValidSources } from "./types";
|
||||
import {
|
||||
@@ -322,12 +321,6 @@ export const SOURCE_METADATA_MAP: SourceMap = {
|
||||
category: SourceCategory.Other,
|
||||
docs: "https://docs.onyx.app/connectors/airtable",
|
||||
},
|
||||
gitbook: {
|
||||
icon: GitbookIcon,
|
||||
displayName: "GitBook",
|
||||
category: SourceCategory.Wiki,
|
||||
docs: "https://docs.onyx.app/connectors/gitbook",
|
||||
},
|
||||
// currently used for the Internet Search tool docs, which is why
|
||||
// a globe is used
|
||||
not_applicable: {
|
||||
|
||||
@@ -350,7 +350,6 @@ export enum ValidSources {
|
||||
Fireflies = "fireflies",
|
||||
Egnyte = "egnyte",
|
||||
Airtable = "airtable",
|
||||
Gitbook = "gitbook",
|
||||
}
|
||||
|
||||
export const validAutoSyncSources = [
|
||||
|
||||
Reference in New Issue
Block a user