mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-17 07:45:47 +00:00
Compare commits
21 Commits
v0.16.3
...
initial_im
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d848fa9dd | ||
|
|
1dbb6f3d69 | ||
|
|
91cf9a5472 | ||
|
|
ebb0e56a30 | ||
|
|
091cb136c4 | ||
|
|
56052c5b4b | ||
|
|
617726207b | ||
|
|
1be58e74b3 | ||
|
|
a693c991d7 | ||
|
|
ef9942b751 | ||
|
|
993acec5e9 | ||
|
|
b01a1b509a | ||
|
|
4f994124ef | ||
|
|
14863bd457 | ||
|
|
aa1c4c635a | ||
|
|
13f6e8a6b4 | ||
|
|
66f47d294c | ||
|
|
0a685bda7d | ||
|
|
23dc8b5dad | ||
|
|
cd5f2293ad | ||
|
|
4b28686721 |
@@ -0,0 +1,36 @@
|
||||
"""Combine Search and Chat
|
||||
|
||||
Revision ID: 9f696734098f
|
||||
Revises: a8c2065484e6
|
||||
Create Date: 2024-11-27 15:32:19.694972
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9f696734098f"
|
||||
down_revision = "a8c2065484e6"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column("chat_session", "description", nullable=True)
|
||||
op.drop_column("chat_session", "one_shot")
|
||||
op.drop_column("slack_channel_config", "response_type")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("UPDATE chat_session SET description = '' WHERE description IS NULL")
|
||||
op.alter_column("chat_session", "description", nullable=False)
|
||||
op.add_column(
|
||||
"chat_session",
|
||||
sa.Column("one_shot", sa.Boolean(), nullable=False, server_default=sa.false()),
|
||||
)
|
||||
op.add_column(
|
||||
"slack_channel_config",
|
||||
sa.Column(
|
||||
"response_type", sa.String(), nullable=False, server_default="citations"
|
||||
),
|
||||
)
|
||||
@@ -18,6 +18,11 @@ class ExternalAccess:
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DocExternalAccess:
|
||||
"""
|
||||
This is just a class to wrap the external access and the document ID
|
||||
together. It's used for syncing document permissions to Redis.
|
||||
"""
|
||||
|
||||
external_access: ExternalAccess
|
||||
# The document ID
|
||||
doc_id: str
|
||||
|
||||
42
backend/danswer/agent_search/core_qa_graph/edges.py
Normal file
42
backend/danswer/agent_search/core_qa_graph/edges.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import Union
|
||||
|
||||
from langgraph.types import Send
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
|
||||
|
||||
def sub_continue_to_verifier(state: BaseQAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes each de-douped retrieved doc to the verifier step - in parallel
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
|
||||
return [
|
||||
Send(
|
||||
"sub_verifier",
|
||||
VerifierState(
|
||||
document=doc,
|
||||
#question=state["original_question"],
|
||||
question=state["sub_question_str"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for doc in state["sub_question_deduped_retrieval_docs"]
|
||||
]
|
||||
|
||||
|
||||
def sub_continue_to_retrieval(state: BaseQAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
rewritten_queries = state["sub_question_search_queries"].rewritten_queries + [state["sub_question_str"]]
|
||||
return [
|
||||
Send(
|
||||
"sub_custom_retrieve",
|
||||
RetrieverState(
|
||||
rewritten_query=query,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for query in rewritten_queries
|
||||
]
|
||||
132
backend/danswer/agent_search/core_qa_graph/graph_builder.py
Normal file
132
backend/danswer/agent_search/core_qa_graph/graph_builder.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from danswer.agent_search.core_qa_graph.edges import sub_continue_to_retrieval
|
||||
from danswer.agent_search.core_qa_graph.edges import sub_continue_to_verifier
|
||||
from danswer.agent_search.core_qa_graph.nodes.combine_retrieved_docs import (
|
||||
sub_combine_retrieved_docs,
|
||||
)
|
||||
from danswer.agent_search.core_qa_graph.nodes.custom_retrieve import (
|
||||
sub_custom_retrieve,
|
||||
)
|
||||
from danswer.agent_search.core_qa_graph.nodes.dummy import sub_dummy
|
||||
from danswer.agent_search.core_qa_graph.nodes.final_format import (
|
||||
sub_final_format,
|
||||
)
|
||||
from danswer.agent_search.core_qa_graph.nodes.generate import sub_generate
|
||||
from danswer.agent_search.core_qa_graph.nodes.qa_check import sub_qa_check
|
||||
from danswer.agent_search.core_qa_graph.nodes.rewrite import sub_rewrite
|
||||
from danswer.agent_search.core_qa_graph.nodes.verifier import sub_verifier
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAOutputState
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.core_qa_graph.states import CoreQAInputState
|
||||
|
||||
|
||||
def build_core_qa_graph() -> StateGraph:
|
||||
sub_answers_initial = StateGraph(
|
||||
state_schema=BaseQAState,
|
||||
output=BaseQAOutputState,
|
||||
)
|
||||
|
||||
### Add nodes ###
|
||||
sub_answers_initial.add_node(node="sub_dummy", action=sub_dummy)
|
||||
sub_answers_initial.add_node(node="sub_rewrite", action=sub_rewrite)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_custom_retrieve",
|
||||
action=sub_custom_retrieve,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_combine_retrieved_docs",
|
||||
action=sub_combine_retrieved_docs,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_verifier",
|
||||
action=sub_verifier,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_generate",
|
||||
action=sub_generate,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_qa_check",
|
||||
action=sub_qa_check,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_final_format",
|
||||
action=sub_final_format,
|
||||
)
|
||||
|
||||
### Add edges ###
|
||||
sub_answers_initial.add_edge(START, "sub_dummy")
|
||||
sub_answers_initial.add_edge("sub_dummy", "sub_rewrite")
|
||||
|
||||
sub_answers_initial.add_conditional_edges(
|
||||
source="sub_rewrite",
|
||||
path=sub_continue_to_retrieval,
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_custom_retrieve",
|
||||
end_key="sub_combine_retrieved_docs",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_conditional_edges(
|
||||
source="sub_combine_retrieved_docs",
|
||||
path=sub_continue_to_verifier,
|
||||
path_map=["sub_verifier"],
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_verifier",
|
||||
end_key="sub_generate",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_generate",
|
||||
end_key="sub_qa_check",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_qa_check",
|
||||
end_key="sub_final_format",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_final_format",
|
||||
end_key=END,
|
||||
)
|
||||
# sub_answers_graph = sub_answers_initial.compile()
|
||||
return sub_answers_initial
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# q = "Whose music is kind of hard to easily enjoy?"
|
||||
# q = "What is voice leading?"
|
||||
# q = "What are the types of motions in music?"
|
||||
# q = "What are key elements of music theory?"
|
||||
# q = "How can I best understand music theory using voice leading?"
|
||||
q = "What makes good music?"
|
||||
# q = "types of motions in music"
|
||||
# q = "What is the relationship between music and physics?"
|
||||
# q = "Can you compare various grunge styles?"
|
||||
# q = "Why is quantum gravity so hard?"
|
||||
|
||||
inputs = CoreQAInputState(
|
||||
original_question=q,
|
||||
sub_question_str=q,
|
||||
)
|
||||
sub_answers_graph = build_core_qa_graph()
|
||||
compiled_sub_answers = sub_answers_graph.compile()
|
||||
output = compiled_sub_answers.invoke(inputs)
|
||||
print("\nOUTPUT:")
|
||||
print(output.keys())
|
||||
for key, value in output.items():
|
||||
if key in [
|
||||
"sub_question_answer",
|
||||
"sub_question_str",
|
||||
"sub_qas",
|
||||
"initial_sub_qas",
|
||||
"sub_question_answer",
|
||||
]:
|
||||
print(f"{key}: {value}")
|
||||
@@ -0,0 +1,36 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def sub_combine_retrieved_docs(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dedupe the retrieved docs.
|
||||
"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_question_base_retrieval_docs = state["sub_question_base_retrieval_docs"]
|
||||
|
||||
print(f"Number of docs from steps: {len(sub_question_base_retrieval_docs)}")
|
||||
dedupe_docs: list[InferenceSection] = []
|
||||
for base_retrieval_doc in sub_question_base_retrieval_docs:
|
||||
if not any(
|
||||
base_retrieval_doc.center_chunk.chunk_id == doc.center_chunk.chunk_id
|
||||
for doc in dedupe_docs
|
||||
):
|
||||
dedupe_docs.append(base_retrieval_doc)
|
||||
|
||||
print(f"Number of deduped docs: {len(dedupe_docs)}")
|
||||
|
||||
|
||||
return {
|
||||
"sub_question_deduped_retrieval_docs": dedupe_docs,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - combine_retrieved_docs (dedupe)",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import SearchRequest
|
||||
from danswer.context.search.pipeline import SearchPipeline
|
||||
from danswer.db.engine import get_session_context_manager
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_custom_retrieve(state: RetrieverState) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve documents
|
||||
|
||||
Args:
|
||||
state (dict): The current graph state
|
||||
|
||||
Returns:
|
||||
state (dict): New key added to state, documents, that contains retrieved documents
|
||||
"""
|
||||
print("---RETRIEVE SUB---")
|
||||
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
rewritten_query = state["rewritten_query"]
|
||||
|
||||
# Retrieval
|
||||
# TODO: add the actual retrieval, probably from search_tool.run()
|
||||
documents: list[InferenceSection] = []
|
||||
llm, fast_llm = get_default_llms()
|
||||
with get_session_context_manager() as db_session:
|
||||
documents = SearchPipeline(
|
||||
search_request=SearchRequest(
|
||||
query=rewritten_query,
|
||||
),
|
||||
user=None,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
reranked_docs = documents.reranked_sections
|
||||
|
||||
# initial metric to measure fit TODO: implement metric properly
|
||||
|
||||
top_1_score = reranked_docs[0].center_chunk.score
|
||||
top_5_score = sum([doc.center_chunk.score for doc in reranked_docs[:5]]) / 5
|
||||
top_10_score = sum([doc.center_chunk.score for doc in reranked_docs[:10]]) / 10
|
||||
|
||||
fit_score = 1/3 * (top_1_score + top_5_score + top_10_score)
|
||||
|
||||
chunk_ids = {'query': rewritten_query,
|
||||
'chunk_ids': [doc.center_chunk.chunk_id for doc in reranked_docs]}
|
||||
|
||||
|
||||
return {
|
||||
"sub_question_base_retrieval_docs": reranked_docs,
|
||||
"sub_chunk_ids": [chunk_ids],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"sub - custom_retrieve, fit_score: {fit_score}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
24
backend/danswer/agent_search/core_qa_graph/nodes/dummy.py
Normal file
24
backend/danswer/agent_search/core_qa_graph/nodes/dummy.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_dummy(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dummy step
|
||||
"""
|
||||
|
||||
print("---Sub Dummy---")
|
||||
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
return {
|
||||
"graph_start_time": node_start_time,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - dummy",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=node_start_time,
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
|
||||
|
||||
def sub_final_format(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Create the final output for the QA subgraph
|
||||
"""
|
||||
|
||||
print("---BASE FINAL FORMAT---")
|
||||
|
||||
return {
|
||||
"sub_qas": [
|
||||
{
|
||||
"sub_question": state["sub_question_str"],
|
||||
"sub_answer": state["sub_question_answer"],
|
||||
"sub_answer_check": state["sub_question_answer_check"],
|
||||
}
|
||||
],
|
||||
"log_messages": state["log_messages"],
|
||||
}
|
||||
91
backend/danswer/agent_search/core_qa_graph/nodes/generate.py
Normal file
91
backend/danswer/agent_search/core_qa_graph/nodes/generate.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_generate(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---GENERATE---")
|
||||
|
||||
# Create sub-query results
|
||||
|
||||
verified_chunks = [chunk.center_chunk.chunk_id for chunk in state["sub_question_verified_retrieval_docs"]]
|
||||
result_dict = {}
|
||||
|
||||
chunk_id_dicts = state["sub_chunk_ids"]
|
||||
expanded_chunks = []
|
||||
original_chunks = []
|
||||
|
||||
for chunk_id_dict in chunk_id_dicts:
|
||||
sub_question = chunk_id_dict['query']
|
||||
verified_sq_chunks = [chunk_id for chunk_id in chunk_id_dict['chunk_ids'] if chunk_id in verified_chunks]
|
||||
|
||||
if sub_question != state["original_question"]:
|
||||
expanded_chunks += verified_sq_chunks
|
||||
else:
|
||||
result_dict['ORIGINAL'] = len(verified_sq_chunks)
|
||||
original_chunks += verified_sq_chunks
|
||||
result_dict[sub_question[:30]] = len(verified_sq_chunks)
|
||||
|
||||
expansion_chunks = set(expanded_chunks)
|
||||
num_expansion_chunks = sum([1 for chunk_id in expansion_chunks if chunk_id in verified_chunks])
|
||||
num_original_relevant_chunks = len(original_chunks)
|
||||
num_missed_relevant_chunks = sum([1 for chunk_id in original_chunks if chunk_id not in expansion_chunks])
|
||||
num_gained_relevant_chunks = sum([1 for chunk_id in expansion_chunks if chunk_id not in original_chunks])
|
||||
result_dict['expansion_chunks'] = num_expansion_chunks
|
||||
|
||||
|
||||
|
||||
print(result_dict)
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["sub_question_str"]
|
||||
docs = state["sub_question_verified_retrieval_docs"]
|
||||
|
||||
print(f"Number of verified retrieval docs: {len(docs)}")
|
||||
|
||||
# Only take the top 10 docs.
|
||||
# TODO: Make this dynamic or use config param?
|
||||
top_10_docs = docs[-10:]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_RAG_PROMPT.format(question=question, context=format_docs(top_10_docs))
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
_, fast_llm = get_default_llms()
|
||||
response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format=None,
|
||||
)
|
||||
)
|
||||
|
||||
answer_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
return {
|
||||
"sub_question_answer": answer_str,
|
||||
"log_messages": generate_log_message(
|
||||
message="base - generate",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
51
backend/danswer/agent_search/core_qa_graph/nodes/qa_check.py
Normal file
51
backend/danswer/agent_search/core_qa_graph/nodes/qa_check.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_CHECK_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_qa_check(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Check if the sub-question answer is satisfactory.
|
||||
|
||||
Args:
|
||||
state: The current SubQAState containing the sub-question and its answer
|
||||
|
||||
Returns:
|
||||
dict containing the check result and log message
|
||||
"""
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_CHECK_PROMPT.format(
|
||||
question=state["sub_question_str"],
|
||||
base_answer=state["sub_question_answer"],
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
_, fast_llm = get_default_llms()
|
||||
response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format=None,
|
||||
)
|
||||
)
|
||||
|
||||
response_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
|
||||
return {
|
||||
"sub_question_answer_check": response_str,
|
||||
"base_answer_messages": generate_log_message(
|
||||
message="sub - qa_check",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
74
backend/danswer/agent_search/core_qa_graph/nodes/rewrite.py
Normal file
74
backend/danswer/agent_search/core_qa_graph/nodes/rewrite.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.agent_search.shared_graph_utils.prompts import (
|
||||
REWRITE_PROMPT_MULTI_ORIGINAL,
|
||||
)
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_rewrite(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Transform the initial question into more suitable search queries.
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
|
||||
print("---SUB TRANSFORM QUERY---")
|
||||
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
# messages = state["base_answer_messages"]
|
||||
question = state["sub_question_str"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI_ORIGINAL.format(question=question),
|
||||
)
|
||||
]
|
||||
|
||||
"""
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI.format(question=question),
|
||||
)
|
||||
]
|
||||
"""
|
||||
|
||||
_, fast_llm = get_default_llms()
|
||||
llm_response_list = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format={"type": "json_object", "schema": RewrittenQueries.model_json_schema()},
|
||||
# structured_response_format=RewrittenQueries.model_json_schema(),
|
||||
)
|
||||
)
|
||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
|
||||
|
||||
print(f"llm_response: {llm_response}")
|
||||
|
||||
rewritten_queries = llm_response.split("--")
|
||||
# rewritten_queries = [llm_response.split("\n")[0]]
|
||||
|
||||
print(f"rewritten_queries: {rewritten_queries}")
|
||||
|
||||
rewritten_queries = RewrittenQueries(rewritten_queries=rewritten_queries)
|
||||
|
||||
return {
|
||||
"sub_question_search_queries": rewritten_queries,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - rewrite",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
64
backend/danswer/agent_search/core_qa_graph/nodes/verifier.py
Normal file
64
backend/danswer/agent_search/core_qa_graph/nodes/verifier.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_verifier(state: VerifierState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the document is relevant for the original user question
|
||||
|
||||
Args:
|
||||
state (VerifierState): The current state
|
||||
|
||||
Returns:
|
||||
dict: ict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
# print("---VERIFY QUTPUT---")
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
question = state["question"]
|
||||
document_content = state["document"].combined_content
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=VERIFIER_PROMPT.format(
|
||||
question=question, document_content=document_content
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
llm, fast_llm = get_default_llms()
|
||||
response = list(
|
||||
llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
response_string = merge_message_runs(response, chunk_separator="")[0].content
|
||||
# Convert string response to proper dictionary format
|
||||
decision_dict = {"decision": response_string.lower()}
|
||||
formatted_response = BinaryDecision.model_validate(decision_dict)
|
||||
|
||||
print(f"Verification end time: {datetime.datetime.now()}")
|
||||
|
||||
return {
|
||||
"sub_question_verified_retrieval_docs": [state["document"]]
|
||||
if formatted_response.decision == "yes"
|
||||
else [],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"sub - verifier: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
90
backend/danswer/agent_search/core_qa_graph/states.py
Normal file
90
backend/danswer/agent_search/core_qa_graph/states.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import operator
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
from typing import TypedDict
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.llm.interfaces import LLM
|
||||
|
||||
|
||||
class SubQuestionRetrieverState(TypedDict):
|
||||
# The state for the parallel Retrievers. They each need to see only one query
|
||||
sub_question_rewritten_query: str
|
||||
|
||||
|
||||
class SubQuestionVerifierState(TypedDict):
|
||||
# The state for the parallel verification step. Each node execution need to see only one question/doc pair
|
||||
sub_question_document: InferenceSection
|
||||
sub_question: str
|
||||
|
||||
|
||||
class CoreQAInputState(TypedDict):
|
||||
sub_question_str: str
|
||||
original_question: str
|
||||
|
||||
|
||||
class BaseQAState(TypedDict):
|
||||
# The 'core SubQuestion' state.
|
||||
original_question: str
|
||||
graph_start_time: datetime
|
||||
# start time for parallel initial sub-questionn thread
|
||||
sub_query_start_time: datetime
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question_str: str
|
||||
sub_question_search_queries: RewrittenQueries
|
||||
sub_question_nr: int
|
||||
sub_chunk_ids: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
# Answers sent back to core
|
||||
initial_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
primary_llm: LLM
|
||||
fast_llm: LLM
|
||||
|
||||
|
||||
class BaseQAOutputState(TypedDict):
|
||||
# The 'SubQuestion' output state. Removes all the intermediate states
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question_str: str
|
||||
sub_question_search_queries: list[str]
|
||||
sub_question_nr: int
|
||||
# Answers sent back to core
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
# Answers sent back to core
|
||||
initial_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
46
backend/danswer/agent_search/deep_qa_graph/edges.py
Normal file
46
backend/danswer/agent_search/deep_qa_graph/edges.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import Union
|
||||
|
||||
from langgraph.types import Send
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
|
||||
|
||||
def sub_continue_to_verifier(state: ResearchQAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes each de-douped retrieved doc to the verifier step - in parallel
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
|
||||
return [
|
||||
Send(
|
||||
"sub_verifier",
|
||||
VerifierState(
|
||||
document=doc,
|
||||
question=state["sub_question"],
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for doc in state["sub_question_base_retrieval_docs"]
|
||||
]
|
||||
|
||||
|
||||
def sub_continue_to_retrieval(
|
||||
state: ResearchQAState,
|
||||
) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
return [
|
||||
Send(
|
||||
"sub_custom_retrieve",
|
||||
RetrieverState(
|
||||
rewritten_query=query,
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for query in state["sub_question_rewritten_queries"]
|
||||
]
|
||||
93
backend/danswer/agent_search/deep_qa_graph/graph_builder.py
Normal file
93
backend/danswer/agent_search/deep_qa_graph/graph_builder.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.edges import sub_continue_to_retrieval
|
||||
from danswer.agent_search.deep_qa_graph.edges import sub_continue_to_verifier
|
||||
from danswer.agent_search.deep_qa_graph.nodes.combine_retrieved_docs import (
|
||||
sub_combine_retrieved_docs,
|
||||
)
|
||||
from danswer.agent_search.deep_qa_graph.nodes.custom_retrieve import sub_custom_retrieve
|
||||
from danswer.agent_search.deep_qa_graph.nodes.dummy import sub_dummy
|
||||
from danswer.agent_search.deep_qa_graph.nodes.final_format import sub_final_format
|
||||
from danswer.agent_search.deep_qa_graph.nodes.generate import sub_generate
|
||||
from danswer.agent_search.deep_qa_graph.nodes.qa_check import sub_qa_check
|
||||
from danswer.agent_search.deep_qa_graph.nodes.verifier import sub_verifier
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAOutputState
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
|
||||
|
||||
def build_deep_qa_graph() -> StateGraph:
|
||||
# Define the nodes we will cycle between
|
||||
sub_answers = StateGraph(state_schema=ResearchQAState, output=ResearchQAOutputState)
|
||||
|
||||
### Add Nodes ###
|
||||
|
||||
# Dummy node for initial processing
|
||||
sub_answers.add_node(node="sub_dummy", action=sub_dummy)
|
||||
|
||||
# The retrieval step
|
||||
sub_answers.add_node(node="sub_custom_retrieve", action=sub_custom_retrieve)
|
||||
|
||||
# The dedupe step
|
||||
sub_answers.add_node(
|
||||
node="sub_combine_retrieved_docs", action=sub_combine_retrieved_docs
|
||||
)
|
||||
|
||||
# Verifying retrieved information
|
||||
sub_answers.add_node(node="sub_verifier", action=sub_verifier)
|
||||
|
||||
# Generating the response
|
||||
sub_answers.add_node(node="sub_generate", action=sub_generate)
|
||||
|
||||
# Checking the quality of the answer
|
||||
sub_answers.add_node(node="sub_qa_check", action=sub_qa_check)
|
||||
|
||||
# Final formatting of the response
|
||||
sub_answers.add_node(node="sub_final_format", action=sub_final_format)
|
||||
|
||||
### Add Edges ###
|
||||
|
||||
# Generate multiple sub-questions
|
||||
sub_answers.add_edge(start_key=START, end_key="sub_rewrite")
|
||||
|
||||
# For each sub-question, perform a retrieval in parallel
|
||||
sub_answers.add_conditional_edges(
|
||||
source="sub_rewrite",
|
||||
path=sub_continue_to_retrieval,
|
||||
path_map=["sub_custom_retrieve"],
|
||||
)
|
||||
|
||||
# Combine the retrieved docs for each sub-question from the parallel retrievals
|
||||
sub_answers.add_edge(
|
||||
start_key="sub_custom_retrieve", end_key="sub_combine_retrieved_docs"
|
||||
)
|
||||
|
||||
# Go over all of the combined retrieved docs and verify them against the original question
|
||||
sub_answers.add_conditional_edges(
|
||||
source="sub_combine_retrieved_docs",
|
||||
path=sub_continue_to_verifier,
|
||||
path_map=["sub_verifier"],
|
||||
)
|
||||
|
||||
# Generate an answer for each verified retrieved doc
|
||||
sub_answers.add_edge(start_key="sub_verifier", end_key="sub_generate")
|
||||
|
||||
# Check the quality of the answer
|
||||
sub_answers.add_edge(start_key="sub_generate", end_key="sub_qa_check")
|
||||
|
||||
sub_answers.add_edge(start_key="sub_qa_check", end_key="sub_final_format")
|
||||
|
||||
sub_answers.add_edge(start_key="sub_final_format", end_key=END)
|
||||
|
||||
return sub_answers
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# TODO: add the actual question
|
||||
inputs = {"sub_question": "Whose music is kind of hard to easily enjoy?"}
|
||||
sub_answers_graph = build_deep_qa_graph()
|
||||
compiled_sub_answers = sub_answers_graph.compile()
|
||||
output = compiled_sub_answers.invoke(inputs)
|
||||
print("\nOUTPUT:")
|
||||
print(output)
|
||||
@@ -0,0 +1,31 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_combine_retrieved_docs(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dedupe the retrieved docs.
|
||||
"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_question_base_retrieval_docs = state["sub_question_base_retrieval_docs"]
|
||||
|
||||
print(f"Number of docs from steps: {len(sub_question_base_retrieval_docs)}")
|
||||
dedupe_docs = []
|
||||
for base_retrieval_doc in sub_question_base_retrieval_docs:
|
||||
if base_retrieval_doc not in dedupe_docs:
|
||||
dedupe_docs.append(base_retrieval_doc)
|
||||
|
||||
print(f"Number of deduped docs: {len(dedupe_docs)}")
|
||||
|
||||
return {
|
||||
"sub_question_deduped_retrieval_docs": dedupe_docs,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - combine_retrieved_docs (dedupe)",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def sub_custom_retrieve(state: RetrieverState) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve documents
|
||||
|
||||
Args:
|
||||
state (dict): The current graph state
|
||||
|
||||
Returns:
|
||||
state (dict): New key added to state, documents, that contains retrieved documents
|
||||
"""
|
||||
print("---RETRIEVE SUB---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
# Retrieval
|
||||
# TODO: add the actual retrieval, probably from search_tool.run()
|
||||
documents: list[InferenceSection] = []
|
||||
|
||||
return {
|
||||
"sub_question_base_retrieval_docs": documents,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - custom_retrieve",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
21
backend/danswer/agent_search/deep_qa_graph/nodes/dummy.py
Normal file
21
backend/danswer/agent_search/deep_qa_graph/nodes/dummy.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_dummy(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dummy step
|
||||
"""
|
||||
|
||||
print("---Sub Dummy---")
|
||||
|
||||
return {
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - dummy",
|
||||
node_start_time=datetime.now(),
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_final_format(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Create the final output for the QA subgraph
|
||||
"""
|
||||
|
||||
print("---SUB FINAL FORMAT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
return {
|
||||
# TODO: Type this
|
||||
"sub_qas": [
|
||||
{
|
||||
"sub_question": state["sub_question"],
|
||||
"sub_answer": state["sub_question_answer"],
|
||||
"sub_question_nr": state["sub_question_nr"],
|
||||
"sub_answer_check": state["sub_question_answer_check"],
|
||||
}
|
||||
],
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - final format",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
56
backend/danswer/agent_search/deep_qa_graph/nodes/generate.py
Normal file
56
backend/danswer/agent_search/deep_qa_graph/nodes/generate.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_generate(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---SUB GENERATE---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["sub_question"]
|
||||
docs = state["sub_question_verified_retrieval_docs"]
|
||||
|
||||
print(f"Number of verified retrieval docs for sub-question: {len(docs)}")
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_RAG_PROMPT.format(question=question, context=format_docs(docs))
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
if len(docs) > 0:
|
||||
model = state["fast_llm"]
|
||||
response = list(
|
||||
model.stream(
|
||||
prompt=msg,
|
||||
)
|
||||
)
|
||||
response_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
else:
|
||||
response_str = ""
|
||||
|
||||
return {
|
||||
"sub_question_answer": response_str,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - generate",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
57
backend/danswer/agent_search/deep_qa_graph/nodes/qa_check.py
Normal file
57
backend/danswer/agent_search/deep_qa_graph/nodes/qa_check.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.prompts import SUB_CHECK_PROMPT
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_qa_check(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the final output satisfies the original user question
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
print("---CHECK SUB QUTPUT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_answer = state["sub_question_answer"]
|
||||
sub_question = state["sub_question"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=SUB_CHECK_PROMPT.format(
|
||||
sub_question=sub_question, sub_answer=sub_answer
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = list(
|
||||
model.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
raw_response = json.loads(response[0].pretty_repr())
|
||||
formatted_response = BinaryDecision.model_validate(raw_response)
|
||||
|
||||
return {
|
||||
"sub_question_answer_check": formatted_response.decision,
|
||||
"log_messages": generate_log_message(
|
||||
message=f"sub - qa check: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
64
backend/danswer/agent_search/deep_qa_graph/nodes/rewrite.py
Normal file
64
backend/danswer/agent_search/deep_qa_graph/nodes/rewrite.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.agent_search.shared_graph_utils.prompts import REWRITE_PROMPT_MULTI
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.interfaces import LLM
|
||||
|
||||
|
||||
def sub_rewrite(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Transform the initial question into more suitable search queries.
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
|
||||
print("---SUB TRANSFORM QUERY---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["sub_question"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI.format(question=question),
|
||||
)
|
||||
]
|
||||
fast_llm: LLM = state["fast_llm"]
|
||||
llm_response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=RewrittenQueries.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
# Get the rewritten queries in a defined format
|
||||
rewritten_queries: RewrittenQueries = json.loads(llm_response[0].pretty_repr())
|
||||
|
||||
print(f"rewritten_queries: {rewritten_queries}")
|
||||
|
||||
rewritten_queries = RewrittenQueries(
|
||||
rewritten_queries=[
|
||||
"music hard to listen to",
|
||||
"Music that is not fun or pleasant",
|
||||
]
|
||||
)
|
||||
|
||||
print(f"hardcoded rewritten_queries: {rewritten_queries}")
|
||||
|
||||
return {
|
||||
"sub_question_rewritten_queries": rewritten_queries,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - rewrite",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
59
backend/danswer/agent_search/deep_qa_graph/nodes/verifier.py
Normal file
59
backend/danswer/agent_search/deep_qa_graph/nodes/verifier.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_verifier(state: VerifierState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the document is relevant for the original user question
|
||||
|
||||
Args:
|
||||
state (VerifierState): The current state
|
||||
|
||||
Returns:
|
||||
dict: ict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
print("---SUB VERIFY QUTPUT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["question"]
|
||||
document_content = state["document"].combined_content
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=VERIFIER_PROMPT.format(
|
||||
question=question, document_content=document_content
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = list(
|
||||
model.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
raw_response = json.loads(response[0].pretty_repr())
|
||||
formatted_response = BinaryDecision.model_validate(raw_response)
|
||||
|
||||
return {
|
||||
"deduped_retrieval_docs": [state["document"]]
|
||||
if formatted_response.decision == "yes"
|
||||
else [],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"core - verifier: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
13
backend/danswer/agent_search/deep_qa_graph/prompts.py
Normal file
13
backend/danswer/agent_search/deep_qa_graph/prompts.py
Normal file
@@ -0,0 +1,13 @@
|
||||
SUB_CHECK_PROMPT = """ \n
|
||||
Please check whether the suggested answer seems to address the original question.
|
||||
|
||||
Please only answer with 'yes' or 'no' \n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
Here is the proposed answer:
|
||||
\n ------- \n
|
||||
{base_answer}
|
||||
\n ------- \n
|
||||
Please answer with yes or no:"""
|
||||
64
backend/danswer/agent_search/deep_qa_graph/states.py
Normal file
64
backend/danswer/agent_search/deep_qa_graph/states.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import operator
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
from typing import TypedDict
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.llm.interfaces import LLM
|
||||
|
||||
|
||||
class ResearchQAState(TypedDict):
|
||||
# The 'core SubQuestion' state.
|
||||
original_question: str
|
||||
graph_start_time: datetime
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question: str
|
||||
sub_question_nr: int
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
primary_llm: LLM
|
||||
fast_llm: LLM
|
||||
|
||||
|
||||
class ResearchQAOutputState(TypedDict):
|
||||
# The 'SubQuestion' output state. Removes all the intermediate states
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question: str
|
||||
sub_question_nr: int
|
||||
# Answers sent back to core
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
75
backend/danswer/agent_search/primary_graph/edges.py
Normal file
75
backend/danswer/agent_search/primary_graph/edges.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import Union
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langgraph.types import Send
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_CHECK_PROMPT
|
||||
|
||||
|
||||
def continue_to_initial_sub_questions(
|
||||
state: QAState,
|
||||
) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
return [
|
||||
Send(
|
||||
"sub_answers_graph_initial",
|
||||
BaseQAState(
|
||||
sub_question_str=initial_sub_question["sub_question_str"],
|
||||
sub_question_search_queries=initial_sub_question[
|
||||
"sub_question_search_queries"
|
||||
],
|
||||
sub_question_nr=initial_sub_question["sub_question_nr"],
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for initial_sub_question in state["initial_sub_questions"]
|
||||
]
|
||||
|
||||
|
||||
def continue_to_answer_sub_questions(state: QAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
return [
|
||||
Send(
|
||||
"sub_answers_graph",
|
||||
ResearchQAState(
|
||||
sub_question=sub_question["sub_question_str"],
|
||||
sub_question_nr=sub_question["sub_question_nr"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
),
|
||||
)
|
||||
for sub_question in state["sub_questions"]
|
||||
]
|
||||
|
||||
|
||||
def continue_to_deep_answer(state: QAState) -> Union[Hashable, list[Hashable]]:
|
||||
print("---GO TO DEEP ANSWER OR END---")
|
||||
|
||||
base_answer = state["base_answer"]
|
||||
|
||||
question = state["original_question"]
|
||||
|
||||
BASE_CHECK_MESSAGE = [
|
||||
HumanMessage(
|
||||
content=BASE_CHECK_PROMPT.format(question=question, base_answer=base_answer)
|
||||
)
|
||||
]
|
||||
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(BASE_CHECK_MESSAGE)
|
||||
|
||||
print(f"CAN WE CONTINUE W/O GENERATING A DEEP ANSWER? - {response.pretty_repr()}")
|
||||
|
||||
if response.pretty_repr() == "no":
|
||||
return "decompose"
|
||||
else:
|
||||
return "end"
|
||||
171
backend/danswer/agent_search/primary_graph/graph_builder.py
Normal file
171
backend/danswer/agent_search/primary_graph/graph_builder.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from danswer.agent_search.core_qa_graph.graph_builder import build_core_qa_graph
|
||||
from danswer.agent_search.deep_qa_graph.graph_builder import build_deep_qa_graph
|
||||
from danswer.agent_search.primary_graph.edges import continue_to_answer_sub_questions
|
||||
from danswer.agent_search.primary_graph.edges import continue_to_deep_answer
|
||||
from danswer.agent_search.primary_graph.edges import continue_to_initial_sub_questions
|
||||
from danswer.agent_search.primary_graph.nodes.base_wait import base_wait
|
||||
from danswer.agent_search.primary_graph.nodes.combine_retrieved_docs import (
|
||||
combine_retrieved_docs,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.custom_retrieve import custom_retrieve
|
||||
from danswer.agent_search.primary_graph.nodes.decompose import decompose
|
||||
from danswer.agent_search.primary_graph.nodes.deep_answer_generation import (
|
||||
deep_answer_generation,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.dummy_start import dummy_start
|
||||
from danswer.agent_search.primary_graph.nodes.entity_term_extraction import (
|
||||
entity_term_extraction,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.final_stuff import final_stuff
|
||||
from danswer.agent_search.primary_graph.nodes.generate_initial import generate_initial
|
||||
from danswer.agent_search.primary_graph.nodes.main_decomp_base import main_decomp_base
|
||||
from danswer.agent_search.primary_graph.nodes.rewrite import rewrite
|
||||
from danswer.agent_search.primary_graph.nodes.sub_qa_level_aggregator import (
|
||||
sub_qa_level_aggregator,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.sub_qa_manager import sub_qa_manager
|
||||
from danswer.agent_search.primary_graph.nodes.verifier import verifier
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
|
||||
|
||||
def build_core_graph() -> StateGraph:
|
||||
# Define the nodes we will cycle between
|
||||
core_answer_graph = StateGraph(state_schema=QAState)
|
||||
|
||||
### Add Nodes ###
|
||||
core_answer_graph.add_node(node="dummy_start",
|
||||
action=dummy_start)
|
||||
|
||||
# Re-writing the question
|
||||
core_answer_graph.add_node(node="rewrite",
|
||||
action=rewrite)
|
||||
|
||||
# The retrieval step
|
||||
core_answer_graph.add_node(node="custom_retrieve",
|
||||
action=custom_retrieve)
|
||||
|
||||
# Combine and dedupe retrieved docs.
|
||||
core_answer_graph.add_node(
|
||||
node="combine_retrieved_docs",
|
||||
action=combine_retrieved_docs
|
||||
)
|
||||
|
||||
# Extract entities, terms and relationships
|
||||
core_answer_graph.add_node(
|
||||
node="entity_term_extraction",
|
||||
action=entity_term_extraction
|
||||
)
|
||||
|
||||
# Verifying that a retrieved doc is relevant
|
||||
core_answer_graph.add_node(node="verifier",
|
||||
action=verifier)
|
||||
|
||||
# Initial question decomposition
|
||||
core_answer_graph.add_node(node="main_decomp_base",
|
||||
action=main_decomp_base)
|
||||
|
||||
# Build the base QA sub-graph and compile it
|
||||
compiled_core_qa_graph = build_core_qa_graph().compile()
|
||||
# Add the compiled base QA sub-graph as a node to the core graph
|
||||
core_answer_graph.add_node(
|
||||
node="sub_answers_graph_initial",
|
||||
action=compiled_core_qa_graph
|
||||
)
|
||||
|
||||
# Checking whether the initial answer is in the ballpark
|
||||
core_answer_graph.add_node(node="base_wait",
|
||||
action=base_wait)
|
||||
|
||||
# Decompose the question into sub-questions
|
||||
core_answer_graph.add_node(node="decompose",
|
||||
action=decompose)
|
||||
|
||||
# Manage the sub-questions
|
||||
core_answer_graph.add_node(node="sub_qa_manager",
|
||||
action=sub_qa_manager)
|
||||
|
||||
# Build the research QA sub-graph and compile it
|
||||
compiled_deep_qa_graph = build_deep_qa_graph().compile()
|
||||
# Add the compiled research QA sub-graph as a node to the core graph
|
||||
core_answer_graph.add_node(node="sub_answers_graph",
|
||||
action=compiled_deep_qa_graph)
|
||||
|
||||
# Aggregate the sub-questions
|
||||
core_answer_graph.add_node(
|
||||
node="sub_qa_level_aggregator",
|
||||
action=sub_qa_level_aggregator
|
||||
)
|
||||
|
||||
# aggregate sub questions and answers
|
||||
core_answer_graph.add_node(
|
||||
node="deep_answer_generation",
|
||||
action=deep_answer_generation
|
||||
)
|
||||
|
||||
# A final clean-up step
|
||||
core_answer_graph.add_node(node="final_stuff",
|
||||
action=final_stuff)
|
||||
|
||||
# Generating a response after we know the documents are relevant
|
||||
core_answer_graph.add_node(node="generate_initial",
|
||||
action=generate_initial)
|
||||
|
||||
### Add Edges ###
|
||||
|
||||
# start the initial sub-question decomposition
|
||||
core_answer_graph.add_edge(start_key=START,
|
||||
end_key="main_decomp_base")
|
||||
|
||||
core_answer_graph.add_conditional_edges(
|
||||
source="main_decomp_base",
|
||||
path=continue_to_initial_sub_questions,
|
||||
)
|
||||
|
||||
# use the retrieved information to generate the answer
|
||||
core_answer_graph.add_edge(
|
||||
start_key=["verifier", "sub_answers_graph_initial"],
|
||||
end_key="generate_initial"
|
||||
)
|
||||
core_answer_graph.add_edge(start_key="generate_initial",
|
||||
end_key="base_wait")
|
||||
|
||||
core_answer_graph.add_conditional_edges(
|
||||
source="base_wait",
|
||||
path=continue_to_deep_answer,
|
||||
path_map={"decompose": "entity_term_extraction", "end": "final_stuff"},
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(start_key="entity_term_extraction", end_key="decompose")
|
||||
|
||||
core_answer_graph.add_edge(start_key="decompose",
|
||||
end_key="sub_qa_manager")
|
||||
core_answer_graph.add_conditional_edges(
|
||||
source="sub_qa_manager",
|
||||
path=continue_to_answer_sub_questions,
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(
|
||||
start_key="sub_answers_graph",
|
||||
end_key="sub_qa_level_aggregator"
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(
|
||||
start_key="sub_qa_level_aggregator",
|
||||
end_key="deep_answer_generation"
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(
|
||||
start_key="deep_answer_generation",
|
||||
end_key="final_stuff"
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(start_key="final_stuff",
|
||||
end_key=END)
|
||||
|
||||
core_answer_graph.compile()
|
||||
|
||||
return core_answer_graph
|
||||
@@ -0,0 +1,27 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def base_wait(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Ensures that all required steps are completed before proceeding to the next step
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: {} (no operation, just logging)
|
||||
"""
|
||||
|
||||
print("---Base Wait ---")
|
||||
node_start_time = datetime.now()
|
||||
return {
|
||||
"log_messages": generate_log_message(
|
||||
message="core - base_wait",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def combine_retrieved_docs(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dedupe the retrieved docs.
|
||||
"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
base_retrieval_docs: Sequence[InferenceSection] = state["base_retrieval_docs"]
|
||||
|
||||
print(f"Number of docs from steps: {len(base_retrieval_docs)}")
|
||||
dedupe_docs: list[InferenceSection] = []
|
||||
for base_retrieval_doc in base_retrieval_docs:
|
||||
if not any(
|
||||
base_retrieval_doc.center_chunk.document_id == doc.center_chunk.document_id
|
||||
for doc in dedupe_docs
|
||||
):
|
||||
dedupe_docs.append(base_retrieval_doc)
|
||||
|
||||
print(f"Number of deduped docs: {len(dedupe_docs)}")
|
||||
|
||||
return {
|
||||
"deduped_retrieval_docs": dedupe_docs,
|
||||
"log_messages": generate_log_message(
|
||||
message="core - combine_retrieved_docs (dedupe)",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import SearchRequest
|
||||
from danswer.context.search.pipeline import SearchPipeline
|
||||
from danswer.db.engine import get_session_context_manager
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def custom_retrieve(state: RetrieverState) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve documents
|
||||
|
||||
Args:
|
||||
retriever_state (dict): The current graph state
|
||||
|
||||
Returns:
|
||||
state (dict): New key added to state, documents, that contains retrieved documents
|
||||
"""
|
||||
print("---RETRIEVE---")
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
query = state["rewritten_query"]
|
||||
|
||||
# Retrieval
|
||||
# TODO: add the actual retrieval, probably from search_tool.run()
|
||||
llm, fast_llm = get_default_llms()
|
||||
with get_session_context_manager() as db_session:
|
||||
top_sections = SearchPipeline(
|
||||
search_request=SearchRequest(
|
||||
query=query,
|
||||
),
|
||||
user=None,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
db_session=db_session,
|
||||
).reranked_sections
|
||||
print(len(top_sections))
|
||||
documents: list[InferenceSection] = []
|
||||
|
||||
return {
|
||||
"base_retrieval_docs": documents,
|
||||
"log_messages": generate_log_message(
|
||||
message="core - custom_retrieve",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_entity_term_extraction
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def decompose(state: QAState) -> dict[str, Any]:
|
||||
""" """
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
base_answer = state["base_answer"]
|
||||
|
||||
# get the entity term extraction dict and properly format it
|
||||
entity_term_extraction_dict = state["retrieved_entities_relationships"][
|
||||
"retrieved_entities_relationships"
|
||||
]
|
||||
|
||||
entity_term_extraction_str = format_entity_term_extraction(
|
||||
entity_term_extraction_dict
|
||||
)
|
||||
|
||||
initial_question_answers = state["initial_sub_qas"]
|
||||
|
||||
addressed_question_list = [
|
||||
x["sub_question"]
|
||||
for x in initial_question_answers
|
||||
if x["sub_answer_check"] == "yes"
|
||||
]
|
||||
failed_question_list = [
|
||||
x["sub_question"]
|
||||
for x in initial_question_answers
|
||||
if x["sub_answer_check"] == "no"
|
||||
]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=DEEP_DECOMPOSE_PROMPT.format(
|
||||
question=question,
|
||||
entity_term_extraction_str=entity_term_extraction_str,
|
||||
base_answer=base_answer,
|
||||
answered_sub_questions="\n - ".join(addressed_question_list),
|
||||
failed_sub_questions="\n - ".join(failed_question_list),
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(msg)
|
||||
|
||||
cleaned_response = re.sub(r"```json\n|\n```", "", response.pretty_repr())
|
||||
parsed_response = json.loads(cleaned_response)
|
||||
|
||||
sub_questions_dict = {}
|
||||
for sub_question_nr, sub_question_dict in enumerate(
|
||||
parsed_response["sub_questions"]
|
||||
):
|
||||
sub_question_dict["answered"] = False
|
||||
sub_question_dict["verified"] = False
|
||||
sub_questions_dict[sub_question_nr] = sub_question_dict
|
||||
|
||||
return {
|
||||
"decomposed_sub_questions_dict": sub_questions_dict,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - decompose",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import COMBINED_CONTEXT
|
||||
from danswer.agent_search.shared_graph_utils.prompts import MODIFIED_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.agent_search.shared_graph_utils.utils import normalize_whitespace
|
||||
|
||||
|
||||
# aggregate sub questions and answers
|
||||
def deep_answer_generation(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---DEEP GENERATE---")
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
docs = state["deduped_retrieval_docs"]
|
||||
|
||||
deep_answer_context = state["core_answer_dynamic_context"]
|
||||
|
||||
print(f"Number of verified retrieval docs - deep: {len(docs)}")
|
||||
|
||||
combined_context = normalize_whitespace(
|
||||
COMBINED_CONTEXT.format(
|
||||
deep_answer_context=deep_answer_context, formated_docs=format_docs(docs)
|
||||
)
|
||||
)
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=MODIFIED_RAG_PROMPT.format(
|
||||
question=question, combined_context=combined_context
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(msg)
|
||||
|
||||
return {
|
||||
"deep_answer": response.content,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - deep answer generation",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
|
||||
|
||||
def dummy_start(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dummy node to set the start time
|
||||
"""
|
||||
return {"start_time": datetime.now()}
|
||||
@@ -0,0 +1,51 @@
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.primary_graph.prompts import ENTITY_TERM_PROMPT
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def entity_term_extraction(state: QAState) -> dict[str, Any]:
|
||||
"""Extract entities and terms from the question and context"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
docs = state["deduped_retrieval_docs"]
|
||||
|
||||
doc_context = format_docs(docs)
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=ENTITY_TERM_PROMPT.format(question=question, context=doc_context),
|
||||
)
|
||||
]
|
||||
_, fast_llm = get_default_llms()
|
||||
# Grader
|
||||
llm_response_list = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format={"type": "json_object", "schema": RewrittenQueries.model_json_schema()},
|
||||
# structured_response_format=RewrittenQueries.model_json_schema(),
|
||||
)
|
||||
)
|
||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
|
||||
|
||||
cleaned_response = re.sub(r"```json\n|\n```", "", llm_response)
|
||||
parsed_response = json.loads(cleaned_response)
|
||||
|
||||
return {
|
||||
"retrieved_entities_relationships": parsed_response,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - entity term extraction",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def final_stuff(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Invokes the agent model to generate a response based on the current state. Given
|
||||
the question, it will decide to retrieve using the retriever tool, or simply end.
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with the agent response appended to messages
|
||||
"""
|
||||
print("---FINAL---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
messages = state["log_messages"]
|
||||
time_ordered_messages = [x.pretty_repr() for x in messages]
|
||||
time_ordered_messages.sort()
|
||||
|
||||
print("Message Log:")
|
||||
print("\n".join(time_ordered_messages))
|
||||
|
||||
initial_sub_qas = state["initial_sub_qas"]
|
||||
initial_sub_qa_list = []
|
||||
for initial_sub_qa in initial_sub_qas:
|
||||
if initial_sub_qa["sub_answer_check"] == "yes":
|
||||
initial_sub_qa_list.append(
|
||||
f' Question:\n {initial_sub_qa["sub_question"]}\n --\n Answer:\n {initial_sub_qa["sub_answer"]}\n -----'
|
||||
)
|
||||
|
||||
initial_sub_qa_context = "\n".join(initial_sub_qa_list)
|
||||
|
||||
log_message = generate_log_message(
|
||||
message="all - final_stuff",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
)
|
||||
|
||||
print(log_message)
|
||||
print("--------------------------------")
|
||||
|
||||
base_answer = state["base_answer"]
|
||||
|
||||
print(f"Final Base Answer:\n{base_answer}")
|
||||
print("--------------------------------")
|
||||
print(f"Initial Answered Sub Questions:\n{initial_sub_qa_context}")
|
||||
print("--------------------------------")
|
||||
|
||||
if not state.get("deep_answer"):
|
||||
print("No Deep Answer was required")
|
||||
return {
|
||||
"log_messages": log_message,
|
||||
}
|
||||
|
||||
deep_answer = state["deep_answer"]
|
||||
sub_qas = state["sub_qas"]
|
||||
sub_qa_list = []
|
||||
for sub_qa in sub_qas:
|
||||
if sub_qa["sub_answer_check"] == "yes":
|
||||
sub_qa_list.append(
|
||||
f' Question:\n {sub_qa["sub_question"]}\n --\n Answer:\n {sub_qa["sub_answer"]}\n -----'
|
||||
)
|
||||
|
||||
sub_qa_context = "\n".join(sub_qa_list)
|
||||
|
||||
print(f"Final Base Answer:\n{base_answer}")
|
||||
print("--------------------------------")
|
||||
print(f"Final Deep Answer:\n{deep_answer}")
|
||||
print("--------------------------------")
|
||||
print("Sub Questions and Answers:")
|
||||
print(sub_qa_context)
|
||||
|
||||
return {
|
||||
"log_messages": generate_log_message(
|
||||
message="all - final_stuff",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
52
backend/danswer/agent_search/primary_graph/nodes/generate.py
Normal file
52
backend/danswer/agent_search/primary_graph/nodes/generate.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def generate(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---GENERATE---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
docs = state["deduped_retrieval_docs"]
|
||||
|
||||
print(f"Number of verified retrieval docs: {len(docs)}")
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_RAG_PROMPT.format(question=question, context=format_docs(docs))
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
llm = state["fast_llm"]
|
||||
response = list(
|
||||
llm.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=None,
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"base_answer": response[0].pretty_repr(),
|
||||
"log_messages": generate_log_message(
|
||||
message="core - generate",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.prompts import INITIAL_RAG_PROMPT
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def generate_initial(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---GENERATE INITIAL---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
docs = state["deduped_retrieval_docs"]
|
||||
print(f"Number of verified retrieval docs - base: {len(docs)}")
|
||||
|
||||
sub_question_answers = state["initial_sub_qas"]
|
||||
|
||||
sub_question_answers_list = []
|
||||
|
||||
_SUB_QUESTION_ANSWER_TEMPLATE = """
|
||||
Sub-Question:\n - {sub_question}\n --\nAnswer:\n - {sub_answer}\n\n
|
||||
"""
|
||||
for sub_question_answer_dict in sub_question_answers:
|
||||
if (
|
||||
sub_question_answer_dict["sub_answer_check"] == "yes"
|
||||
and len(sub_question_answer_dict["sub_answer"]) > 0
|
||||
and sub_question_answer_dict["sub_answer"] != "I don't know"
|
||||
):
|
||||
sub_question_answers_list.append(
|
||||
_SUB_QUESTION_ANSWER_TEMPLATE.format(
|
||||
sub_question=sub_question_answer_dict["sub_question"],
|
||||
sub_answer=sub_question_answer_dict["sub_answer"],
|
||||
)
|
||||
)
|
||||
|
||||
sub_question_answer_str = "\n\n------\n\n".join(sub_question_answers_list)
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=INITIAL_RAG_PROMPT.format(
|
||||
question=question,
|
||||
context=format_docs(docs),
|
||||
answered_sub_questions=sub_question_answer_str,
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(msg)
|
||||
|
||||
return {
|
||||
"base_answer": response.pretty_repr(),
|
||||
"log_messages": generate_log_message(
|
||||
message="core - generate initial",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.prompts import INITIAL_DECOMPOSITION_PROMPT
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import clean_and_parse_list_string
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def main_decomp_base(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Perform an initial question decomposition, incl. one search term
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with initial decomposition
|
||||
"""
|
||||
|
||||
print("---INITIAL DECOMP---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=INITIAL_DECOMPOSITION_PROMPT.format(question=question),
|
||||
)
|
||||
]
|
||||
|
||||
# Get the rewritten queries in a defined format
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(msg)
|
||||
|
||||
content = response.pretty_repr()
|
||||
list_of_subquestions = clean_and_parse_list_string(content)
|
||||
|
||||
decomp_list = []
|
||||
|
||||
for sub_question_nr, sub_question in enumerate(list_of_subquestions):
|
||||
sub_question_str = sub_question["sub_question"].strip()
|
||||
# temporarily
|
||||
sub_question_search_queries = [sub_question["search_term"]]
|
||||
|
||||
decomp_list.append(
|
||||
{
|
||||
"sub_question_str": sub_question_str,
|
||||
"sub_question_search_queries": sub_question_search_queries,
|
||||
"sub_question_nr": sub_question_nr,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"initial_sub_questions": decomp_list,
|
||||
"sub_query_start_time": node_start_time,
|
||||
"log_messages": generate_log_message(
|
||||
message="core - initial decomp",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
55
backend/danswer/agent_search/primary_graph/nodes/rewrite.py
Normal file
55
backend/danswer/agent_search/primary_graph/nodes/rewrite.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.agent_search.shared_graph_utils.prompts import REWRITE_PROMPT_MULTI
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def rewrite(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Transform the initial question into more suitable search queries.
|
||||
|
||||
Args:
|
||||
qa_state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---STARTING GRAPH---")
|
||||
graph_start_time = datetime.now()
|
||||
|
||||
print("---TRANSFORM QUERY---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI.format(question=question),
|
||||
)
|
||||
]
|
||||
|
||||
# Get the rewritten queries in a defined format
|
||||
fast_llm = state["fast_llm"]
|
||||
llm_response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=RewrittenQueries.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
formatted_response: RewrittenQueries = json.loads(llm_response[0].pretty_repr())
|
||||
|
||||
return {
|
||||
"rewritten_queries": formatted_response.rewritten_queries,
|
||||
"log_messages": generate_log_message(
|
||||
message="core - rewrite",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=graph_start_time,
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
# aggregate sub questions and answers
|
||||
def sub_qa_level_aggregator(state: QAState) -> dict[str, Any]:
|
||||
sub_qas = state["sub_qas"]
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
dynamic_context_list = [
|
||||
"Below you will find useful information to answer the original question:"
|
||||
]
|
||||
checked_sub_qas = []
|
||||
|
||||
for core_answer_sub_qa in sub_qas:
|
||||
question = core_answer_sub_qa["sub_question"]
|
||||
answer = core_answer_sub_qa["sub_answer"]
|
||||
verified = core_answer_sub_qa["sub_answer_check"]
|
||||
|
||||
if verified == "yes":
|
||||
dynamic_context_list.append(
|
||||
f"Question:\n{question}\n\nAnswer:\n{answer}\n\n---\n\n"
|
||||
)
|
||||
checked_sub_qas.append({"sub_question": question, "sub_answer": answer})
|
||||
dynamic_context = "\n".join(dynamic_context_list)
|
||||
|
||||
return {
|
||||
"core_answer_dynamic_context": dynamic_context,
|
||||
"checked_sub_qas": checked_sub_qas,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - sub qa level aggregator",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_qa_manager(state: QAState) -> dict[str, Any]:
|
||||
""" """
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_questions_dict = state["decomposed_sub_questions_dict"]
|
||||
|
||||
sub_questions = {}
|
||||
|
||||
for sub_question_nr, sub_question_dict in sub_questions_dict.items():
|
||||
sub_questions[sub_question_nr] = sub_question_dict["sub_question"]
|
||||
|
||||
return {
|
||||
"sub_questions": sub_questions,
|
||||
"num_new_question_iterations": 0,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - sub qa manager",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
59
backend/danswer/agent_search/primary_graph/nodes/verifier.py
Normal file
59
backend/danswer/agent_search/primary_graph/nodes/verifier.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def verifier(state: VerifierState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the document is relevant for the original user question
|
||||
|
||||
Args:
|
||||
state (VerifierState): The current state
|
||||
|
||||
Returns:
|
||||
dict: ict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
print("---VERIFY QUTPUT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["question"]
|
||||
document_content = state["document"].combined_content
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=VERIFIER_PROMPT.format(
|
||||
question=question, document_content=document_content
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
llm = state["fast_llm"]
|
||||
response = list(
|
||||
llm.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
raw_response = json.loads(response[0].pretty_repr())
|
||||
formatted_response = BinaryDecision.model_validate(raw_response)
|
||||
|
||||
return {
|
||||
"deduped_retrieval_docs": [state["document"]]
|
||||
if formatted_response.decision == "yes"
|
||||
else [],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"core - verifier: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
86
backend/danswer/agent_search/primary_graph/prompts.py
Normal file
86
backend/danswer/agent_search/primary_graph/prompts.py
Normal file
@@ -0,0 +1,86 @@
|
||||
INITIAL_DECOMPOSITION_PROMPT = """ \n
|
||||
Please decompose an initial user question into not more than 4 appropriate sub-questions that help to
|
||||
answer the original question. The purpose for this decomposition is to isolate individulal entities
|
||||
(i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
|
||||
for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
|
||||
sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
|
||||
for us'), etc. Each sub-question should be realistically be answerable by a good RAG system. \n
|
||||
|
||||
For each sub-question, please also create one search term that can be used to retrieve relevant
|
||||
documents from a document store.
|
||||
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Please formulate your answer as a list of json objects with the following format:
|
||||
|
||||
[{{"sub_question": <sub-question>, "search_term": <search term>}}, ...]
|
||||
|
||||
Answer:
|
||||
"""
|
||||
|
||||
INITIAL_RAG_PROMPT = """ \n
|
||||
You are an assistant for question-answering tasks. Use the information provided below - and only the
|
||||
provided information - to answer the provided question.
|
||||
|
||||
The information provided below consists of:
|
||||
1) a number of answered sub-questions - these are very important(!) and definitely should be
|
||||
considered to answer the question.
|
||||
2) a number of documents that were also deemed relevant for the question.
|
||||
|
||||
If you don't know the answer or if the provided information is empty or insufficient, just say
|
||||
"I don't know". Do not use your internal knowledge!
|
||||
|
||||
Again, only use the provided informationand do not use your internal knowledge! It is a matter of life
|
||||
and death that you do NOT use your internal knowledge, just the provided information!
|
||||
|
||||
Try to keep your answer concise.
|
||||
|
||||
And here is the question and the provided information:
|
||||
\n
|
||||
\nQuestion:\n {question}
|
||||
|
||||
\nAnswered Sub-questions:\n {answered_sub_questions}
|
||||
|
||||
\nContext:\n {context} \n\n
|
||||
\n\n
|
||||
|
||||
Answer:"""
|
||||
|
||||
ENTITY_TERM_PROMPT = """ \n
|
||||
Based on the original question and the context retieved from a dataset, please generate a list of
|
||||
entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts
|
||||
(e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other.
|
||||
|
||||
\n\n
|
||||
Here is the original question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
And here is the context retrieved:
|
||||
\n ------- \n
|
||||
{context}
|
||||
\n ------- \n
|
||||
|
||||
Please format your answer as a json object in the following format:
|
||||
|
||||
{{"retrieved_entities_relationships": {{
|
||||
"entities": [{{
|
||||
"entity_name": <assign a name for the entity>,
|
||||
"entity_type": <specify a short type name for the entity, such as 'company', 'location',...>
|
||||
}}],
|
||||
"relationships": [{{
|
||||
"name": <assign a name for the relationship>,
|
||||
"type": <specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>,
|
||||
"entities": [<related entity name 1>, <related entity name 2>]
|
||||
}}],
|
||||
"terms": [{{
|
||||
"term_name": <assign a name for the term>,
|
||||
"term_type": <specify a short type name for the term, such as 'revenue', 'market_share',...>,
|
||||
"similar_to": <list terms that are similar to this term>
|
||||
}}]
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
73
backend/danswer/agent_search/primary_graph/states.py
Normal file
73
backend/danswer/agent_search/primary_graph/states.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import operator
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
from typing import TypedDict
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
class QAState(TypedDict):
|
||||
# The 'main' state of the answer graph
|
||||
original_question: str
|
||||
graph_start_time: datetime
|
||||
# start time for parallel initial sub-questionn thread
|
||||
sub_query_start_time: datetime
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
rewritten_queries: RewrittenQueries
|
||||
sub_questions: list[dict]
|
||||
initial_sub_questions: list[dict]
|
||||
ranked_subquestion_ids: list[int]
|
||||
decomposed_sub_questions_dict: dict
|
||||
rejected_sub_questions: Annotated[list[str], operator.add]
|
||||
rejected_sub_questions_handled: bool
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
initial_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
checked_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
base_retrieval_docs: Annotated[Sequence[InferenceSection], operator.add]
|
||||
deduped_retrieval_docs: Annotated[Sequence[InferenceSection], operator.add]
|
||||
reranked_retrieval_docs: Annotated[Sequence[InferenceSection], operator.add]
|
||||
retrieved_entities_relationships: dict
|
||||
questions_context: list[dict]
|
||||
qa_level: int
|
||||
top_chunks: list[InferenceSection]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
num_new_question_iterations: int
|
||||
core_answer_dynamic_context: str
|
||||
dynamic_context: str
|
||||
initial_base_answer: str
|
||||
base_answer: str
|
||||
deep_answer: str
|
||||
|
||||
|
||||
class QAOuputState(TypedDict):
|
||||
# The 'main' output state of the answer graph. Removes all the intermediate states
|
||||
original_question: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
sub_questions: list[dict]
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
initial_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
checked_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
reranked_retrieval_docs: Annotated[Sequence[InferenceSection], operator.add]
|
||||
retrieved_entities_relationships: dict
|
||||
top_chunks: list[InferenceSection]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
base_answer: str
|
||||
deep_answer: str
|
||||
|
||||
|
||||
class RetrieverState(TypedDict):
|
||||
# The state for the parallel Retrievers. They each need to see only one query
|
||||
rewritten_query: str
|
||||
graph_start_time: datetime
|
||||
|
||||
|
||||
class VerifierState(TypedDict):
|
||||
# The state for the parallel verification step. Each node execution need to see only one question/doc pair
|
||||
document: InferenceSection
|
||||
question: str
|
||||
graph_start_time: datetime
|
||||
22
backend/danswer/agent_search/run_graph.py
Normal file
22
backend/danswer/agent_search/run_graph.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from danswer.agent_search.primary_graph.graph_builder import build_core_graph
|
||||
from danswer.llm.answering.answer import AnswerStream
|
||||
from danswer.llm.interfaces import LLM
|
||||
from danswer.tools.tool import Tool
|
||||
|
||||
|
||||
def run_graph(
|
||||
query: str,
|
||||
llm: LLM,
|
||||
tools: list[Tool],
|
||||
) -> AnswerStream:
|
||||
graph = build_core_graph()
|
||||
|
||||
inputs = {
|
||||
"original_question": query,
|
||||
"messages": [],
|
||||
"tools": tools,
|
||||
"llm": llm,
|
||||
}
|
||||
compiled_graph = graph.compile()
|
||||
output = compiled_graph.invoke(input=inputs)
|
||||
yield from output
|
||||
16
backend/danswer/agent_search/shared_graph_utils/models.py
Normal file
16
backend/danswer/agent_search/shared_graph_utils/models.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
# Pydantic models for structured outputs
|
||||
class RewrittenQueries(BaseModel):
|
||||
rewritten_queries: list[str]
|
||||
|
||||
|
||||
class BinaryDecision(BaseModel):
|
||||
decision: Literal["yes", "no"]
|
||||
|
||||
|
||||
class SubQuestions(BaseModel):
|
||||
sub_questions: list[str]
|
||||
342
backend/danswer/agent_search/shared_graph_utils/prompts.py
Normal file
342
backend/danswer/agent_search/shared_graph_utils/prompts.py
Normal file
@@ -0,0 +1,342 @@
|
||||
REWRITE_PROMPT_MULTI_ORIGINAL = """ \n
|
||||
Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrievel from a
|
||||
document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
|
||||
enabling the system to search more broadly.
|
||||
Also, try to make the search queries not redundant, i.e. not too similar! \n\n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Formulate the queries separated by '--' (Do not say 'Query 1: ...', just write the querytext): """
|
||||
|
||||
|
||||
REWRITE_PROMPT_MULTI = """ \n
|
||||
Please create a list of 2-3 sample documents that could answer an original question. Each document
|
||||
should be about as long as the original question. \n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Formulate the sample documents separated by '--' (Do not say 'Document 1: ...', just write the text): """
|
||||
|
||||
BASE_RAG_PROMPT = """ \n
|
||||
You are an assistant for question-answering tasks. Use the context provided below - and only the
|
||||
provided context - to answer the question. If you don't know the answer or if the provided context is
|
||||
empty, just say "I don't know". Do not use your internal knowledge!
|
||||
|
||||
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
|
||||
question based on the context, say "I don't know". It is a matter of life and death that you do NOT
|
||||
use your internal knowledge, just the provided information!
|
||||
|
||||
Use three sentences maximum and keep the answer concise.
|
||||
answer concise.\nQuestion:\n {question} \nContext:\n {context} \n\n
|
||||
\n\n
|
||||
Answer:"""
|
||||
|
||||
BASE_CHECK_PROMPT = """ \n
|
||||
Please check whether 1) the suggested answer seems to fully address the original question AND 2)the
|
||||
original question requests a simple, factual answer, and there are no ambiguities, judgements,
|
||||
aggregations, or any other complications that may require extra context. (I.e., if the question is
|
||||
somewhat addressed, but the answer would benefit from more context, then answer with 'no'.)
|
||||
|
||||
Please only answer with 'yes' or 'no' \n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
Here is the proposed answer:
|
||||
\n ------- \n
|
||||
{base_answer}
|
||||
\n ------- \n
|
||||
Please answer with yes or no:"""
|
||||
|
||||
VERIFIER_PROMPT = """ \n
|
||||
Please check whether the document seems to be relevant for the answer of the original question. Please
|
||||
only answer with 'yes' or 'no' \n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
Here is the document text:
|
||||
\n ------- \n
|
||||
{document_content}
|
||||
\n ------- \n
|
||||
Please answer with yes or no:"""
|
||||
|
||||
INITIAL_DECOMPOSITION_PROMPT_BASIC = """ \n
|
||||
Please decompose an initial user question into not more than 4 appropriate sub-questions that help to
|
||||
answer the original question. The purpose for this decomposition is to isolate individulal entities
|
||||
(i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
|
||||
for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
|
||||
sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
|
||||
for us'), etc. Each sub-question should be realistically be answerable by a good RAG system. \n
|
||||
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Please formulate your answer as a list of subquestions:
|
||||
|
||||
Answer:
|
||||
"""
|
||||
|
||||
REWRITE_PROMPT_SINGLE = """ \n
|
||||
Please convert an initial user question into a more appropriate search query for retrievel from a
|
||||
document store. \n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Formulate the query: """
|
||||
|
||||
MODIFIED_RAG_PROMPT = """You are an assistant for question-answering tasks. Use the context provided below
|
||||
- and only this context - to answer the question. If you don't know the answer, just say "I don't know".
|
||||
Use three sentences maximum and keep the answer concise.
|
||||
Pay also particular attention to the sub-questions and their answers, at least it may enrich the answer.
|
||||
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
|
||||
question based on the context, say "I don't know". It is a matter of life and death that you do NOT
|
||||
use your internal knowledge, just the provided information!
|
||||
|
||||
\nQuestion: {question}
|
||||
\nContext: {combined_context} \n
|
||||
|
||||
Answer:"""
|
||||
|
||||
ORIG_DEEP_DECOMPOSE_PROMPT = """ \n
|
||||
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
||||
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
||||
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
||||
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
|
||||
you have an idea of how the avaiolable data looks like.
|
||||
|
||||
Your role is to generate 3-5 new sub-questions that would help to answer the initial question,
|
||||
considering:
|
||||
|
||||
1) The initial question
|
||||
2) The initial answer that was found to be unsatisfactory
|
||||
3) The sub-questions that were answered
|
||||
4) The sub-questions that were suggested but not answered
|
||||
5) The entities, relationships and terms that were extracted from the context
|
||||
|
||||
The individual questions should be answerable by a good RAG system.
|
||||
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
||||
question for different entities that may be involved in the original question, but in a way that does
|
||||
not duplicate questions that were already tried.
|
||||
|
||||
Additional Guidelines:
|
||||
- The sub-questions should be specific to the question and provide richer context for the question,
|
||||
resolve ambiguities, or address shortcoming of the initial answer
|
||||
- Each sub-question - when answered - should be relevant for the answer to the original question
|
||||
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
|
||||
other complications that may require extra context.
|
||||
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
||||
a RAG system independently without the original question available
|
||||
(Example:
|
||||
- initial question: "What is the capital of France?"
|
||||
- bad sub-question: "What is the name of the river there?"
|
||||
- good sub-question: "What is the name of the river that flows through Paris?"
|
||||
- For each sub-question, please provide a short explanation for why it is a good sub-question. So
|
||||
generate a list of dictionaries with the following format:
|
||||
[{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
|
||||
sub-question using as a search phrase for the document store>}}, ...]
|
||||
|
||||
\n\n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Here is the initial sub-optimal answer:
|
||||
\n ------- \n
|
||||
{base_answer}
|
||||
\n ------- \n
|
||||
|
||||
Here are the sub-questions that were answered:
|
||||
\n ------- \n
|
||||
{answered_sub_questions}
|
||||
\n ------- \n
|
||||
|
||||
Here are the sub-questions that were suggested but not answered:
|
||||
\n ------- \n
|
||||
{failed_sub_questions}
|
||||
\n ------- \n
|
||||
|
||||
And here are the entities, relationships and terms extracted from the context:
|
||||
\n ------- \n
|
||||
{entity_term_extraction_str}
|
||||
\n ------- \n
|
||||
|
||||
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
||||
main question. Again, please find questions that are NOT overlapping too much with the already answered
|
||||
sub-questions or those that already were suggested and failed.
|
||||
In other words - what can we try in addition to what has been tried so far?
|
||||
|
||||
Please think through it step by step and then generate the list of json dictionaries with the following
|
||||
format:
|
||||
|
||||
{{"sub_questions": [{{"sub_question": <sub-question>,
|
||||
"explanation": <explanation>,
|
||||
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
|
||||
...]}} """
|
||||
|
||||
DEEP_DECOMPOSE_PROMPT = """ \n
|
||||
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
||||
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
||||
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
||||
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
|
||||
you have an idea of how the avaiolable data looks like.
|
||||
|
||||
Your role is to generate 4-6 new sub-questions that would help to answer the initial question,
|
||||
considering:
|
||||
|
||||
1) The initial question
|
||||
2) The initial answer that was found to be unsatisfactory
|
||||
3) The sub-questions that were answered
|
||||
4) The sub-questions that were suggested but not answered
|
||||
5) The entities, relationships and terms that were extracted from the context
|
||||
|
||||
The individual questions should be answerable by a good RAG system.
|
||||
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
||||
question for different entities that may be involved in the original question, but in a way that does
|
||||
not duplicate questions that were already tried.
|
||||
|
||||
Additional Guidelines:
|
||||
- The sub-questions should be specific to the question and provide richer context for the question,
|
||||
resolve ambiguities, or address shortcoming of the initial answer
|
||||
- Each sub-question - when answered - should be relevant for the answer to the original question
|
||||
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
|
||||
other complications that may require extra context.
|
||||
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
||||
a RAG system independently without the original question available
|
||||
(Example:
|
||||
- initial question: "What is the capital of France?"
|
||||
- bad sub-question: "What is the name of the river there?"
|
||||
- good sub-question: "What is the name of the river that flows through Paris?"
|
||||
- For each sub-question, please also provide a search term that can be used to retrieve relevant
|
||||
documents from a document store.
|
||||
\n\n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
Here is the initial sub-optimal answer:
|
||||
\n ------- \n
|
||||
{base_answer}
|
||||
\n ------- \n
|
||||
|
||||
Here are the sub-questions that were answered:
|
||||
\n ------- \n
|
||||
{answered_sub_questions}
|
||||
\n ------- \n
|
||||
|
||||
Here are the sub-questions that were suggested but not answered:
|
||||
\n ------- \n
|
||||
{failed_sub_questions}
|
||||
\n ------- \n
|
||||
|
||||
And here are the entities, relationships and terms extracted from the context:
|
||||
\n ------- \n
|
||||
{entity_term_extraction_str}
|
||||
\n ------- \n
|
||||
|
||||
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
||||
main question. Again, please find questions that are NOT overlapping too much with the already answered
|
||||
sub-questions or those that already were suggested and failed.
|
||||
In other words - what can we try in addition to what has been tried so far?
|
||||
|
||||
Generate the list of json dictionaries with the following format:
|
||||
|
||||
{{"sub_questions": [{{"sub_question": <sub-question>,
|
||||
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
|
||||
...]}} """
|
||||
|
||||
DECOMPOSE_PROMPT = """ \n
|
||||
For an initial user question, please generate at 5-10 individual sub-questions whose answers would help
|
||||
\n to answer the initial question. The individual questions should be answerable by a good RAG system.
|
||||
So a good idea would be to \n use the sub-questions to resolve ambiguities and/or to separate the
|
||||
question for different entities that may be involved in the original question.
|
||||
|
||||
In order to arrive at meaningful sub-questions, please also consider the context retrieved from the
|
||||
document store, expressed as entities, relationships and terms. You can also think about the types
|
||||
mentioned in brackets
|
||||
|
||||
Guidelines:
|
||||
- The sub-questions should be specific to the question and provide richer context for the question,
|
||||
and or resolve ambiguities
|
||||
- Each sub-question - when answered - should be relevant for the answer to the original question
|
||||
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
|
||||
other complications that may require extra context.
|
||||
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
||||
a RAG system independently without the original question available
|
||||
(Example:
|
||||
- initial question: "What is the capital of France?"
|
||||
- bad sub-question: "What is the name of the river there?"
|
||||
- good sub-question: "What is the name of the river that flows through Paris?"
|
||||
- For each sub-question, please provide a short explanation for why it is a good sub-question. So
|
||||
generate a list of dictionaries with the following format:
|
||||
[{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
|
||||
sub-question using as a search phrase for the document store>}}, ...]
|
||||
|
||||
\n\n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
|
||||
And here are the entities, relationships and terms extracted from the context:
|
||||
\n ------- \n
|
||||
{entity_term_extraction_str}
|
||||
\n ------- \n
|
||||
|
||||
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
||||
main question. Don't be too specific unless the original question is specific.
|
||||
Please think through it step by step and then generate the list of json dictionaries with the following
|
||||
format:
|
||||
{{"sub_questions": [{{"sub_question": <sub-question>,
|
||||
"explanation": <explanation>,
|
||||
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
|
||||
...]}} """
|
||||
|
||||
#### Consolidations
|
||||
COMBINED_CONTEXT = """-------
|
||||
Below you will find useful information to answer the original question. First, you see a number of
|
||||
sub-questions with their answers. This information should be considered to be more focussed and
|
||||
somewhat more specific to the original question as it tries to contextualized facts.
|
||||
After that will see the documents that were considered to be relevant to answer the original question.
|
||||
|
||||
Here are the sub-questions and their answers:
|
||||
\n\n {deep_answer_context} \n\n
|
||||
\n\n Here are the documents that were considered to be relevant to answer the original question:
|
||||
\n\n {formated_docs} \n\n
|
||||
----------------
|
||||
"""
|
||||
|
||||
SUB_QUESTION_EXPLANATION_RANKER_PROMPT = """-------
|
||||
Below you will find a question that we ultimately want to answer (the original question) and a list of
|
||||
motivations in arbitrary order for generated sub-questions that are supposed to help us answering the
|
||||
original question. The motivations are formatted as <motivation number>: <motivation explanation>.
|
||||
(Again, the numbering is arbitrary and does not necessarily mean that 1 is the most relevant
|
||||
motivation and 2 is less relevant.)
|
||||
|
||||
Please rank the motivations in order of relevance for answering the original question. Also, try to
|
||||
ensure that the top questions do not duplicate too much, i.e. that they are not too similar.
|
||||
Ultimately, create a list with the motivation numbers where the number of the most relevant
|
||||
motivations comes first.
|
||||
|
||||
Here is the original question:
|
||||
\n\n {original_question} \n\n
|
||||
\n\n Here is the list of sub-question motivations:
|
||||
\n\n {sub_question_explanations} \n\n
|
||||
----------------
|
||||
|
||||
Please think step by step and then generate the ranked list of motivations.
|
||||
|
||||
Please format your answer as a json object in the following format:
|
||||
{{"reasonning": <explain your reasoning for the ranking>,
|
||||
"ranked_motivations": <ranked list of motivation numbers>}}
|
||||
"""
|
||||
91
backend/danswer/agent_search/shared_graph_utils/utils.py
Normal file
91
backend/danswer/agent_search/shared_graph_utils/utils.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from typing import Any
|
||||
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def normalize_whitespace(text: str) -> str:
|
||||
"""Normalize whitespace in text to single spaces and strip leading/trailing whitespace."""
|
||||
import re
|
||||
|
||||
return re.sub(r"\s+", " ", text.strip())
|
||||
|
||||
|
||||
# Post-processing
|
||||
def format_docs(docs: Sequence[InferenceSection]) -> str:
|
||||
return "\n\n".join(doc.combined_content for doc in docs)
|
||||
|
||||
|
||||
def clean_and_parse_list_string(json_string: str) -> list[dict]:
|
||||
# Remove markdown code block markers and any newline prefixes
|
||||
cleaned_string = re.sub(r"```json\n|\n```", "", json_string)
|
||||
cleaned_string = cleaned_string.replace("\\n", " ").replace("\n", " ")
|
||||
cleaned_string = " ".join(cleaned_string.split())
|
||||
# Parse the cleaned string into a Python dictionary
|
||||
return ast.literal_eval(cleaned_string)
|
||||
|
||||
|
||||
def clean_and_parse_json_string(json_string: str) -> dict[str, Any]:
|
||||
# Remove markdown code block markers and any newline prefixes
|
||||
cleaned_string = re.sub(r"```json\n|\n```", "", json_string)
|
||||
cleaned_string = cleaned_string.replace("\\n", " ").replace("\n", " ")
|
||||
cleaned_string = " ".join(cleaned_string.split())
|
||||
# Parse the cleaned string into a Python dictionary
|
||||
return json.loads(cleaned_string)
|
||||
|
||||
|
||||
def format_entity_term_extraction(entity_term_extraction_dict: dict[str, Any]) -> str:
|
||||
entities = entity_term_extraction_dict["entities"]
|
||||
terms = entity_term_extraction_dict["terms"]
|
||||
relationships = entity_term_extraction_dict["relationships"]
|
||||
|
||||
entity_strs = ["\nEntities:\n"]
|
||||
for entity in entities:
|
||||
entity_str = f"{entity['entity_name']} ({entity['entity_type']})"
|
||||
entity_strs.append(entity_str)
|
||||
|
||||
entity_str = "\n - ".join(entity_strs)
|
||||
|
||||
relationship_strs = ["\n\nRelationships:\n"]
|
||||
for relationship in relationships:
|
||||
relationship_str = f"{relationship['name']} ({relationship['type']}): {relationship['entities']}"
|
||||
relationship_strs.append(relationship_str)
|
||||
|
||||
relationship_str = "\n - ".join(relationship_strs)
|
||||
|
||||
term_strs = ["\n\nTerms:\n"]
|
||||
for term in terms:
|
||||
term_str = f"{term['term_name']} ({term['term_type']}): similar to {term['similar_to']}"
|
||||
term_strs.append(term_str)
|
||||
|
||||
term_str = "\n - ".join(term_strs)
|
||||
|
||||
return "\n".join(entity_strs + relationship_strs + term_strs)
|
||||
|
||||
|
||||
def _format_time_delta(time: timedelta) -> str:
|
||||
seconds_from_start = f"{((time).seconds):03d}"
|
||||
microseconds_from_start = f"{((time).microseconds):06d}"
|
||||
return f"{seconds_from_start}.{microseconds_from_start}"
|
||||
|
||||
|
||||
def generate_log_message(
|
||||
message: str,
|
||||
node_start_time: datetime,
|
||||
graph_start_time: datetime | None = None,
|
||||
) -> str:
|
||||
current_time = datetime.now()
|
||||
|
||||
if graph_start_time is not None:
|
||||
graph_time_str = _format_time_delta(current_time - graph_start_time)
|
||||
else:
|
||||
graph_time_str = "N/A"
|
||||
|
||||
node_time_str = _format_time_delta(current_time - node_start_time)
|
||||
|
||||
return f"{graph_time_str} ({node_time_str} s): {message}"
|
||||
@@ -87,6 +87,7 @@ from danswer.db.models import AccessToken
|
||||
from danswer.db.models import OAuthAccount
|
||||
from danswer.db.models import User
|
||||
from danswer.db.users import get_user_by_email
|
||||
from danswer.server.utils import BasicAuthenticationError
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.telemetry import optional_telemetry
|
||||
from danswer.utils.telemetry import RecordType
|
||||
@@ -99,11 +100,6 @@ from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class BasicAuthenticationError(HTTPException):
|
||||
def __init__(self, detail: str):
|
||||
super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
|
||||
|
||||
|
||||
def is_user_admin(user: User | None) -> bool:
|
||||
if AUTH_TYPE == AuthType.DISABLED:
|
||||
return True
|
||||
|
||||
@@ -39,7 +39,6 @@ from danswer.redis.redis_usergroup import RedisUserGroup
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
celery_app = Celery(__name__)
|
||||
@@ -117,9 +116,13 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
|
||||
# it is planned to use this lock to enforce singleton behavior on the primary
|
||||
# worker, since the primary worker does redis cleanup on startup, but this isn't
|
||||
# implemented yet.
|
||||
|
||||
# set thread_local=False since we don't control what thread the periodic task might
|
||||
# reacquire the lock with
|
||||
lock: RedisLock = r.lock(
|
||||
DanswerRedisLocks.PRIMARY_WORKER,
|
||||
timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
|
||||
thread_local=False,
|
||||
)
|
||||
|
||||
logger.info("Primary worker lock: Acquire starting.")
|
||||
|
||||
@@ -22,6 +22,7 @@ from danswer.configs.constants import DanswerCeleryTask
|
||||
from danswer.configs.constants import DanswerRedisLocks
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from danswer.db.document import upsert_document_by_connector_credential_pair
|
||||
from danswer.db.engine import get_session_with_tenant
|
||||
from danswer.db.enums import AccessType
|
||||
from danswer.db.enums import ConnectorCredentialPairStatus
|
||||
@@ -262,7 +263,12 @@ def connector_permission_sync_generator_task(
|
||||
f"RedisConnector.permissions.generate_tasks starting. cc_pair={cc_pair_id}"
|
||||
)
|
||||
tasks_generated = redis_connector.permissions.generate_tasks(
|
||||
self.app, lock, document_external_accesses, source_type
|
||||
celery_app=self.app,
|
||||
lock=lock,
|
||||
new_permissions=document_external_accesses,
|
||||
source_string=source_type,
|
||||
connector_id=cc_pair.connector.id,
|
||||
credential_id=cc_pair.credential.id,
|
||||
)
|
||||
if tasks_generated is None:
|
||||
return None
|
||||
@@ -298,6 +304,8 @@ def update_external_document_permissions_task(
|
||||
tenant_id: str | None,
|
||||
serialized_doc_external_access: dict,
|
||||
source_string: str,
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
) -> bool:
|
||||
document_external_access = DocExternalAccess.from_dict(
|
||||
serialized_doc_external_access
|
||||
@@ -306,18 +314,28 @@ def update_external_document_permissions_task(
|
||||
external_access = document_external_access.external_access
|
||||
try:
|
||||
with get_session_with_tenant(tenant_id) as db_session:
|
||||
# Then we build the update requests to update vespa
|
||||
# Add the users to the DB if they don't exist
|
||||
batch_add_ext_perm_user_if_not_exists(
|
||||
db_session=db_session,
|
||||
emails=list(external_access.external_user_emails),
|
||||
)
|
||||
upsert_document_external_perms(
|
||||
# Then we upsert the document's external permissions in postgres
|
||||
created_new_doc = upsert_document_external_perms(
|
||||
db_session=db_session,
|
||||
doc_id=doc_id,
|
||||
external_access=external_access,
|
||||
source_type=DocumentSource(source_string),
|
||||
)
|
||||
|
||||
if created_new_doc:
|
||||
# If a new document was created, we associate it with the cc_pair
|
||||
upsert_document_by_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
document_ids=[doc_id],
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Successfully synced postgres document permissions for {doc_id}"
|
||||
)
|
||||
|
||||
@@ -32,10 +32,14 @@ from danswer.redis.redis_connector_ext_group_sync import (
|
||||
from danswer.redis.redis_pool import get_redis_client
|
||||
from danswer.utils.logger import setup_logger
|
||||
from ee.danswer.db.connector_credential_pair import get_all_auto_sync_cc_pairs
|
||||
from ee.danswer.db.connector_credential_pair import get_cc_pairs_by_source
|
||||
from ee.danswer.db.external_perm import ExternalUserGroup
|
||||
from ee.danswer.db.external_perm import replace_user__ext_group_for_cc_pair
|
||||
from ee.danswer.external_permissions.sync_params import EXTERNAL_GROUP_SYNC_PERIODS
|
||||
from ee.danswer.external_permissions.sync_params import GROUP_PERMISSIONS_FUNC_MAP
|
||||
from ee.danswer.external_permissions.sync_params import (
|
||||
GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC,
|
||||
)
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -107,6 +111,22 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> None:
|
||||
with get_session_with_tenant(tenant_id) as db_session:
|
||||
cc_pairs = get_all_auto_sync_cc_pairs(db_session)
|
||||
|
||||
# We only want to sync one cc_pair per source type in
|
||||
# GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC
|
||||
for source in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC:
|
||||
# These are ordered by cc_pair id so the first one is the one we want
|
||||
cc_pairs_to_dedupe = get_cc_pairs_by_source(
|
||||
db_session, source, only_sync=True
|
||||
)
|
||||
# We only want to sync one cc_pair per source type
|
||||
# in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC so we dedupe here
|
||||
for cc_pair_to_remove in cc_pairs_to_dedupe[1:]:
|
||||
cc_pairs = [
|
||||
cc_pair
|
||||
for cc_pair in cc_pairs
|
||||
if cc_pair.id != cc_pair_to_remove.id
|
||||
]
|
||||
|
||||
for cc_pair in cc_pairs:
|
||||
if _is_external_group_sync_due(cc_pair):
|
||||
cc_pair_ids_to_sync.append(cc_pair.id)
|
||||
|
||||
@@ -789,9 +789,12 @@ def connector_indexing_task(
|
||||
)
|
||||
break
|
||||
|
||||
# set thread_local=False since we don't control what thread the indexing/pruning
|
||||
# might run our callback with
|
||||
lock: RedisLock = r.lock(
|
||||
redis_connector_index.generator_lock_key,
|
||||
timeout=CELERY_INDEXING_LOCK_TIMEOUT,
|
||||
thread_local=False,
|
||||
)
|
||||
|
||||
acquired = lock.acquire(blocking=False)
|
||||
|
||||
@@ -8,6 +8,7 @@ from celery import shared_task
|
||||
from celery import Task
|
||||
from celery.exceptions import SoftTimeLimitExceeded
|
||||
from redis import Redis
|
||||
from redis.lock import Lock as RedisLock
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.background.celery.apps.app_base import task_logger
|
||||
@@ -239,9 +240,12 @@ def connector_pruning_generator_task(
|
||||
|
||||
r = get_redis_client(tenant_id=tenant_id)
|
||||
|
||||
lock = r.lock(
|
||||
# set thread_local=False since we don't control what thread the indexing/pruning
|
||||
# might run our callback with
|
||||
lock: RedisLock = r.lock(
|
||||
DanswerRedisLocks.PRUNING_LOCK_PREFIX + f"_{redis_connector.id}",
|
||||
timeout=CELERY_PRUNING_LOCK_TIMEOUT,
|
||||
thread_local=False,
|
||||
)
|
||||
|
||||
acquired = lock.acquire(blocking=False)
|
||||
|
||||
@@ -2,20 +2,79 @@ import re
|
||||
from typing import cast
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import HTTPException
|
||||
from fastapi.datastructures import Headers
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.auth.users import is_user_admin
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.chat.models import PersonaOverrideConfig
|
||||
from danswer.chat.models import ThreadMessage
|
||||
from danswer.configs.constants import DEFAULT_PERSONA_ID
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import RerankingDetails
|
||||
from danswer.context.search.models import RetrievalDetails
|
||||
from danswer.db.chat import create_chat_session
|
||||
from danswer.db.chat import get_chat_messages_by_session
|
||||
from danswer.db.llm import fetch_existing_doc_sets
|
||||
from danswer.db.llm import fetch_existing_tools
|
||||
from danswer.db.models import ChatMessage
|
||||
from danswer.db.models import Persona
|
||||
from danswer.db.models import Prompt
|
||||
from danswer.db.models import Tool
|
||||
from danswer.db.models import User
|
||||
from danswer.db.persona import get_prompts_by_ids
|
||||
from danswer.llm.answering.models import PreviousMessage
|
||||
from danswer.natural_language_processing.utils import BaseTokenizer
|
||||
from danswer.server.query_and_chat.models import CreateChatMessageRequest
|
||||
from danswer.tools.tool_implementations.custom.custom_tool import (
|
||||
build_custom_tools_from_openapi_schema_and_headers,
|
||||
)
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def prepare_chat_message_request(
|
||||
message_text: str,
|
||||
user: User | None,
|
||||
persona_id: int | None,
|
||||
# Does the question need to have a persona override
|
||||
persona_override_config: PersonaOverrideConfig | None,
|
||||
prompt: Prompt | None,
|
||||
message_ts_to_respond_to: str | None,
|
||||
retrieval_details: RetrievalDetails | None,
|
||||
rerank_settings: RerankingDetails | None,
|
||||
db_session: Session,
|
||||
) -> CreateChatMessageRequest:
|
||||
# Typically used for one shot flows like SlackBot or non-chat API endpoint use cases
|
||||
new_chat_session = create_chat_session(
|
||||
db_session=db_session,
|
||||
description=None,
|
||||
user_id=user.id if user else None,
|
||||
# If using an override, this id will be ignored later on
|
||||
persona_id=persona_id or DEFAULT_PERSONA_ID,
|
||||
danswerbot_flow=True,
|
||||
slack_thread_id=message_ts_to_respond_to,
|
||||
)
|
||||
|
||||
return CreateChatMessageRequest(
|
||||
chat_session_id=new_chat_session.id,
|
||||
parent_message_id=None, # It's a standalone chat session each time
|
||||
message=message_text,
|
||||
file_descriptors=[], # Currently SlackBot/answer api do not support files in the context
|
||||
prompt_id=prompt.id if prompt else None,
|
||||
# Can always override the persona for the single query, if it's a normal persona
|
||||
# then it will be treated the same
|
||||
persona_override_config=persona_override_config,
|
||||
search_doc_ids=None,
|
||||
retrieval_options=retrieval_details,
|
||||
rerank_settings=rerank_settings,
|
||||
)
|
||||
|
||||
|
||||
def llm_doc_from_inference_section(inference_section: InferenceSection) -> LlmDoc:
|
||||
return LlmDoc(
|
||||
document_id=inference_section.center_chunk.document_id,
|
||||
@@ -35,6 +94,45 @@ def llm_doc_from_inference_section(inference_section: InferenceSection) -> LlmDo
|
||||
)
|
||||
|
||||
|
||||
def combine_message_thread(
|
||||
messages: list[ThreadMessage],
|
||||
max_tokens: int | None,
|
||||
llm_tokenizer: BaseTokenizer,
|
||||
) -> str:
|
||||
"""Used to create a single combined message context from threads"""
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
message_strs: list[str] = []
|
||||
total_token_count = 0
|
||||
|
||||
for message in reversed(messages):
|
||||
if message.role == MessageType.USER:
|
||||
role_str = message.role.value.upper()
|
||||
if message.sender:
|
||||
role_str += " " + message.sender
|
||||
else:
|
||||
# Since other messages might have the user identifying information
|
||||
# better to use Unknown for symmetry
|
||||
role_str += " Unknown"
|
||||
else:
|
||||
role_str = message.role.value.upper()
|
||||
|
||||
msg_str = f"{role_str}:\n{message.message}"
|
||||
message_token_count = len(llm_tokenizer.encode(msg_str))
|
||||
|
||||
if (
|
||||
max_tokens is not None
|
||||
and total_token_count + message_token_count > max_tokens
|
||||
):
|
||||
break
|
||||
|
||||
message_strs.insert(0, msg_str)
|
||||
total_token_count += message_token_count
|
||||
|
||||
return "\n\n".join(message_strs)
|
||||
|
||||
|
||||
def create_chat_chain(
|
||||
chat_session_id: UUID,
|
||||
db_session: Session,
|
||||
@@ -197,3 +295,71 @@ def extract_headers(
|
||||
if lowercase_key in headers:
|
||||
extracted_headers[lowercase_key] = headers[lowercase_key]
|
||||
return extracted_headers
|
||||
|
||||
|
||||
def create_temporary_persona(
|
||||
persona_config: PersonaOverrideConfig, db_session: Session, user: User | None = None
|
||||
) -> Persona:
|
||||
if not is_user_admin(user):
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="User is not authorized to create a persona in one shot queries",
|
||||
)
|
||||
|
||||
"""Create a temporary Persona object from the provided configuration."""
|
||||
persona = Persona(
|
||||
name=persona_config.name,
|
||||
description=persona_config.description,
|
||||
num_chunks=persona_config.num_chunks,
|
||||
llm_relevance_filter=persona_config.llm_relevance_filter,
|
||||
llm_filter_extraction=persona_config.llm_filter_extraction,
|
||||
recency_bias=persona_config.recency_bias,
|
||||
llm_model_provider_override=persona_config.llm_model_provider_override,
|
||||
llm_model_version_override=persona_config.llm_model_version_override,
|
||||
)
|
||||
|
||||
if persona_config.prompts:
|
||||
persona.prompts = [
|
||||
Prompt(
|
||||
name=p.name,
|
||||
description=p.description,
|
||||
system_prompt=p.system_prompt,
|
||||
task_prompt=p.task_prompt,
|
||||
include_citations=p.include_citations,
|
||||
datetime_aware=p.datetime_aware,
|
||||
)
|
||||
for p in persona_config.prompts
|
||||
]
|
||||
elif persona_config.prompt_ids:
|
||||
persona.prompts = get_prompts_by_ids(
|
||||
db_session=db_session, prompt_ids=persona_config.prompt_ids
|
||||
)
|
||||
|
||||
persona.tools = []
|
||||
if persona_config.custom_tools_openapi:
|
||||
for schema in persona_config.custom_tools_openapi:
|
||||
tools = cast(
|
||||
list[Tool],
|
||||
build_custom_tools_from_openapi_schema_and_headers(schema),
|
||||
)
|
||||
persona.tools.extend(tools)
|
||||
|
||||
if persona_config.tools:
|
||||
tool_ids = [tool.id for tool in persona_config.tools]
|
||||
persona.tools.extend(
|
||||
fetch_existing_tools(db_session=db_session, tool_ids=tool_ids)
|
||||
)
|
||||
|
||||
if persona_config.tool_ids:
|
||||
persona.tools.extend(
|
||||
fetch_existing_tools(
|
||||
db_session=db_session, tool_ids=persona_config.tool_ids
|
||||
)
|
||||
)
|
||||
|
||||
fetched_docs = fetch_existing_doc_sets(
|
||||
db_session=db_session, doc_ids=persona_config.document_set_ids
|
||||
)
|
||||
persona.document_sets = fetched_docs
|
||||
|
||||
return persona
|
||||
|
||||
@@ -4,12 +4,14 @@ from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.context.search.enums import QueryFlow
|
||||
from danswer.context.search.enums import RecencyBiasSetting
|
||||
from danswer.context.search.enums import SearchType
|
||||
from danswer.context.search.models import RetrievalDocs
|
||||
from danswer.context.search.models import SearchResponse
|
||||
from danswer.tools.tool_implementations.custom.base_tool_types import ToolResultType
|
||||
|
||||
|
||||
@@ -118,20 +120,6 @@ class StreamingError(BaseModel):
|
||||
stack_trace: str | None = None
|
||||
|
||||
|
||||
class DanswerQuote(BaseModel):
|
||||
# This is during inference so everything is a string by this point
|
||||
quote: str
|
||||
document_id: str
|
||||
link: str | None
|
||||
source_type: str
|
||||
semantic_identifier: str
|
||||
blurb: str
|
||||
|
||||
|
||||
class DanswerQuotes(BaseModel):
|
||||
quotes: list[DanswerQuote]
|
||||
|
||||
|
||||
class DanswerContext(BaseModel):
|
||||
content: str
|
||||
document_id: str
|
||||
@@ -147,14 +135,20 @@ class DanswerAnswer(BaseModel):
|
||||
answer: str | None
|
||||
|
||||
|
||||
class QAResponse(SearchResponse, DanswerAnswer):
|
||||
quotes: list[DanswerQuote] | None
|
||||
contexts: list[DanswerContexts] | None
|
||||
predicted_flow: QueryFlow
|
||||
predicted_search: SearchType
|
||||
eval_res_valid: bool | None = None
|
||||
class ThreadMessage(BaseModel):
|
||||
message: str
|
||||
sender: str | None = None
|
||||
role: MessageType = MessageType.USER
|
||||
|
||||
|
||||
class ChatDanswerBotResponse(BaseModel):
|
||||
answer: str | None = None
|
||||
citations: list[CitationInfo] | None = None
|
||||
docs: QADocsResponse | None = None
|
||||
llm_selected_doc_indices: list[int] | None = None
|
||||
error_msg: str | None = None
|
||||
chat_message_id: int | None = None
|
||||
answer_valid: bool = True # Reflexion result, default True if Reflexion not run
|
||||
|
||||
|
||||
class FileChatDisplay(BaseModel):
|
||||
@@ -166,9 +160,41 @@ class CustomToolResponse(BaseModel):
|
||||
tool_name: str
|
||||
|
||||
|
||||
class ToolConfig(BaseModel):
|
||||
id: int
|
||||
|
||||
|
||||
class PromptOverrideConfig(BaseModel):
|
||||
name: str
|
||||
description: str = ""
|
||||
system_prompt: str
|
||||
task_prompt: str = ""
|
||||
include_citations: bool = True
|
||||
datetime_aware: bool = True
|
||||
|
||||
|
||||
class PersonaOverrideConfig(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
search_type: SearchType = SearchType.SEMANTIC
|
||||
num_chunks: float | None = None
|
||||
llm_relevance_filter: bool = False
|
||||
llm_filter_extraction: bool = False
|
||||
recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO
|
||||
llm_model_provider_override: str | None = None
|
||||
llm_model_version_override: str | None = None
|
||||
|
||||
prompts: list[PromptOverrideConfig] = Field(default_factory=list)
|
||||
prompt_ids: list[int] = Field(default_factory=list)
|
||||
|
||||
document_set_ids: list[int] = Field(default_factory=list)
|
||||
tools: list[ToolConfig] = Field(default_factory=list)
|
||||
tool_ids: list[int] = Field(default_factory=list)
|
||||
custom_tools_openapi: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
AnswerQuestionPossibleReturn = (
|
||||
DanswerAnswerPiece
|
||||
| DanswerQuotes
|
||||
| CitationInfo
|
||||
| DanswerContexts
|
||||
| FileChatDisplay
|
||||
|
||||
@@ -7,10 +7,13 @@ from typing import cast
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.chat_utils import create_chat_chain
|
||||
from danswer.chat.chat_utils import create_temporary_persona
|
||||
from danswer.chat.models import AllCitations
|
||||
from danswer.chat.models import ChatDanswerBotResponse
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import CustomToolResponse
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import DanswerContexts
|
||||
from danswer.chat.models import FileChatDisplay
|
||||
from danswer.chat.models import FinalUsedContextDocsResponse
|
||||
from danswer.chat.models import LLMRelevanceFilterResponse
|
||||
@@ -102,6 +105,7 @@ from danswer.tools.tool_implementations.internet_search.internet_search_tool imp
|
||||
from danswer.tools.tool_implementations.search.search_tool import (
|
||||
FINAL_CONTEXT_DOCUMENTS_ID,
|
||||
)
|
||||
from danswer.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTENT_ID
|
||||
from danswer.tools.tool_implementations.search.search_tool import (
|
||||
SEARCH_RESPONSE_SUMMARY_ID,
|
||||
)
|
||||
@@ -113,8 +117,10 @@ from danswer.tools.tool_implementations.search.search_tool import (
|
||||
from danswer.tools.tool_runner import ToolCallFinalResult
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.long_term_log import LongTermLogger
|
||||
from danswer.utils.timing import log_function_time
|
||||
from danswer.utils.timing import log_generator_function_time
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
@@ -256,6 +262,7 @@ def _get_force_search_settings(
|
||||
ChatPacket = (
|
||||
StreamingError
|
||||
| QADocsResponse
|
||||
| DanswerContexts
|
||||
| LLMRelevanceFilterResponse
|
||||
| FinalUsedContextDocsResponse
|
||||
| ChatMessageDetail
|
||||
@@ -286,6 +293,8 @@ def stream_chat_message_objects(
|
||||
custom_tool_additional_headers: dict[str, str] | None = None,
|
||||
is_connected: Callable[[], bool] | None = None,
|
||||
enforce_chat_session_id_for_search_docs: bool = True,
|
||||
bypass_acl: bool = False,
|
||||
include_contexts: bool = False,
|
||||
) -> ChatPacketStream:
|
||||
"""Streams in order:
|
||||
1. [conditional] Retrieved documents if a search needs to be run
|
||||
@@ -322,17 +331,31 @@ def stream_chat_message_objects(
|
||||
metadata={"user_id": str(user_id), "chat_session_id": str(chat_session_id)}
|
||||
)
|
||||
|
||||
# use alternate persona if alternative assistant id is passed in
|
||||
if alternate_assistant_id is not None:
|
||||
# Allows users to specify a temporary persona (assistant) in the chat session
|
||||
# this takes highest priority since it's user specified
|
||||
persona = get_persona_by_id(
|
||||
alternate_assistant_id,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
is_for_edit=False,
|
||||
)
|
||||
elif new_msg_req.persona_override_config:
|
||||
# Certain endpoints allow users to specify arbitrary persona settings
|
||||
# this should never conflict with the alternate_assistant_id
|
||||
persona = persona = create_temporary_persona(
|
||||
db_session=db_session,
|
||||
persona_config=new_msg_req.persona_override_config,
|
||||
user=user,
|
||||
)
|
||||
else:
|
||||
persona = chat_session.persona
|
||||
|
||||
if not persona:
|
||||
raise RuntimeError("No persona specified or found for chat session")
|
||||
|
||||
# If a prompt override is specified via the API, use that with highest priority
|
||||
# but for saving it, we are just mapping it to an existing prompt
|
||||
prompt_id = new_msg_req.prompt_id
|
||||
if prompt_id is None and persona.prompts:
|
||||
prompt_id = sorted(persona.prompts, key=lambda x: x.id)[-1].id
|
||||
@@ -555,19 +578,34 @@ def stream_chat_message_objects(
|
||||
reserved_message_id=reserved_message_id,
|
||||
)
|
||||
|
||||
if not final_msg.prompt:
|
||||
raise RuntimeError("No Prompt found")
|
||||
|
||||
prompt_config = (
|
||||
PromptConfig.from_model(
|
||||
final_msg.prompt,
|
||||
prompt_override=(
|
||||
new_msg_req.prompt_override or chat_session.prompt_override
|
||||
),
|
||||
prompt_override = new_msg_req.prompt_override or chat_session.prompt_override
|
||||
if new_msg_req.persona_override_config:
|
||||
prompt_config = PromptConfig(
|
||||
system_prompt=new_msg_req.persona_override_config.prompts[
|
||||
0
|
||||
].system_prompt,
|
||||
task_prompt=new_msg_req.persona_override_config.prompts[0].task_prompt,
|
||||
datetime_aware=new_msg_req.persona_override_config.prompts[
|
||||
0
|
||||
].datetime_aware,
|
||||
include_citations=new_msg_req.persona_override_config.prompts[
|
||||
0
|
||||
].include_citations,
|
||||
)
|
||||
if not persona
|
||||
else PromptConfig.from_model(persona.prompts[0])
|
||||
)
|
||||
elif prompt_override:
|
||||
if not final_msg.prompt:
|
||||
raise ValueError(
|
||||
"Prompt override cannot be applied, no base prompt found."
|
||||
)
|
||||
prompt_config = PromptConfig.from_model(
|
||||
final_msg.prompt,
|
||||
prompt_override=prompt_override,
|
||||
)
|
||||
elif final_msg.prompt:
|
||||
prompt_config = PromptConfig.from_model(final_msg.prompt)
|
||||
else:
|
||||
prompt_config = PromptConfig.from_model(persona.prompts[0])
|
||||
|
||||
answer_style_config = AnswerStyleConfig(
|
||||
citation_config=CitationConfig(
|
||||
all_docs_useful=selected_db_search_docs is not None
|
||||
@@ -587,11 +625,13 @@ def stream_chat_message_objects(
|
||||
answer_style_config=answer_style_config,
|
||||
document_pruning_config=document_pruning_config,
|
||||
retrieval_options=retrieval_options or RetrievalDetails(),
|
||||
rerank_settings=new_msg_req.rerank_settings,
|
||||
selected_sections=selected_sections,
|
||||
chunks_above=new_msg_req.chunks_above,
|
||||
chunks_below=new_msg_req.chunks_below,
|
||||
full_doc=new_msg_req.full_doc,
|
||||
latest_query_files=latest_query_files,
|
||||
bypass_acl=bypass_acl,
|
||||
),
|
||||
internet_search_tool_config=InternetSearchToolConfig(
|
||||
answer_style_config=answer_style_config,
|
||||
@@ -737,6 +777,8 @@ def stream_chat_message_objects(
|
||||
response=custom_tool_response.tool_result,
|
||||
tool_name=custom_tool_response.tool_name,
|
||||
)
|
||||
elif packet.id == SEARCH_DOC_CONTENT_ID and include_contexts:
|
||||
yield cast(DanswerContexts, packet.response)
|
||||
|
||||
elif isinstance(packet, StreamStopInfo):
|
||||
pass
|
||||
@@ -845,3 +887,30 @@ def stream_chat_message(
|
||||
)
|
||||
for obj in objects:
|
||||
yield get_json_line(obj.model_dump())
|
||||
|
||||
|
||||
@log_function_time()
|
||||
def gather_stream_for_slack(
|
||||
packets: ChatPacketStream,
|
||||
) -> ChatDanswerBotResponse:
|
||||
response = ChatDanswerBotResponse()
|
||||
|
||||
answer = ""
|
||||
for packet in packets:
|
||||
if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece:
|
||||
answer += packet.answer_piece
|
||||
elif isinstance(packet, QADocsResponse):
|
||||
response.docs = packet
|
||||
elif isinstance(packet, StreamingError):
|
||||
response.error_msg = packet.error
|
||||
elif isinstance(packet, ChatMessageDetail):
|
||||
response.chat_message_id = packet.message_id
|
||||
elif isinstance(packet, LLMRelevanceFilterResponse):
|
||||
response.llm_selected_doc_indices = packet.llm_selected_doc_indices
|
||||
elif isinstance(packet, AllCitations):
|
||||
response.citations = packet.citations
|
||||
|
||||
if answer:
|
||||
response.answer = answer
|
||||
|
||||
return response
|
||||
|
||||
@@ -522,3 +522,6 @@ API_KEY_HASH_ROUNDS = (
|
||||
|
||||
POD_NAME = os.environ.get("POD_NAME")
|
||||
POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
|
||||
|
||||
|
||||
DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"
|
||||
|
||||
@@ -31,6 +31,8 @@ DISABLED_GEN_AI_MSG = (
|
||||
"You can still use Danswer as a search engine."
|
||||
)
|
||||
|
||||
DEFAULT_PERSONA_ID = 0
|
||||
|
||||
# Postgres connection constants for application_name
|
||||
POSTGRES_WEB_APP_NAME = "web"
|
||||
POSTGRES_INDEXER_APP_NAME = "indexer"
|
||||
|
||||
@@ -4,11 +4,8 @@ import os
|
||||
# Danswer Slack Bot Configs
|
||||
#####
|
||||
DANSWER_BOT_NUM_RETRIES = int(os.environ.get("DANSWER_BOT_NUM_RETRIES", "5"))
|
||||
DANSWER_BOT_ANSWER_GENERATION_TIMEOUT = int(
|
||||
os.environ.get("DANSWER_BOT_ANSWER_GENERATION_TIMEOUT", "90")
|
||||
)
|
||||
# How much of the available input context can be used for thread context
|
||||
DANSWER_BOT_TARGET_CHUNK_PERCENTAGE = 512 * 2 / 3072
|
||||
MAX_THREAD_CONTEXT_PERCENTAGE = 512 * 2 / 3072
|
||||
# Number of docs to display in "Reference Documents"
|
||||
DANSWER_BOT_NUM_DOCS_TO_DISPLAY = int(
|
||||
os.environ.get("DANSWER_BOT_NUM_DOCS_TO_DISPLAY", "5")
|
||||
@@ -47,17 +44,6 @@ DANSWER_BOT_DISPLAY_ERROR_MSGS = os.environ.get(
|
||||
DANSWER_BOT_RESPOND_EVERY_CHANNEL = (
|
||||
os.environ.get("DANSWER_BOT_RESPOND_EVERY_CHANNEL", "").lower() == "true"
|
||||
)
|
||||
# Add a second LLM call post Answer to verify if the Answer is valid
|
||||
# Throws out answers that don't directly or fully answer the user query
|
||||
# This is the default for all DanswerBot channels unless the channel is configured individually
|
||||
# Set/unset by "Hide Non Answers"
|
||||
ENABLE_DANSWERBOT_REFLEXION = (
|
||||
os.environ.get("ENABLE_DANSWERBOT_REFLEXION", "").lower() == "true"
|
||||
)
|
||||
# Currently not support chain of thought, probably will add back later
|
||||
DANSWER_BOT_DISABLE_COT = True
|
||||
# if set, will default DanswerBot to use quotes and reference documents
|
||||
DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true"
|
||||
|
||||
# Maximum Questions Per Minute, Default Uncapped
|
||||
DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None
|
||||
|
||||
@@ -32,7 +32,11 @@ def get_user_email_from_username__server(
|
||||
response = confluence_client.get_mobile_parameters(user_name)
|
||||
email = response.get("email")
|
||||
except Exception:
|
||||
email = None
|
||||
# For now, we'll just return a string that indicates failure
|
||||
# We may want to revert to returning None in the future
|
||||
# email = None
|
||||
email = f"FAILED TO GET CONFLUENCE EMAIL FOR {user_name}"
|
||||
logger.warning(f"failed to get confluence email for {user_name}")
|
||||
_USER_EMAIL_CACHE[user_name] = email
|
||||
return _USER_EMAIL_CACHE[user_name]
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from slack_sdk.models.blocks import SectionBlock
|
||||
from slack_sdk.models.blocks.basic_components import MarkdownTextObject
|
||||
from slack_sdk.models.blocks.block_elements import ImageElement
|
||||
|
||||
from danswer.chat.models import DanswerQuote
|
||||
from danswer.chat.models import ChatDanswerBotResponse
|
||||
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
||||
from danswer.configs.app_configs import WEB_DOMAIN
|
||||
from danswer.configs.constants import DocumentSource
|
||||
@@ -40,10 +40,7 @@ from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
|
||||
from danswer.db.chat import get_chat_session_by_message_id
|
||||
from danswer.db.engine import get_session_with_tenant
|
||||
from danswer.db.models import ChannelConfig
|
||||
from danswer.db.models import Persona
|
||||
from danswer.one_shot_answer.models import OneShotQAResponse
|
||||
from danswer.utils.text_processing import decode_escapes
|
||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||
|
||||
_MAX_BLURB_LEN = 45
|
||||
|
||||
@@ -327,7 +324,7 @@ def _build_sources_blocks(
|
||||
|
||||
|
||||
def _priority_ordered_documents_blocks(
|
||||
answer: OneShotQAResponse,
|
||||
answer: ChatDanswerBotResponse,
|
||||
) -> list[Block]:
|
||||
docs_response = answer.docs if answer.docs else None
|
||||
top_docs = docs_response.top_documents if docs_response else []
|
||||
@@ -350,7 +347,7 @@ def _priority_ordered_documents_blocks(
|
||||
|
||||
|
||||
def _build_citations_blocks(
|
||||
answer: OneShotQAResponse,
|
||||
answer: ChatDanswerBotResponse,
|
||||
) -> list[Block]:
|
||||
docs_response = answer.docs if answer.docs else None
|
||||
top_docs = docs_response.top_documents if docs_response else []
|
||||
@@ -369,51 +366,8 @@ def _build_citations_blocks(
|
||||
return citations_block
|
||||
|
||||
|
||||
def _build_quotes_block(
|
||||
quotes: list[DanswerQuote],
|
||||
) -> list[Block]:
|
||||
quote_lines: list[str] = []
|
||||
doc_to_quotes: dict[str, list[str]] = {}
|
||||
doc_to_link: dict[str, str] = {}
|
||||
doc_to_sem_id: dict[str, str] = {}
|
||||
for q in quotes:
|
||||
quote = q.quote
|
||||
doc_id = q.document_id
|
||||
doc_link = q.link
|
||||
doc_name = q.semantic_identifier
|
||||
if doc_link and doc_name and doc_id and quote:
|
||||
if doc_id not in doc_to_quotes:
|
||||
doc_to_quotes[doc_id] = [quote]
|
||||
doc_to_link[doc_id] = doc_link
|
||||
doc_to_sem_id[doc_id] = (
|
||||
doc_name
|
||||
if q.source_type != DocumentSource.SLACK.value
|
||||
else "#" + doc_name
|
||||
)
|
||||
else:
|
||||
doc_to_quotes[doc_id].append(quote)
|
||||
|
||||
for doc_id, quote_strs in doc_to_quotes.items():
|
||||
quotes_str_clean = [
|
||||
replace_whitespaces_w_space(q_str).strip() for q_str in quote_strs
|
||||
]
|
||||
longest_quotes = sorted(quotes_str_clean, key=len, reverse=True)[:5]
|
||||
single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes])
|
||||
link = doc_to_link[doc_id]
|
||||
sem_id = doc_to_sem_id[doc_id]
|
||||
quote_lines.append(
|
||||
f"<{link}|{sem_id}>:\n{remove_slack_text_interactions(single_quote_str)}"
|
||||
)
|
||||
|
||||
if not doc_to_quotes:
|
||||
return []
|
||||
|
||||
return [SectionBlock(text="*Relevant Snippets*\n" + "\n".join(quote_lines))]
|
||||
|
||||
|
||||
def _build_qa_response_blocks(
|
||||
answer: OneShotQAResponse,
|
||||
skip_quotes: bool = False,
|
||||
answer: ChatDanswerBotResponse,
|
||||
process_message_for_citations: bool = False,
|
||||
) -> list[Block]:
|
||||
retrieval_info = answer.docs
|
||||
@@ -422,13 +376,10 @@ def _build_qa_response_blocks(
|
||||
raise RuntimeError("Failed to retrieve docs, cannot answer question.")
|
||||
|
||||
formatted_answer = format_slack_message(answer.answer) if answer.answer else None
|
||||
quotes = answer.quotes.quotes if answer.quotes else None
|
||||
|
||||
if DISABLE_GENERATIVE_AI:
|
||||
return []
|
||||
|
||||
quotes_blocks: list[Block] = []
|
||||
|
||||
filter_block: Block | None = None
|
||||
if (
|
||||
retrieval_info.applied_time_cutoff
|
||||
@@ -471,16 +422,6 @@ def _build_qa_response_blocks(
|
||||
answer_blocks = [
|
||||
SectionBlock(text=text) for text in _split_text(answer_processed)
|
||||
]
|
||||
if quotes:
|
||||
quotes_blocks = _build_quotes_block(quotes)
|
||||
|
||||
# if no quotes OR `_build_quotes_block()` did not give back any blocks
|
||||
if not quotes_blocks:
|
||||
quotes_blocks = [
|
||||
SectionBlock(
|
||||
text="*Warning*: no sources were quoted for this answer, so it may be unreliable 😔"
|
||||
)
|
||||
]
|
||||
|
||||
response_blocks: list[Block] = []
|
||||
|
||||
@@ -489,9 +430,6 @@ def _build_qa_response_blocks(
|
||||
|
||||
response_blocks.extend(answer_blocks)
|
||||
|
||||
if not skip_quotes:
|
||||
response_blocks.extend(quotes_blocks)
|
||||
|
||||
return response_blocks
|
||||
|
||||
|
||||
@@ -567,10 +505,9 @@ def build_follow_up_resolved_blocks(
|
||||
|
||||
|
||||
def build_slack_response_blocks(
|
||||
answer: ChatDanswerBotResponse,
|
||||
tenant_id: str | None,
|
||||
message_info: SlackMessageInfo,
|
||||
answer: OneShotQAResponse,
|
||||
persona: Persona | None,
|
||||
channel_conf: ChannelConfig | None,
|
||||
use_citations: bool,
|
||||
feedback_reminder_id: str | None,
|
||||
@@ -587,7 +524,6 @@ def build_slack_response_blocks(
|
||||
|
||||
answer_blocks = _build_qa_response_blocks(
|
||||
answer=answer,
|
||||
skip_quotes=persona is not None or use_citations,
|
||||
process_message_for_citations=use_citations,
|
||||
)
|
||||
|
||||
@@ -617,8 +553,7 @@ def build_slack_response_blocks(
|
||||
|
||||
citations_blocks = []
|
||||
document_blocks = []
|
||||
if use_citations:
|
||||
# if citations are enabled, only show cited documents
|
||||
if use_citations and answer.citations:
|
||||
citations_blocks = _build_citations_blocks(answer)
|
||||
else:
|
||||
document_blocks = _priority_ordered_documents_blocks(answer)
|
||||
@@ -637,4 +572,5 @@ def build_slack_response_blocks(
|
||||
+ web_follow_up_block
|
||||
+ follow_up_block
|
||||
)
|
||||
|
||||
return all_blocks
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import functools
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
from typing import Optional
|
||||
from typing import TypeVar
|
||||
|
||||
@@ -9,46 +8,36 @@ from retry import retry
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.models.blocks import SectionBlock
|
||||
|
||||
from danswer.chat.chat_utils import prepare_chat_message_request
|
||||
from danswer.chat.models import ChatDanswerBotResponse
|
||||
from danswer.chat.process_message import gather_stream_for_slack
|
||||
from danswer.chat.process_message import stream_chat_message_objects
|
||||
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_COT
|
||||
from danswer.configs.constants import DEFAULT_PERSONA_ID
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES
|
||||
from danswer.configs.danswerbot_configs import DANSWER_FOLLOWUP_EMOJI
|
||||
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
|
||||
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
|
||||
from danswer.configs.danswerbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
|
||||
from danswer.context.search.enums import OptionalSearchSetting
|
||||
from danswer.context.search.models import BaseFilters
|
||||
from danswer.context.search.models import RerankingDetails
|
||||
from danswer.context.search.models import RetrievalDetails
|
||||
from danswer.danswerbot.slack.blocks import build_slack_response_blocks
|
||||
from danswer.danswerbot.slack.handlers.utils import send_team_member_message
|
||||
from danswer.danswerbot.slack.handlers.utils import slackify_message_thread
|
||||
from danswer.danswerbot.slack.models import SlackMessageInfo
|
||||
from danswer.danswerbot.slack.utils import respond_in_thread
|
||||
from danswer.danswerbot.slack.utils import SlackRateLimiter
|
||||
from danswer.danswerbot.slack.utils import update_emote_react
|
||||
from danswer.db.engine import get_session_with_tenant
|
||||
from danswer.db.models import Persona
|
||||
from danswer.db.models import SlackBotResponseType
|
||||
from danswer.db.models import SlackChannelConfig
|
||||
from danswer.db.persona import fetch_persona_by_id
|
||||
from danswer.db.search_settings import get_current_search_settings
|
||||
from danswer.db.models import User
|
||||
from danswer.db.persona import get_persona_by_id
|
||||
from danswer.db.users import get_user_by_email
|
||||
from danswer.llm.answering.prompts.citations_prompt import (
|
||||
compute_max_document_tokens_for_persona,
|
||||
)
|
||||
from danswer.llm.factory import get_llms_for_persona
|
||||
from danswer.llm.utils import check_number_of_tokens
|
||||
from danswer.llm.utils import get_max_input_tokens
|
||||
from danswer.one_shot_answer.answer_question import get_search_answer
|
||||
from danswer.one_shot_answer.models import DirectQARequest
|
||||
from danswer.one_shot_answer.models import OneShotQAResponse
|
||||
from danswer.server.query_and_chat.models import CreateChatMessageRequest
|
||||
from danswer.utils.logger import DanswerLoggingAdapter
|
||||
|
||||
|
||||
srl = SlackRateLimiter()
|
||||
|
||||
RT = TypeVar("RT") # return type
|
||||
@@ -83,16 +72,14 @@ def handle_regular_answer(
|
||||
feedback_reminder_id: str | None,
|
||||
tenant_id: str | None,
|
||||
num_retries: int = DANSWER_BOT_NUM_RETRIES,
|
||||
answer_generation_timeout: int = DANSWER_BOT_ANSWER_GENERATION_TIMEOUT,
|
||||
thread_context_percent: float = DANSWER_BOT_TARGET_CHUNK_PERCENTAGE,
|
||||
thread_context_percent: float = MAX_THREAD_CONTEXT_PERCENTAGE,
|
||||
should_respond_with_error_msgs: bool = DANSWER_BOT_DISPLAY_ERROR_MSGS,
|
||||
disable_docs_only_answer: bool = DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER,
|
||||
disable_cot: bool = DANSWER_BOT_DISABLE_COT,
|
||||
reflexion: bool = ENABLE_DANSWERBOT_REFLEXION,
|
||||
) -> bool:
|
||||
channel_conf = slack_channel_config.channel_config if slack_channel_config else None
|
||||
|
||||
messages = message_info.thread_messages
|
||||
|
||||
message_ts_to_respond_to = message_info.msg_to_respond
|
||||
is_bot_msg = message_info.is_bot_msg
|
||||
user = None
|
||||
@@ -102,9 +89,18 @@ def handle_regular_answer(
|
||||
user = get_user_by_email(message_info.email, db_session)
|
||||
|
||||
document_set_names: list[str] | None = None
|
||||
persona = slack_channel_config.persona if slack_channel_config else None
|
||||
prompt = None
|
||||
if persona:
|
||||
# If no persona is specified, use the default search based persona
|
||||
# This way slack flow always has a persona
|
||||
persona = slack_channel_config.persona if slack_channel_config else None
|
||||
if not persona:
|
||||
with get_session_with_tenant(tenant_id) as db_session:
|
||||
persona = get_persona_by_id(DEFAULT_PERSONA_ID, user, db_session)
|
||||
document_set_names = [
|
||||
document_set.name for document_set in persona.document_sets
|
||||
]
|
||||
prompt = persona.prompts[0] if persona.prompts else None
|
||||
else:
|
||||
document_set_names = [
|
||||
document_set.name for document_set in persona.document_sets
|
||||
]
|
||||
@@ -112,6 +108,26 @@ def handle_regular_answer(
|
||||
|
||||
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
|
||||
|
||||
# TODO: Add in support for Slack to truncate messages based on max LLM context
|
||||
# llm, _ = get_llms_for_persona(persona)
|
||||
|
||||
# llm_tokenizer = get_tokenizer(
|
||||
# model_name=llm.config.model_name,
|
||||
# provider_type=llm.config.model_provider,
|
||||
# )
|
||||
|
||||
# # In cases of threads, split the available tokens between docs and thread context
|
||||
# input_tokens = get_max_input_tokens(
|
||||
# model_name=llm.config.model_name,
|
||||
# model_provider=llm.config.model_provider,
|
||||
# )
|
||||
# max_history_tokens = int(input_tokens * thread_context_percent)
|
||||
# combined_message = combine_message_thread(
|
||||
# messages, max_tokens=max_history_tokens, llm_tokenizer=llm_tokenizer
|
||||
# )
|
||||
|
||||
combined_message = slackify_message_thread(messages)
|
||||
|
||||
bypass_acl = False
|
||||
if (
|
||||
slack_channel_config
|
||||
@@ -122,13 +138,6 @@ def handle_regular_answer(
|
||||
# with non-public document sets
|
||||
bypass_acl = True
|
||||
|
||||
# figure out if we want to use citations or quotes
|
||||
use_citations = (
|
||||
not DANSWER_BOT_USE_QUOTES
|
||||
if slack_channel_config is None
|
||||
else slack_channel_config.response_type == SlackBotResponseType.CITATIONS
|
||||
)
|
||||
|
||||
if not message_ts_to_respond_to and not is_bot_msg:
|
||||
# if the message is not "/danswer" command, then it should have a message ts to respond to
|
||||
raise RuntimeError(
|
||||
@@ -141,75 +150,23 @@ def handle_regular_answer(
|
||||
backoff=2,
|
||||
)
|
||||
@rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to)
|
||||
def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | None:
|
||||
max_document_tokens: int | None = None
|
||||
max_history_tokens: int | None = None
|
||||
|
||||
def _get_slack_answer(
|
||||
new_message_request: CreateChatMessageRequest, danswer_user: User | None
|
||||
) -> ChatDanswerBotResponse:
|
||||
with get_session_with_tenant(tenant_id) as db_session:
|
||||
if len(new_message_request.messages) > 1:
|
||||
if new_message_request.persona_config:
|
||||
raise RuntimeError("Slack bot does not support persona config")
|
||||
elif new_message_request.persona_id is not None:
|
||||
persona = cast(
|
||||
Persona,
|
||||
fetch_persona_by_id(
|
||||
db_session,
|
||||
new_message_request.persona_id,
|
||||
user=None,
|
||||
get_editable=False,
|
||||
),
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"No persona id provided, this should never happen."
|
||||
)
|
||||
|
||||
llm, _ = get_llms_for_persona(persona)
|
||||
|
||||
# In cases of threads, split the available tokens between docs and thread context
|
||||
input_tokens = get_max_input_tokens(
|
||||
model_name=llm.config.model_name,
|
||||
model_provider=llm.config.model_provider,
|
||||
)
|
||||
max_history_tokens = int(input_tokens * thread_context_percent)
|
||||
|
||||
remaining_tokens = input_tokens - max_history_tokens
|
||||
|
||||
query_text = new_message_request.messages[0].message
|
||||
if persona:
|
||||
max_document_tokens = compute_max_document_tokens_for_persona(
|
||||
persona=persona,
|
||||
actual_user_input=query_text,
|
||||
max_llm_token_override=remaining_tokens,
|
||||
)
|
||||
else:
|
||||
max_document_tokens = (
|
||||
remaining_tokens
|
||||
- 512 # Needs to be more than any of the QA prompts
|
||||
- check_number_of_tokens(query_text)
|
||||
)
|
||||
|
||||
if DISABLE_GENERATIVE_AI:
|
||||
return None
|
||||
|
||||
# This also handles creating the query event in postgres
|
||||
answer = get_search_answer(
|
||||
query_req=new_message_request,
|
||||
user=user,
|
||||
max_document_tokens=max_document_tokens,
|
||||
max_history_tokens=max_history_tokens,
|
||||
packets = stream_chat_message_objects(
|
||||
new_msg_req=new_message_request,
|
||||
user=danswer_user,
|
||||
db_session=db_session,
|
||||
answer_generation_timeout=answer_generation_timeout,
|
||||
enable_reflexion=reflexion,
|
||||
bypass_acl=bypass_acl,
|
||||
use_citations=use_citations,
|
||||
danswerbot_flow=True,
|
||||
)
|
||||
|
||||
if not answer.error_msg:
|
||||
return answer
|
||||
else:
|
||||
raise RuntimeError(answer.error_msg)
|
||||
answer = gather_stream_for_slack(packets)
|
||||
|
||||
if answer.error_msg:
|
||||
raise RuntimeError(answer.error_msg)
|
||||
|
||||
return answer
|
||||
|
||||
try:
|
||||
# By leaving time_cutoff and favor_recent as None, and setting enable_auto_detect_filters
|
||||
@@ -239,26 +196,24 @@ def handle_regular_answer(
|
||||
enable_auto_detect_filters=auto_detect_filters,
|
||||
)
|
||||
|
||||
# Always apply reranking settings if it exists, this is the non-streaming flow
|
||||
with get_session_with_tenant(tenant_id) as db_session:
|
||||
saved_search_settings = get_current_search_settings(db_session)
|
||||
|
||||
# This includes throwing out answer via reflexion
|
||||
answer = _get_answer(
|
||||
DirectQARequest(
|
||||
messages=messages,
|
||||
multilingual_query_expansion=saved_search_settings.multilingual_expansion
|
||||
if saved_search_settings
|
||||
else None,
|
||||
prompt_id=prompt.id if prompt else None,
|
||||
persona_id=persona.id if persona is not None else 0,
|
||||
retrieval_options=retrieval_details,
|
||||
chain_of_thought=not disable_cot,
|
||||
rerank_settings=RerankingDetails.from_db_model(saved_search_settings)
|
||||
if saved_search_settings
|
||||
else None,
|
||||
answer_request = prepare_chat_message_request(
|
||||
message_text=combined_message,
|
||||
user=user,
|
||||
persona_id=persona.id,
|
||||
# This is not used in the Slack flow, only in the answer API
|
||||
persona_override_config=None,
|
||||
prompt=prompt,
|
||||
message_ts_to_respond_to=message_ts_to_respond_to,
|
||||
retrieval_details=retrieval_details,
|
||||
rerank_settings=None, # Rerank customization supported in Slack flow
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
answer = _get_slack_answer(
|
||||
new_message_request=answer_request, danswer_user=user
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Unable to process message - did not successfully answer "
|
||||
@@ -359,7 +314,7 @@ def handle_regular_answer(
|
||||
top_docs = retrieval_info.top_documents
|
||||
if not top_docs and not should_respond_even_with_no_docs:
|
||||
logger.error(
|
||||
f"Unable to answer question: '{answer.rephrase}' - no documents found"
|
||||
f"Unable to answer question: '{combined_message}' - no documents found"
|
||||
)
|
||||
# Optionally, respond in thread with the error message
|
||||
# Used primarily for debugging purposes
|
||||
@@ -380,18 +335,18 @@ def handle_regular_answer(
|
||||
)
|
||||
return True
|
||||
|
||||
only_respond_with_citations_or_quotes = (
|
||||
only_respond_if_citations = (
|
||||
channel_conf
|
||||
and "well_answered_postfilter" in channel_conf.get("answer_filters", [])
|
||||
)
|
||||
has_citations_or_quotes = bool(answer.citations or answer.quotes)
|
||||
|
||||
if (
|
||||
only_respond_with_citations_or_quotes
|
||||
and not has_citations_or_quotes
|
||||
only_respond_if_citations
|
||||
and not answer.citations
|
||||
and not message_info.bypass_filters
|
||||
):
|
||||
logger.error(
|
||||
f"Unable to find citations or quotes to answer: '{answer.rephrase}' - not answering!"
|
||||
f"Unable to find citations to answer: '{answer.answer}' - not answering!"
|
||||
)
|
||||
# Optionally, respond in thread with the error message
|
||||
# Used primarily for debugging purposes
|
||||
@@ -409,9 +364,8 @@ def handle_regular_answer(
|
||||
tenant_id=tenant_id,
|
||||
message_info=message_info,
|
||||
answer=answer,
|
||||
persona=persona,
|
||||
channel_conf=channel_conf,
|
||||
use_citations=use_citations,
|
||||
use_citations=True, # No longer supporting quotes
|
||||
feedback_reminder_id=feedback_reminder_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,8 +1,33 @@
|
||||
from slack_sdk import WebClient
|
||||
|
||||
from danswer.chat.models import ThreadMessage
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.danswerbot.slack.utils import respond_in_thread
|
||||
|
||||
|
||||
def slackify_message_thread(messages: list[ThreadMessage]) -> str:
|
||||
# Note: this does not handle extremely long threads, every message will be included
|
||||
# with weaker LLMs, this could cause issues with exceeeding the token limit
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
message_strs: list[str] = []
|
||||
for message in messages:
|
||||
if message.role == MessageType.USER:
|
||||
message_text = (
|
||||
f"{message.sender or 'Unknown User'} said in Slack:\n{message.message}"
|
||||
)
|
||||
elif message.role == MessageType.ASSISTANT:
|
||||
message_text = f"AI said in Slack:\n{message.message}"
|
||||
else:
|
||||
message_text = (
|
||||
f"{message.role.value.upper()} said in Slack:\n{message.message}"
|
||||
)
|
||||
message_strs.append(message_text)
|
||||
|
||||
return "\n\n".join(message_strs)
|
||||
|
||||
|
||||
def send_team_member_message(
|
||||
client: WebClient,
|
||||
channel: str,
|
||||
|
||||
@@ -19,6 +19,8 @@ from slack_sdk.socket_mode.request import SocketModeRequest
|
||||
from slack_sdk.socket_mode.response import SocketModeResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.models import ThreadMessage
|
||||
from danswer.configs.app_configs import DEV_MODE
|
||||
from danswer.configs.app_configs import POD_NAME
|
||||
from danswer.configs.app_configs import POD_NAMESPACE
|
||||
from danswer.configs.constants import DanswerRedisLocks
|
||||
@@ -74,7 +76,6 @@ from danswer.db.slack_bot import fetch_slack_bots
|
||||
from danswer.key_value_store.interface import KvKeyNotFoundError
|
||||
from danswer.natural_language_processing.search_nlp_models import EmbeddingModel
|
||||
from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder
|
||||
from danswer.one_shot_answer.models import ThreadMessage
|
||||
from danswer.redis.redis_pool import get_redis_client
|
||||
from danswer.server.manage.models import SlackBotTokens
|
||||
from danswer.utils.logger import setup_logger
|
||||
@@ -250,7 +251,7 @@ class SlackbotHandler:
|
||||
nx=True,
|
||||
ex=TENANT_LOCK_EXPIRATION,
|
||||
)
|
||||
if not acquired:
|
||||
if not acquired and not DEV_MODE:
|
||||
logger.debug(f"Another pod holds the lock for tenant {tenant_id}")
|
||||
continue
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
from danswer.one_shot_answer.models import ThreadMessage
|
||||
from danswer.chat.models import ThreadMessage
|
||||
|
||||
|
||||
class SlackMessageInfo(BaseModel):
|
||||
|
||||
@@ -30,13 +30,13 @@ from danswer.configs.danswerbot_configs import (
|
||||
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
||||
from danswer.connectors.slack.utils import SlackTextCleaner
|
||||
from danswer.danswerbot.slack.constants import FeedbackVisibility
|
||||
from danswer.danswerbot.slack.models import ThreadMessage
|
||||
from danswer.db.engine import get_session_with_tenant
|
||||
from danswer.db.users import get_user_by_email
|
||||
from danswer.llm.exceptions import GenAIDisabledException
|
||||
from danswer.llm.factory import get_default_llms
|
||||
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
|
||||
from danswer.llm.utils import message_to_string
|
||||
from danswer.one_shot_answer.models import ThreadMessage
|
||||
from danswer.prompts.miscellaneous_prompts import SLACK_LANGUAGE_REPHRASE_PROMPT
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.telemetry import optional_telemetry
|
||||
|
||||
@@ -145,16 +145,10 @@ def get_chat_sessions_by_user(
|
||||
user_id: UUID | None,
|
||||
deleted: bool | None,
|
||||
db_session: Session,
|
||||
only_one_shot: bool = False,
|
||||
limit: int = 50,
|
||||
) -> list[ChatSession]:
|
||||
stmt = select(ChatSession).where(ChatSession.user_id == user_id)
|
||||
|
||||
if only_one_shot:
|
||||
stmt = stmt.where(ChatSession.one_shot.is_(True))
|
||||
else:
|
||||
stmt = stmt.where(ChatSession.one_shot.is_(False))
|
||||
|
||||
stmt = stmt.order_by(desc(ChatSession.time_created))
|
||||
|
||||
if deleted is not None:
|
||||
@@ -226,12 +220,11 @@ def delete_messages_and_files_from_chat_session(
|
||||
|
||||
def create_chat_session(
|
||||
db_session: Session,
|
||||
description: str,
|
||||
description: str | None,
|
||||
user_id: UUID | None,
|
||||
persona_id: int | None, # Can be none if temporary persona is used
|
||||
llm_override: LLMOverride | None = None,
|
||||
prompt_override: PromptOverride | None = None,
|
||||
one_shot: bool = False,
|
||||
danswerbot_flow: bool = False,
|
||||
slack_thread_id: str | None = None,
|
||||
) -> ChatSession:
|
||||
@@ -241,7 +234,6 @@ def create_chat_session(
|
||||
description=description,
|
||||
llm_override=llm_override,
|
||||
prompt_override=prompt_override,
|
||||
one_shot=one_shot,
|
||||
danswerbot_flow=danswerbot_flow,
|
||||
slack_thread_id=slack_thread_id,
|
||||
)
|
||||
@@ -287,8 +279,6 @@ def duplicate_chat_session_for_user_from_slack(
|
||||
description="",
|
||||
llm_override=chat_session.llm_override,
|
||||
prompt_override=chat_session.prompt_override,
|
||||
# Chat sessions from Slack should put people in the chat UI, not the search
|
||||
one_shot=False,
|
||||
# Chat is in UI now so this is false
|
||||
danswerbot_flow=False,
|
||||
# Maybe we want this in the future to track if it was created from Slack
|
||||
|
||||
@@ -37,6 +37,7 @@ from danswer.configs.app_configs import POSTGRES_PORT
|
||||
from danswer.configs.app_configs import POSTGRES_USER
|
||||
from danswer.configs.app_configs import USER_AUTH_SECRET
|
||||
from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME
|
||||
from danswer.server.utils import BasicAuthenticationError
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
@@ -426,7 +427,9 @@ def get_session() -> Generator[Session, None, None]:
|
||||
"""Generate a database session with the appropriate tenant schema set."""
|
||||
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
|
||||
if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT:
|
||||
raise HTTPException(status_code=401, detail="User must authenticate")
|
||||
raise BasicAuthenticationError(
|
||||
detail="User must authenticate",
|
||||
)
|
||||
|
||||
engine = get_sqlalchemy_engine()
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import datetime
|
||||
import json
|
||||
from enum import Enum as PyEnum
|
||||
from typing import Any
|
||||
from typing import Literal
|
||||
from typing import NotRequired
|
||||
@@ -964,9 +963,8 @@ class ChatSession(Base):
|
||||
persona_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("persona.id"), nullable=True
|
||||
)
|
||||
description: Mapped[str] = mapped_column(Text)
|
||||
# One-shot direct answering, currently the two types of chats are not mixed
|
||||
one_shot: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
# This chat created by DanswerBot
|
||||
danswerbot_flow: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
# Only ever set to True if system is set to not hard-delete chats
|
||||
deleted: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
@@ -1488,11 +1486,6 @@ class ChannelConfig(TypedDict):
|
||||
show_continue_in_web_ui: NotRequired[bool] # defaults to False
|
||||
|
||||
|
||||
class SlackBotResponseType(str, PyEnum):
|
||||
QUOTES = "quotes"
|
||||
CITATIONS = "citations"
|
||||
|
||||
|
||||
class SlackChannelConfig(Base):
|
||||
__tablename__ = "slack_channel_config"
|
||||
|
||||
@@ -1505,9 +1498,6 @@ class SlackChannelConfig(Base):
|
||||
channel_config: Mapped[ChannelConfig] = mapped_column(
|
||||
postgresql.JSONB(), nullable=False
|
||||
)
|
||||
response_type: Mapped[SlackBotResponseType] = mapped_column(
|
||||
Enum(SlackBotResponseType, native_enum=False), nullable=False
|
||||
)
|
||||
|
||||
enable_auto_filters: Mapped[bool] = mapped_column(
|
||||
Boolean, nullable=False, default=False
|
||||
|
||||
@@ -415,9 +415,6 @@ def upsert_prompt(
|
||||
return prompt
|
||||
|
||||
|
||||
# NOTE: This operation cannot update persona configuration options that
|
||||
# are core to the persona, such as its display priority and
|
||||
# whether or not the assistant is a built-in / default assistant
|
||||
def upsert_persona(
|
||||
user: User | None,
|
||||
name: str,
|
||||
@@ -449,6 +446,12 @@ def upsert_persona(
|
||||
chunks_above: int = CONTEXT_CHUNKS_ABOVE,
|
||||
chunks_below: int = CONTEXT_CHUNKS_BELOW,
|
||||
) -> Persona:
|
||||
"""
|
||||
NOTE: This operation cannot update persona configuration options that
|
||||
are core to the persona, such as its display priority and
|
||||
whether or not the assistant is a built-in / default assistant
|
||||
"""
|
||||
|
||||
if persona_id is not None:
|
||||
persona = db_session.query(Persona).filter_by(id=persona_id).first()
|
||||
else:
|
||||
@@ -486,6 +489,8 @@ def upsert_persona(
|
||||
validate_persona_tools(tools)
|
||||
|
||||
if persona:
|
||||
# Built-in personas can only be updated through YAML configuration.
|
||||
# This ensures that core system personas are not modified unintentionally.
|
||||
if persona.builtin_persona and not builtin_persona:
|
||||
raise ValueError("Cannot update builtin persona with non-builtin.")
|
||||
|
||||
@@ -494,6 +499,9 @@ def upsert_persona(
|
||||
db_session=db_session, persona_id=persona.id, user=user, get_editable=True
|
||||
)
|
||||
|
||||
# The following update excludes `default`, `built-in`, and display priority.
|
||||
# Display priority is handled separately in the `display-priority` endpoint.
|
||||
# `default` and `built-in` properties can only be set when creating a persona.
|
||||
persona.name = name
|
||||
persona.description = description
|
||||
persona.num_chunks = num_chunks
|
||||
|
||||
@@ -10,7 +10,6 @@ from danswer.db.constants import SLACK_BOT_PERSONA_PREFIX
|
||||
from danswer.db.models import ChannelConfig
|
||||
from danswer.db.models import Persona
|
||||
from danswer.db.models import Persona__DocumentSet
|
||||
from danswer.db.models import SlackBotResponseType
|
||||
from danswer.db.models import SlackChannelConfig
|
||||
from danswer.db.models import User
|
||||
from danswer.db.persona import get_default_prompt
|
||||
@@ -83,7 +82,6 @@ def insert_slack_channel_config(
|
||||
slack_bot_id: int,
|
||||
persona_id: int | None,
|
||||
channel_config: ChannelConfig,
|
||||
response_type: SlackBotResponseType,
|
||||
standard_answer_category_ids: list[int],
|
||||
enable_auto_filters: bool,
|
||||
) -> SlackChannelConfig:
|
||||
@@ -115,7 +113,6 @@ def insert_slack_channel_config(
|
||||
slack_bot_id=slack_bot_id,
|
||||
persona_id=persona_id,
|
||||
channel_config=channel_config,
|
||||
response_type=response_type,
|
||||
standard_answer_categories=existing_standard_answer_categories,
|
||||
enable_auto_filters=enable_auto_filters,
|
||||
)
|
||||
@@ -130,7 +127,6 @@ def update_slack_channel_config(
|
||||
slack_channel_config_id: int,
|
||||
persona_id: int | None,
|
||||
channel_config: ChannelConfig,
|
||||
response_type: SlackBotResponseType,
|
||||
standard_answer_category_ids: list[int],
|
||||
enable_auto_filters: bool,
|
||||
) -> SlackChannelConfig:
|
||||
@@ -170,7 +166,6 @@ def update_slack_channel_config(
|
||||
# will encounter `violates foreign key constraint` errors
|
||||
slack_channel_config.persona_id = persona_id
|
||||
slack_channel_config.channel_config = channel_config
|
||||
slack_channel_config.response_type = response_type
|
||||
slack_channel_config.standard_answer_categories = list(
|
||||
existing_standard_answer_categories
|
||||
)
|
||||
|
||||
@@ -18,18 +18,12 @@ from danswer.llm.answering.models import PromptConfig
|
||||
from danswer.llm.answering.prompts.build import AnswerPromptBuilder
|
||||
from danswer.llm.answering.prompts.build import default_build_system_message
|
||||
from danswer.llm.answering.prompts.build import default_build_user_message
|
||||
from danswer.llm.answering.stream_processing.answer_response_handler import (
|
||||
AnswerResponseHandler,
|
||||
)
|
||||
from danswer.llm.answering.stream_processing.answer_response_handler import (
|
||||
CitationResponseHandler,
|
||||
)
|
||||
from danswer.llm.answering.stream_processing.answer_response_handler import (
|
||||
DummyAnswerResponseHandler,
|
||||
)
|
||||
from danswer.llm.answering.stream_processing.answer_response_handler import (
|
||||
QuotesResponseHandler,
|
||||
)
|
||||
from danswer.llm.answering.stream_processing.utils import map_document_id_order
|
||||
from danswer.llm.answering.tool.tool_response_handler import ToolResponseHandler
|
||||
from danswer.llm.interfaces import LLM
|
||||
@@ -214,18 +208,23 @@ class Answer:
|
||||
|
||||
search_result = SearchTool.get_search_result(current_llm_call) or []
|
||||
|
||||
answer_handler: AnswerResponseHandler
|
||||
if self.answer_style_config.citation_config:
|
||||
answer_handler = CitationResponseHandler(
|
||||
context_docs=search_result,
|
||||
doc_id_to_rank_map=map_document_id_order(search_result),
|
||||
)
|
||||
elif self.answer_style_config.quotes_config:
|
||||
answer_handler = QuotesResponseHandler(
|
||||
context_docs=search_result,
|
||||
)
|
||||
else:
|
||||
raise ValueError("No answer style config provided")
|
||||
# Quotes are no longer supported
|
||||
# answer_handler: AnswerResponseHandler
|
||||
# if self.answer_style_config.citation_config:
|
||||
# answer_handler = CitationResponseHandler(
|
||||
# context_docs=search_result,
|
||||
# doc_id_to_rank_map=map_document_id_order(search_result),
|
||||
# )
|
||||
# elif self.answer_style_config.quotes_config:
|
||||
# answer_handler = QuotesResponseHandler(
|
||||
# context_docs=search_result,
|
||||
# )
|
||||
# else:
|
||||
# raise ValueError("No answer style config provided")
|
||||
answer_handler = CitationResponseHandler(
|
||||
context_docs=search_result,
|
||||
doc_id_to_rank_map=map_document_id_order(search_result),
|
||||
)
|
||||
|
||||
response_handler_manager = LLMResponseHandlerManager(
|
||||
tool_call_handler, answer_handler, self.is_cancelled
|
||||
|
||||
@@ -8,7 +8,6 @@ from pydantic.v1 import BaseModel as BaseModel__v1
|
||||
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import DanswerQuotes
|
||||
from danswer.chat.models import StreamStopInfo
|
||||
from danswer.chat.models import StreamStopReason
|
||||
from danswer.file_store.models import InMemoryChatFile
|
||||
@@ -30,7 +29,6 @@ if TYPE_CHECKING:
|
||||
ResponsePart = (
|
||||
DanswerAnswerPiece
|
||||
| CitationInfo
|
||||
| DanswerQuotes
|
||||
| ToolCallKickoff
|
||||
| ToolResponse
|
||||
| ToolCallFinalResult
|
||||
|
||||
@@ -9,9 +9,6 @@ from danswer.llm.answering.llm_response_handler import ResponsePart
|
||||
from danswer.llm.answering.stream_processing.citation_processing import (
|
||||
CitationProcessor,
|
||||
)
|
||||
from danswer.llm.answering.stream_processing.quotes_processing import (
|
||||
QuotesProcessor,
|
||||
)
|
||||
from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
@@ -70,28 +67,29 @@ class CitationResponseHandler(AnswerResponseHandler):
|
||||
yield from self.citation_processor.process_token(content)
|
||||
|
||||
|
||||
class QuotesResponseHandler(AnswerResponseHandler):
|
||||
def __init__(
|
||||
self,
|
||||
context_docs: list[LlmDoc],
|
||||
is_json_prompt: bool = True,
|
||||
):
|
||||
self.quotes_processor = QuotesProcessor(
|
||||
context_docs=context_docs,
|
||||
is_json_prompt=is_json_prompt,
|
||||
)
|
||||
# No longer in use, remove later
|
||||
# class QuotesResponseHandler(AnswerResponseHandler):
|
||||
# def __init__(
|
||||
# self,
|
||||
# context_docs: list[LlmDoc],
|
||||
# is_json_prompt: bool = True,
|
||||
# ):
|
||||
# self.quotes_processor = QuotesProcessor(
|
||||
# context_docs=context_docs,
|
||||
# is_json_prompt=is_json_prompt,
|
||||
# )
|
||||
|
||||
def handle_response_part(
|
||||
self,
|
||||
response_item: BaseMessage | None,
|
||||
previous_response_items: list[BaseMessage],
|
||||
) -> Generator[ResponsePart, None, None]:
|
||||
if response_item is None:
|
||||
yield from self.quotes_processor.process_token(None)
|
||||
return
|
||||
# def handle_response_part(
|
||||
# self,
|
||||
# response_item: BaseMessage | None,
|
||||
# previous_response_items: list[BaseMessage],
|
||||
# ) -> Generator[ResponsePart, None, None]:
|
||||
# if response_item is None:
|
||||
# yield from self.quotes_processor.process_token(None)
|
||||
# return
|
||||
|
||||
content = (
|
||||
response_item.content if isinstance(response_item.content, str) else ""
|
||||
)
|
||||
# content = (
|
||||
# response_item.content if isinstance(response_item.content, str) else ""
|
||||
# )
|
||||
|
||||
yield from self.quotes_processor.process_token(content)
|
||||
# yield from self.quotes_processor.process_token(content)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# THIS IS NO LONGER IN USE
|
||||
import math
|
||||
import re
|
||||
from collections.abc import Generator
|
||||
@@ -5,11 +6,10 @@ from json import JSONDecodeError
|
||||
from typing import Optional
|
||||
|
||||
import regex
|
||||
from pydantic import BaseModel
|
||||
|
||||
from danswer.chat.models import DanswerAnswer
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import DanswerQuote
|
||||
from danswer.chat.models import DanswerQuotes
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT
|
||||
from danswer.context.search.models import InferenceChunk
|
||||
@@ -26,6 +26,20 @@ logger = setup_logger()
|
||||
answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"', re.IGNORECASE)
|
||||
|
||||
|
||||
class DanswerQuote(BaseModel):
|
||||
# This is during inference so everything is a string by this point
|
||||
quote: str
|
||||
document_id: str
|
||||
link: str | None
|
||||
source_type: str
|
||||
semantic_identifier: str
|
||||
blurb: str
|
||||
|
||||
|
||||
class DanswerQuotes(BaseModel):
|
||||
quotes: list[DanswerQuote]
|
||||
|
||||
|
||||
def _extract_answer_quotes_freeform(
|
||||
answer_raw: str,
|
||||
) -> tuple[Optional[str], Optional[list[str]]]:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from typing import Any
|
||||
|
||||
from danswer.chat.models import PersonaOverrideConfig
|
||||
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
||||
from danswer.configs.chat_configs import QA_TIMEOUT
|
||||
from danswer.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
|
||||
@@ -13,8 +14,11 @@ from danswer.llm.exceptions import GenAIDisabledException
|
||||
from danswer.llm.interfaces import LLM
|
||||
from danswer.llm.override_models import LLMOverride
|
||||
from danswer.utils.headers import build_llm_extra_headers
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.long_term_log import LongTermLogger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _build_extra_model_kwargs(provider: str) -> dict[str, Any]:
|
||||
"""Ollama requires us to specify the max context window.
|
||||
@@ -32,11 +36,15 @@ def get_main_llm_from_tuple(
|
||||
|
||||
|
||||
def get_llms_for_persona(
|
||||
persona: Persona,
|
||||
persona: Persona | PersonaOverrideConfig | None,
|
||||
llm_override: LLMOverride | None = None,
|
||||
additional_headers: dict[str, str] | None = None,
|
||||
long_term_logger: LongTermLogger | None = None,
|
||||
) -> tuple[LLM, LLM]:
|
||||
if persona is None:
|
||||
logger.warning("No persona provided, using default LLMs")
|
||||
return get_default_llms()
|
||||
|
||||
model_provider_override = llm_override.model_provider if llm_override else None
|
||||
model_version_override = llm_override.model_version if llm_override else None
|
||||
temperature_override = llm_override.temperature if llm_override else None
|
||||
@@ -71,6 +79,7 @@ def get_llms_for_persona(
|
||||
api_base=llm_provider.api_base,
|
||||
api_version=llm_provider.api_version,
|
||||
custom_config=llm_provider.custom_config,
|
||||
temperature=temperature_override,
|
||||
additional_headers=additional_headers,
|
||||
long_term_logger=long_term_logger,
|
||||
)
|
||||
@@ -128,11 +137,13 @@ def get_llm(
|
||||
api_base: str | None = None,
|
||||
api_version: str | None = None,
|
||||
custom_config: dict[str, str] | None = None,
|
||||
temperature: float = GEN_AI_TEMPERATURE,
|
||||
temperature: float | None = None,
|
||||
timeout: int = QA_TIMEOUT,
|
||||
additional_headers: dict[str, str] | None = None,
|
||||
long_term_logger: LongTermLogger | None = None,
|
||||
) -> LLM:
|
||||
if temperature is None:
|
||||
temperature = GEN_AI_TEMPERATURE
|
||||
return DefaultMultiLLM(
|
||||
model_provider=provider,
|
||||
model_name=model,
|
||||
|
||||
@@ -25,7 +25,6 @@ from danswer.auth.schemas import UserCreate
|
||||
from danswer.auth.schemas import UserRead
|
||||
from danswer.auth.schemas import UserUpdate
|
||||
from danswer.auth.users import auth_backend
|
||||
from danswer.auth.users import BasicAuthenticationError
|
||||
from danswer.auth.users import create_danswer_oauth_router
|
||||
from danswer.auth.users import fastapi_users
|
||||
from danswer.configs.app_configs import APP_API_PREFIX
|
||||
@@ -92,6 +91,7 @@ from danswer.server.settings.api import basic_router as settings_router
|
||||
from danswer.server.token_rate_limits.api import (
|
||||
router as token_rate_limit_settings_router,
|
||||
)
|
||||
from danswer.server.utils import BasicAuthenticationError
|
||||
from danswer.setup import setup_danswer
|
||||
from danswer.setup import setup_multitenant_danswer
|
||||
from danswer.utils.logger import setup_logger
|
||||
@@ -206,7 +206,7 @@ def log_http_error(_: Request, exc: Exception) -> JSONResponse:
|
||||
|
||||
if isinstance(exc, BasicAuthenticationError):
|
||||
# For BasicAuthenticationError, just log a brief message without stack trace (almost always spam)
|
||||
logger.error(f"Authentication failed: {str(exc)}")
|
||||
logger.warning(f"Authentication failed: {str(exc)}")
|
||||
|
||||
elif status_code >= 400:
|
||||
error_msg = f"{str(exc)}\n"
|
||||
|
||||
@@ -1,456 +0,0 @@
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from typing import cast
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.chat_utils import reorganize_citations
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import DanswerContexts
|
||||
from danswer.chat.models import DanswerQuotes
|
||||
from danswer.chat.models import DocumentRelevance
|
||||
from danswer.chat.models import LLMRelevanceFilterResponse
|
||||
from danswer.chat.models import QADocsResponse
|
||||
from danswer.chat.models import RelevanceAnalysis
|
||||
from danswer.chat.models import StreamingError
|
||||
from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
|
||||
from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
|
||||
from danswer.configs.chat_configs import QA_TIMEOUT
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.context.search.enums import LLMEvaluationType
|
||||
from danswer.context.search.models import RerankMetricsContainer
|
||||
from danswer.context.search.models import RetrievalMetricsContainer
|
||||
from danswer.context.search.utils import chunks_or_sections_to_search_docs
|
||||
from danswer.context.search.utils import dedupe_documents
|
||||
from danswer.db.chat import create_chat_session
|
||||
from danswer.db.chat import create_db_search_doc
|
||||
from danswer.db.chat import create_new_chat_message
|
||||
from danswer.db.chat import get_or_create_root_message
|
||||
from danswer.db.chat import translate_db_message_to_chat_message_detail
|
||||
from danswer.db.chat import translate_db_search_doc_to_server_search_doc
|
||||
from danswer.db.chat import update_search_docs_table_with_relevance
|
||||
from danswer.db.engine import get_session_context_manager
|
||||
from danswer.db.models import Persona
|
||||
from danswer.db.models import User
|
||||
from danswer.db.persona import get_prompt_by_id
|
||||
from danswer.llm.answering.answer import Answer
|
||||
from danswer.llm.answering.models import AnswerStyleConfig
|
||||
from danswer.llm.answering.models import CitationConfig
|
||||
from danswer.llm.answering.models import DocumentPruningConfig
|
||||
from danswer.llm.answering.models import PromptConfig
|
||||
from danswer.llm.answering.models import QuotesConfig
|
||||
from danswer.llm.factory import get_llms_for_persona
|
||||
from danswer.llm.factory import get_main_llm_from_tuple
|
||||
from danswer.natural_language_processing.utils import get_tokenizer
|
||||
from danswer.one_shot_answer.models import DirectQARequest
|
||||
from danswer.one_shot_answer.models import OneShotQAResponse
|
||||
from danswer.one_shot_answer.models import QueryRephrase
|
||||
from danswer.one_shot_answer.qa_utils import combine_message_thread
|
||||
from danswer.one_shot_answer.qa_utils import slackify_message_thread
|
||||
from danswer.secondary_llm_flows.answer_validation import get_answer_validity
|
||||
from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase
|
||||
from danswer.server.query_and_chat.models import ChatMessageDetail
|
||||
from danswer.server.utils import get_json_line
|
||||
from danswer.tools.force import ForceUseTool
|
||||
from danswer.tools.models import ToolResponse
|
||||
from danswer.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTENT_ID
|
||||
from danswer.tools.tool_implementations.search.search_tool import (
|
||||
SEARCH_RESPONSE_SUMMARY_ID,
|
||||
)
|
||||
from danswer.tools.tool_implementations.search.search_tool import SearchResponseSummary
|
||||
from danswer.tools.tool_implementations.search.search_tool import SearchTool
|
||||
from danswer.tools.tool_implementations.search.search_tool import (
|
||||
SECTION_RELEVANCE_LIST_ID,
|
||||
)
|
||||
from danswer.tools.tool_runner import ToolCallKickoff
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.long_term_log import LongTermLogger
|
||||
from danswer.utils.timing import log_generator_function_time
|
||||
from danswer.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
AnswerObjectIterator = Iterator[
|
||||
QueryRephrase
|
||||
| QADocsResponse
|
||||
| LLMRelevanceFilterResponse
|
||||
| DanswerAnswerPiece
|
||||
| DanswerQuotes
|
||||
| DanswerContexts
|
||||
| StreamingError
|
||||
| ChatMessageDetail
|
||||
| CitationInfo
|
||||
| ToolCallKickoff
|
||||
| DocumentRelevance
|
||||
]
|
||||
|
||||
|
||||
def stream_answer_objects(
|
||||
query_req: DirectQARequest,
|
||||
user: User | None,
|
||||
# These need to be passed in because in Web UI one shot flow,
|
||||
# we can have much more document as there is no history.
|
||||
# For Slack flow, we need to save more tokens for the thread context
|
||||
max_document_tokens: int | None,
|
||||
max_history_tokens: int | None,
|
||||
db_session: Session,
|
||||
# Needed to translate persona num_chunks to tokens to the LLM
|
||||
default_num_chunks: float = MAX_CHUNKS_FED_TO_CHAT,
|
||||
timeout: int = QA_TIMEOUT,
|
||||
bypass_acl: bool = False,
|
||||
use_citations: bool = False,
|
||||
danswerbot_flow: bool = False,
|
||||
retrieval_metrics_callback: (
|
||||
Callable[[RetrievalMetricsContainer], None] | None
|
||||
) = None,
|
||||
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
||||
) -> AnswerObjectIterator:
|
||||
"""Streams in order:
|
||||
1. [always] Retrieved documents, stops flow if nothing is found
|
||||
2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
|
||||
3. [always] A set of streamed DanswerAnswerPiece and DanswerQuotes at the end
|
||||
or an error anywhere along the line if something fails
|
||||
4. [always] Details on the final AI response message that is created
|
||||
"""
|
||||
user_id = user.id if user is not None else None
|
||||
query_msg = query_req.messages[-1]
|
||||
history = query_req.messages[:-1]
|
||||
|
||||
chat_session = create_chat_session(
|
||||
db_session=db_session,
|
||||
description="", # One shot queries don't need naming as it's never displayed
|
||||
user_id=user_id,
|
||||
persona_id=query_req.persona_id,
|
||||
one_shot=True,
|
||||
danswerbot_flow=danswerbot_flow,
|
||||
)
|
||||
|
||||
# permanent "log" store, used primarily for debugging
|
||||
long_term_logger = LongTermLogger(
|
||||
metadata={"user_id": str(user_id), "chat_session_id": str(chat_session.id)}
|
||||
)
|
||||
|
||||
temporary_persona: Persona | None = None
|
||||
|
||||
if query_req.persona_config is not None:
|
||||
temporary_persona = fetch_ee_implementation_or_noop(
|
||||
"danswer.server.query_and_chat.utils", "create_temporary_persona", None
|
||||
)(db_session=db_session, persona_config=query_req.persona_config, user=user)
|
||||
|
||||
persona = temporary_persona if temporary_persona else chat_session.persona
|
||||
|
||||
try:
|
||||
llm, fast_llm = get_llms_for_persona(
|
||||
persona=persona, long_term_logger=long_term_logger
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(
|
||||
f"Failed to initialize LLMs for persona '{persona.name}': {str(e)}"
|
||||
)
|
||||
if "No LLM provider" in str(e):
|
||||
raise ValueError(
|
||||
"Please configure a Generative AI model to use this feature."
|
||||
) from e
|
||||
raise ValueError(
|
||||
"Failed to initialize the AI model. Please check your configuration and try again."
|
||||
) from e
|
||||
|
||||
llm_tokenizer = get_tokenizer(
|
||||
model_name=llm.config.model_name,
|
||||
provider_type=llm.config.model_provider,
|
||||
)
|
||||
|
||||
# Create a chat session which will just store the root message, the query, and the AI response
|
||||
root_message = get_or_create_root_message(
|
||||
chat_session_id=chat_session.id, db_session=db_session
|
||||
)
|
||||
|
||||
history_str = combine_message_thread(
|
||||
messages=history,
|
||||
max_tokens=max_history_tokens,
|
||||
llm_tokenizer=llm_tokenizer,
|
||||
)
|
||||
|
||||
rephrased_query = query_req.query_override or thread_based_query_rephrase(
|
||||
user_query=query_msg.message,
|
||||
history_str=history_str,
|
||||
)
|
||||
|
||||
# Given back ahead of the documents for latency reasons
|
||||
# In chat flow it's given back along with the documents
|
||||
yield QueryRephrase(rephrased_query=rephrased_query)
|
||||
|
||||
prompt = None
|
||||
if query_req.prompt_id is not None:
|
||||
# NOTE: let the user access any prompt as long as the Persona is shared
|
||||
# with them
|
||||
prompt = get_prompt_by_id(
|
||||
prompt_id=query_req.prompt_id, user=None, db_session=db_session
|
||||
)
|
||||
if prompt is None:
|
||||
if not persona.prompts:
|
||||
raise RuntimeError(
|
||||
"Persona does not have any prompts - this should never happen"
|
||||
)
|
||||
prompt = persona.prompts[0]
|
||||
|
||||
user_message_str = query_msg.message
|
||||
# For this endpoint, we only save one user message to the chat session
|
||||
# However, for slackbot, we want to include the history of the entire thread
|
||||
if danswerbot_flow:
|
||||
# Right now, we only support bringing over citations and search docs
|
||||
# from the last message in the thread, not the entire thread
|
||||
# in the future, we may want to retrieve the entire thread
|
||||
user_message_str = slackify_message_thread(query_req.messages)
|
||||
|
||||
# Create the first User query message
|
||||
new_user_message = create_new_chat_message(
|
||||
chat_session_id=chat_session.id,
|
||||
parent_message=root_message,
|
||||
prompt_id=query_req.prompt_id,
|
||||
message=user_message_str,
|
||||
token_count=len(llm_tokenizer.encode(user_message_str)),
|
||||
message_type=MessageType.USER,
|
||||
db_session=db_session,
|
||||
commit=True,
|
||||
)
|
||||
|
||||
prompt_config = PromptConfig.from_model(prompt)
|
||||
document_pruning_config = DocumentPruningConfig(
|
||||
max_chunks=int(
|
||||
persona.num_chunks if persona.num_chunks is not None else default_num_chunks
|
||||
),
|
||||
max_tokens=max_document_tokens,
|
||||
)
|
||||
|
||||
answer_config = AnswerStyleConfig(
|
||||
citation_config=CitationConfig() if use_citations else None,
|
||||
quotes_config=QuotesConfig() if not use_citations else None,
|
||||
document_pruning_config=document_pruning_config,
|
||||
)
|
||||
|
||||
search_tool = SearchTool(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
evaluation_type=(
|
||||
LLMEvaluationType.SKIP
|
||||
if DISABLE_LLM_DOC_RELEVANCE
|
||||
else query_req.evaluation_type
|
||||
),
|
||||
persona=persona,
|
||||
retrieval_options=query_req.retrieval_options,
|
||||
prompt_config=prompt_config,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
pruning_config=document_pruning_config,
|
||||
answer_style_config=answer_config,
|
||||
bypass_acl=bypass_acl,
|
||||
chunks_above=query_req.chunks_above,
|
||||
chunks_below=query_req.chunks_below,
|
||||
full_doc=query_req.full_doc,
|
||||
)
|
||||
|
||||
answer = Answer(
|
||||
question=query_msg.message,
|
||||
answer_style_config=answer_config,
|
||||
prompt_config=PromptConfig.from_model(prompt),
|
||||
llm=get_main_llm_from_tuple(
|
||||
get_llms_for_persona(persona=persona, long_term_logger=long_term_logger)
|
||||
),
|
||||
single_message_history=history_str,
|
||||
tools=[search_tool] if search_tool else [],
|
||||
force_use_tool=(
|
||||
ForceUseTool(
|
||||
tool_name=search_tool.name,
|
||||
args={"query": rephrased_query},
|
||||
force_use=True,
|
||||
)
|
||||
),
|
||||
# for now, don't use tool calling for this flow, as we haven't
|
||||
# tested quotes with tool calling too much yet
|
||||
skip_explicit_tool_calling=True,
|
||||
return_contexts=query_req.return_contexts,
|
||||
skip_gen_ai_answer_generation=query_req.skip_gen_ai_answer_generation,
|
||||
)
|
||||
# won't be any FileChatDisplay responses since that tool is never passed in
|
||||
for packet in cast(AnswerObjectIterator, answer.processed_streamed_output):
|
||||
# for one-shot flow, don't currently do anything with these
|
||||
if isinstance(packet, ToolResponse):
|
||||
# (likely fine that it comes after the initial creation of the search docs)
|
||||
if packet.id == SEARCH_RESPONSE_SUMMARY_ID:
|
||||
search_response_summary = cast(SearchResponseSummary, packet.response)
|
||||
|
||||
top_docs = chunks_or_sections_to_search_docs(
|
||||
search_response_summary.top_sections
|
||||
)
|
||||
|
||||
# Deduping happens at the last step to avoid harming quality by dropping content early on
|
||||
deduped_docs = top_docs
|
||||
if query_req.retrieval_options.dedupe_docs:
|
||||
deduped_docs, dropped_inds = dedupe_documents(top_docs)
|
||||
|
||||
reference_db_search_docs = [
|
||||
create_db_search_doc(server_search_doc=doc, db_session=db_session)
|
||||
for doc in deduped_docs
|
||||
]
|
||||
|
||||
response_docs = [
|
||||
translate_db_search_doc_to_server_search_doc(db_search_doc)
|
||||
for db_search_doc in reference_db_search_docs
|
||||
]
|
||||
|
||||
initial_response = QADocsResponse(
|
||||
rephrased_query=rephrased_query,
|
||||
top_documents=response_docs,
|
||||
predicted_flow=search_response_summary.predicted_flow,
|
||||
predicted_search=search_response_summary.predicted_search,
|
||||
applied_source_filters=search_response_summary.final_filters.source_type,
|
||||
applied_time_cutoff=search_response_summary.final_filters.time_cutoff,
|
||||
recency_bias_multiplier=search_response_summary.recency_bias_multiplier,
|
||||
)
|
||||
|
||||
yield initial_response
|
||||
|
||||
elif packet.id == SEARCH_DOC_CONTENT_ID:
|
||||
yield packet.response
|
||||
|
||||
elif packet.id == SECTION_RELEVANCE_LIST_ID:
|
||||
document_based_response = {}
|
||||
|
||||
if packet.response is not None:
|
||||
for evaluation in packet.response:
|
||||
document_based_response[
|
||||
evaluation.document_id
|
||||
] = RelevanceAnalysis(
|
||||
relevant=evaluation.relevant, content=evaluation.content
|
||||
)
|
||||
|
||||
evaluation_response = DocumentRelevance(
|
||||
relevance_summaries=document_based_response
|
||||
)
|
||||
if reference_db_search_docs is not None:
|
||||
update_search_docs_table_with_relevance(
|
||||
db_session=db_session,
|
||||
reference_db_search_docs=reference_db_search_docs,
|
||||
relevance_summary=evaluation_response,
|
||||
)
|
||||
yield evaluation_response
|
||||
|
||||
else:
|
||||
yield packet
|
||||
|
||||
# Saving Gen AI answer and responding with message info
|
||||
gen_ai_response_message = create_new_chat_message(
|
||||
chat_session_id=chat_session.id,
|
||||
parent_message=new_user_message,
|
||||
prompt_id=query_req.prompt_id,
|
||||
message=answer.llm_answer,
|
||||
token_count=len(llm_tokenizer.encode(answer.llm_answer)),
|
||||
message_type=MessageType.ASSISTANT,
|
||||
error=None,
|
||||
reference_docs=reference_db_search_docs,
|
||||
db_session=db_session,
|
||||
commit=True,
|
||||
)
|
||||
|
||||
msg_detail_response = translate_db_message_to_chat_message_detail(
|
||||
gen_ai_response_message
|
||||
)
|
||||
yield msg_detail_response
|
||||
|
||||
|
||||
@log_generator_function_time()
|
||||
def stream_search_answer(
|
||||
query_req: DirectQARequest,
|
||||
user: User | None,
|
||||
max_document_tokens: int | None,
|
||||
max_history_tokens: int | None,
|
||||
) -> Iterator[str]:
|
||||
with get_session_context_manager() as session:
|
||||
objects = stream_answer_objects(
|
||||
query_req=query_req,
|
||||
user=user,
|
||||
max_document_tokens=max_document_tokens,
|
||||
max_history_tokens=max_history_tokens,
|
||||
db_session=session,
|
||||
)
|
||||
for obj in objects:
|
||||
yield get_json_line(obj.model_dump())
|
||||
|
||||
|
||||
def get_search_answer(
|
||||
query_req: DirectQARequest,
|
||||
user: User | None,
|
||||
max_document_tokens: int | None,
|
||||
max_history_tokens: int | None,
|
||||
db_session: Session,
|
||||
answer_generation_timeout: int = QA_TIMEOUT,
|
||||
enable_reflexion: bool = False,
|
||||
bypass_acl: bool = False,
|
||||
use_citations: bool = False,
|
||||
danswerbot_flow: bool = False,
|
||||
retrieval_metrics_callback: (
|
||||
Callable[[RetrievalMetricsContainer], None] | None
|
||||
) = None,
|
||||
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
||||
) -> OneShotQAResponse:
|
||||
"""Collects the streamed one shot answer responses into a single object"""
|
||||
qa_response = OneShotQAResponse()
|
||||
|
||||
results = stream_answer_objects(
|
||||
query_req=query_req,
|
||||
user=user,
|
||||
max_document_tokens=max_document_tokens,
|
||||
max_history_tokens=max_history_tokens,
|
||||
db_session=db_session,
|
||||
bypass_acl=bypass_acl,
|
||||
use_citations=use_citations,
|
||||
danswerbot_flow=danswerbot_flow,
|
||||
timeout=answer_generation_timeout,
|
||||
retrieval_metrics_callback=retrieval_metrics_callback,
|
||||
rerank_metrics_callback=rerank_metrics_callback,
|
||||
)
|
||||
|
||||
answer = ""
|
||||
for packet in results:
|
||||
if isinstance(packet, QueryRephrase):
|
||||
qa_response.rephrase = packet.rephrased_query
|
||||
if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece:
|
||||
answer += packet.answer_piece
|
||||
elif isinstance(packet, QADocsResponse):
|
||||
qa_response.docs = packet
|
||||
elif isinstance(packet, LLMRelevanceFilterResponse):
|
||||
qa_response.llm_selected_doc_indices = packet.llm_selected_doc_indices
|
||||
elif isinstance(packet, DanswerQuotes):
|
||||
qa_response.quotes = packet
|
||||
elif isinstance(packet, CitationInfo):
|
||||
if qa_response.citations:
|
||||
qa_response.citations.append(packet)
|
||||
else:
|
||||
qa_response.citations = [packet]
|
||||
elif isinstance(packet, DanswerContexts):
|
||||
qa_response.contexts = packet
|
||||
elif isinstance(packet, StreamingError):
|
||||
qa_response.error_msg = packet.error
|
||||
elif isinstance(packet, ChatMessageDetail):
|
||||
qa_response.chat_message_id = packet.message_id
|
||||
|
||||
if answer:
|
||||
qa_response.answer = answer
|
||||
|
||||
if enable_reflexion:
|
||||
# Because follow up messages are explicitly tagged, we don't need to verify the answer
|
||||
if len(query_req.messages) == 1:
|
||||
first_query = query_req.messages[0].message
|
||||
qa_response.answer_valid = get_answer_validity(first_query, answer)
|
||||
else:
|
||||
qa_response.answer_valid = True
|
||||
|
||||
if use_citations and qa_response.answer and qa_response.citations:
|
||||
# Reorganize citation nums to be in the same order as the answer
|
||||
qa_response.answer, qa_response.citations = reorganize_citations(
|
||||
qa_response.answer, qa_response.citations
|
||||
)
|
||||
|
||||
return qa_response
|
||||
@@ -1,114 +0,0 @@
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from pydantic import model_validator
|
||||
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import DanswerContexts
|
||||
from danswer.chat.models import DanswerQuotes
|
||||
from danswer.chat.models import QADocsResponse
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.context.search.enums import LLMEvaluationType
|
||||
from danswer.context.search.enums import RecencyBiasSetting
|
||||
from danswer.context.search.enums import SearchType
|
||||
from danswer.context.search.models import ChunkContext
|
||||
from danswer.context.search.models import RerankingDetails
|
||||
from danswer.context.search.models import RetrievalDetails
|
||||
|
||||
|
||||
class QueryRephrase(BaseModel):
|
||||
rephrased_query: str
|
||||
|
||||
|
||||
class ThreadMessage(BaseModel):
|
||||
message: str
|
||||
sender: str | None = None
|
||||
role: MessageType = MessageType.USER
|
||||
|
||||
|
||||
class PromptConfig(BaseModel):
|
||||
name: str
|
||||
description: str = ""
|
||||
system_prompt: str
|
||||
task_prompt: str = ""
|
||||
include_citations: bool = True
|
||||
datetime_aware: bool = True
|
||||
|
||||
|
||||
class ToolConfig(BaseModel):
|
||||
id: int
|
||||
|
||||
|
||||
class PersonaConfig(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
search_type: SearchType = SearchType.SEMANTIC
|
||||
num_chunks: float | None = None
|
||||
llm_relevance_filter: bool = False
|
||||
llm_filter_extraction: bool = False
|
||||
recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO
|
||||
llm_model_provider_override: str | None = None
|
||||
llm_model_version_override: str | None = None
|
||||
|
||||
prompts: list[PromptConfig] = Field(default_factory=list)
|
||||
prompt_ids: list[int] = Field(default_factory=list)
|
||||
|
||||
document_set_ids: list[int] = Field(default_factory=list)
|
||||
tools: list[ToolConfig] = Field(default_factory=list)
|
||||
tool_ids: list[int] = Field(default_factory=list)
|
||||
custom_tools_openapi: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class DirectQARequest(ChunkContext):
|
||||
persona_config: PersonaConfig | None = None
|
||||
persona_id: int | None = None
|
||||
|
||||
messages: list[ThreadMessage]
|
||||
prompt_id: int | None = None
|
||||
multilingual_query_expansion: list[str] | None = None
|
||||
retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
|
||||
rerank_settings: RerankingDetails | None = None
|
||||
evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED
|
||||
|
||||
chain_of_thought: bool = False
|
||||
return_contexts: bool = False
|
||||
|
||||
# allows the caller to specify the exact search query they want to use
|
||||
# can be used if the message sent to the LLM / query should not be the same
|
||||
# will also disable Thread-based Rewording if specified
|
||||
query_override: str | None = None
|
||||
|
||||
# If True, skips generative an AI response to the search query
|
||||
skip_gen_ai_answer_generation: bool = False
|
||||
|
||||
@model_validator(mode="after")
|
||||
def check_persona_fields(self) -> "DirectQARequest":
|
||||
if (self.persona_config is None) == (self.persona_id is None):
|
||||
raise ValueError("Exactly one of persona_config or persona_id must be set")
|
||||
return self
|
||||
|
||||
@model_validator(mode="after")
|
||||
def check_chain_of_thought_and_prompt_id(self) -> "DirectQARequest":
|
||||
if self.chain_of_thought and self.prompt_id is not None:
|
||||
raise ValueError(
|
||||
"If chain_of_thought is True, prompt_id must be None"
|
||||
"The chain of thought prompt is only for question "
|
||||
"answering and does not accept customizing."
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class OneShotQAResponse(BaseModel):
|
||||
# This is built piece by piece, any of these can be None as the flow could break
|
||||
answer: str | None = None
|
||||
rephrase: str | None = None
|
||||
quotes: DanswerQuotes | None = None
|
||||
citations: list[CitationInfo] | None = None
|
||||
docs: QADocsResponse | None = None
|
||||
llm_selected_doc_indices: list[int] | None = None
|
||||
error_msg: str | None = None
|
||||
answer_valid: bool = True # Reflexion result, default True if Reflexion not run
|
||||
chat_message_id: int | None = None
|
||||
contexts: DanswerContexts | None = None
|
||||
@@ -1,81 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.natural_language_processing.utils import BaseTokenizer
|
||||
from danswer.one_shot_answer.models import ThreadMessage
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def simulate_streaming_response(model_out: str) -> Generator[str, None, None]:
|
||||
"""Mock streaming by generating the passed in model output, character by character"""
|
||||
for token in model_out:
|
||||
yield token
|
||||
|
||||
|
||||
def combine_message_thread(
|
||||
messages: list[ThreadMessage],
|
||||
max_tokens: int | None,
|
||||
llm_tokenizer: BaseTokenizer,
|
||||
) -> str:
|
||||
"""Used to create a single combined message context from threads"""
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
message_strs: list[str] = []
|
||||
total_token_count = 0
|
||||
|
||||
for message in reversed(messages):
|
||||
if message.role == MessageType.USER:
|
||||
role_str = message.role.value.upper()
|
||||
if message.sender:
|
||||
role_str += " " + message.sender
|
||||
else:
|
||||
# Since other messages might have the user identifying information
|
||||
# better to use Unknown for symmetry
|
||||
role_str += " Unknown"
|
||||
else:
|
||||
role_str = message.role.value.upper()
|
||||
|
||||
msg_str = f"{role_str}:\n{message.message}"
|
||||
message_token_count = len(llm_tokenizer.encode(msg_str))
|
||||
|
||||
if (
|
||||
max_tokens is not None
|
||||
and total_token_count + message_token_count > max_tokens
|
||||
):
|
||||
break
|
||||
|
||||
message_strs.insert(0, msg_str)
|
||||
total_token_count += message_token_count
|
||||
|
||||
return "\n\n".join(message_strs)
|
||||
|
||||
|
||||
def slackify_message(message: ThreadMessage) -> str:
|
||||
if message.role != MessageType.USER:
|
||||
return message.message
|
||||
|
||||
return f"{message.sender or 'Unknown User'} said in Slack:\n{message.message}"
|
||||
|
||||
|
||||
def slackify_message_thread(messages: list[ThreadMessage]) -> str:
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
message_strs: list[str] = []
|
||||
for message in messages:
|
||||
if message.role == MessageType.USER:
|
||||
message_text = (
|
||||
f"{message.sender or 'Unknown User'} said in Slack:\n{message.message}"
|
||||
)
|
||||
elif message.role == MessageType.ASSISTANT:
|
||||
message_text = f"DanswerBot said in Slack:\n{message.message}"
|
||||
else:
|
||||
message_text = (
|
||||
f"{message.role.value.upper()} said in Slack:\n{message.message}"
|
||||
)
|
||||
message_strs.append(message_text)
|
||||
|
||||
return "\n\n".join(message_strs)
|
||||
@@ -133,6 +133,8 @@ class RedisConnectorPermissionSync:
|
||||
lock: RedisLock | None,
|
||||
new_permissions: list[DocExternalAccess],
|
||||
source_string: str,
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
) -> int | None:
|
||||
last_lock_time = time.monotonic()
|
||||
async_results = []
|
||||
@@ -155,6 +157,8 @@ class RedisConnectorPermissionSync:
|
||||
tenant_id=self.tenant_id,
|
||||
serialized_doc_external_access=doc_perm.to_dict(),
|
||||
source_string=source_string,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
),
|
||||
queue=DanswerCeleryQueues.DOC_PERMISSIONS_UPSERT,
|
||||
task_id=custom_task_id,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import BaseModel
|
||||
@@ -15,7 +16,6 @@ from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS
|
||||
from danswer.db.models import AllowedAnswerFilters
|
||||
from danswer.db.models import ChannelConfig
|
||||
from danswer.db.models import SlackBot as SlackAppModel
|
||||
from danswer.db.models import SlackBotResponseType
|
||||
from danswer.db.models import SlackChannelConfig as SlackChannelConfigModel
|
||||
from danswer.db.models import User
|
||||
from danswer.server.features.persona.models import PersonaSnapshot
|
||||
@@ -148,6 +148,12 @@ class SlackBotTokens(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
|
||||
# TODO No longer in use, remove later
|
||||
class SlackBotResponseType(str, Enum):
|
||||
QUOTES = "quotes"
|
||||
CITATIONS = "citations"
|
||||
|
||||
|
||||
class SlackChannelConfigCreationRequest(BaseModel):
|
||||
slack_bot_id: int
|
||||
# currently, a persona is created for each Slack channel config
|
||||
@@ -197,7 +203,6 @@ class SlackChannelConfig(BaseModel):
|
||||
id: int
|
||||
persona: PersonaSnapshot | None
|
||||
channel_config: ChannelConfig
|
||||
response_type: SlackBotResponseType
|
||||
# XXX this is going away soon
|
||||
standard_answer_categories: list[StandardAnswerCategory]
|
||||
enable_auto_filters: bool
|
||||
@@ -217,7 +222,6 @@ class SlackChannelConfig(BaseModel):
|
||||
else None
|
||||
),
|
||||
channel_config=slack_channel_config_model.channel_config,
|
||||
response_type=slack_channel_config_model.response_type,
|
||||
# XXX this is going away soon
|
||||
standard_answer_categories=[
|
||||
StandardAnswerCategory.from_model(standard_answer_category_model)
|
||||
|
||||
@@ -118,7 +118,6 @@ def create_slack_channel_config(
|
||||
slack_bot_id=slack_channel_config_creation_request.slack_bot_id,
|
||||
persona_id=persona_id,
|
||||
channel_config=channel_config,
|
||||
response_type=slack_channel_config_creation_request.response_type,
|
||||
standard_answer_category_ids=slack_channel_config_creation_request.standard_answer_categories,
|
||||
db_session=db_session,
|
||||
enable_auto_filters=slack_channel_config_creation_request.enable_auto_filters,
|
||||
@@ -182,7 +181,6 @@ def patch_slack_channel_config(
|
||||
slack_channel_config_id=slack_channel_config_id,
|
||||
persona_id=persona_id,
|
||||
channel_config=channel_config,
|
||||
response_type=slack_channel_config_creation_request.response_type,
|
||||
standard_answer_category_ids=slack_channel_config_creation_request.standard_answer_categories,
|
||||
enable_auto_filters=slack_channel_config_creation_request.enable_auto_filters,
|
||||
)
|
||||
|
||||
@@ -26,7 +26,6 @@ from danswer.auth.noauth_user import fetch_no_auth_user
|
||||
from danswer.auth.noauth_user import set_no_auth_user_preferences
|
||||
from danswer.auth.schemas import UserRole
|
||||
from danswer.auth.schemas import UserStatus
|
||||
from danswer.auth.users import BasicAuthenticationError
|
||||
from danswer.auth.users import current_admin_user
|
||||
from danswer.auth.users import current_curator_or_admin_user
|
||||
from danswer.auth.users import current_user
|
||||
@@ -60,6 +59,7 @@ from danswer.server.manage.models import UserRoleUpdateRequest
|
||||
from danswer.server.models import FullUserSnapshot
|
||||
from danswer.server.models import InvitedUserSnapshot
|
||||
from danswer.server.models import MinimalUserSnapshot
|
||||
from danswer.server.utils import BasicAuthenticationError
|
||||
from danswer.server.utils import send_user_email_invite
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
|
||||
@@ -109,6 +109,7 @@ def process_run_in_background(
|
||||
prompt_id=chat_session.persona.prompts[0].id,
|
||||
search_doc_ids=None,
|
||||
retrieval_options=search_tool_retrieval_details, # Adjust as needed
|
||||
rerank_settings=None,
|
||||
query_override=None,
|
||||
regenerate=None,
|
||||
llm_override=None,
|
||||
|
||||
@@ -5,12 +5,14 @@ from uuid import UUID
|
||||
from pydantic import BaseModel
|
||||
from pydantic import model_validator
|
||||
|
||||
from danswer.chat.models import PersonaOverrideConfig
|
||||
from danswer.chat.models import RetrievalDocs
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.configs.constants import SearchFeedbackType
|
||||
from danswer.context.search.models import BaseFilters
|
||||
from danswer.context.search.models import ChunkContext
|
||||
from danswer.context.search.models import RerankingDetails
|
||||
from danswer.context.search.models import RetrievalDetails
|
||||
from danswer.context.search.models import SearchDoc
|
||||
from danswer.context.search.models import Tag
|
||||
@@ -87,6 +89,8 @@ class CreateChatMessageRequest(ChunkContext):
|
||||
# If search_doc_ids provided, then retrieval options are unused
|
||||
search_doc_ids: list[int] | None
|
||||
retrieval_options: RetrievalDetails | None
|
||||
# Useable via the APIs but not recommended for most flows
|
||||
rerank_settings: RerankingDetails | None = None
|
||||
# allows the caller to specify the exact search query they want to use
|
||||
# will disable Query Rewording if specified
|
||||
query_override: str | None = None
|
||||
@@ -102,6 +106,10 @@ class CreateChatMessageRequest(ChunkContext):
|
||||
# allow user to specify an alternate assistnat
|
||||
alternate_assistant_id: int | None = None
|
||||
|
||||
# This takes the priority over the prompt_override
|
||||
# This won't be a type that's passed in directly from the API
|
||||
persona_override_config: PersonaOverrideConfig | None = None
|
||||
|
||||
# used for seeded chats to kick off the generation of an AI answer
|
||||
use_existing_user_message: bool = False
|
||||
|
||||
@@ -145,7 +153,7 @@ class RenameChatSessionResponse(BaseModel):
|
||||
|
||||
class ChatSessionDetails(BaseModel):
|
||||
id: UUID
|
||||
name: str
|
||||
name: str | None
|
||||
persona_id: int | None = None
|
||||
time_created: str
|
||||
shared_status: ChatSessionSharedStatus
|
||||
@@ -198,14 +206,14 @@ class ChatMessageDetail(BaseModel):
|
||||
|
||||
class SearchSessionDetailResponse(BaseModel):
|
||||
search_session_id: UUID
|
||||
description: str
|
||||
description: str | None
|
||||
documents: list[SearchDoc]
|
||||
messages: list[ChatMessageDetail]
|
||||
|
||||
|
||||
class ChatSessionDetailResponse(BaseModel):
|
||||
chat_session_id: UUID
|
||||
description: str
|
||||
description: str | None
|
||||
persona_id: int | None = None
|
||||
persona_name: str | None
|
||||
messages: list[ChatMessageDetail]
|
||||
|
||||
@@ -1,15 +1,11 @@
|
||||
import json
|
||||
from collections.abc import Generator
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.auth.users import current_curator_or_admin_user
|
||||
from danswer.auth.users import current_limited_user
|
||||
from danswer.auth.users import current_user
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import MessageType
|
||||
@@ -32,8 +28,6 @@ from danswer.db.search_settings import get_current_search_settings
|
||||
from danswer.db.tag import find_tags
|
||||
from danswer.document_index.factory import get_default_document_index
|
||||
from danswer.document_index.vespa.index import VespaIndex
|
||||
from danswer.one_shot_answer.answer_question import stream_search_answer
|
||||
from danswer.one_shot_answer.models import DirectQARequest
|
||||
from danswer.server.query_and_chat.models import AdminSearchRequest
|
||||
from danswer.server.query_and_chat.models import AdminSearchResponse
|
||||
from danswer.server.query_and_chat.models import ChatSessionDetails
|
||||
@@ -41,7 +35,6 @@ from danswer.server.query_and_chat.models import ChatSessionsResponse
|
||||
from danswer.server.query_and_chat.models import SearchSessionDetailResponse
|
||||
from danswer.server.query_and_chat.models import SourceTag
|
||||
from danswer.server.query_and_chat.models import TagResponse
|
||||
from danswer.server.query_and_chat.token_limit import check_token_rate_limits
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@@ -140,7 +133,7 @@ def get_user_search_sessions(
|
||||
|
||||
try:
|
||||
search_sessions = get_chat_sessions_by_user(
|
||||
user_id=user_id, deleted=False, db_session=db_session, only_one_shot=True
|
||||
user_id=user_id, deleted=False, db_session=db_session
|
||||
)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
@@ -229,29 +222,3 @@ def get_search_session(
|
||||
],
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
@basic_router.post("/stream-answer-with-quote")
|
||||
def get_answer_with_quote(
|
||||
query_request: DirectQARequest,
|
||||
user: User = Depends(current_limited_user),
|
||||
_: None = Depends(check_token_rate_limits),
|
||||
) -> StreamingResponse:
|
||||
query = query_request.messages[0].message
|
||||
|
||||
logger.notice(f"Received query for one shot answer with quotes: {query}")
|
||||
|
||||
def stream_generator() -> Generator[str, None, None]:
|
||||
try:
|
||||
for packet in stream_search_answer(
|
||||
query_req=query_request,
|
||||
user=user,
|
||||
max_document_tokens=None,
|
||||
max_history_tokens=0,
|
||||
):
|
||||
yield json.dumps(packet) if isinstance(packet, dict) else packet
|
||||
except Exception as e:
|
||||
logger.exception("Error in search answer streaming")
|
||||
yield json.dumps({"error": str(e)})
|
||||
|
||||
return StreamingResponse(stream_generator(), media_type="application/json")
|
||||
|
||||
@@ -6,6 +6,9 @@ from email.mime.text import MIMEText
|
||||
from textwrap import dedent
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException
|
||||
from fastapi import status
|
||||
|
||||
from danswer.configs.app_configs import SMTP_PASS
|
||||
from danswer.configs.app_configs import SMTP_PORT
|
||||
from danswer.configs.app_configs import SMTP_SERVER
|
||||
@@ -14,6 +17,11 @@ from danswer.configs.app_configs import WEB_DOMAIN
|
||||
from danswer.db.models import User
|
||||
|
||||
|
||||
class BasicAuthenticationError(HTTPException):
|
||||
def __init__(self, detail: str):
|
||||
super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
|
||||
|
||||
|
||||
class DateTimeEncoder(json.JSONEncoder):
|
||||
"""Custom JSON encoder that converts datetime objects to ISO format strings."""
|
||||
|
||||
|
||||
@@ -25,6 +25,9 @@ class ToolCallSummary(BaseModel__v1):
|
||||
tool_call_request: AIMessage
|
||||
tool_call_result: ToolMessage
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
def tool_call_tokens(
|
||||
tool_call_summary: ToolCallSummary, llm_tokenizer: BaseTokenizer
|
||||
|
||||
@@ -13,6 +13,7 @@ from danswer.configs.chat_configs import BING_API_KEY
|
||||
from danswer.configs.model_configs import GEN_AI_TEMPERATURE
|
||||
from danswer.context.search.enums import LLMEvaluationType
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import RerankingDetails
|
||||
from danswer.context.search.models import RetrievalDetails
|
||||
from danswer.db.llm import fetch_existing_llm_providers
|
||||
from danswer.db.models import Persona
|
||||
@@ -102,11 +103,14 @@ class SearchToolConfig(BaseModel):
|
||||
default_factory=DocumentPruningConfig
|
||||
)
|
||||
retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
|
||||
rerank_settings: RerankingDetails | None = None
|
||||
selected_sections: list[InferenceSection] | None = None
|
||||
chunks_above: int = 0
|
||||
chunks_below: int = 0
|
||||
full_doc: bool = False
|
||||
latest_query_files: list[InMemoryChatFile] | None = None
|
||||
# Use with care, should only be used for DanswerBot in channels with multiple users
|
||||
bypass_acl: bool = False
|
||||
|
||||
|
||||
class InternetSearchToolConfig(BaseModel):
|
||||
@@ -170,6 +174,8 @@ def construct_tools(
|
||||
if persona.llm_relevance_filter
|
||||
else LLMEvaluationType.SKIP
|
||||
),
|
||||
rerank_settings=search_tool_config.rerank_settings,
|
||||
bypass_acl=search_tool_config.bypass_acl,
|
||||
)
|
||||
tool_dict[db_tool_model.id] = [search_tool]
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from danswer.context.search.enums import QueryFlow
|
||||
from danswer.context.search.enums import SearchType
|
||||
from danswer.context.search.models import IndexFilters
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import RerankingDetails
|
||||
from danswer.context.search.models import RetrievalDetails
|
||||
from danswer.context.search.models import SearchRequest
|
||||
from danswer.context.search.pipeline import SearchPipeline
|
||||
@@ -103,6 +104,7 @@ class SearchTool(Tool):
|
||||
chunks_below: int | None = None,
|
||||
full_doc: bool = False,
|
||||
bypass_acl: bool = False,
|
||||
rerank_settings: RerankingDetails | None = None,
|
||||
) -> None:
|
||||
self.user = user
|
||||
self.persona = persona
|
||||
@@ -118,6 +120,9 @@ class SearchTool(Tool):
|
||||
self.bypass_acl = bypass_acl
|
||||
self.db_session = db_session
|
||||
|
||||
# Only used via API
|
||||
self.rerank_settings = rerank_settings
|
||||
|
||||
self.chunks_above = (
|
||||
chunks_above
|
||||
if chunks_above is not None
|
||||
@@ -292,6 +297,7 @@ class SearchTool(Tool):
|
||||
self.retrieval_options.offset if self.retrieval_options else None
|
||||
),
|
||||
limit=self.retrieval_options.limit if self.retrieval_options else None,
|
||||
rerank_settings=self.rerank_settings,
|
||||
chunks_above=self.chunks_above,
|
||||
chunks_below=self.chunks_below,
|
||||
full_doc=self.full_doc,
|
||||
|
||||
41
backend/ee/danswer/chat/process_message.py
Normal file
41
backend/ee/danswer/chat/process_message.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from danswer.chat.models import AllCitations
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import DanswerContexts
|
||||
from danswer.chat.models import LLMRelevanceFilterResponse
|
||||
from danswer.chat.models import QADocsResponse
|
||||
from danswer.chat.models import StreamingError
|
||||
from danswer.chat.process_message import ChatPacketStream
|
||||
from danswer.server.query_and_chat.models import ChatMessageDetail
|
||||
from danswer.utils.timing import log_function_time
|
||||
from ee.danswer.server.query_and_chat.models import OneShotQAResponse
|
||||
|
||||
|
||||
@log_function_time()
|
||||
def gather_stream_for_answer_api(
|
||||
packets: ChatPacketStream,
|
||||
) -> OneShotQAResponse:
|
||||
response = OneShotQAResponse()
|
||||
|
||||
answer = ""
|
||||
for packet in packets:
|
||||
if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece:
|
||||
answer += packet.answer_piece
|
||||
elif isinstance(packet, QADocsResponse):
|
||||
response.docs = packet
|
||||
# Extraneous, provided for backwards compatibility
|
||||
response.rephrase = packet.rephrased_query
|
||||
elif isinstance(packet, StreamingError):
|
||||
response.error_msg = packet.error
|
||||
elif isinstance(packet, ChatMessageDetail):
|
||||
response.chat_message_id = packet.message_id
|
||||
elif isinstance(packet, LLMRelevanceFilterResponse):
|
||||
response.llm_selected_doc_indices = packet.llm_selected_doc_indices
|
||||
elif isinstance(packet, AllCitations):
|
||||
response.citations = packet.citations
|
||||
elif isinstance(packet, DanswerContexts):
|
||||
response.contexts = packet
|
||||
|
||||
if answer:
|
||||
response.answer = answer
|
||||
|
||||
return response
|
||||
@@ -155,7 +155,6 @@ def _handle_standard_answers(
|
||||
else 0,
|
||||
danswerbot_flow=True,
|
||||
slack_thread_id=slack_thread_id,
|
||||
one_shot=True,
|
||||
)
|
||||
|
||||
root_message = get_or_create_root_message(
|
||||
|
||||
@@ -37,10 +37,15 @@ def get_cc_pairs_by_source(
|
||||
source_type: DocumentSource,
|
||||
only_sync: bool,
|
||||
) -> list[ConnectorCredentialPair]:
|
||||
"""
|
||||
Get all cc_pairs for a given source type (and optionally only sync)
|
||||
result is sorted by cc_pair id
|
||||
"""
|
||||
query = (
|
||||
db_session.query(ConnectorCredentialPair)
|
||||
.join(ConnectorCredentialPair.connector)
|
||||
.filter(Connector.source == source_type)
|
||||
.order_by(ConnectorCredentialPair.id)
|
||||
)
|
||||
|
||||
if only_sync:
|
||||
|
||||
@@ -55,9 +55,10 @@ def upsert_document_external_perms(
|
||||
doc_id: str,
|
||||
external_access: ExternalAccess,
|
||||
source_type: DocumentSource,
|
||||
) -> None:
|
||||
) -> bool:
|
||||
"""
|
||||
This sets the permissions for a document in postgres.
|
||||
This sets the permissions for a document in postgres. Returns True if the
|
||||
a new document was created, False otherwise.
|
||||
NOTE: this will replace any existing external access, it will not do a union
|
||||
"""
|
||||
document = db_session.scalars(
|
||||
@@ -85,7 +86,7 @@ def upsert_document_external_perms(
|
||||
)
|
||||
db_session.add(document)
|
||||
db_session.commit()
|
||||
return
|
||||
return True
|
||||
|
||||
# If the document exists, we need to check if the external access has changed
|
||||
if (
|
||||
@@ -98,3 +99,5 @@ def upsert_document_external_perms(
|
||||
document.is_public = external_access.is_public
|
||||
document.last_modified = datetime.now(timezone.utc)
|
||||
db_session.commit()
|
||||
|
||||
return False
|
||||
|
||||
@@ -33,12 +33,7 @@ def get_empty_chat_messages_entries__paginated(
|
||||
|
||||
message_skeletons: list[ChatMessageSkeleton] = []
|
||||
for chat_session in chat_sessions:
|
||||
if chat_session.one_shot:
|
||||
flow_type = FlowType.SEARCH
|
||||
elif chat_session.danswerbot_flow:
|
||||
flow_type = FlowType.SLACK
|
||||
else:
|
||||
flow_type = FlowType.CHAT
|
||||
flow_type = FlowType.SLACK if chat_session.danswerbot_flow else FlowType.CHAT
|
||||
|
||||
for message in chat_session.messages:
|
||||
# Only count user messages
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user