Compare commits

..

34 Commits

Author SHA1 Message Date
pablodanswer
09e6bd3c9c k 2024-12-18 20:01:44 -08:00
pablodanswer
c1803cdd56 log 2024-12-18 19:20:55 -08:00
pablodanswer
a5b9c76012 validation 2024-12-18 19:13:09 -08:00
rkuo-danswer
e9b10e8b41 temporarily disabling validate indexing fences (#3502)
* temporarily disabling validate indexing fences

* add back a few startup checks in the cloud

* use common vespa client to perform health check

* log vespa url and try using http1 on light worker index methods

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2024-12-19 01:32:09 +00:00
pablonyx
a0fa4adb60 Ensure password validation errors propagate (#3509)
* ensure password validation errors propagate

* copy update

* support o1

* improve typing

* Revert "support o1"

This reverts commit 9b7aa6008c.
2024-12-19 00:05:57 +00:00
pablonyx
ca9ba925bd Support o1 (#3510)
* support o1

* nit
2024-12-19 00:05:00 +00:00
rkuo-danswer
833cc5c97c Merge pull request #3497 from emerzon/new_icons
New model icons for LLM Picker
2024-12-18 16:38:31 -08:00
Chris Weaver
23ecf654ed Add support for custom LLM error messages (#3501)
* Add support for custom LLM error messages

* Fix mypy
2024-12-17 22:58:17 -08:00
pablonyx
ddc6a6d2b3 Wrap nits (#3496) 2024-12-17 18:03:38 -08:00
pablonyx
571c8ece32 Slack Workspace Alembic Updates
Old alembic migration + restore workspace
2024-12-17 16:28:59 -08:00
pablodanswer
884bdb4b01 old alembic migration + restore workspace 2024-12-17 16:28:05 -08:00
pablonyx
b3ecf0d59f Migrate user milestone logic (#3493) 2024-12-17 15:59:56 -08:00
Emerson Gomes
f56fda27c9 Add also Microsoft models 2024-12-17 16:37:52 -06:00
Emerson Gomes
b1e4d4ea8d Adds icons for Amazon, Meta and Mistral models (when proxied via LiteLLM) 2024-12-17 16:20:46 -06:00
pablonyx
8db6d49fe5 IAM Auth for RDS (#3479)
* k

* functional iam auth

* k

* k

* improve typing

* add deployment options

* cleanup

* quick clean up

* minor cleanup

* additional clarity for db session operations

* nit

* k

* k

* update configs

* docker compose spacing
2024-12-17 22:02:37 +00:00
pablonyx
28598694b1 Add delete all chats option (#2515)
* Add delete all chats option

* post rebase fixes

* final validation

* minor cleanup

* move up
2024-12-17 02:55:35 +00:00
Emerson Gomes
b5d0df90b9 Remove hardcoded root path for HF models 2024-12-16 19:03:15 -08:00
pablonyx
48be6338ec Update Hubpost tracking form submission (#3261)
* Update Hubpost tracking form submission

* minor cleanup

* validated

* validate

* nit

* k
2024-12-17 02:31:09 +00:00
pablonyx
ed9014f03d Use logotypes where feasible (#3478)
* Use logotypes where feasible

* quick nit

* minor cleanup
2024-12-17 02:13:45 +00:00
rkuo-danswer
2dd51230ed clear indexing fences with no celery tasks queued (#3482)
* allow beat tasks to expire. it isn't important that they all run

* validate fences are in a good state and cancel/fail them if not

* add function timings for important beat tasks

* optimize lookups, add lots of comments

* review changes

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2024-12-17 00:55:58 +00:00
pablonyx
8b249cbe63 Proper display priority seeding (#3468)
* proper seeding

* k

* clean up
2024-12-17 00:19:45 +00:00
pablonyx
6b50f86cd2 Improved theming (#3204) 2024-12-16 22:24:32 +00:00
pablonyx
bd2805b6df Update llm override defaults (#3230)
* update llm override defaults

* post rebase fix
2024-12-16 22:18:21 +00:00
pablonyx
2847ab003e Prompting (#3372)
* auto generate start prompts

* post rebase clean up

* update for clarity
2024-12-16 21:34:43 +00:00
pablodanswer
1df6a506ec Revert "update pre-commit black version (#3250)"
This reverts commit d954914a0a.
2024-12-16 13:57:56 -08:00
pablonyx
f1541d1fbe Update default assistant to search for new users (#3317)
* update default assistant to search for new users

* update!
2024-12-16 21:15:33 +00:00
rkuo-danswer
dd0c4b64df errors in the summary row should be counting last_finished_status as reflected in the per connector rows (#3484)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2024-12-16 20:53:19 +00:00
pablonyx
788b3015bc fix single quote block in llm answer (#3139) 2024-12-16 20:37:47 +00:00
pablonyx
cbbf10f450 remove tenant id logs (#3063) 2024-12-16 20:24:09 +00:00
pablonyx
d954914a0a update pre-commit black version (#3250) 2024-12-16 20:04:42 +00:00
pablodanswer
bee74ac360 mark slack perm sync as flaky 2024-12-16 11:50:03 -08:00
pablonyx
29ef64272a Update chat provider values
Update chat provider values
2024-12-16 11:46:53 -08:00
pablodanswer
01bf6ee4b7 quick clean up 2024-12-16 11:43:34 -08:00
pablodanswer
0502417cbe update chat provider values 2024-12-16 11:39:25 -08:00
165 changed files with 2558 additions and 4197 deletions

4
.gitignore vendored
View File

@@ -7,6 +7,4 @@
.vscode/
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml
/web/test-results/
backend/onyx/agent_search/main/test_data.json
backend/tests/regression/answer_quality/test_data.json
/web/test-results/

View File

@@ -49,9 +49,3 @@ BING_API_KEY=<REPLACE THIS>
# Enable the full set of Danswer Enterprise Edition features
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
# Agent Search configs # TODO: Remove give proper namings
AGENT_RETRIEVAL_STATS=False # Note: This setting will incur substantial re-ranking effort
AGENT_RERANKING_STATS=True
AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20

View File

@@ -1,39 +1,49 @@
from typing import Any, Literal
from onyx.db.engine import get_iam_auth_token
from onyx.configs.app_configs import USE_IAM_AUTH
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.app_configs import AWS_REGION
from onyx.db.engine import build_connection_string
from onyx.db.engine import get_all_tenant_ids
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy import text
from sqlalchemy.engine.base import Connection
from typing import Literal
import os
import ssl
import asyncio
from logging.config import fileConfig
import logging
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.sql import text
from sqlalchemy.sql.schema import SchemaItem
from shared_configs.configs import MULTI_TENANT
from onyx.db.engine import build_connection_string
from onyx.configs.constants import SSL_CERT_FILE
from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
from onyx.db.models import Base
from celery.backends.database.session import ResultModelBase # type: ignore
from onyx.db.engine import get_all_tenant_ids
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
# Alembic Config object
config = context.config
# Interpret the config file for Python logging.
if config.config_file_name is not None and config.attributes.get(
"configure_logger", True
):
fileConfig(config.config_file_name)
# Add your model's MetaData object here for 'autogenerate' support
target_metadata = [Base.metadata, ResultModelBase.metadata]
EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
# Set up logging
logger = logging.getLogger(__name__)
ssl_context: ssl.SSLContext | None = None
if USE_IAM_AUTH:
if not os.path.exists(SSL_CERT_FILE):
raise FileNotFoundError(f"Expected {SSL_CERT_FILE} when USE_IAM_AUTH is true.")
ssl_context = ssl.create_default_context(cafile=SSL_CERT_FILE)
def include_object(
object: SchemaItem,
@@ -49,20 +59,12 @@ def include_object(
reflected: bool,
compare_to: SchemaItem | None,
) -> bool:
"""
Determines whether a database object should be included in migrations.
Excludes specified tables from migrations.
"""
if type_ == "table" and name in EXCLUDE_TABLES:
return False
return True
def get_schema_options() -> tuple[str, bool, bool]:
"""
Parses command-line options passed via '-x' in Alembic commands.
Recognizes 'schema', 'create_schema', and 'upgrade_all_tenants' options.
"""
x_args_raw = context.get_x_argument()
x_args = {}
for arg in x_args_raw:
@@ -90,16 +92,12 @@ def get_schema_options() -> tuple[str, bool, bool]:
def do_run_migrations(
connection: Connection, schema_name: str, create_schema: bool
) -> None:
"""
Executes migrations in the specified schema.
"""
logger.info(f"About to migrate schema: {schema_name}")
if create_schema:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
connection.execute(text("COMMIT"))
# Set search_path to the target schema
connection.execute(text(f'SET search_path TO "{schema_name}"'))
context.configure(
@@ -117,11 +115,25 @@ def do_run_migrations(
context.run_migrations()
def provide_iam_token_for_alembic(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
) -> None:
if USE_IAM_AUTH:
# Database connection settings
region = AWS_REGION
host = POSTGRES_HOST
port = POSTGRES_PORT
user = POSTGRES_USER
# Get IAM authentication token
token = get_iam_auth_token(host, port, user, region)
# For Alembic / SQLAlchemy in this context, set SSL and password
cparams["password"] = token
cparams["ssl"] = ssl_context
async def run_async_migrations() -> None:
"""
Determines whether to run migrations for a single schema or all schemas,
and executes migrations accordingly.
"""
schema_name, create_schema, upgrade_all_tenants = get_schema_options()
engine = create_async_engine(
@@ -129,10 +141,16 @@ async def run_async_migrations() -> None:
poolclass=pool.NullPool,
)
if upgrade_all_tenants:
# Run migrations for all tenant schemas sequentially
tenant_schemas = get_all_tenant_ids()
if USE_IAM_AUTH:
@event.listens_for(engine.sync_engine, "do_connect")
def event_provide_iam_token_for_alembic(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
) -> None:
provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
if upgrade_all_tenants:
tenant_schemas = get_all_tenant_ids()
for schema in tenant_schemas:
try:
logger.info(f"Migrating schema: {schema}")
@@ -162,15 +180,20 @@ async def run_async_migrations() -> None:
def run_migrations_offline() -> None:
"""
Run migrations in 'offline' mode.
"""
schema_name, _, upgrade_all_tenants = get_schema_options()
url = build_connection_string()
if upgrade_all_tenants:
# Run offline migrations for all tenant schemas
engine = create_async_engine(url)
if USE_IAM_AUTH:
@event.listens_for(engine.sync_engine, "do_connect")
def event_provide_iam_token_for_alembic_offline(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
) -> None:
provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
tenant_schemas = get_all_tenant_ids()
engine.sync_engine.dispose()
@@ -207,9 +230,6 @@ def run_migrations_offline() -> None:
def run_migrations_online() -> None:
"""
Runs migrations in 'online' mode using an asynchronous engine.
"""
asyncio.run(run_async_migrations())

View File

@@ -0,0 +1,121 @@
"""properly_cascade
Revision ID: 35e518e0ddf4
Revises: 91a0a4d62b14
Create Date: 2024-09-20 21:24:04.891018
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "35e518e0ddf4"
down_revision = "91a0a4d62b14"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Update chat_message foreign key constraint
op.drop_constraint(
"chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
)
op.create_foreign_key(
"chat_message_chat_session_id_fkey",
"chat_message",
"chat_session",
["chat_session_id"],
["id"],
ondelete="CASCADE",
)
# Update chat_message__search_doc foreign key constraints
op.drop_constraint(
"chat_message__search_doc_chat_message_id_fkey",
"chat_message__search_doc",
type_="foreignkey",
)
op.drop_constraint(
"chat_message__search_doc_search_doc_id_fkey",
"chat_message__search_doc",
type_="foreignkey",
)
op.create_foreign_key(
"chat_message__search_doc_chat_message_id_fkey",
"chat_message__search_doc",
"chat_message",
["chat_message_id"],
["id"],
ondelete="CASCADE",
)
op.create_foreign_key(
"chat_message__search_doc_search_doc_id_fkey",
"chat_message__search_doc",
"search_doc",
["search_doc_id"],
["id"],
ondelete="CASCADE",
)
# Add CASCADE delete for tool_call foreign key
op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
op.create_foreign_key(
"tool_call_message_id_fkey",
"tool_call",
"chat_message",
["message_id"],
["id"],
ondelete="CASCADE",
)
def downgrade() -> None:
# Revert chat_message foreign key constraint
op.drop_constraint(
"chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
)
op.create_foreign_key(
"chat_message_chat_session_id_fkey",
"chat_message",
"chat_session",
["chat_session_id"],
["id"],
)
# Revert chat_message__search_doc foreign key constraints
op.drop_constraint(
"chat_message__search_doc_chat_message_id_fkey",
"chat_message__search_doc",
type_="foreignkey",
)
op.drop_constraint(
"chat_message__search_doc_search_doc_id_fkey",
"chat_message__search_doc",
type_="foreignkey",
)
op.create_foreign_key(
"chat_message__search_doc_chat_message_id_fkey",
"chat_message__search_doc",
"chat_message",
["chat_message_id"],
["id"],
)
op.create_foreign_key(
"chat_message__search_doc_search_doc_id_fkey",
"chat_message__search_doc",
"search_doc",
["search_doc_id"],
["id"],
)
# Revert tool_call foreign key constraint
op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
op.create_foreign_key(
"tool_call_message_id_fkey",
"tool_call",
"chat_message",
["message_id"],
["id"],
)

View File

@@ -0,0 +1,87 @@
"""delete workspace
Revision ID: c0aab6edb6dd
Revises: 35e518e0ddf4
Create Date: 2024-12-17 14:37:07.660631
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "c0aab6edb6dd"
down_revision = "35e518e0ddf4"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.execute(
"""
UPDATE connector
SET connector_specific_config = connector_specific_config - 'workspace'
WHERE source = 'SLACK'
"""
)
def downgrade() -> None:
import json
from sqlalchemy import text
from slack_sdk import WebClient
conn = op.get_bind()
# Fetch all Slack credentials
creds_result = conn.execute(
text("SELECT id, credential_json FROM credential WHERE source = 'SLACK'")
)
all_slack_creds = creds_result.fetchall()
if not all_slack_creds:
return
for cred_row in all_slack_creds:
credential_id, credential_json = cred_row
credential_json = (
credential_json.tobytes().decode("utf-8")
if isinstance(credential_json, memoryview)
else credential_json.decode("utf-8")
)
credential_data = json.loads(credential_json)
slack_bot_token = credential_data.get("slack_bot_token")
if not slack_bot_token:
print(
f"No slack_bot_token found for credential {credential_id}. "
"Your Slack connector will not function until you upgrade and provide a valid token."
)
continue
client = WebClient(token=slack_bot_token)
try:
auth_response = client.auth_test()
workspace = auth_response["url"].split("//")[1].split(".")[0]
# Update only the connectors linked to this credential
# (and which are Slack connectors).
op.execute(
f"""
UPDATE connector AS c
SET connector_specific_config = jsonb_set(
connector_specific_config,
'{{workspace}}',
to_jsonb('{workspace}'::text)
)
FROM connector_credential_pair AS ccp
WHERE ccp.connector_id = c.id
AND c.source = 'SLACK'
AND ccp.credential_id = {credential_id}
"""
)
except Exception:
print(
f"We were unable to get the workspace url for your Slack Connector with id {credential_id}."
)
print("This connector will no longer work until you upgrade.")
continue

View File

@@ -53,3 +53,5 @@ OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
# when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

View File

@@ -3,12 +3,15 @@ import logging
import uuid
import aiohttp # Async HTTP client
import httpx
from fastapi import HTTPException
from fastapi import Request
from sqlalchemy import select
from sqlalchemy.orm import Session
from ee.onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
from ee.onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
from ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL
from ee.onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.server.tenants.models import TenantCreationPayload
@@ -47,13 +50,16 @@ from shared_configs.enums import EmbeddingProvider
logger = logging.getLogger(__name__)
async def get_or_create_tenant_id(
email: str, referral_source: str | None = None
async def get_or_provision_tenant(
email: str, referral_source: str | None = None, request: Request | None = None
) -> str:
"""Get existing tenant ID for an email or create a new tenant if none exists."""
if not MULTI_TENANT:
return POSTGRES_DEFAULT_SCHEMA
if referral_source and request:
await submit_to_hubspot(email, referral_source, request)
try:
tenant_id = get_tenant_id_for_email(email)
except exceptions.UserNotExists:
@@ -281,3 +287,36 @@ def configure_default_api_keys(db_session: Session) -> None:
logger.info(
"COHERE_DEFAULT_API_KEY not set, skipping Cohere embedding provider configuration"
)
async def submit_to_hubspot(
email: str, referral_source: str | None, request: Request
) -> None:
if not HUBSPOT_TRACKING_URL:
logger.info("HUBSPOT_TRACKING_URL not set, skipping HubSpot submission")
return
# HubSpot tracking cookie
hubspot_cookie = request.cookies.get("hubspotutk")
# IP address
ip_address = request.client.host if request.client else None
data = {
"fields": [
{"name": "email", "value": email},
{"name": "referral_source", "value": referral_source or ""},
],
"context": {
"hutk": hubspot_cookie,
"ipAddress": ip_address,
"pageUri": str(request.url),
"pageName": "User Registration",
},
}
async with httpx.AsyncClient() as client:
response = await client.post(HUBSPOT_TRACKING_URL, json=data)
if response.status_code != 200:
logger.error(f"Failed to submit to HubSpot: {response.text}")

View File

@@ -1,14 +1,38 @@
from typing import Any
from posthog import Posthog
from ee.onyx.configs.app_configs import POSTHOG_API_KEY
from ee.onyx.configs.app_configs import POSTHOG_HOST
from onyx.utils.logger import setup_logger
posthog = Posthog(project_api_key=POSTHOG_API_KEY, host=POSTHOG_HOST)
logger = setup_logger()
def posthog_on_error(error: Any, items: Any) -> None:
logger.error(f"PostHog error: {error}, items: {items}")
posthog = Posthog(
project_api_key=POSTHOG_API_KEY,
host=POSTHOG_HOST,
debug=True,
on_error=posthog_on_error,
)
def event_telemetry(
distinct_id: str,
event: str,
properties: dict | None = None,
distinct_id: str, event: str, properties: dict | None = None
) -> None:
posthog.capture(distinct_id, event, properties)
logger.info(f"Capturing Posthog event: {distinct_id} {event} {properties}")
print("API KEY", POSTHOG_API_KEY)
print("HOST", POSTHOG_HOST)
try:
print(type(distinct_id))
print(type(event))
print(type(properties))
response = posthog.capture(distinct_id, event, properties)
posthog.flush()
print(response)
except Exception as e:
logger.error(f"Error capturing Posthog event: {e}")

View File

@@ -27,8 +27,8 @@ from shared_configs.configs import SENTRY_DSN
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
HF_CACHE_PATH = Path("/root/.cache/huggingface/")
TEMP_HF_CACHE_PATH = Path("/root/.cache/temp_huggingface/")
HF_CACHE_PATH = Path(os.path.expanduser("~")) / ".cache/huggingface"
TEMP_HF_CACHE_PATH = Path(os.path.expanduser("~")) / ".cache/temp_huggingface"
transformer_logging.set_verbosity_error()

View File

@@ -1,20 +0,0 @@
from collections.abc import Hashable
from langgraph.types import Send
from onyx.agent_search.answer_question.states import AnswerQuestionInput
from onyx.agent_search.core_state import in_subgraph_extract_core_fields
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
def send_to_expanded_retrieval(state: AnswerQuestionInput) -> Send | Hashable:
print("sending to expanded retrieval via edge")
return Send(
"decomped_expanded_retrieval",
ExpandedRetrievalInput(
**in_subgraph_extract_core_fields(state),
question=state["question"],
dummy="1"
),
)

View File

@@ -1,106 +0,0 @@
from langgraph.graph import END
from langgraph.graph import START
from langgraph.graph import StateGraph
from onyx.agent_search.answer_question.edges import send_to_expanded_retrieval
from onyx.agent_search.answer_question.nodes.answer_check import answer_check
from onyx.agent_search.answer_question.nodes.answer_generation import answer_generation
from onyx.agent_search.answer_question.nodes.format_answer import format_answer
from onyx.agent_search.answer_question.nodes.ingest_retrieval import ingest_retrieval
from onyx.agent_search.answer_question.states import AnswerQuestionInput
from onyx.agent_search.answer_question.states import AnswerQuestionOutput
from onyx.agent_search.answer_question.states import AnswerQuestionState
from onyx.agent_search.expanded_retrieval.graph_builder import (
expanded_retrieval_graph_builder,
)
def answer_query_graph_builder() -> StateGraph:
graph = StateGraph(
state_schema=AnswerQuestionState,
input=AnswerQuestionInput,
output=AnswerQuestionOutput,
)
### Add nodes ###
expanded_retrieval = expanded_retrieval_graph_builder().compile()
graph.add_node(
node="decomped_expanded_retrieval",
action=expanded_retrieval,
)
graph.add_node(
node="answer_check",
action=answer_check,
)
graph.add_node(
node="answer_generation",
action=answer_generation,
)
graph.add_node(
node="format_answer",
action=format_answer,
)
graph.add_node(
node="ingest_retrieval",
action=ingest_retrieval,
)
### Add edges ###
graph.add_conditional_edges(
source=START,
path=send_to_expanded_retrieval,
path_map=["decomped_expanded_retrieval"],
)
graph.add_edge(
start_key="decomped_expanded_retrieval",
end_key="ingest_retrieval",
)
graph.add_edge(
start_key="ingest_retrieval",
end_key="answer_generation",
)
graph.add_edge(
start_key="answer_generation",
end_key="answer_check",
)
graph.add_edge(
start_key="answer_check",
end_key="format_answer",
)
graph.add_edge(
start_key="format_answer",
end_key=END,
)
return graph
if __name__ == "__main__":
from onyx.db.engine import get_session_context_manager
from onyx.llm.factory import get_default_llms
from onyx.context.search.models import SearchRequest
graph = answer_query_graph_builder()
compiled_graph = graph.compile()
primary_llm, fast_llm = get_default_llms()
search_request = SearchRequest(
query="what can you do with onyx or danswer?",
)
with get_session_context_manager() as db_session:
inputs = AnswerQuestionInput(
search_request=search_request,
primary_llm=primary_llm,
fast_llm=fast_llm,
db_session=db_session,
question="what can you do with onyx?",
)
for thing in compiled_graph.stream(
input=inputs,
# debug=True,
# subgraphs=True,
):
print(thing)
# output = compiled_graph.invoke(inputs)
# print(output)

View File

@@ -1,19 +0,0 @@
from pydantic import BaseModel
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.context.search.models import InferenceSection
### Models ###
class AnswerRetrievalStats(BaseModel):
answer_retrieval_stats: dict[str, float | int]
class QuestionAnswerResults(BaseModel):
question: str
answer: str
quality: str
# expanded_retrieval_results: list[QueryResult]
documents: list[InferenceSection]
sub_question_retrieval_stats: AgentChunkStats

View File

@@ -1,30 +0,0 @@
from langchain_core.messages import HumanMessage
from langchain_core.messages import merge_message_runs
from onyx.agent_search.answer_question.states import AnswerQuestionState
from onyx.agent_search.answer_question.states import QACheckUpdate
from onyx.agent_search.shared_graph_utils.prompts import SUB_CHECK_PROMPT
def answer_check(state: AnswerQuestionState) -> QACheckUpdate:
msg = [
HumanMessage(
content=SUB_CHECK_PROMPT.format(
question=state["question"],
base_answer=state["answer"],
)
)
]
fast_llm = state["subgraph_fast_llm"]
response = list(
fast_llm.stream(
prompt=msg,
)
)
quality_str = merge_message_runs(response, chunk_separator="")[0].content
return QACheckUpdate(
answer_quality=quality_str,
)

View File

@@ -1,36 +0,0 @@
from langchain_core.messages import HumanMessage
from langchain_core.messages import merge_message_runs
from onyx.agent_search.answer_question.states import AnswerQuestionState
from onyx.agent_search.answer_question.states import QAGenerationUpdate
from onyx.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
from onyx.agent_search.shared_graph_utils.utils import format_docs
def answer_generation(state: AnswerQuestionState) -> QAGenerationUpdate:
question = state["question"]
docs = state["documents"]
print(f"Number of verified retrieval docs: {len(docs)}")
msg = [
HumanMessage(
content=BASE_RAG_PROMPT.format(
question=question,
context=format_docs(docs),
original_question=state["subgraph_search_request"].query,
)
)
]
fast_llm = state["subgraph_fast_llm"]
response = list(
fast_llm.stream(
prompt=msg,
)
)
answer_str = merge_message_runs(response, chunk_separator="")[0].content
return QAGenerationUpdate(
answer=answer_str,
)

View File

@@ -1,28 +0,0 @@
from onyx.agent_search.answer_question.states import AnswerQuestionOutput
from onyx.agent_search.answer_question.states import AnswerQuestionState
from onyx.agent_search.answer_question.states import QuestionAnswerResults
def format_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
# sub_question_retrieval_stats = state["sub_question_retrieval_stats"]
# if sub_question_retrieval_stats is None:
# sub_question_retrieval_stats = []
# elif isinstance(sub_question_retrieval_stats, list):
# sub_question_retrieval_stats = sub_question_retrieval_stats
# if isinstance(sub_question_retrieval_stats[0], list):
# sub_question_retrieval_stats = sub_question_retrieval_stats[0]
# else:
# sub_question_retrieval_stats = [sub_question_retrieval_stats]
return AnswerQuestionOutput(
answer_results=[
QuestionAnswerResults(
question=state["question"],
quality=state["answer_quality"],
answer=state["answer"],
# expanded_retrieval_results=state["expanded_retrieval_results"],
documents=state["documents"],
sub_question_retrieval_stats=state["sub_question_retrieval_stats"],
)
],
)

View File

@@ -1,19 +0,0 @@
from onyx.agent_search.answer_question.states import RetrievalIngestionUpdate
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalOutput
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
def ingest_retrieval(state: ExpandedRetrievalOutput) -> RetrievalIngestionUpdate:
sub_question_retrieval_stats = state[
"expanded_retrieval_result"
].sub_question_retrieval_stats
if sub_question_retrieval_stats is None:
sub_question_retrieval_stats = AgentChunkStats()
return RetrievalIngestionUpdate(
expanded_retrieval_results=state[
"expanded_retrieval_result"
].expanded_queries_results,
documents=state["expanded_retrieval_result"].all_documents,
sub_question_retrieval_stats=sub_question_retrieval_stats,
)

View File

@@ -1,58 +0,0 @@
from operator import add
from typing import Annotated
from typing import TypedDict
from onyx.agent_search.answer_question.models import QuestionAnswerResults
from onyx.agent_search.core_state import SubgraphCoreState
from onyx.agent_search.expanded_retrieval.models import QueryResult
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
from onyx.context.search.models import InferenceSection
## Update States
class QACheckUpdate(TypedDict):
answer_quality: str
class QAGenerationUpdate(TypedDict):
answer: str
# answer_stat: AnswerStats
class RetrievalIngestionUpdate(TypedDict):
expanded_retrieval_results: list[QueryResult]
documents: Annotated[list[InferenceSection], dedup_inference_sections]
sub_question_retrieval_stats: AgentChunkStats
## Graph Input State
class AnswerQuestionInput(SubgraphCoreState):
question: str
## Graph State
class AnswerQuestionState(
AnswerQuestionInput,
QAGenerationUpdate,
QACheckUpdate,
RetrievalIngestionUpdate,
):
pass
## Graph Output State
class AnswerQuestionOutput(TypedDict):
"""
This is a list of results even though each call of this subgraph only returns one result.
This is because if we parallelize the answer query subgraph, there will be multiple
results in a list so the add operator is used to add them together.
"""
answer_results: Annotated[list[QuestionAnswerResults], add]

View File

@@ -1,70 +0,0 @@
from langgraph.graph import END
from langgraph.graph import START
from langgraph.graph import StateGraph
from onyx.agent_search.base_raw_search.nodes.format_raw_search_results import (
format_raw_search_results,
)
from onyx.agent_search.base_raw_search.nodes.generate_raw_search_data import (
generate_raw_search_data,
)
from onyx.agent_search.base_raw_search.states import BaseRawSearchInput
from onyx.agent_search.base_raw_search.states import BaseRawSearchOutput
from onyx.agent_search.base_raw_search.states import BaseRawSearchState
from onyx.agent_search.expanded_retrieval.graph_builder import (
expanded_retrieval_graph_builder,
)
def base_raw_search_graph_builder() -> StateGraph:
graph = StateGraph(
state_schema=BaseRawSearchState,
input=BaseRawSearchInput,
output=BaseRawSearchOutput,
)
### Add nodes ###
expanded_retrieval = expanded_retrieval_graph_builder().compile()
graph.add_node(
node="generate_raw_search_data",
action=generate_raw_search_data,
)
graph.add_node(
node="expanded_retrieval_base_search",
action=expanded_retrieval,
)
graph.add_node(
node="format_raw_search_results",
action=format_raw_search_results,
)
### Add edges ###
graph.add_edge(start_key=START, end_key="generate_raw_search_data")
graph.add_edge(
start_key="generate_raw_search_data",
end_key="expanded_retrieval_base_search",
)
graph.add_edge(
start_key="expanded_retrieval_base_search",
end_key="format_raw_search_results",
)
# graph.add_edge(
# start_key="expanded_retrieval_base_search",
# end_key=END,
# )
graph.add_edge(
start_key="format_raw_search_results",
end_key=END,
)
return graph
if __name__ == "__main__":
pass

View File

@@ -1,20 +0,0 @@
from pydantic import BaseModel
from onyx.agent_search.expanded_retrieval.models import QueryResult
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.context.search.models import InferenceSection
### Models ###
class AnswerRetrievalStats(BaseModel):
answer_retrieval_stats: dict[str, float | int]
class QuestionAnswerResults(BaseModel):
question: str
answer: str
quality: str
expanded_retrieval_results: list[QueryResult]
documents: list[InferenceSection]
sub_question_retrieval_stats: list[AgentChunkStats]

View File

@@ -1,11 +0,0 @@
from onyx.agent_search.base_raw_search.states import BaseRawSearchOutput
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalOutput
def format_raw_search_results(state: ExpandedRetrievalOutput) -> BaseRawSearchOutput:
print("format_raw_search_results")
return BaseRawSearchOutput(
base_expanded_retrieval_result=state["expanded_retrieval_result"],
# base_retrieval_results=[state["expanded_retrieval_result"]],
# base_search_documents=[],
)

View File

@@ -1,15 +0,0 @@
from onyx.agent_search.core_state import CoreState
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
def generate_raw_search_data(state: CoreState) -> ExpandedRetrievalInput:
print("generate_raw_search_data")
return ExpandedRetrievalInput(
subgraph_search_request=state["search_request"],
subgraph_primary_llm=state["primary_llm"],
subgraph_fast_llm=state["fast_llm"],
subgraph_db_session=state["db_session"],
question=state["search_request"].query,
dummy="7",
base_search=True,
)

View File

@@ -1,40 +0,0 @@
from typing import TypedDict
from onyx.agent_search.core_state import CoreState
from onyx.agent_search.core_state import SubgraphCoreState
from onyx.agent_search.expanded_retrieval.models import ExpandedRetrievalResult
## Update States
## Graph Input State
class BaseRawSearchInput(CoreState, SubgraphCoreState):
pass
## Graph Output State
class BaseRawSearchOutput(TypedDict):
"""
This is a list of results even though each call of this subgraph only returns one result.
This is because if we parallelize the answer query subgraph, there will be multiple
results in a list so the add operator is used to add them together.
"""
# base_search_documents: Annotated[list[InferenceSection], dedup_inference_sections]
# base_retrieval_results: Annotated[list[ExpandedRetrievalResult], add]
base_expanded_retrieval_result: ExpandedRetrievalResult
## Graph State
class BaseRawSearchState(
BaseRawSearchInput,
BaseRawSearchOutput,
):
pass

View File

@@ -1,61 +0,0 @@
from operator import add
from typing import Annotated
from typing import TypedDict
from typing import TypeVar
from sqlalchemy.orm import Session
from onyx.context.search.models import SearchRequest
from onyx.llm.interfaces import LLM
class CoreState(TypedDict, total=False):
"""
This is the core state that is shared across all subgraphs.
"""
search_request: SearchRequest
primary_llm: LLM
fast_llm: LLM
# a single session for the entire agent search
# is fine if we are only reading
db_session: Session
log_messages: Annotated[list[str], add]
dummy: str
class SubgraphCoreState(TypedDict, total=False):
"""
This is the core state that is shared across all subgraphs.
"""
subgraph_search_request: SearchRequest
subgraph_primary_llm: LLM
subgraph_fast_llm: LLM
# a single session for the entire agent search
# is fine if we are only reading
subgraph_db_session: Session
# This ensures that the state passed in extends the CoreState
T = TypeVar("T", bound=CoreState)
T_SUBGRAPH = TypeVar("T_SUBGRAPH", bound=SubgraphCoreState)
def extract_core_fields(state: T) -> CoreState:
filtered_dict = {k: v for k, v in state.items() if k in CoreState.__annotations__}
return CoreState(**dict(filtered_dict)) # type: ignore
def extract_core_fields_for_subgraph(state: T) -> SubgraphCoreState:
filtered_dict = {
"subgraph_" + k: v for k, v in state.items() if k in CoreState.__annotations__
}
return SubgraphCoreState(**dict(filtered_dict)) # type: ignore
def in_subgraph_extract_core_fields(state: T_SUBGRAPH) -> SubgraphCoreState:
filtered_dict = {
k: v for k, v in state.items() if k in SubgraphCoreState.__annotations__
}
return SubgraphCoreState(**dict(filtered_dict))

View File

@@ -1,114 +0,0 @@
from typing import Any
from langchain_core.messages import HumanMessage
from onyx.agent_search.main.states import MainState
from onyx.agent_search.shared_graph_utils.prompts import COMBINED_CONTEXT
from onyx.agent_search.shared_graph_utils.prompts import MODIFIED_RAG_PROMPT
from onyx.agent_search.shared_graph_utils.utils import format_docs
from onyx.agent_search.shared_graph_utils.utils import normalize_whitespace
# aggregate sub questions and answers
def deep_answer_generation(state: MainState) -> dict[str, Any]:
"""
Generate answer
Args:
state (messages): The current state
Returns:
dict: The updated state with re-phrased question
"""
print("---DEEP GENERATE---")
question = state["original_question"]
docs = state["deduped_retrieval_docs"]
deep_answer_context = state["core_answer_dynamic_context"]
print(f"Number of verified retrieval docs - deep: {len(docs)}")
combined_context = normalize_whitespace(
COMBINED_CONTEXT.format(
deep_answer_context=deep_answer_context, formated_docs=format_docs(docs)
)
)
msg = [
HumanMessage(
content=MODIFIED_RAG_PROMPT.format(
question=question, combined_context=combined_context
)
)
]
# Grader
model = state["fast_llm"]
response = model.invoke(msg)
return {
"deep_answer": response.content,
}
def final_stuff(state: MainState) -> dict[str, Any]:
"""
Invokes the agent model to generate a response based on the current state. Given
the question, it will decide to retrieve using the retriever tool, or simply end.
Args:
state (messages): The current state
Returns:
dict: The updated state with the agent response appended to messages
"""
print("---FINAL---")
messages = state["log_messages"]
time_ordered_messages = [x.pretty_repr() for x in messages]
time_ordered_messages.sort()
print("Message Log:")
# print("\n".join(time_ordered_messages))
initial_sub_qas = state["initial_sub_qas"]
initial_sub_qa_list = []
for initial_sub_qa in initial_sub_qas:
if initial_sub_qa["sub_answer_check"] == "yes":
initial_sub_qa_list.append(
f' Question:\n {initial_sub_qa["sub_question"]}\n --\n Answer:\n {initial_sub_qa["sub_answer"]}\n -----'
)
initial_sub_qa_context = "\n".join(initial_sub_qa_list)
base_answer = state["base_answer"]
print(f"Final Base Answer:\n{base_answer}")
print("--------------------------------")
print(f"Initial Answered Sub Questions:\n{initial_sub_qa_context}")
print("--------------------------------")
if not state.get("deep_answer"):
print("No Deep Answer was required")
return {}
deep_answer = state["deep_answer"]
sub_qas = state["sub_qas"]
sub_qa_list = []
for sub_qa in sub_qas:
if sub_qa["sub_answer_check"] == "yes":
sub_qa_list.append(
f' Question:\n {sub_qa["sub_question"]}\n --\n Answer:\n {sub_qa["sub_answer"]}\n -----'
)
sub_qa_context = "\n".join(sub_qa_list)
print(f"Final Base Answer:\n{base_answer}")
print("--------------------------------")
print(f"Final Deep Answer:\n{deep_answer}")
print("--------------------------------")
print("Sub Questions and Answers:")
print(sub_qa_context)
return {}

View File

@@ -1,78 +0,0 @@
import json
import re
from datetime import datetime
from typing import Any
from langchain_core.messages import HumanMessage
from onyx.agent_search.main.states import MainState
from onyx.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
from onyx.agent_search.shared_graph_utils.utils import format_entity_term_extraction
from onyx.agent_search.shared_graph_utils.utils import generate_log_message
def decompose(state: MainState) -> dict[str, Any]:
""" """
node_start_time = datetime.now()
question = state["original_question"]
base_answer = state["base_answer"]
# get the entity term extraction dict and properly format it
entity_term_extraction_dict = state["retrieved_entities_relationships"][
"retrieved_entities_relationships"
]
entity_term_extraction_str = format_entity_term_extraction(
entity_term_extraction_dict
)
initial_question_answers = state["initial_sub_qas"]
addressed_question_list = [
x["sub_question"]
for x in initial_question_answers
if x["sub_answer_check"] == "yes"
]
failed_question_list = [
x["sub_question"]
for x in initial_question_answers
if x["sub_answer_check"] == "no"
]
msg = [
HumanMessage(
content=DEEP_DECOMPOSE_PROMPT.format(
question=question,
entity_term_extraction_str=entity_term_extraction_str,
base_answer=base_answer,
answered_sub_questions="\n - ".join(addressed_question_list),
failed_sub_questions="\n - ".join(failed_question_list),
),
)
]
# Grader
model = state["fast_llm"]
response = model.invoke(msg)
cleaned_response = re.sub(r"```json\n|\n```", "", response.pretty_repr())
parsed_response = json.loads(cleaned_response)
sub_questions_dict = {}
for sub_question_nr, sub_question_dict in enumerate(
parsed_response["sub_questions"]
):
sub_question_dict["answered"] = False
sub_question_dict["verified"] = False
sub_questions_dict[sub_question_nr] = sub_question_dict
return {
"decomposed_sub_questions_dict": sub_questions_dict,
"log_messages": generate_log_message(
message="deep - decompose",
node_start_time=node_start_time,
graph_start_time=state["graph_start_time"],
),
}

View File

@@ -1,40 +0,0 @@
import json
import re
from typing import Any
from langchain_core.messages import HumanMessage
from langchain_core.messages import merge_message_runs
from onyx.agent_search.main.states import MainState
from onyx.agent_search.shared_graph_utils.prompts import ENTITY_TERM_PROMPT
from onyx.agent_search.shared_graph_utils.utils import format_docs
def entity_term_extraction(state: MainState) -> dict[str, Any]:
"""Extract entities and terms from the question and context"""
question = state["original_question"]
docs = state["deduped_retrieval_docs"]
doc_context = format_docs(docs)
msg = [
HumanMessage(
content=ENTITY_TERM_PROMPT.format(question=question, context=doc_context),
)
]
fast_llm = state["fast_llm"]
# Grader
llm_response_list = list(
fast_llm.stream(
prompt=msg,
)
)
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
cleaned_response = re.sub(r"```json\n|\n```", "", llm_response)
parsed_response = json.loads(cleaned_response)
return {
"retrieved_entities_relationships": parsed_response,
}

View File

@@ -1,30 +0,0 @@
from typing import Any
from onyx.agent_search.main.states import MainState
# aggregate sub questions and answers
def sub_qa_level_aggregator(state: MainState) -> dict[str, Any]:
sub_qas = state["sub_qas"]
dynamic_context_list = [
"Below you will find useful information to answer the original question:"
]
checked_sub_qas = []
for core_answer_sub_qa in sub_qas:
question = core_answer_sub_qa["sub_question"]
answer = core_answer_sub_qa["sub_answer"]
verified = core_answer_sub_qa["sub_answer_check"]
if verified == "yes":
dynamic_context_list.append(
f"Question:\n{question}\n\nAnswer:\n{answer}\n\n---\n\n"
)
checked_sub_qas.append({"sub_question": question, "sub_answer": answer})
dynamic_context = "\n".join(dynamic_context_list)
return {
"core_answer_dynamic_context": dynamic_context,
"checked_sub_qas": checked_sub_qas,
}

View File

@@ -1,19 +0,0 @@
from typing import Any
from onyx.agent_search.main.states import MainState
def sub_qa_manager(state: MainState) -> dict[str, Any]:
""" """
sub_questions_dict = state["decomposed_sub_questions_dict"]
sub_questions = {}
for sub_question_nr, sub_question_dict in sub_questions_dict.items():
sub_questions[sub_question_nr] = sub_question_dict["sub_question"]
return {
"sub_questions": sub_questions,
"num_new_question_iterations": 0,
}

View File

@@ -1,24 +0,0 @@
from collections.abc import Hashable
from langgraph.types import Send
from onyx.agent_search.core_state import in_subgraph_extract_core_fields
from onyx.agent_search.expanded_retrieval.nodes.doc_retrieval import RetrievalInput
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
def parallel_retrieval_edge(state: ExpandedRetrievalState) -> list[Send | Hashable]:
question = state.get("question", state["subgraph_search_request"].query)
query_expansions = state.get("expanded_queries", []) + [question]
return [
Send(
"doc_retrieval",
RetrievalInput(
query_to_retrieve=query,
question=question,
**in_subgraph_extract_core_fields(state),
),
)
for query in query_expansions
]

View File

@@ -1,111 +0,0 @@
from langgraph.graph import END
from langgraph.graph import START
from langgraph.graph import StateGraph
from onyx.agent_search.expanded_retrieval.edges import parallel_retrieval_edge
from onyx.agent_search.expanded_retrieval.nodes.doc_reranking import doc_reranking
from onyx.agent_search.expanded_retrieval.nodes.doc_retrieval import doc_retrieval
from onyx.agent_search.expanded_retrieval.nodes.doc_verification import (
doc_verification,
)
from onyx.agent_search.expanded_retrieval.nodes.expand_queries import expand_queries
from onyx.agent_search.expanded_retrieval.nodes.format_results import format_results
from onyx.agent_search.expanded_retrieval.nodes.verification_kickoff import (
verification_kickoff,
)
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalOutput
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
def expanded_retrieval_graph_builder() -> StateGraph:
graph = StateGraph(
state_schema=ExpandedRetrievalState,
input=ExpandedRetrievalInput,
output=ExpandedRetrievalOutput,
)
### Add nodes ###
graph.add_node(
node="expand_queries",
action=expand_queries,
)
graph.add_node(
node="doc_retrieval",
action=doc_retrieval,
)
graph.add_node(
node="verification_kickoff",
action=verification_kickoff,
)
graph.add_node(
node="doc_verification",
action=doc_verification,
)
graph.add_node(
node="doc_reranking",
action=doc_reranking,
)
graph.add_node(
node="format_results",
action=format_results,
)
### Add edges ###
graph.add_edge(
start_key=START,
end_key="expand_queries",
)
graph.add_conditional_edges(
source="expand_queries",
path=parallel_retrieval_edge,
path_map=["doc_retrieval"],
)
graph.add_edge(
start_key="doc_retrieval",
end_key="verification_kickoff",
)
graph.add_edge(
start_key="doc_verification",
end_key="doc_reranking",
)
graph.add_edge(
start_key="doc_reranking",
end_key="format_results",
)
graph.add_edge(
start_key="format_results",
end_key=END,
)
return graph
if __name__ == "__main__":
from onyx.db.engine import get_session_context_manager
from onyx.llm.factory import get_default_llms
from onyx.context.search.models import SearchRequest
graph = expanded_retrieval_graph_builder()
compiled_graph = graph.compile()
primary_llm, fast_llm = get_default_llms()
search_request = SearchRequest(
query="what can you do with onyx or danswer?",
)
with get_session_context_manager() as db_session:
inputs = ExpandedRetrievalInput(
search_request=search_request,
primary_llm=primary_llm,
fast_llm=fast_llm,
db_session=db_session,
question="what can you do with onyx?",
)
for thing in compiled_graph.stream(
input=inputs,
# debug=True,
subgraphs=True,
):
print(thing)

View File

@@ -1,19 +0,0 @@
from pydantic import BaseModel
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.agent_search.shared_graph_utils.models import RetrievalFitStats
from onyx.context.search.models import InferenceSection
### Models ###
class QueryResult(BaseModel):
query: str
search_results: list[InferenceSection]
stats: RetrievalFitStats | None
class ExpandedRetrievalResult(BaseModel):
expanded_queries_results: list[QueryResult]
all_documents: list[InferenceSection]
sub_question_retrieval_stats: AgentChunkStats

View File

@@ -1,47 +0,0 @@
from onyx.agent_search.expanded_retrieval.states import DocRerankingUpdate
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
from onyx.agent_search.shared_graph_utils.calculations import get_fit_scores
from onyx.agent_search.shared_graph_utils.models import RetrievalFitStats
from onyx.configs.dev_configs import AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS
from onyx.configs.dev_configs import AGENT_RERANKING_STATS
from onyx.context.search.pipeline import InferenceSection
from onyx.context.search.pipeline import retrieval_preprocessing
from onyx.context.search.pipeline import search_postprocessing
from onyx.context.search.pipeline import SearchRequest
def doc_reranking(state: ExpandedRetrievalState) -> DocRerankingUpdate:
verified_documents = state["verified_documents"]
# Rerank post retrieval and verification. First, create a search query
# then create the list of reranked sections
question = state.get("question", state["subgraph_search_request"].query)
_search_query = retrieval_preprocessing(
search_request=SearchRequest(query=question),
user=None,
llm=state["subgraph_fast_llm"],
db_session=state["subgraph_db_session"],
)
reranked_documents = list(
search_postprocessing(
search_query=_search_query,
retrieved_sections=verified_documents,
llm=state["subgraph_fast_llm"],
)
)[
0
] # only get the reranked szections, not the SectionRelevancePiece
if AGENT_RERANKING_STATS:
fit_scores = get_fit_scores(verified_documents, reranked_documents)
else:
fit_scores = RetrievalFitStats(fit_score_lift=0, rerank_effect=0, fit_scores={})
return DocRerankingUpdate(
reranked_documents=[
doc for doc in reranked_documents if type(doc) == InferenceSection
][:AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS],
sub_question_retrieval_stats=fit_scores,
)

View File

@@ -1,55 +0,0 @@
from onyx.agent_search.expanded_retrieval.models import QueryResult
from onyx.agent_search.expanded_retrieval.states import DocRetrievalUpdate
from onyx.agent_search.expanded_retrieval.states import RetrievalInput
from onyx.agent_search.shared_graph_utils.calculations import get_fit_scores
from onyx.configs.dev_configs import AGENT_MAX_QUERY_RETRIEVAL_RESULTS
from onyx.configs.dev_configs import AGENT_RETRIEVAL_STATS
from onyx.context.search.models import SearchRequest
from onyx.context.search.pipeline import SearchPipeline
def doc_retrieval(state: RetrievalInput) -> DocRetrievalUpdate:
"""
Retrieve documents
Args:
state (RetrievalInput): Primary state + the query to retrieve
Updates:
expanded_retrieval_results: list[ExpandedRetrievalResult]
retrieved_documents: list[InferenceSection]
"""
llm = state["subgraph_primary_llm"]
fast_llm = state["subgraph_fast_llm"]
query_to_retrieve = state["query_to_retrieve"]
search_results = SearchPipeline(
search_request=SearchRequest(
query=query_to_retrieve,
),
user=None,
llm=llm,
fast_llm=fast_llm,
db_session=state["subgraph_db_session"],
)
retrieved_docs = search_results._get_sections()[:AGENT_MAX_QUERY_RETRIEVAL_RESULTS]
if AGENT_RETRIEVAL_STATS:
fit_scores = get_fit_scores(
retrieved_docs,
search_results.reranked_sections[:AGENT_MAX_QUERY_RETRIEVAL_RESULTS],
)
else:
fit_scores = None
expanded_retrieval_result = QueryResult(
query=query_to_retrieve,
search_results=retrieved_docs,
stats=fit_scores,
)
return DocRetrievalUpdate(
expanded_retrieval_results=[expanded_retrieval_result],
retrieved_documents=retrieved_docs,
)

View File

@@ -1,41 +0,0 @@
from langchain_core.messages import HumanMessage
from onyx.agent_search.expanded_retrieval.states import DocVerificationInput
from onyx.agent_search.expanded_retrieval.states import DocVerificationUpdate
from onyx.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
def doc_verification(state: DocVerificationInput) -> DocVerificationUpdate:
"""
Check whether the document is relevant for the original user question
Args:
state (DocVerificationInput): The current state
Updates:
verified_documents: list[InferenceSection]
"""
question = state["question"]
doc_to_verify = state["doc_to_verify"]
document_content = doc_to_verify.combined_content
msg = [
HumanMessage(
content=VERIFIER_PROMPT.format(
question=question, document_content=document_content
)
)
]
fast_llm = state["subgraph_fast_llm"]
response = fast_llm.invoke(msg)
verified_documents = []
if "yes" in response.content.lower():
verified_documents.append(doc_to_verify)
return DocVerificationUpdate(
verified_documents=verified_documents,
)

View File

@@ -1,30 +0,0 @@
from langchain_core.messages import HumanMessage
from langchain_core.messages import merge_message_runs
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
from onyx.agent_search.expanded_retrieval.states import QueryExpansionUpdate
from onyx.agent_search.shared_graph_utils.prompts import REWRITE_PROMPT_MULTI_ORIGINAL
from onyx.llm.interfaces import LLM
def expand_queries(state: ExpandedRetrievalInput) -> QueryExpansionUpdate:
question = state.get("question")
llm: LLM = state["subgraph_fast_llm"]
msg = [
HumanMessage(
content=REWRITE_PROMPT_MULTI_ORIGINAL.format(question=question),
)
]
llm_response_list = list(
llm.stream(
prompt=msg,
)
)
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
rewritten_queries = llm_response.split("--")
return QueryExpansionUpdate(
expanded_queries=rewritten_queries,
)

View File

@@ -1,99 +0,0 @@
from collections import defaultdict
import numpy as np
from onyx.agent_search.expanded_retrieval.models import ExpandedRetrievalResult
from onyx.agent_search.expanded_retrieval.models import QueryResult
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalOutput
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
from onyx.agent_search.expanded_retrieval.states import InferenceSection
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
def _calculate_sub_question_retrieval_stats(
verified_documents: list[InferenceSection],
expanded_retrieval_results: list[QueryResult],
) -> AgentChunkStats:
chunk_scores: dict[str, dict[str, list[int | float]]] = defaultdict(
lambda: defaultdict(list)
)
for expanded_retrieval_result in expanded_retrieval_results:
for doc in expanded_retrieval_result.search_results:
doc_chunk_id = f"{doc.center_chunk.document_id}_{doc.center_chunk.chunk_id}"
if doc.center_chunk.score is not None:
chunk_scores[doc_chunk_id]["score"].append(doc.center_chunk.score)
verified_doc_chunk_ids = [
f"{verified_document.center_chunk.document_id}_{verified_document.center_chunk.chunk_id}"
for verified_document in verified_documents
]
dismissed_doc_chunk_ids = []
raw_chunk_stats_counts: dict[str, int] = defaultdict(int)
raw_chunk_stats_scores: dict[str, float] = defaultdict(float)
for doc_chunk_id, chunk_data in chunk_scores.items():
if doc_chunk_id in verified_doc_chunk_ids:
raw_chunk_stats_counts["verified_count"] += 1
valid_chunk_scores = [
score for score in chunk_data["score"] if score is not None
]
raw_chunk_stats_scores["verified_scores"] += float(
np.mean(valid_chunk_scores)
)
else:
raw_chunk_stats_counts["rejected_count"] += 1
valid_chunk_scores = [
score for score in chunk_data["score"] if score is not None
]
raw_chunk_stats_scores["rejected_scores"] += float(
np.mean(valid_chunk_scores)
)
dismissed_doc_chunk_ids.append(doc_chunk_id)
if raw_chunk_stats_counts["verified_count"] == 0:
verified_avg_scores = 0.0
else:
verified_avg_scores = raw_chunk_stats_scores["verified_scores"] / float(
raw_chunk_stats_counts["verified_count"]
)
rejected_scores = raw_chunk_stats_scores.get("rejected_scores", None)
if rejected_scores is not None:
rejected_avg_scores = rejected_scores / float(
raw_chunk_stats_counts["rejected_count"]
)
else:
rejected_avg_scores = None
chunk_stats = AgentChunkStats(
verified_count=raw_chunk_stats_counts["verified_count"],
verified_avg_scores=verified_avg_scores,
rejected_count=raw_chunk_stats_counts["rejected_count"],
rejected_avg_scores=rejected_avg_scores,
verified_doc_chunk_ids=verified_doc_chunk_ids,
dismissed_doc_chunk_ids=dismissed_doc_chunk_ids,
)
return chunk_stats
def format_results(state: ExpandedRetrievalState) -> ExpandedRetrievalOutput:
sub_question_retrieval_stats = _calculate_sub_question_retrieval_stats(
verified_documents=state["verified_documents"],
expanded_retrieval_results=state["expanded_retrieval_results"],
)
if sub_question_retrieval_stats is None:
sub_question_retrieval_stats = AgentChunkStats()
# else:
# sub_question_retrieval_stats = [sub_question_retrieval_stats]
return ExpandedRetrievalOutput(
expanded_retrieval_result=ExpandedRetrievalResult(
expanded_queries_results=state["expanded_retrieval_results"],
all_documents=state["reranked_documents"],
sub_question_retrieval_stats=sub_question_retrieval_stats,
),
)

View File

@@ -1,33 +0,0 @@
from typing import Literal
from langgraph.types import Command
from langgraph.types import Send
from onyx.agent_search.core_state import in_subgraph_extract_core_fields
from onyx.agent_search.expanded_retrieval.nodes.doc_verification import (
DocVerificationInput,
)
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
def verification_kickoff(
state: ExpandedRetrievalState,
) -> Command[Literal["doc_verification"]]:
documents = state["retrieved_documents"]
verification_question = state.get(
"question", state["subgraph_search_request"].query
)
return Command(
update={},
goto=[
Send(
node="doc_verification",
arg=DocVerificationInput(
doc_to_verify=doc,
question=verification_question,
**in_subgraph_extract_core_fields(state),
),
)
for doc in documents
],
)

View File

@@ -1,76 +0,0 @@
from operator import add
from typing import Annotated
from typing import TypedDict
from onyx.agent_search.core_state import SubgraphCoreState
from onyx.agent_search.expanded_retrieval.models import ExpandedRetrievalResult
from onyx.agent_search.expanded_retrieval.models import QueryResult
from onyx.agent_search.shared_graph_utils.models import RetrievalFitStats
from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
from onyx.context.search.models import InferenceSection
### States ###
## Graph Input State
class ExpandedRetrievalInput(SubgraphCoreState):
question: str
dummy: str
base_search: bool = False
## Update/Return States
class QueryExpansionUpdate(TypedDict):
expanded_queries: list[str]
class DocVerificationUpdate(TypedDict):
verified_documents: Annotated[list[InferenceSection], dedup_inference_sections]
class DocRetrievalUpdate(TypedDict):
expanded_retrieval_results: Annotated[list[QueryResult], add]
retrieved_documents: Annotated[list[InferenceSection], dedup_inference_sections]
class DocRerankingUpdate(TypedDict):
reranked_documents: Annotated[list[InferenceSection], dedup_inference_sections]
sub_question_retrieval_stats: RetrievalFitStats | None
## Graph Output State
class ExpandedRetrievalOutput(TypedDict):
expanded_retrieval_result: ExpandedRetrievalResult
base_expanded_retrieval_result: ExpandedRetrievalResult
## Graph State
class ExpandedRetrievalState(
# This includes the core state
ExpandedRetrievalInput,
QueryExpansionUpdate,
DocRetrievalUpdate,
DocVerificationUpdate,
DocRerankingUpdate,
ExpandedRetrievalOutput,
):
pass
## Conditional Input States
class DocVerificationInput(ExpandedRetrievalInput):
doc_to_verify: InferenceSection
class RetrievalInput(ExpandedRetrievalInput):
query_to_retrieve: str

View File

@@ -1,87 +0,0 @@
from collections.abc import Hashable
from langgraph.types import Send
from onyx.agent_search.answer_question.states import AnswerQuestionInput
from onyx.agent_search.answer_question.states import AnswerQuestionOutput
from onyx.agent_search.core_state import extract_core_fields_for_subgraph
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
from onyx.agent_search.main.states import MainInput
from onyx.agent_search.main.states import MainState
def parallelize_decompozed_answer_queries(state: MainState) -> list[Send | Hashable]:
if len(state["initial_decomp_questions"]) > 0:
return [
Send(
"answer_query",
AnswerQuestionInput(
**extract_core_fields_for_subgraph(state),
question=question,
),
)
for question in state["initial_decomp_questions"]
]
else:
return [
Send(
"ingest_answers",
AnswerQuestionOutput(),
)
]
def send_to_initial_retrieval(state: MainInput) -> list[Send | Hashable]:
print("sending to initial retrieval via edge")
return [
Send(
"initial_retrieval",
ExpandedRetrievalInput(
question=state["search_request"].query,
**extract_core_fields_for_subgraph(state),
),
)
]
# def continue_to_answer_sub_questions(state: QAState) -> Union[Hashable, list[Hashable]]:
# # Routes re-written queries to the (parallel) retrieval steps
# # Notice the 'Send()' API that takes care of the parallelization
# return [
# Send(
# "sub_answers_graph",
# ResearchQAState(
# sub_question=sub_question["sub_question_str"],
# sub_question_nr=sub_question["sub_question_nr"],
# graph_start_time=state["graph_start_time"],
# primary_llm=state["primary_llm"],
# fast_llm=state["fast_llm"],
# ),
# )
# for sub_question in state["sub_questions"]
# ]
# def continue_to_deep_answer(state: QAState) -> Union[Hashable, list[Hashable]]:
# print("---GO TO DEEP ANSWER OR END---")
# base_answer = state["base_answer"]
# question = state["original_question"]
# BASE_CHECK_MESSAGE = [
# HumanMessage(
# content=BASE_CHECK_PROMPT.format(question=question, base_answer=base_answer)
# )
# ]
# model = state["fast_llm"]
# response = model.invoke(BASE_CHECK_MESSAGE)
# print(f"CAN WE CONTINUE W/O GENERATING A DEEP ANSWER? - {response.pretty_repr()}")
# if response.pretty_repr() == "no":
# return "decompose"
# else:
# return "end"

View File

@@ -1,459 +0,0 @@
from langgraph.graph import END
from langgraph.graph import START
from langgraph.graph import StateGraph
from onyx.agent_search.answer_question.graph_builder import answer_query_graph_builder
from onyx.agent_search.base_raw_search.graph_builder import (
base_raw_search_graph_builder,
)
from onyx.agent_search.main.edges import parallelize_decompozed_answer_queries
from onyx.agent_search.main.nodes.base_decomp import main_decomp_base
from onyx.agent_search.main.nodes.generate_initial_answer import (
generate_initial_answer,
)
from onyx.agent_search.main.nodes.ingest_answers import ingest_answers
from onyx.agent_search.main.nodes.ingest_initial_retrieval import (
ingest_initial_retrieval,
)
from onyx.agent_search.main.states import MainInput
from onyx.agent_search.main.states import MainState
test_mode = False
def main_graph_builder(test_mode: bool = False) -> StateGraph:
graph = StateGraph(
state_schema=MainState,
input=MainInput,
)
graph_component = "both"
# graph_component = "right"
# graph_component = "left"
if graph_component == "left":
### Add nodes ###
graph.add_node(
node="base_decomp",
action=main_decomp_base,
)
answer_query_subgraph = answer_query_graph_builder().compile()
graph.add_node(
node="answer_query",
action=answer_query_subgraph,
)
# graph.add_node(
# node="prep_for_initial_retrieval",
# action=prep_for_initial_retrieval,
# )
# expanded_retrieval_subgraph = expanded_retrieval_graph_builder().compile()
# graph.add_node(
# node="initial_retrieval",
# action=expanded_retrieval_subgraph,
# )
# base_raw_search_subgraph = base_raw_search_graph_builder().compile()
# graph.add_node(
# node="base_raw_search_data",
# action=base_raw_search_subgraph,
# )
# graph.add_node(
# node="ingest_initial_retrieval",
# action=ingest_initial_retrieval,
# )
graph.add_node(
node="ingest_answers",
action=ingest_answers,
)
graph.add_node(
node="generate_initial_answer",
action=generate_initial_answer,
)
# if test_mode:
# graph.add_node(
# node="generate_initial_base_answer",
# action=generate_initial_base_answer,
# )
### Add edges ###
# graph.add_conditional_edges(
# source=START,
# path=send_to_initial_retrieval,
# path_map=["initial_retrieval"],
# )
# graph.add_edge(
# start_key=START,
# end_key="prep_for_initial_retrieval",
# )
# graph.add_edge(
# start_key="prep_for_initial_retrieval",
# end_key="initial_retrieval",
# )
# graph.add_edge(
# start_key="initial_retrieval",
# end_key="ingest_initial_retrieval",
# )
# graph.add_edge(
# start_key=START,
# end_key="base_raw_search_data"
# )
# # graph.add_edge(
# # start_key="base_raw_search_data",
# # end_key=END
# # )
# graph.add_edge(
# start_key="base_raw_search_data",
# end_key="ingest_initial_retrieval",
# )
# graph.add_edge(
# start_key="ingest_initial_retrieval",
# end_key=END
# )
graph.add_edge(
start_key=START,
end_key="base_decomp",
)
graph.add_conditional_edges(
source="base_decomp",
path=parallelize_decompozed_answer_queries,
path_map=["answer_query"],
)
graph.add_edge(
start_key="answer_query",
end_key="ingest_answers",
)
graph.add_edge(
start_key="ingest_answers",
end_key="generate_initial_answer",
)
# graph.add_edge(
# start_key=["ingest_answers", "ingest_initial_retrieval"],
# end_key="generate_initial_answer",
# )
graph.add_edge(
start_key="generate_initial_answer",
end_key=END,
)
# graph.add_edge(
# start_key="ingest_answers",
# end_key="generate_initial_answer",
# )
# if test_mode:
# graph.add_edge(
# start_key=["ingest_answers", "ingest_initial_retrieval"],
# end_key="generate_initial_base_answer",
# )
# graph.add_edge(
# start_key=["generate_initial_answer", "generate_initial_base_answer"],
# end_key=END,
# )
# else:
# graph.add_edge(
# start_key="generate_initial_answer",
# end_key=END,
# )
elif graph_component == "right":
### Add nodes ###
# graph.add_node(
# node="base_decomp",
# action=main_decomp_base,
# )
# answer_query_subgraph = answer_query_graph_builder().compile()
# graph.add_node(
# node="answer_query",
# action=answer_query_subgraph,
# )
# graph.add_node(
# node="prep_for_initial_retrieval",
# action=prep_for_initial_retrieval,
# )
# expanded_retrieval_subgraph = expanded_retrieval_graph_builder().compile()
# graph.add_node(
# node="initial_retrieval",
# action=expanded_retrieval_subgraph,
# )
base_raw_search_subgraph = base_raw_search_graph_builder().compile()
graph.add_node(
node="base_raw_search_data",
action=base_raw_search_subgraph,
)
graph.add_node(
node="ingest_initial_retrieval",
action=ingest_initial_retrieval,
)
# graph.add_node(
# node="ingest_answers",
# action=ingest_answers,
# )
graph.add_node(
node="generate_initial_answer",
action=generate_initial_answer,
)
# if test_mode:
# graph.add_node(
# node="generate_initial_base_answer",
# action=generate_initial_base_answer,
# )
### Add edges ###
# graph.add_conditional_edges(
# source=START,
# path=send_to_initial_retrieval,
# path_map=["initial_retrieval"],
# )
# graph.add_edge(
# start_key=START,
# end_key="prep_for_initial_retrieval",
# )
# graph.add_edge(
# start_key="prep_for_initial_retrieval",
# end_key="initial_retrieval",
# )
# graph.add_edge(
# start_key="initial_retrieval",
# end_key="ingest_initial_retrieval",
# )
graph.add_edge(start_key=START, end_key="base_raw_search_data")
# # graph.add_edge(
# # start_key="base_raw_search_data",
# # end_key=END
# # )
graph.add_edge(
start_key="base_raw_search_data",
end_key="ingest_initial_retrieval",
)
# graph.add_edge(
# start_key="ingest_initial_retrieval",
# end_key=END
# )
# graph.add_edge(
# start_key=START,
# end_key="base_decomp",
# )
# graph.add_conditional_edges(
# source="base_decomp",
# path=parallelize_decompozed_answer_queries,
# path_map=["answer_query"],
# )
# graph.add_edge(
# start_key="answer_query",
# end_key="ingest_answers",
# )
# graph.add_edge(
# start_key="ingest_answers",
# end_key="generate_initial_answer",
# )
graph.add_edge(
start_key="ingest_initial_retrieval",
end_key="generate_initial_answer",
)
# graph.add_edge(
# start_key=["ingest_answers", "ingest_initial_retrieval"],
# end_key="generate_initial_answer",
# )
graph.add_edge(
start_key="generate_initial_answer",
end_key=END,
)
# graph.add_edge(
# start_key="ingest_answers",
# end_key="generate_initial_answer",
# )
# if test_mode:
# graph.add_edge(
# start_key=["ingest_answers", "ingest_initial_retrieval"],
# end_key="generate_initial_base_answer",
# )
# graph.add_edge(
# start_key=["generate_initial_answer", "generate_initial_base_answer"],
# end_key=END,
# )
# else:
# graph.add_edge(
# start_key="generate_initial_answer",
# end_key=END,
# )
else:
graph.add_node(
node="base_decomp",
action=main_decomp_base,
)
answer_query_subgraph = answer_query_graph_builder().compile()
graph.add_node(
node="answer_query",
action=answer_query_subgraph,
)
# graph.add_node(
# node="prep_for_initial_retrieval",
# action=prep_for_initial_retrieval,
# )
# expanded_retrieval_subgraph = expanded_retrieval_graph_builder().compile()
# graph.add_node(
# node="initial_retrieval",
# action=expanded_retrieval_subgraph,
# )
base_raw_search_subgraph = base_raw_search_graph_builder().compile()
graph.add_node(
node="base_raw_search_data",
action=base_raw_search_subgraph,
)
graph.add_node(
node="ingest_initial_retrieval",
action=ingest_initial_retrieval,
)
graph.add_node(
node="ingest_answers",
action=ingest_answers,
)
graph.add_node(
node="generate_initial_answer",
action=generate_initial_answer,
)
# if test_mode:
# graph.add_node(
# node="generate_initial_base_answer",
# action=generate_initial_base_answer,
# )
### Add edges ###
# graph.add_conditional_edges(
# source=START,
# path=send_to_initial_retrieval,
# path_map=["initial_retrieval"],
# )
# graph.add_edge(
# start_key=START,
# end_key="prep_for_initial_retrieval",
# )
# graph.add_edge(
# start_key="prep_for_initial_retrieval",
# end_key="initial_retrieval",
# )
# graph.add_edge(
# start_key="initial_retrieval",
# end_key="ingest_initial_retrieval",
# )
graph.add_edge(start_key=START, end_key="base_raw_search_data")
# # graph.add_edge(
# # start_key="base_raw_search_data",
# # end_key=END
# # )
graph.add_edge(
start_key="base_raw_search_data",
end_key="ingest_initial_retrieval",
)
# graph.add_edge(
# start_key="ingest_initial_retrieval",
# end_key=END
# )
graph.add_edge(
start_key=START,
end_key="base_decomp",
)
graph.add_conditional_edges(
source="base_decomp",
path=parallelize_decompozed_answer_queries,
path_map=["answer_query"],
)
graph.add_edge(
start_key="answer_query",
end_key="ingest_answers",
)
# graph.add_edge(
# start_key="ingest_answers",
# end_key="generate_initial_answer",
# )
graph.add_edge(
start_key=["ingest_answers", "ingest_initial_retrieval"],
end_key="generate_initial_answer",
)
graph.add_edge(
start_key="generate_initial_answer",
end_key=END,
)
# graph.add_edge(
# start_key="ingest_answers",
# end_key="generate_initial_answer",
# )
# if test_mode:
# graph.add_edge(
# start_key=["ingest_answers", "ingest_initial_retrieval"],
# end_key="generate_initial_base_answer",
# )
# graph.add_edge(
# start_key=["generate_initial_answer", "generate_initial_base_answer"],
# end_key=END,
# )
# else:
# graph.add_edge(
# start_key="generate_initial_answer",
# end_key=END,
# )
return graph
if __name__ == "__main__":
pass
from onyx.db.engine import get_session_context_manager
from onyx.llm.factory import get_default_llms
from onyx.context.search.models import SearchRequest
graph = main_graph_builder()
compiled_graph = graph.compile()
primary_llm, fast_llm = get_default_llms()
with get_session_context_manager() as db_session:
search_request = SearchRequest(query="Who created Excel?")
inputs = MainInput(
search_request=search_request,
primary_llm=primary_llm,
fast_llm=fast_llm,
db_session=db_session,
)
for thing in compiled_graph.stream(
input=inputs,
# stream_mode="debug",
# debug=True,
subgraphs=True,
):
# print(thing)
print()

View File

@@ -1,33 +0,0 @@
from langchain_core.messages import HumanMessage
from onyx.agent_search.main.states import BaseDecompUpdate
from onyx.agent_search.main.states import MainState
from onyx.agent_search.shared_graph_utils.prompts import (
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS,
)
from onyx.agent_search.shared_graph_utils.utils import clean_and_parse_list_string
def main_decomp_base(state: MainState) -> BaseDecompUpdate:
question = state["search_request"].query
msg = [
HumanMessage(
content=INITIAL_DECOMPOSITION_PROMPT_QUESTIONS.format(question=question),
)
]
# Get the rewritten queries in a defined format
model = state["fast_llm"]
response = model.invoke(msg)
content = response.pretty_repr()
list_of_subquestions = clean_and_parse_list_string(content)
decomp_list: list[str] = [
sub_question["sub_question"].strip() for sub_question in list_of_subquestions
]
return BaseDecompUpdate(
initial_decomp_questions=decomp_list,
)

View File

@@ -1,33 +0,0 @@
from langchain_core.messages import HumanMessage
from onyx.agent_search.main.states import InitialAnswerBASEUpdate
from onyx.agent_search.main.states import MainState
from onyx.agent_search.shared_graph_utils.prompts import INITIAL_RAG_BASE_PROMPT
from onyx.agent_search.shared_graph_utils.utils import format_docs
def generate_initial_base_answer(state: MainState) -> InitialAnswerBASEUpdate:
print("---GENERATE INITIAL BASE ANSWER---")
question = state["search_request"].query
original_question_docs = state["all_original_question_documents"]
msg = [
HumanMessage(
content=INITIAL_RAG_BASE_PROMPT.format(
question=question,
context=format_docs(original_question_docs),
)
)
]
# Grader
model = state["fast_llm"]
response = model.invoke(msg)
answer = response.pretty_repr()
print()
print(
f"\n\n---INITIAL BASE ANSWER START---\n\nBase: {answer}\n\n ---INITIAL BASE ANSWER END---\n\n"
)
return InitialAnswerBASEUpdate(initial_base_answer=answer)

View File

@@ -1,180 +0,0 @@
from langchain_core.messages import HumanMessage
from onyx.agent_search.answer_question.states import QuestionAnswerResults
from onyx.agent_search.main.states import InitialAnswerUpdate
from onyx.agent_search.main.states import MainState
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.agent_search.shared_graph_utils.models import InitialAgentResultStats
from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
from onyx.agent_search.shared_graph_utils.prompts import INITIAL_RAG_PROMPT
from onyx.agent_search.shared_graph_utils.prompts import (
INITIAL_RAG_PROMPT_NO_SUB_QUESTIONS,
)
from onyx.agent_search.shared_graph_utils.utils import format_docs
def _calculate_initial_agent_stats(
decomp_answer_results: list[QuestionAnswerResults],
original_question_stats: AgentChunkStats,
) -> InitialAgentResultStats:
initial_agent_result_stats: InitialAgentResultStats = InitialAgentResultStats(
sub_questions={},
original_question={},
agent_effectiveness={},
)
orig_verified = original_question_stats.verified_count
orig_support_score = original_question_stats.verified_avg_scores
verified_document_chunk_ids = []
support_scores = 0.0
for decomp_answer_result in decomp_answer_results:
verified_document_chunk_ids += (
decomp_answer_result.sub_question_retrieval_stats.verified_doc_chunk_ids
)
if (
decomp_answer_result.sub_question_retrieval_stats.verified_avg_scores
is not None
):
support_scores += (
decomp_answer_result.sub_question_retrieval_stats.verified_avg_scores
)
verified_document_chunk_ids = list(set(verified_document_chunk_ids))
# Calculate sub-question stats
if (
verified_document_chunk_ids
and len(verified_document_chunk_ids) > 0
and support_scores is not None
):
sub_question_stats: dict[str, float | int | None] = {
"num_verified_documents": len(verified_document_chunk_ids),
"verified_avg_score": float(support_scores / len(decomp_answer_results)),
}
else:
sub_question_stats = {"num_verified_documents": 0, "verified_avg_score": None}
initial_agent_result_stats.sub_questions.update(sub_question_stats)
# Get original question stats
initial_agent_result_stats.original_question.update(
{
"num_verified_documents": original_question_stats.verified_count,
"verified_avg_score": original_question_stats.verified_avg_scores,
}
)
# Calculate chunk utilization ratio
sub_verified = initial_agent_result_stats.sub_questions["num_verified_documents"]
chunk_ratio: float | None = None
if sub_verified is not None and orig_verified is not None and orig_verified > 0:
chunk_ratio = (float(sub_verified) / orig_verified) if sub_verified > 0 else 0.0
elif sub_verified is not None and sub_verified > 0:
chunk_ratio = 10.0
initial_agent_result_stats.agent_effectiveness["utilized_chunk_ratio"] = chunk_ratio
if (
orig_support_score is None
and initial_agent_result_stats.sub_questions["verified_avg_score"] is None
):
initial_agent_result_stats.agent_effectiveness["support_ratio"] = None
elif orig_support_score is None:
initial_agent_result_stats.agent_effectiveness["support_ratio"] = 10
elif initial_agent_result_stats.sub_questions["verified_avg_score"] is None:
initial_agent_result_stats.agent_effectiveness["support_ratio"] = 0
else:
initial_agent_result_stats.agent_effectiveness["support_ratio"] = (
initial_agent_result_stats.sub_questions["verified_avg_score"]
/ orig_support_score
)
return initial_agent_result_stats
def generate_initial_answer(state: MainState) -> InitialAnswerUpdate:
print("---GENERATE INITIAL---")
question = state["search_request"].query
sub_question_docs = state["documents"]
all_original_question_documents = state["all_original_question_documents"]
relevant_docs = dedup_inference_sections(
sub_question_docs, all_original_question_documents
)
net_new_original_question_docs = []
for all_original_question_doc in all_original_question_documents:
if all_original_question_doc not in sub_question_docs:
net_new_original_question_docs.append(all_original_question_doc)
decomp_answer_results = state["decomp_answer_results"]
good_qa_list: list[str] = []
decomp_questions = []
_SUB_QUESTION_ANSWER_TEMPLATE = """
Sub-Question:\n - {sub_question}\n --\nAnswer:\n - {sub_answer}\n\n
"""
for decomp_answer_result in decomp_answer_results:
decomp_questions.append(decomp_answer_result.question)
if (
decomp_answer_result.quality.lower().startswith("yes")
and len(decomp_answer_result.answer) > 0
and decomp_answer_result.answer != "I don't know"
):
good_qa_list.append(
_SUB_QUESTION_ANSWER_TEMPLATE.format(
sub_question=decomp_answer_result.question,
sub_answer=decomp_answer_result.answer,
)
)
sub_question_answer_str = "\n\n------\n\n".join(good_qa_list)
if len(good_qa_list) > 0:
msg = [
HumanMessage(
content=INITIAL_RAG_PROMPT.format(
question=question,
answered_sub_questions=sub_question_answer_str,
relevant_docs=format_docs(relevant_docs),
)
)
]
else:
msg = [
HumanMessage(
content=INITIAL_RAG_PROMPT_NO_SUB_QUESTIONS.format(
question=question,
relevant_docs=format_docs(relevant_docs),
)
)
]
# Grader
model = state["fast_llm"]
response = model.invoke(msg)
answer = response.pretty_repr()
initial_agent_stats = _calculate_initial_agent_stats(
state["decomp_answer_results"], state["original_question_retrieval_stats"]
)
print(f"\n\n---INITIAL AGENT ANSWER START---\n\n Answer:\n Agent: {answer}")
print(f"\n\nSub-Questions:\n\n{sub_question_answer_str}\n\nStas:\n\n")
if initial_agent_stats:
print(initial_agent_stats.original_question)
print(initial_agent_stats.sub_questions)
print(initial_agent_stats.agent_effectiveness)
print("\n\n ---INITIAL AGENT ANSWER END---\n\n")
return InitialAnswerUpdate(
initial_answer=answer,
initial_agent_stats=initial_agent_stats,
generated_sub_questions=decomp_questions,
)

View File

@@ -1,16 +0,0 @@
from onyx.agent_search.answer_question.states import AnswerQuestionOutput
from onyx.agent_search.main.states import DecompAnswersUpdate
from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
def ingest_answers(state: AnswerQuestionOutput) -> DecompAnswersUpdate:
documents = []
answer_results = state.get("answer_results", [])
for answer_result in answer_results:
documents.extend(answer_result.documents)
return DecompAnswersUpdate(
# Deduping is done by the documents operator for the main graph
# so we might not need to dedup here
documents=dedup_inference_sections(documents, []),
decomp_answer_results=answer_results,
)

View File

@@ -1,23 +0,0 @@
from onyx.agent_search.base_raw_search.states import BaseRawSearchOutput
from onyx.agent_search.main.states import ExpandedRetrievalUpdate
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
def ingest_initial_retrieval(state: BaseRawSearchOutput) -> ExpandedRetrievalUpdate:
sub_question_retrieval_stats = state[
"base_expanded_retrieval_result"
].sub_question_retrieval_stats
if sub_question_retrieval_stats is None:
sub_question_retrieval_stats = AgentChunkStats()
else:
sub_question_retrieval_stats = sub_question_retrieval_stats
return ExpandedRetrievalUpdate(
original_question_retrieval_results=state[
"base_expanded_retrieval_result"
].expanded_queries_results,
all_original_question_documents=state[
"base_expanded_retrieval_result"
].all_documents,
original_question_retrieval_stats=sub_question_retrieval_stats,
)

View File

@@ -1,12 +0,0 @@
from onyx.agent_search.core_state import extract_core_fields_for_subgraph
from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
from onyx.agent_search.main.states import MainState
def prep_for_initial_retrieval(state: MainState) -> ExpandedRetrievalInput:
print("prepping")
return ExpandedRetrievalInput(
question=state["search_request"].query,
dummy="0",
**extract_core_fields_for_subgraph(state)
)

View File

@@ -1,77 +0,0 @@
from operator import add
from typing import Annotated
from typing import TypedDict
from onyx.agent_search.answer_question.states import QuestionAnswerResults
from onyx.agent_search.core_state import CoreState
from onyx.agent_search.expanded_retrieval.models import ExpandedRetrievalResult
from onyx.agent_search.expanded_retrieval.models import QueryResult
from onyx.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.agent_search.shared_graph_utils.models import InitialAgentResultStats
from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
from onyx.context.search.models import InferenceSection
### States ###
## Update States
class BaseDecompUpdate(TypedDict):
initial_decomp_questions: list[str]
class InitialAnswerBASEUpdate(TypedDict):
initial_base_answer: str
class InitialAnswerUpdate(TypedDict):
initial_answer: str
initial_agent_stats: InitialAgentResultStats
generated_sub_questions: list[str]
class DecompAnswersUpdate(TypedDict):
documents: Annotated[list[InferenceSection], dedup_inference_sections]
decomp_answer_results: Annotated[list[QuestionAnswerResults], add]
class ExpandedRetrievalUpdate(TypedDict):
all_original_question_documents: Annotated[
list[InferenceSection], dedup_inference_sections
]
original_question_retrieval_results: list[QueryResult]
original_question_retrieval_stats: AgentChunkStats
## Graph Input State
class MainInput(CoreState):
pass
## Graph State
class MainState(
# This includes the core state
MainInput,
BaseDecompUpdate,
InitialAnswerUpdate,
InitialAnswerBASEUpdate,
DecompAnswersUpdate,
ExpandedRetrievalUpdate,
):
# expanded_retrieval_result: Annotated[list[ExpandedRetrievalResult], add]
base_raw_search_result: Annotated[list[ExpandedRetrievalResult], add]
## Graph Output State
class MainOutput(TypedDict):
initial_answer: str
initial_base_answer: str
initial_agent_stats: dict
generated_sub_questions: list[str]

View File

@@ -1,120 +0,0 @@
import asyncio
from collections.abc import AsyncIterable
from collections.abc import Iterable
from langchain_core.runnables.schema import StreamEvent
from langgraph.graph.state import CompiledStateGraph
from onyx.agent_search.main.graph_builder import main_graph_builder
from onyx.agent_search.main.states import MainInput
from onyx.chat.answer import AnswerStream
from onyx.chat.models import AnswerQuestionPossibleReturn
from onyx.chat.models import OnyxAnswerPiece
from onyx.context.search.models import SearchRequest
from onyx.db.engine import get_session_context_manager
from onyx.llm.interfaces import LLM
from onyx.tools.models import ToolResponse
from onyx.tools.tool_runner import ToolCallKickoff
def _parse_agent_event(
event: StreamEvent,
) -> AnswerQuestionPossibleReturn | ToolCallKickoff | ToolResponse | None:
"""
Parse the event into a typed object.
Return None if we are not interested in the event.
"""
# if event["name"] == "LangGraph":
# return None
event_type = event["event"]
langgraph_node = event["metadata"].get("langgraph_node", "_graph_")
if "input" in event["data"] and isinstance(event["data"]["input"], str):
input_data = f'\nINPUT: {langgraph_node} -- {str(event["data"]["input"])}'
else:
input_data = ""
if "output" in event["data"] and isinstance(event["data"]["output"], str):
output_data = f'\nOUTPUT: {langgraph_node} -- {str(event["data"]["output"])}'
else:
output_data = ""
if len(input_data) > 0 or len(output_data) > 0:
return input_data + output_data
event_type = event["event"]
if event_type == "tool_call_kickoff":
return ToolCallKickoff(**event["data"])
elif event_type == "tool_response":
return ToolResponse(**event["data"])
elif event_type == "on_chat_model_stream":
return OnyxAnswerPiece(answer_piece=event["data"]["chunk"].content)
return None
def _manage_async_event_streaming(
compiled_graph: CompiledStateGraph,
graph_input: MainInput,
) -> Iterable[StreamEvent]:
async def _run_async_event_stream() -> AsyncIterable[StreamEvent]:
async for event in compiled_graph.astream_events(
input=graph_input,
# indicating v2 here deserves further scrutiny
version="v2",
):
yield event
# This might be able to be simplified
def _yield_async_to_sync() -> Iterable[StreamEvent]:
loop = asyncio.new_event_loop()
try:
# Get the async generator
async_gen = _run_async_event_stream()
# Convert to AsyncIterator
async_iter = async_gen.__aiter__()
while True:
try:
# Create a coroutine by calling anext with the async iterator
next_coro = anext(async_iter)
# Run the coroutine to get the next event
event = loop.run_until_complete(next_coro)
yield event
except StopAsyncIteration:
break
finally:
loop.close()
return _yield_async_to_sync()
def run_graph(
compiled_graph: CompiledStateGraph,
search_request: SearchRequest,
primary_llm: LLM,
fast_llm: LLM,
) -> AnswerStream:
with get_session_context_manager() as db_session:
input = MainInput(
search_request=search_request,
primary_llm=primary_llm,
fast_llm=fast_llm,
db_session=db_session,
)
for event in _manage_async_event_streaming(
compiled_graph=compiled_graph, graph_input=input
):
if parsed_object := _parse_agent_event(event):
yield parsed_object
if __name__ == "__main__":
from onyx.llm.factory import get_default_llms
from onyx.context.search.models import SearchRequest
graph = main_graph_builder()
compiled_graph = graph.compile()
primary_llm, fast_llm = get_default_llms()
search_request = SearchRequest(
query="what can you do with onyx or danswer?",
)
for output in run_graph(compiled_graph, search_request, primary_llm, fast_llm):
print("a")
# print(output)

View File

@@ -1,98 +0,0 @@
import numpy as np
from onyx.agent_search.shared_graph_utils.models import RetrievalFitScoreMetrics
from onyx.agent_search.shared_graph_utils.models import RetrievalFitStats
from onyx.chat.models import SectionRelevancePiece
from onyx.context.search.models import InferenceSection
from onyx.utils.logger import setup_logger
logger = setup_logger()
def unique_chunk_id(doc: InferenceSection) -> str:
return f"{doc.center_chunk.document_id}_{doc.center_chunk.chunk_id}"
def calculate_rank_shift(list1: list, list2: list, top_n: int = 20) -> float:
shift = 0
for rank_first, doc_id in enumerate(list1[:top_n], 1):
try:
rank_second = list2.index(doc_id) + 1
except ValueError:
rank_second = len(list2) # Document not found in second list
shift += np.abs(rank_first - rank_second) / np.log(1 + rank_first * rank_second)
return shift / top_n
def get_fit_scores(
pre_reranked_results: list[InferenceSection],
post_reranked_results: list[InferenceSection] | list[SectionRelevancePiece],
) -> RetrievalFitStats | None:
"""
Calculate retrieval metrics for search purposes
"""
if len(pre_reranked_results) == 0 or len(post_reranked_results) == 0:
return None
ranked_sections = {
"initial": pre_reranked_results,
"reranked": post_reranked_results,
}
fit_eval: RetrievalFitStats = RetrievalFitStats(
fit_score_lift=0,
rerank_effect=0,
fit_scores={
"initial": RetrievalFitScoreMetrics(scores={}, chunk_ids=[]),
"reranked": RetrievalFitScoreMetrics(scores={}, chunk_ids=[]),
},
)
for rank_type, docs in ranked_sections.items():
print(f"rank_type: {rank_type}")
for i in [1, 5, 10]:
fit_eval.fit_scores[rank_type].scores[str(i)] = (
sum(
[
float(doc.center_chunk.score)
for doc in docs[:i]
if type(doc) == InferenceSection
and doc.center_chunk.score is not None
]
)
/ i
)
fit_eval.fit_scores[rank_type].scores["fit_score"] = (
1
/ 3
* (
fit_eval.fit_scores[rank_type].scores["1"]
+ fit_eval.fit_scores[rank_type].scores["5"]
+ fit_eval.fit_scores[rank_type].scores["10"]
)
)
fit_eval.fit_scores[rank_type].scores["fit_score"] = fit_eval.fit_scores[
rank_type
].scores["1"]
fit_eval.fit_scores[rank_type].chunk_ids = [
unique_chunk_id(doc) for doc in docs if type(doc) == InferenceSection
]
fit_eval.fit_score_lift = (
fit_eval.fit_scores["reranked"].scores["fit_score"]
/ fit_eval.fit_scores["initial"].scores["fit_score"]
)
fit_eval.rerank_effect = calculate_rank_shift(
fit_eval.fit_scores["initial"].chunk_ids,
fit_eval.fit_scores["reranked"].chunk_ids,
)
return fit_eval

View File

@@ -1,47 +0,0 @@
from typing import Literal
from pydantic import BaseModel
# Pydantic models for structured outputs
class RewrittenQueries(BaseModel):
rewritten_queries: list[str]
class BinaryDecision(BaseModel):
decision: Literal["yes", "no"]
class BinaryDecisionWithReasoning(BaseModel):
reasoning: str
decision: Literal["yes", "no"]
class RetrievalFitScoreMetrics(BaseModel):
scores: dict[str, float]
chunk_ids: list[str]
class RetrievalFitStats(BaseModel):
fit_score_lift: float
rerank_effect: float
fit_scores: dict[str, RetrievalFitScoreMetrics]
class AgentChunkScores(BaseModel):
scores: dict[str, dict[str, list[int | float]]]
class AgentChunkStats(BaseModel):
verified_count: int | None
verified_avg_scores: float | None
rejected_count: int | None
rejected_avg_scores: float | None
verified_doc_chunk_ids: list[str]
dismissed_doc_chunk_ids: list[str]
class InitialAgentResultStats(BaseModel):
sub_questions: dict[str, float | int | None]
original_question: dict[str, float | int | None]
agent_effectiveness: dict[str, float | int | None]

View File

@@ -1,9 +0,0 @@
from onyx.chat.prune_and_merge import _merge_sections
from onyx.context.search.models import InferenceSection
def dedup_inference_sections(
list1: list[InferenceSection], list2: list[InferenceSection]
) -> list[InferenceSection]:
deduped = _merge_sections(list1 + list2)
return deduped

View File

@@ -1,543 +0,0 @@
REWRITE_PROMPT_MULTI_ORIGINAL = """ \n
Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrievel from a
document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
enabling the system to search more broadly.
Also, try to make the search queries not redundant, i.e. not too similar! \n\n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Formulate the queries separated by '--' (Do not say 'Query 1: ...', just write the querytext): """
REWRITE_PROMPT_MULTI = """ \n
Please create a list of 2-3 sample documents that could answer an original question. Each document
should be about as long as the original question. \n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Formulate the sample documents separated by '--' (Do not say 'Document 1: ...', just write the text): """
BASE_RAG_PROMPT = """ \n
You are an assistant for question-answering tasks. Use the context provided below - and only the
provided context - to answer the given question. (Note that the answer is in service of anserwing a broader
question, given below as 'motivation'.)
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
question based on the context, say "I don't know". It is a matter of life and death that you do NOT
use your internal knowledge, just the provided information!
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
(But keep other details as well.)
\nContext:\n {context} \n
Motivation:\n {original_question} \n\n
\n\n
And here is the question I want you to answer based on the context above (with the motivation in mind):
\n--\n {question} \n--\n
"""
SUB_CHECK_PROMPT = """
Your task is to see whether a given answer addresses a given question.
Please do not use any internal knowledge you may have - just focus on whether the answer
as given seems to largely address the question as given, or at least addresses part of the question.
Here is the question:
\n ------- \n
{question}
\n ------- \n
Here is the suggested answer:
\n ------- \n
{base_answer}
\n ------- \n
Does the suggested answer address the question? Please answer with yes or no:"""
BASE_CHECK_PROMPT = """ \n
Please check whether 1) the suggested answer seems to fully address the original question AND 2)the
original question requests a simple, factual answer, and there are no ambiguities, judgements,
aggregations, or any other complications that may require extra context. (I.e., if the question is
somewhat addressed, but the answer would benefit from more context, then answer with 'no'.)
Please only answer with 'yes' or 'no' \n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Here is the proposed answer:
\n ------- \n
{base_answer}
\n ------- \n
Please answer with yes or no:"""
VERIFIER_PROMPT = """
You are supposed to judge whether a document text contains data or information that is potentially relevant for a question.
Here is a document text that you can take as a fact:
--
DOCUMENT INFORMATION:
{document_content}
--
Do you think that this information is useful and relevant to answer the following question?
(Other documents may supply additional information, so do not worry if the provided information
is not enough to answer the question, but it needs to be relevant to the question.)
--
QUESTION:
{question}
--
Please answer with 'yes' or 'no':
Answer:
"""
INITIAL_DECOMPOSITION_PROMPT_BASIC = """ \n
If you think it is helpful, please decompose an initial user question into not more
than 4 appropriate sub-questions that help to answer the original question.
The purpose for this decomposition is to isolate individulal entities
(i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
for us'), etc. Each sub-question should be realistically be answerable by a good RAG system.
Importantly, if you think it is not needed or helpful, please just return an empty list. That is ok too.
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Please formulate your answer as a list of subquestions:
Answer:
"""
REWRITE_PROMPT_SINGLE = """ \n
Please convert an initial user question into a more appropriate search query for retrievel from a
document store. \n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Formulate the query: """
MODIFIED_RAG_PROMPT = """You are an assistant for question-answering tasks. Use the context provided below
- and only this context - to answer the question. If you don't know the answer, just say "I don't know".
Use three sentences maximum and keep the answer concise.
Pay also particular attention to the sub-questions and their answers, at least it may enrich the answer.
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
question based on the context, say "I don't know". It is a matter of life and death that you do NOT
use your internal knowledge, just the provided information!
\nQuestion: {question}
\nContext: {combined_context} \n
Answer:"""
ORIG_DEEP_DECOMPOSE_PROMPT = """ \n
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
good enough. Also, some sub-questions had been answered and this information has been used to provide
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
you have an idea of how the avaiolable data looks like.
Your role is to generate 3-5 new sub-questions that would help to answer the initial question,
considering:
1) The initial question
2) The initial answer that was found to be unsatisfactory
3) The sub-questions that were answered
4) The sub-questions that were suggested but not answered
5) The entities, relationships and terms that were extracted from the context
The individual questions should be answerable by a good RAG system.
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
question for different entities that may be involved in the original question, but in a way that does
not duplicate questions that were already tried.
Additional Guidelines:
- The sub-questions should be specific to the question and provide richer context for the question,
resolve ambiguities, or address shortcoming of the initial answer
- Each sub-question - when answered - should be relevant for the answer to the original question
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
other complications that may require extra context.
- The sub-questions MUST have the full context of the original question so that it can be executed by
a RAG system independently without the original question available
(Example:
- initial question: "What is the capital of France?"
- bad sub-question: "What is the name of the river there?"
- good sub-question: "What is the name of the river that flows through Paris?"
- For each sub-question, please provide a short explanation for why it is a good sub-question. So
generate a list of dictionaries with the following format:
[{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
sub-question using as a search phrase for the document store>}}, ...]
\n\n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Here is the initial sub-optimal answer:
\n ------- \n
{base_answer}
\n ------- \n
Here are the sub-questions that were answered:
\n ------- \n
{answered_sub_questions}
\n ------- \n
Here are the sub-questions that were suggested but not answered:
\n ------- \n
{failed_sub_questions}
\n ------- \n
And here are the entities, relationships and terms extracted from the context:
\n ------- \n
{entity_term_extraction_str}
\n ------- \n
Please generate the list of good, fully contextualized sub-questions that would help to address the
main question. Again, please find questions that are NOT overlapping too much with the already answered
sub-questions or those that already were suggested and failed.
In other words - what can we try in addition to what has been tried so far?
Please think through it step by step and then generate the list of json dictionaries with the following
format:
{{"sub_questions": [{{"sub_question": <sub-question>,
"explanation": <explanation>,
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
...]}} """
DEEP_DECOMPOSE_PROMPT = """ \n
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
good enough. Also, some sub-questions had been answered and this information has been used to provide
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
you have an idea of how the avaiolable data looks like.
Your role is to generate 4-6 new sub-questions that would help to answer the initial question,
considering:
1) The initial question
2) The initial answer that was found to be unsatisfactory
3) The sub-questions that were answered
4) The sub-questions that were suggested but not answered
5) The entities, relationships and terms that were extracted from the context
The individual questions should be answerable by a good RAG system.
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
question for different entities that may be involved in the original question, but in a way that does
not duplicate questions that were already tried.
Additional Guidelines:
- The sub-questions should be specific to the question and provide richer context for the question,
resolve ambiguities, or address shortcoming of the initial answer
- Each sub-question - when answered - should be relevant for the answer to the original question
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
other complications that may require extra context.
- The sub-questions MUST have the full context of the original question so that it can be executed by
a RAG system independently without the original question available
(Example:
- initial question: "What is the capital of France?"
- bad sub-question: "What is the name of the river there?"
- good sub-question: "What is the name of the river that flows through Paris?"
- For each sub-question, please also provide a search term that can be used to retrieve relevant
documents from a document store.
\n\n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Here is the initial sub-optimal answer:
\n ------- \n
{base_answer}
\n ------- \n
Here are the sub-questions that were answered:
\n ------- \n
{answered_sub_questions}
\n ------- \n
Here are the sub-questions that were suggested but not answered:
\n ------- \n
{failed_sub_questions}
\n ------- \n
And here are the entities, relationships and terms extracted from the context:
\n ------- \n
{entity_term_extraction_str}
\n ------- \n
Please generate the list of good, fully contextualized sub-questions that would help to address the
main question. Again, please find questions that are NOT overlapping too much with the already answered
sub-questions or those that already were suggested and failed.
In other words - what can we try in addition to what has been tried so far?
Generate the list of json dictionaries with the following format:
{{"sub_questions": [{{"sub_question": <sub-question>,
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
...]}} """
DECOMPOSE_PROMPT = """ \n
For an initial user question, please generate at 5-10 individual sub-questions whose answers would help
\n to answer the initial question. The individual questions should be answerable by a good RAG system.
So a good idea would be to \n use the sub-questions to resolve ambiguities and/or to separate the
question for different entities that may be involved in the original question.
In order to arrive at meaningful sub-questions, please also consider the context retrieved from the
document store, expressed as entities, relationships and terms. You can also think about the types
mentioned in brackets
Guidelines:
- The sub-questions should be specific to the question and provide richer context for the question,
and or resolve ambiguities
- Each sub-question - when answered - should be relevant for the answer to the original question
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
other complications that may require extra context.
- The sub-questions MUST have the full context of the original question so that it can be executed by
a RAG system independently without the original question available
(Example:
- initial question: "What is the capital of France?"
- bad sub-question: "What is the name of the river there?"
- good sub-question: "What is the name of the river that flows through Paris?"
- For each sub-question, please provide a short explanation for why it is a good sub-question. So
generate a list of dictionaries with the following format:
[{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
sub-question using as a search phrase for the document store>}}, ...]
\n\n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
And here are the entities, relationships and terms extracted from the context:
\n ------- \n
{entity_term_extraction_str}
\n ------- \n
Please generate the list of good, fully contextualized sub-questions that would help to address the
main question. Don't be too specific unless the original question is specific.
Please think through it step by step and then generate the list of json dictionaries with the following
format:
{{"sub_questions": [{{"sub_question": <sub-question>,
"explanation": <explanation>,
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
...]}} """
#### Consolidations
COMBINED_CONTEXT = """-------
Below you will find useful information to answer the original question. First, you see a number of
sub-questions with their answers. This information should be considered to be more focussed and
somewhat more specific to the original question as it tries to contextualized facts.
After that will see the documents that were considered to be relevant to answer the original question.
Here are the sub-questions and their answers:
\n\n {deep_answer_context} \n\n
\n\n Here are the documents that were considered to be relevant to answer the original question:
\n\n {formated_docs} \n\n
----------------
"""
SUB_QUESTION_EXPLANATION_RANKER_PROMPT = """-------
Below you will find a question that we ultimately want to answer (the original question) and a list of
motivations in arbitrary order for generated sub-questions that are supposed to help us answering the
original question. The motivations are formatted as <motivation number>: <motivation explanation>.
(Again, the numbering is arbitrary and does not necessarily mean that 1 is the most relevant
motivation and 2 is less relevant.)
Please rank the motivations in order of relevance for answering the original question. Also, try to
ensure that the top questions do not duplicate too much, i.e. that they are not too similar.
Ultimately, create a list with the motivation numbers where the number of the most relevant
motivations comes first.
Here is the original question:
\n\n {original_question} \n\n
\n\n Here is the list of sub-question motivations:
\n\n {sub_question_explanations} \n\n
----------------
Please think step by step and then generate the ranked list of motivations.
Please format your answer as a json object in the following format:
{{"reasonning": <explain your reasoning for the ranking>,
"ranked_motivations": <ranked list of motivation numbers>}}
"""
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS = """
If you think it is helpful, please decompose an initial user question into 2 or 4 appropriate sub-questions that help to
answer the original question. The purpose for this decomposition is to
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
'what are sales for company B')]
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
'what is our market share with company A', 'is company A a reference customer for us', etc.])
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
(i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
'what do we do to improve stability of product X', ...])
If you think that a decomposition is not needed or helpful, please just return an empty list. That is ok too.
Here is the initial question:
-------
{question}
-------
Please formulate your answer as a list of json objects with the following format:
[{{"sub_question": <sub-question>}}, ...]
Answer:"""
INITIAL_DECOMPOSITION_PROMPT = """ \n
Please decompose an initial user question into 2 or 3 appropriate sub-questions that help to
answer the original question. The purpose for this decomposition is to isolate individulal entities
(i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
for us'), etc. Each sub-question should be realistically be answerable by a good RAG system. \n
For each sub-question, please also create one search term that can be used to retrieve relevant
documents from a document store.
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
Please formulate your answer as a list of json objects with the following format:
[{{"sub_question": <sub-question>, "search_term": <search term>}}, ...]
Answer:
"""
INITIAL_RAG_BASE_PROMPT = """ \n
You are an assistant for question-answering tasks. Use the information provided below - and only the
provided information - to answer the provided question.
The information provided below consists ofa number of documents that were deemed relevant for the question.
IMPORTANT RULES:
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
You may give some additional facts you learned, but do not try to invent an answer.
- If the information is empty or irrelevant, just say "I don't know".
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
Try to keep your answer concise.
Here is the contextual information from the document store:
\n ------- \n
{context} \n\n\n
\n ------- \n
And here is the question I want you to answer based on the context above (with the motivation in mind):
\n--\n {question} \n--\n
Answer:"""
INITIAL_RAG_PROMPT = """ \n
You are an assistant for question-answering tasks. Use the information provided below - and only the
provided information - to answer the provided question.
The information provided below consists of:
1) a number of answered sub-questions - these are very important(!) and definitely should be
considered to answer the question.
2) a number of documents that were also deemed relevant for the question.
IMPORTANT RULES:
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
You may give some additional facts you learned, but do not try to invent an answer.
- If the information is empty or irrelevant, just say "I don't know".
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
Again, you should be sure that the answer is supported by the information provided!
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,
or assumptions you made.
Here is the contextual information:
\n-------\n
*Answered Sub-questions (these should really matter!):
{answered_sub_questions}
And here are relevant document information that support the sub-question answers, or that are relevant for the actual question:\n
{relevant_docs}
\n-------\n
\n
And here is the question I want you to answer based on the information above:
\n--\n
{question}
\n--\n\n
Answer:"""
INITIAL_RAG_PROMPT_NO_SUB_QUESTIONS = """
You are an assistant for question-answering tasks. Use the information provided below
- and only the provided information - to answer the provided question.
The information provided below consists of a number of documents that were deemed relevant for the question.
IMPORTANT RULES:
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
You may give some additional facts you learned, but do not try to invent an answer.
- If the information is irrelevant, just say "I don't know".
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
Again, you should be sure that the answer is supported by the information provided!
Try to keep your answer concise.
Here are is the relevant context information:
\n-------\n
{relevant_docs}
\n-------\n
And here is the question I want you to answer based on the context above
\n--\n
{question}
\n--\n
Answer:"""
ENTITY_TERM_PROMPT = """ \n
Based on the original question and the context retieved from a dataset, please generate a list of
entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts
(e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other.
\n\n
Here is the original question:
\n ------- \n
{question}
\n ------- \n
And here is the context retrieved:
\n ------- \n
{context}
\n ------- \n
Please format your answer as a json object in the following format:
{{"retrieved_entities_relationships": {{
"entities": [{{
"entity_name": <assign a name for the entity>,
"entity_type": <specify a short type name for the entity, such as 'company', 'location',...>
}}],
"relationships": [{{
"name": <assign a name for the relationship>,
"type": <specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>,
"entities": [<related entity name 1>, <related entity name 2>]
}}],
"terms": [{{
"term_name": <assign a name for the term>,
"term_type": <specify a short type name for the term, such as 'revenue', 'market_share',...>,
"similar_to": <list terms that are similar to this term>
}}]
}}
}}
"""

View File

@@ -1,101 +0,0 @@
import ast
import json
import re
from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from typing import Any
from onyx.context.search.models import InferenceSection
def normalize_whitespace(text: str) -> str:
"""Normalize whitespace in text to single spaces and strip leading/trailing whitespace."""
import re
return re.sub(r"\s+", " ", text.strip())
# Post-processing
def format_docs(docs: Sequence[InferenceSection]) -> str:
return "\n\n".join(doc.combined_content for doc in docs)
def clean_and_parse_list_string(json_string: str) -> list[dict]:
# Remove any prefixes/labels before the actual JSON content
json_string = re.sub(r"^.*?(?=\[)", "", json_string, flags=re.DOTALL)
# Remove markdown code block markers and any newline prefixes
cleaned_string = re.sub(r"```json\n|\n```", "", json_string)
cleaned_string = cleaned_string.replace("\\n", " ").replace("\n", " ")
cleaned_string = " ".join(cleaned_string.split())
# Try parsing with json.loads first, fall back to ast.literal_eval
try:
return json.loads(cleaned_string)
except json.JSONDecodeError:
try:
return ast.literal_eval(cleaned_string)
except (ValueError, SyntaxError) as e:
raise ValueError(f"Failed to parse JSON string: {cleaned_string}") from e
def clean_and_parse_json_string(json_string: str) -> dict[str, Any]:
# Remove markdown code block markers and any newline prefixes
cleaned_string = re.sub(r"```json\n|\n```", "", json_string)
cleaned_string = cleaned_string.replace("\\n", " ").replace("\n", " ")
cleaned_string = " ".join(cleaned_string.split())
# Parse the cleaned string into a Python dictionary
return json.loads(cleaned_string)
def format_entity_term_extraction(entity_term_extraction_dict: dict[str, Any]) -> str:
entities = entity_term_extraction_dict["entities"]
terms = entity_term_extraction_dict["terms"]
relationships = entity_term_extraction_dict["relationships"]
entity_strs = ["\nEntities:\n"]
for entity in entities:
entity_str = f"{entity['entity_name']} ({entity['entity_type']})"
entity_strs.append(entity_str)
entity_str = "\n - ".join(entity_strs)
relationship_strs = ["\n\nRelationships:\n"]
for relationship in relationships:
relationship_str = f"{relationship['name']} ({relationship['type']}): {relationship['entities']}"
relationship_strs.append(relationship_str)
relationship_str = "\n - ".join(relationship_strs)
term_strs = ["\n\nTerms:\n"]
for term in terms:
term_str = f"{term['term_name']} ({term['term_type']}): similar to {term['similar_to']}"
term_strs.append(term_str)
term_str = "\n - ".join(term_strs)
return "\n".join(entity_strs + relationship_strs + term_strs)
def _format_time_delta(time: timedelta) -> str:
seconds_from_start = f"{((time).seconds):03d}"
microseconds_from_start = f"{((time).microseconds):06d}"
return f"{seconds_from_start}.{microseconds_from_start}"
def generate_log_message(
message: str,
node_start_time: datetime,
graph_start_time: datetime | None = None,
) -> str:
current_time = datetime.now()
if graph_start_time is not None:
graph_time_str = _format_time_delta(current_time - graph_start_time)
else:
graph_time_str = "N/A"
node_time_str = _format_time_delta(current_time - node_start_time)
return f"{graph_time_str} ({node_time_str} s): {message}"

View File

@@ -5,6 +5,7 @@ from datetime import datetime
from datetime import timezone
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from typing import cast
from typing import Dict
from typing import List
from typing import Optional
@@ -228,18 +229,26 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
safe: bool = False,
request: Optional[Request] = None,
) -> User:
# We verify the password here to make sure it's valid before we proceed
await self.validate_password(
user_create.password, cast(schemas.UC, user_create)
)
user_count: int | None = None
referral_source = None
if request is not None:
referral_source = request.cookies.get("referral_source", None)
referral_source = (
request.cookies.get("referral_source", None)
if request is not None
else None
)
tenant_id = await fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_or_create_tenant_id",
"get_or_provision_tenant",
async_return_default_schema,
)(
email=user_create.email,
referral_source=referral_source,
request=request,
)
async with get_async_session_with_tenant(tenant_id) as db_session:
@@ -282,25 +291,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
# Blocking but this should be very quick
with get_session_with_tenant(tenant_id) as db_session:
if not user_count:
create_milestone_and_report(
user=user,
distinct_id=user.email,
event_type=MilestoneRecordType.USER_SIGNED_UP,
properties=None,
db_session=db_session,
)
else:
create_milestone_and_report(
user=user,
distinct_id=user.email,
event_type=MilestoneRecordType.MULTIPLE_USERS,
properties=None,
db_session=db_session,
)
return user
async def validate_password(self, password: str, _: schemas.UC | models.UP) -> None:
@@ -346,17 +336,18 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
associate_by_email: bool = False,
is_verified_by_default: bool = False,
) -> User:
referral_source = None
if request:
referral_source = getattr(request.state, "referral_source", None)
referral_source = (
getattr(request.state, "referral_source", None) if request else None
)
tenant_id = await fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_or_create_tenant_id",
"get_or_provision_tenant",
async_return_default_schema,
)(
email=account_email,
referral_source=referral_source,
request=request,
)
if not tenant_id:
@@ -418,6 +409,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
# Add OAuth account
await self.user_db.add_oauth_account(user, oauth_account_dict)
await self.on_after_register(user, request)
else:
@@ -471,6 +463,39 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
async def on_after_register(
self, user: User, request: Optional[Request] = None
) -> None:
tenant_id = await fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_or_provision_tenant",
async_return_default_schema,
)(
email=user.email,
request=request,
)
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
try:
user_count = await get_user_count()
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
if user_count == 1:
create_milestone_and_report(
user=user,
distinct_id=user.email,
event_type=MilestoneRecordType.USER_SIGNED_UP,
properties=None,
db_session=db_session,
)
else:
create_milestone_and_report(
user=user,
distinct_id=user.email,
event_type=MilestoneRecordType.MULTIPLE_USERS,
properties=None,
db_session=db_session,
)
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
logger.notice(f"User {user.id} has registered.")
optional_telemetry(
record_type=RecordType.SIGN_UP,
@@ -502,7 +527,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
# Get tenant_id from mapping table
tenant_id = await fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_or_create_tenant_id",
"get_or_provision_tenant",
async_return_default_schema,
)(
email=email,
@@ -563,7 +588,7 @@ class TenantAwareJWTStrategy(JWTStrategy):
async def _create_token_data(self, user: User, impersonate: bool = False) -> dict:
tenant_id = await fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_or_create_tenant_id",
"get_or_provision_tenant",
async_return_default_schema,
)(
email=user.email,

View File

@@ -3,11 +3,12 @@ import multiprocessing
import time
from typing import Any
import requests
import sentry_sdk
from celery import Task
from celery.app import trace
from celery.exceptions import WorkerShutdown
from celery.signals import task_postrun
from celery.signals import task_prerun
from celery.states import READY_STATES
from celery.utils.log import get_task_logger
from celery.worker import strategy # type: ignore
@@ -21,6 +22,7 @@ from onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
from onyx.background.celery.celery_utils import celery_is_worker_primary
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine import get_sqlalchemy_engine
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa_constants import VESPA_CONFIG_SERVER_URL
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
@@ -34,8 +36,11 @@ from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.utils.logger import ColoredFormatter
from onyx.utils.logger import PlainFormatter
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import SENTRY_DSN
from shared_configs.configs import TENANT_ID_PREFIX
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
logger = setup_logger()
@@ -56,8 +61,8 @@ def on_task_prerun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**kwds: Any,
) -> None:
pass
@@ -257,7 +262,8 @@ def wait_for_vespa(sender: Any, **kwargs: Any) -> None:
logger.info("Vespa: Readiness probe starting.")
while True:
try:
response = requests.get(f"{VESPA_CONFIG_SERVER_URL}/state/v1/health")
client = get_vespa_http_client()
response = client.get(f"{VESPA_CONFIG_SERVER_URL}/state/v1/health")
response.raise_for_status()
response_dict = response.json()
@@ -346,26 +352,36 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
def on_setup_logging(
loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
loglevel: int,
logfile: str | None,
format: str,
colorize: bool,
**kwargs: Any,
) -> None:
# TODO: could unhardcode format and colorize and accept these as options from
# celery's config
# reformats the root logger
root_logger = logging.getLogger()
root_logger.handlers = []
root_handler = logging.StreamHandler() # Set up a handler for the root logger
# Define the log format
log_format = (
"%(levelname)-8s %(asctime)s %(filename)15s:%(lineno)-4d: %(name)s %(message)s"
)
# Set up the root handler
root_handler = logging.StreamHandler()
root_formatter = ColoredFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
log_format,
datefmt="%m/%d/%Y %I:%M:%S %p",
)
root_handler.setFormatter(root_formatter)
root_logger.addHandler(root_handler) # Apply the handler to the root logger
root_logger.addHandler(root_handler)
if logfile:
root_file_handler = logging.FileHandler(logfile)
root_file_formatter = PlainFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
log_format,
datefmt="%m/%d/%Y %I:%M:%S %p",
)
root_file_handler.setFormatter(root_file_formatter)
@@ -373,19 +389,23 @@ def on_setup_logging(
root_logger.setLevel(loglevel)
# reformats celery's task logger
# Configure the task logger
task_logger.handlers = []
task_handler = logging.StreamHandler()
task_handler.addFilter(TenantContextFilter())
task_formatter = CeleryTaskColoredFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
log_format,
datefmt="%m/%d/%Y %I:%M:%S %p",
)
task_handler = logging.StreamHandler() # Set up a handler for the task logger
task_handler.setFormatter(task_formatter)
task_logger.addHandler(task_handler) # Apply the handler to the task logger
task_logger.addHandler(task_handler)
if logfile:
task_file_handler = logging.FileHandler(logfile)
task_file_handler.addFilter(TenantContextFilter())
task_file_formatter = CeleryTaskPlainFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
log_format,
datefmt="%m/%d/%Y %I:%M:%S %p",
)
task_file_handler.setFormatter(task_file_formatter)
@@ -394,10 +414,55 @@ def on_setup_logging(
task_logger.setLevel(loglevel)
task_logger.propagate = False
# hide celery task received spam
# e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] received"
# Hide celery task received and succeeded/failed messages
strategy.logger.setLevel(logging.WARNING)
# hide celery task succeeded/failed spam
# e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] succeeded in 0.03137450001668185s: None"
trace.logger.setLevel(logging.WARNING)
class TenantContextFilter(logging.Filter):
"""Logging filter to inject tenant ID into the logger's name."""
def filter(self, record: logging.LogRecord) -> bool:
if not MULTI_TENANT:
record.name = ""
return True
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
if tenant_id:
tenant_id = tenant_id.split(TENANT_ID_PREFIX)[-1][:5]
record.name = f"[t:{tenant_id}]"
else:
record.name = ""
return True
@task_prerun.connect
def set_tenant_id(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**other_kwargs: Any,
) -> None:
"""Signal handler to set tenant ID in context var before task starts."""
tenant_id = (
kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA)
if kwargs
else POSTGRES_DEFAULT_SCHEMA
)
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
@task_postrun.connect
def reset_tenant_id(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**other_kwargs: Any,
) -> None:
"""Signal handler to reset tenant ID in context var after task ends."""
CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)

View File

@@ -13,7 +13,6 @@ from onyx.db.engine import SqlEngine
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation
from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
from shared_configs.configs import MULTI_TENANT
logger = setup_logger(__name__)
@@ -44,18 +43,18 @@ class DynamicTenantScheduler(PersistentScheduler):
self._last_reload is None
or (now - self._last_reload) > self._reload_interval
):
logger.info("Reload interval reached, initiating tenant task update")
logger.info("Reload interval reached, initiating task update")
self._update_tenant_tasks()
self._last_reload = now
logger.info("Tenant task update completed, reset reload timer")
logger.info("Task update completed, reset reload timer")
return retval
def _update_tenant_tasks(self) -> None:
logger.info("Starting tenant task update process")
logger.info("Starting task update process")
try:
logger.info("Fetching all tenant IDs")
logger.info("Fetching all IDs")
tenant_ids = get_all_tenant_ids()
logger.info(f"Found {len(tenant_ids)} tenants")
logger.info(f"Found {len(tenant_ids)} IDs")
logger.info("Fetching tasks to schedule")
tasks_to_schedule = fetch_versioned_implementation(
@@ -70,7 +69,7 @@ class DynamicTenantScheduler(PersistentScheduler):
for task_name, _ in current_schedule:
if "-" in task_name:
existing_tenants.add(task_name.split("-")[-1])
logger.info(f"Found {len(existing_tenants)} existing tenants in schedule")
logger.info(f"Found {len(existing_tenants)} existing items in schedule")
for tenant_id in tenant_ids:
if (
@@ -83,7 +82,7 @@ class DynamicTenantScheduler(PersistentScheduler):
continue
if tenant_id not in existing_tenants:
logger.info(f"Processing new tenant: {tenant_id}")
logger.info(f"Processing new item: {tenant_id}")
for task in tasks_to_schedule():
task_name = f"{task['name']}-{tenant_id}"
@@ -129,11 +128,10 @@ class DynamicTenantScheduler(PersistentScheduler):
logger.info("Schedule update completed successfully")
else:
logger.info("Schedule is up to date, no changes needed")
except (AttributeError, KeyError):
logger.exception("Failed to process task configuration")
except Exception:
logger.exception("Unexpected error updating tenant tasks")
except (AttributeError, KeyError) as e:
logger.exception(f"Failed to process task configuration: {str(e)}")
except Exception as e:
logger.exception(f"Unexpected error updating tasks: {str(e)}")
def _should_update_schedule(
self, current_schedule: dict, new_schedule: dict
@@ -155,10 +153,6 @@ def on_beat_init(sender: Any, **kwargs: Any) -> None:
SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME)
SqlEngine.init_engine(pool_size=2, max_overflow=0)
# Startup checks are not needed in multi-tenant case
if MULTI_TENANT:
return
app_base.wait_for_redis(sender, **kwargs)

View File

@@ -61,13 +61,14 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME)
SqlEngine.init_engine(pool_size=4, max_overflow=12)
# Startup checks are not needed in multi-tenant case
if MULTI_TENANT:
return
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
app_base.on_secondary_worker_init(sender, **kwargs)

View File

@@ -62,13 +62,14 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_APP_NAME)
SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=sender.concurrency)
# Startup checks are not needed in multi-tenant case
if MULTI_TENANT:
return
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
app_base.on_secondary_worker_init(sender, **kwargs)

View File

@@ -60,13 +60,15 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)
SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8)
# Startup checks are not needed in multi-tenant case
if MULTI_TENANT:
return
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
app_base.on_secondary_worker_init(sender, **kwargs)

View File

@@ -84,14 +84,14 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)
SqlEngine.init_engine(pool_size=8, max_overflow=0)
# Startup checks are not needed in multi-tenant case
if MULTI_TENANT:
return
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
logger.info("Running as the primary celery worker.")
# This is singleton work that should be done on startup exactly once

View File

@@ -1,4 +1,6 @@
# These are helper objects for tracking the keys we need to write in redis
import json
from typing import Any
from typing import cast
from redis import Redis
@@ -23,3 +25,25 @@ def celery_get_queue_length(queue: str, r: Redis) -> int:
total_length += cast(int, length)
return total_length
def celery_find_task(task_id: str, queue: str, r: Redis) -> int:
"""This is a redis specific way to find a task for a particular queue in redis.
It is priority aware and knows how to look through the multiple redis lists
used to implement task prioritization.
This operation is not atomic.
This is a linear search O(n) ... so be careful using it when the task queues can be larger.
Returns true if the id is in the queue, False if not.
"""
for priority in range(len(OnyxCeleryPriority)):
queue_name = f"{queue}{CELERY_SEPARATOR}{priority}" if priority > 0 else queue
tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))
for task in tasks:
task_dict: dict[str, Any] = json.loads(task.decode("utf-8"))
if task_dict.get("headers", {}).get("id") == task_id:
return True
return False

View File

@@ -4,55 +4,80 @@ from typing import Any
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
# we set expires because it isn't necessary to queue up these tasks
# it's only important that they run relatively regularly
tasks_to_schedule = [
{
"name": "check-for-vespa-sync",
"task": OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
"schedule": timedelta(seconds=20),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
{
"name": "check-for-connector-deletion",
"task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
"schedule": timedelta(seconds=20),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
{
"name": "check-for-indexing",
"task": OnyxCeleryTask.CHECK_FOR_INDEXING,
"schedule": timedelta(seconds=15),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
{
"name": "check-for-prune",
"task": OnyxCeleryTask.CHECK_FOR_PRUNING,
"schedule": timedelta(seconds=15),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
{
"name": "kombu-message-cleanup",
"task": OnyxCeleryTask.KOMBU_MESSAGE_CLEANUP_TASK,
"schedule": timedelta(seconds=3600),
"options": {"priority": OnyxCeleryPriority.LOWEST},
"options": {
"priority": OnyxCeleryPriority.LOWEST,
"expires": 60,
},
},
{
"name": "monitor-vespa-sync",
"task": OnyxCeleryTask.MONITOR_VESPA_SYNC,
"schedule": timedelta(seconds=5),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
{
"name": "check-for-doc-permissions-sync",
"task": OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
"schedule": timedelta(seconds=30),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
{
"name": "check-for-external-group-sync",
"task": OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
"schedule": timedelta(seconds=20),
"options": {"priority": OnyxCeleryPriority.HIGH},
"options": {
"priority": OnyxCeleryPriority.HIGH,
"expires": 60,
},
},
]

View File

@@ -76,7 +76,7 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str | None) -> N
"Soft time limit exceeded, task is being terminated gracefully."
)
except Exception:
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
task_logger.exception("Unexpected exception during connector deletion check")
finally:
if lock_beat.owned():
lock_beat.release()
@@ -131,14 +131,14 @@ def try_generate_document_cc_pair_cleanup_tasks(
redis_connector_index = redis_connector.new_index(search_settings.id)
if redis_connector_index.fenced:
raise TaskDependencyError(
f"Connector deletion - Delayed (indexing in progress): "
"Connector deletion - Delayed (indexing in progress): "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings.id}"
)
if redis_connector.prune.fenced:
raise TaskDependencyError(
f"Connector deletion - Delayed (pruning in progress): "
"Connector deletion - Delayed (pruning in progress): "
f"cc_pair={cc_pair_id}"
)
@@ -175,7 +175,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
# return 0
task_logger.info(
f"RedisConnectorDeletion.generate_tasks finished. "
"RedisConnectorDeletion.generate_tasks finished. "
f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
)

View File

@@ -1,7 +1,9 @@
import time
from datetime import datetime
from datetime import timezone
from http import HTTPStatus
from time import sleep
from typing import Any
import redis
import sentry_sdk
@@ -15,6 +17,7 @@ from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.indexing.job_client import SimpleJobClient
from onyx.background.indexing.run_indexing import run_indexing_entrypoint
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
@@ -162,11 +165,19 @@ def get_unfenced_index_attempt_ids(db_session: Session, r: redis.Redis) -> list[
bind=True,
)
def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
"""a lightweight task used to kick off indexing tasks.
Occcasionally does some validation of existing state to clear up error conditions"""
time_start = time.monotonic()
tasks_created = 0
locked = False
r = get_redis_client(tenant_id=tenant_id)
redis_client = get_redis_client(tenant_id=tenant_id)
lock_beat: RedisLock = r.lock(
# we need to use celery's redis client to access its redis data
# (which lives on a different db number)
# redis_client_celery: Redis = self.app.broker_connection().channel().client # type: ignore
lock_beat: RedisLock = redis_client.lock(
OnyxRedisLocks.CHECK_INDEXING_BEAT_LOCK,
timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
)
@@ -271,7 +282,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
search_settings_instance,
reindex,
db_session,
r,
redis_client,
tenant_id,
)
if attempt_id:
@@ -286,7 +297,9 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
# Fail any index attempts in the DB that don't have fences
# This shouldn't ever happen!
with get_session_with_tenant(tenant_id) as db_session:
unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
unfenced_attempt_ids = get_unfenced_index_attempt_ids(
db_session, redis_client
)
for attempt_id in unfenced_attempt_ids:
lock_beat.reacquire()
@@ -304,12 +317,31 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
mark_attempt_failed(
attempt.id, db_session, failure_reason=failure_reason
)
# rkuo: The following code logically appears to work, but the celery inspect code may be unstable
# turning off for the moment to see if it helps cloud stability
# we want to run this less frequently than the overall task
# if not redis_client.exists(OnyxRedisSignals.VALIDATE_INDEXING_FENCES):
# # clear any indexing fences that don't have associated celery tasks in progress
# # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
# # or be currently executing
# try:
# task_logger.info("Validating indexing fences...")
# validate_indexing_fences(
# tenant_id, self.app, redis_client, redis_client_celery, lock_beat
# )
# except Exception:
# task_logger.exception("Exception while validating indexing fences")
# redis_client.set(OnyxRedisSignals.VALIDATE_INDEXING_FENCES, 1, ex=60)
except SoftTimeLimitExceeded:
task_logger.info(
"Soft time limit exceeded, task is being terminated gracefully."
)
except Exception:
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
task_logger.exception("Unexpected exception during indexing check")
finally:
if locked:
if lock_beat.owned():
@@ -320,9 +352,190 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
f"tenant={tenant_id}"
)
time_elapsed = time.monotonic() - time_start
task_logger.info(f"check_for_indexing finished: elapsed={time_elapsed:.2f}")
return tasks_created
def validate_indexing_fences(
tenant_id: str | None,
celery_app: Celery,
r: Redis,
r_celery: Redis,
lock_beat: RedisLock,
) -> None:
reserved_indexing_tasks: set[str] = set()
active_indexing_tasks: set[str] = set()
indexing_worker_names: list[str] = []
# filter for and create an indexing specific inspect object
inspect = celery_app.control.inspect()
workers: dict[str, Any] = inspect.ping() # type: ignore
if not workers:
raise ValueError("No workers found!")
for worker_name in list(workers.keys()):
if "indexing" in worker_name:
indexing_worker_names.append(worker_name)
if len(indexing_worker_names) == 0:
raise ValueError("No indexing workers found!")
inspect_indexing = celery_app.control.inspect(destination=indexing_worker_names)
# NOTE: each dict entry is a map of worker name to a list of tasks
# we want sets for reserved task and active task id's to optimize
# subsequent validation lookups
# get the list of reserved tasks
reserved_tasks: dict[str, list] | None = inspect_indexing.reserved() # type: ignore
if reserved_tasks is None:
raise ValueError("inspect_indexing.reserved() returned None!")
for _, task_list in reserved_tasks.items():
for task in task_list:
reserved_indexing_tasks.add(task["id"])
# get the list of active tasks
active_tasks: dict[str, list] | None = inspect_indexing.active() # type: ignore
if active_tasks is None:
raise ValueError("inspect_indexing.active() returned None!")
for _, task_list in active_tasks.items():
for task in task_list:
active_indexing_tasks.add(task["id"])
# validate all existing indexing jobs
for key_bytes in r.scan_iter(RedisConnectorIndex.FENCE_PREFIX + "*"):
lock_beat.reacquire()
with get_session_with_tenant(tenant_id) as db_session:
validate_indexing_fence(
tenant_id,
key_bytes,
reserved_indexing_tasks,
active_indexing_tasks,
r_celery,
db_session,
)
return
def validate_indexing_fence(
tenant_id: str | None,
key_bytes: bytes,
reserved_tasks: set[str],
active_tasks: set[str],
r_celery: Redis,
db_session: Session,
) -> None:
"""Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
This can happen if the indexing worker hard crashes or is terminated.
Being in this bad state means the fence will never clear without help, so this function
gives the help.
How this works:
1. Active signal is renewed with a 5 minute TTL
1.1 When the fence is created
1.2. When the task is seen in the redis queue
1.3. When the task is seen in the reserved or active list for a worker
2. The TTL allows us to get through the transitions on fence startup
and when the task starts executing.
More TTL clarification: it is seemingly impossible to exactly query Celery for
whether a task is in the queue or currently executing.
1. An unknown task id is always returned as state PENDING.
2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task
and the time it actually starts on the worker.
"""
# if the fence doesn't exist, there's nothing to do
fence_key = key_bytes.decode("utf-8")
composite_id = RedisConnector.get_id_from_fence_key(fence_key)
if composite_id is None:
task_logger.warning(
f"validate_indexing_fence - could not parse composite_id from {fence_key}"
)
return
# parse out metadata and initialize the helper class with it
parts = composite_id.split("/")
if len(parts) != 2:
return
cc_pair_id = int(parts[0])
search_settings_id = int(parts[1])
redis_connector = RedisConnector(tenant_id, cc_pair_id)
redis_connector_index = redis_connector.new_index(search_settings_id)
if not redis_connector_index.fenced:
return
payload = redis_connector_index.payload
if not payload:
return
# OK, there's actually something for us to validate
if payload.celery_task_id is None:
# the fence is just barely set up.
if redis_connector_index.active():
return
# it would be odd to get here as there isn't that much that can go wrong during
# initial fence setup, but it's still worth making sure we can recover
logger.info(
f"validate_indexing_fence - Resetting fence in basic state without any activity: fence={fence_key}"
)
redis_connector_index.reset()
return
found = celery_find_task(
payload.celery_task_id, OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
)
if found:
# the celery task exists in the redis queue
redis_connector_index.set_active()
return
if payload.celery_task_id in reserved_tasks:
# the celery task was prefetched and is reserved within the indexing worker
redis_connector_index.set_active()
return
if payload.celery_task_id in active_tasks:
# the celery task is active (aka currently executing)
redis_connector_index.set_active()
return
# we may want to enable this check if using the active task list somehow isn't good enough
# if redis_connector_index.generator_locked():
# logger.info(f"{payload.celery_task_id} is currently executing.")
# we didn't find any direct indication that associated celery tasks exist, but they still might be there
# due to gaps in our ability to check states during transitions
# Rely on the active signal (which has a duration that allows us to bridge those gaps)
if redis_connector_index.active():
return
# celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
logger.warning(
f"validate_indexing_fence - Resetting fence because no associated celery tasks were found: fence={fence_key}"
)
if payload.index_attempt_id:
try:
mark_attempt_failed(
payload.index_attempt_id,
db_session,
"validate_indexing_fence - Canceling index attempt due to missing celery tasks",
)
except Exception:
logger.exception(
"validate_indexing_fence - Exception while marking index attempt as failed."
)
redis_connector_index.reset()
return
def _should_index(
cc_pair: ConnectorCredentialPair,
last_index: IndexAttempt | None,
@@ -469,6 +682,7 @@ def try_creating_indexing_task(
celery_task_id=None,
)
redis_connector_index.set_active()
redis_connector_index.set_fence(payload)
# create the index attempt for tracking purposes
@@ -502,13 +716,14 @@ def try_creating_indexing_task(
raise RuntimeError("send_task for connector_indexing_proxy_task failed.")
# now fill out the fence with the rest of the data
redis_connector_index.set_active()
payload.index_attempt_id = index_attempt_id
payload.celery_task_id = result.id
redis_connector_index.set_fence(payload)
except Exception:
task_logger.exception(
f"try_creating_indexing_task - Unexpected exception: "
f"tenant={tenant_id} "
f"cc_pair={cc_pair.id} "
f"search_settings={search_settings.id}"
)
@@ -540,7 +755,6 @@ def connector_indexing_proxy_task(
"""celery tasks are forked, but forking is unstable. This proxies work to a spawned task."""
task_logger.info(
f"Indexing watchdog - starting: attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)
@@ -563,15 +777,14 @@ def connector_indexing_proxy_task(
if not job:
task_logger.info(
f"Indexing watchdog - spawn failed: attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)
return
task_logger.info(
f"Indexing proxy - spawn succeeded: attempt={index_attempt_id} "
f"Indexing watchdog - spawn succeeded: attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)
@@ -586,7 +799,6 @@ def connector_indexing_proxy_task(
task_logger.warning(
"Indexing watchdog - termination signal detected: "
f"attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)
@@ -646,7 +858,7 @@ def connector_indexing_proxy_task(
if job.process:
exit_code = job.process.exitcode
# seeing non-deterministic behavior where spawned tasks occasionally return exit code 1
# seeing odd behavior where spawned tasks usually return exit code 1 in the cloud,
# even though logging clearly indicates that they completed successfully
# to work around this, we ignore the job error state if the completion signal is OK
status_int = redis_connector_index.get_completion()
@@ -681,7 +893,6 @@ def connector_indexing_proxy_task(
task_logger.info(
f"Indexing watchdog - finished: attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)
@@ -877,6 +1088,7 @@ def connector_indexing_task(
f"search_settings={search_settings_id}"
)
# This is where the heavy/real work happens
run_indexing_entrypoint(
index_attempt_id,
tenant_id,
@@ -906,7 +1118,6 @@ def connector_indexing_task(
logger.info(
f"Indexing spawned task finished: attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)

View File

@@ -122,7 +122,7 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> None:
"Soft time limit exceeded, task is being terminated gracefully."
)
except Exception:
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
task_logger.exception("Unexpected exception during pruning check")
finally:
if lock_beat.owned():
lock_beat.release()
@@ -308,7 +308,7 @@ def connector_pruning_generator_task(
doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
task_logger.info(
f"Pruning set collected: "
"Pruning set collected: "
f"cc_pair={cc_pair_id} "
f"connector_source={cc_pair.connector.source} "
f"docs_to_remove={len(doc_ids_to_remove)}"
@@ -324,7 +324,7 @@ def connector_pruning_generator_task(
return None
task_logger.info(
f"RedisConnector.prune.generate_tasks finished. "
"RedisConnector.prune.generate_tasks finished. "
f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
)

View File

@@ -60,7 +60,7 @@ def document_by_cc_pair_cleanup_task(
connector / credential pair from the access list
(6) delete all relevant entries from postgres
"""
task_logger.debug(f"Task start: tenant={tenant_id} doc={document_id}")
task_logger.debug(f"Task start: doc={document_id}")
try:
with get_session_with_tenant(tenant_id) as db_session:
@@ -129,16 +129,13 @@ def document_by_cc_pair_cleanup_task(
db_session.commit()
task_logger.info(
f"tenant={tenant_id} "
f"doc={document_id} "
f"action={action} "
f"refcount={count} "
f"chunks={chunks_affected}"
)
except SoftTimeLimitExceeded:
task_logger.info(
f"SoftTimeLimitExceeded exception. tenant={tenant_id} doc={document_id}"
)
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
return False
except Exception as ex:
if isinstance(ex, RetryError):
@@ -157,15 +154,12 @@ def document_by_cc_pair_cleanup_task(
if e.response.status_code == HTTPStatus.BAD_REQUEST:
task_logger.exception(
f"Non-retryable HTTPStatusError: "
f"tenant={tenant_id} "
f"doc={document_id} "
f"status={e.response.status_code}"
)
return False
task_logger.exception(
f"Unexpected exception: tenant={tenant_id} doc={document_id}"
)
task_logger.exception(f"Unexpected exception: doc={document_id}")
if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES:
# Still retrying. Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
@@ -176,7 +170,7 @@ def document_by_cc_pair_cleanup_task(
# eventually gets fixed out of band via stale document reconciliation
task_logger.warning(
f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
f"tenant={tenant_id} doc={document_id}"
f"doc={document_id}"
)
with get_session_with_tenant(tenant_id) as db_session:
# delete the cc pair relationship now and let reconciliation clean it up

View File

@@ -1,3 +1,4 @@
import time
import traceback
from datetime import datetime
from datetime import timezone
@@ -89,10 +90,11 @@ logger = setup_logger()
def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> None:
"""Runs periodically to check if any document needs syncing.
Generates sets of tasks for Celery if syncing is needed."""
time_start = time.monotonic()
r = get_redis_client(tenant_id=tenant_id)
lock_beat = r.lock(
lock_beat: RedisLock = r.lock(
OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
)
@@ -156,11 +158,15 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> None:
"Soft time limit exceeded, task is being terminated gracefully."
)
except Exception:
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
task_logger.exception("Unexpected exception during vespa metadata sync")
finally:
if lock_beat.owned():
lock_beat.release()
time_elapsed = time.monotonic() - time_start
task_logger.info(f"check_for_vespa_sync_task finished: elapsed={time_elapsed:.2f}")
return
def try_generate_stale_document_sync_tasks(
celery_app: Celery,
@@ -730,6 +736,7 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
Returns True if the task actually did work, False if it exited early to prevent overlap
"""
time_start = time.monotonic()
r = get_redis_client(tenant_id=tenant_id)
lock_beat: RedisLock = r.lock(
@@ -824,6 +831,8 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
if lock_beat.owned():
lock_beat.release()
time_elapsed = time.monotonic() - time_start
task_logger.info(f"monitor_vespa_sync finished: elapsed={time_elapsed:.2f}")
return True
@@ -873,13 +882,9 @@ def vespa_metadata_sync_task(
# the sync might repeat again later
mark_document_as_synced(document_id, db_session)
task_logger.info(
f"tenant={tenant_id} doc={document_id} action=sync chunks={chunks_affected}"
)
task_logger.info(f"doc={document_id} action=sync chunks={chunks_affected}")
except SoftTimeLimitExceeded:
task_logger.info(
f"SoftTimeLimitExceeded exception. tenant={tenant_id} doc={document_id}"
)
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
except Exception as ex:
if isinstance(ex, RetryError):
task_logger.warning(
@@ -897,14 +902,13 @@ def vespa_metadata_sync_task(
if e.response.status_code == HTTPStatus.BAD_REQUEST:
task_logger.exception(
f"Non-retryable HTTPStatusError: "
f"tenant={tenant_id} "
f"doc={document_id} "
f"status={e.response.status_code}"
)
return False
task_logger.exception(
f"Unexpected exception: tenant={tenant_id} doc={document_id}"
f"Unexpected exception during vespa metadata sync: doc={document_id}"
)
# Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64

View File

@@ -65,7 +65,7 @@ class CitationProcessor:
# Handle code blocks without language tags
if "`" in self.curr_segment:
if self.curr_segment.endswith("`"):
return
pass
elif "```" in self.curr_segment:
piece_that_comes_after = self.curr_segment.split("```")[1][0]
if piece_that_comes_after == "\n" and in_code_block(self.llm_out):

View File

@@ -1,6 +1,7 @@
import json
import os
import urllib.parse
from typing import cast
from onyx.configs.constants import AuthType
from onyx.configs.constants import DocumentIndexType
@@ -144,6 +145,7 @@ POSTGRES_PASSWORD = urllib.parse.quote_plus(
POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost"
POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"
AWS_REGION = os.environ.get("AWS_REGION") or "us-east-2"
POSTGRES_API_SERVER_POOL_SIZE = int(
os.environ.get("POSTGRES_API_SERVER_POOL_SIZE") or 40
@@ -174,6 +176,9 @@ try:
except ValueError:
POSTGRES_IDLE_SESSIONS_TIMEOUT = POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT
USE_IAM_AUTH = os.getenv("USE_IAM_AUTH", "False").lower() == "true"
REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true"
REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
@@ -483,6 +488,21 @@ SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")
PARSE_WITH_TRAFILATURA = os.environ.get("PARSE_WITH_TRAFILATURA", "").lower() == "true"
# allow for custom error messages for different errors returned by litellm
# for example, can specify: {"Violated content safety policy": "EVIL REQUEST!!!"}
# to make it so that if an LLM call returns an error containing "Violated content safety policy"
# the end user will see "EVIL REQUEST!!!" instead of the default error message.
_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = os.environ.get(
"LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS", ""
)
LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS: dict[str, str] | None = None
try:
LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = cast(
dict[str, str], json.loads(_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS)
)
except json.JSONDecodeError:
pass
#####
# Enterprise Edition Configs
#####

View File

@@ -63,6 +63,10 @@ LANGUAGE_CHAT_NAMING_HINT = (
or "The name of the conversation must be in the same language as the user query."
)
# Number of prompts each persona should have
NUM_PERSONA_PROMPTS = 4
NUM_PERSONA_PROMPT_GENERATION_CHUNKS = 5
# Agentic search takes significantly more tokens and therefore has much higher cost.
# This configuration allows users to get a search-only experience with instant results
# and no involvement from the LLM.

View File

@@ -49,6 +49,7 @@ POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
POSTGRES_PERMISSIONS_APP_NAME = "permissions"
POSTGRES_UNKNOWN_APP_NAME = "unknown"
SSL_CERT_FILE = "bundle.pem"
# API Keys
DANSWER_API_KEY_PREFIX = "API_KEY__"
DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "onyxapikey.ai"
@@ -274,6 +275,10 @@ class OnyxRedisLocks:
SLACK_BOT_HEARTBEAT_PREFIX = "da_heartbeat:slack_bot"
class OnyxRedisSignals:
VALIDATE_INDEXING_FENCES = "signal:validate_indexing_fences"
class OnyxCeleryPriority(int, Enum):
HIGHEST = 0
HIGH = auto()

View File

@@ -1,57 +0,0 @@
import os
from .chat_configs import NUM_RETURNED_HITS
#####
# Agent Configs
#####
agent_retrieval_stats_os: bool | str | None = os.environ.get(
"AGENT_RETRIEVAL_STATS", False
)
AGENT_RETRIEVAL_STATS: bool = False
if isinstance(agent_retrieval_stats_os, str) and agent_retrieval_stats_os == "True":
AGENT_RETRIEVAL_STATS = True
elif isinstance(agent_retrieval_stats_os, bool) and agent_retrieval_stats_os:
AGENT_RETRIEVAL_STATS = True
agent_max_query_retrieval_results_os: int | str = os.environ.get(
"AGENT_MAX_QUERY_RETRIEVAL_RESULTS", NUM_RETURNED_HITS
)
AGENT_MAX_QUERY_RETRIEVAL_RESULTS: int = NUM_RETURNED_HITS
try:
atmqrr = int(agent_max_query_retrieval_results_os)
AGENT_MAX_QUERY_RETRIEVAL_RESULTS = atmqrr
except ValueError:
raise ValueError(
f"MAX_AGENT_QUERY_RETRIEVAL_RESULTS must be an integer, got {AGENT_MAX_QUERY_RETRIEVAL_RESULTS}"
)
# Reranking agent configs
agent_reranking_stats_os: bool | str | None = os.environ.get(
"AGENT_RERANKING_TEST", False
)
AGENT_RERANKING_STATS: bool = False
if isinstance(agent_reranking_stats_os, str) and agent_reranking_stats_os == "True":
AGENT_RERANKING_STATS = True
elif isinstance(agent_reranking_stats_os, bool) and agent_reranking_stats_os:
AGENT_RERANKING_STATS = True
agent_reranking_max_query_retrieval_results_os: int | str = os.environ.get(
"AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS", NUM_RETURNED_HITS
)
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS: int = NUM_RETURNED_HITS
try:
atmqrr = int(agent_reranking_max_query_retrieval_results_os)
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = atmqrr
except ValueError:
raise ValueError(
f"AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS must be an integer, got {AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS}"
)

View File

@@ -316,6 +316,23 @@ def update_chat_session(
return chat_session
def delete_all_chat_sessions_for_user(
user: User | None, db_session: Session, hard_delete: bool = HARD_DELETE_CHATS
) -> None:
user_id = user.id if user is not None else None
query = db_session.query(ChatSession).filter(
ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False)
)
if hard_delete:
query.delete(synchronize_session=False)
else:
query.update({ChatSession.deleted: True}, synchronize_session=False)
db_session.commit()
def delete_chat_session(
user_id: UUID | None,
chat_session_id: UUID,

View File

@@ -1,5 +1,7 @@
import contextlib
import os
import re
import ssl
import threading
import time
from collections.abc import AsyncGenerator
@@ -10,6 +12,8 @@ from datetime import datetime
from typing import Any
from typing import ContextManager
import asyncpg # type: ignore
import boto3
import jwt
from fastapi import HTTPException
from fastapi import Request
@@ -23,6 +27,7 @@ from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from onyx.configs.app_configs import AWS_REGION
from onyx.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
from onyx.configs.app_configs import LOG_POSTGRES_LATENCY
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
@@ -37,6 +42,7 @@ from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.configs.constants import POSTGRES_UNKNOWN_APP_NAME
from onyx.configs.constants import SSL_CERT_FILE
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -49,28 +55,87 @@ logger = setup_logger()
SYNC_DB_API = "psycopg2"
ASYNC_DB_API = "asyncpg"
# global so we don't create more than one engine per process
# outside of being best practice, this is needed so we can properly pool
# connections and not create a new pool on every request
USE_IAM_AUTH = os.getenv("USE_IAM_AUTH", "False").lower() == "true"
# Global so we don't create more than one engine per process
_ASYNC_ENGINE: AsyncEngine | None = None
SessionFactory: sessionmaker[Session] | None = None
def create_ssl_context_if_iam() -> ssl.SSLContext | None:
"""Create an SSL context if IAM authentication is enabled, else return None."""
if USE_IAM_AUTH:
return ssl.create_default_context(cafile=SSL_CERT_FILE)
return None
ssl_context = create_ssl_context_if_iam()
def get_iam_auth_token(
host: str, port: str, user: str, region: str = "us-east-2"
) -> str:
"""
Generate an IAM authentication token using boto3.
"""
client = boto3.client("rds", region_name=region)
token = client.generate_db_auth_token(
DBHostname=host, Port=int(port), DBUsername=user
)
return token
def configure_psycopg2_iam_auth(
cparams: dict[str, Any], host: str, port: str, user: str, region: str
) -> None:
"""
Configure cparams for psycopg2 with IAM token and SSL.
"""
token = get_iam_auth_token(host, port, user, region)
cparams["password"] = token
cparams["sslmode"] = "require"
cparams["sslrootcert"] = SSL_CERT_FILE
def build_connection_string(
*,
db_api: str = ASYNC_DB_API,
user: str = POSTGRES_USER,
password: str = POSTGRES_PASSWORD,
host: str = POSTGRES_HOST,
port: str = POSTGRES_PORT,
db: str = POSTGRES_DB,
app_name: str | None = None,
use_iam: bool = USE_IAM_AUTH,
region: str = "us-west-2",
) -> str:
if use_iam:
base_conn_str = f"postgresql+{db_api}://{user}@{host}:{port}/{db}"
else:
base_conn_str = f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
# For asyncpg, do not include application_name in the connection string
if app_name and db_api != "asyncpg":
if "?" in base_conn_str:
return f"{base_conn_str}&application_name={app_name}"
else:
return f"{base_conn_str}?application_name={app_name}"
return base_conn_str
if LOG_POSTGRES_LATENCY:
# Function to log before query execution
@event.listens_for(Engine, "before_cursor_execute")
def before_cursor_execute( # type: ignore
conn, cursor, statement, parameters, context, executemany
):
conn.info["query_start_time"] = time.time()
# Function to log after query execution
@event.listens_for(Engine, "after_cursor_execute")
def after_cursor_execute( # type: ignore
conn, cursor, statement, parameters, context, executemany
):
total_time = time.time() - conn.info["query_start_time"]
# don't spam TOO hard
if total_time > 0.1:
logger.debug(
f"Query Complete: {statement}\n\nTotal Time: {total_time:.4f} seconds"
@@ -78,7 +143,6 @@ if LOG_POSTGRES_LATENCY:
if LOG_POSTGRES_CONN_COUNTS:
# Global counter for connection checkouts and checkins
checkout_count = 0
checkin_count = 0
@@ -105,21 +169,13 @@ if LOG_POSTGRES_CONN_COUNTS:
logger.debug(f"Total connection checkins: {checkin_count}")
"""END DEBUGGING LOGGING"""
def get_db_current_time(db_session: Session) -> datetime:
"""Get the current time from Postgres representing the start of the transaction
Within the same transaction this value will not update
This datetime object returned should be timezone aware, default Postgres timezone is UTC
"""
result = db_session.execute(text("SELECT NOW()")).scalar()
if result is None:
raise ValueError("Database did not return a time")
return result
# Regular expression to validate schema names to prevent SQL injection
SCHEMA_NAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]+$")
@@ -128,16 +184,9 @@ def is_valid_schema_name(name: str) -> bool:
class SqlEngine:
"""Class to manage a global SQLAlchemy engine (needed for proper resource control).
Will eventually subsume most of the standalone functions in this file.
Sync only for now.
"""
_engine: Engine | None = None
_lock: threading.Lock = threading.Lock()
_app_name: str = POSTGRES_UNKNOWN_APP_NAME
# Default parameters for engine creation
DEFAULT_ENGINE_KWARGS = {
"pool_size": 20,
"max_overflow": 5,
@@ -145,33 +194,27 @@ class SqlEngine:
"pool_recycle": POSTGRES_POOL_RECYCLE,
}
def __init__(self) -> None:
pass
@classmethod
def _init_engine(cls, **engine_kwargs: Any) -> Engine:
"""Private helper method to create and return an Engine."""
connection_string = build_connection_string(
db_api=SYNC_DB_API, app_name=cls._app_name + "_sync"
db_api=SYNC_DB_API, app_name=cls._app_name + "_sync", use_iam=USE_IAM_AUTH
)
merged_kwargs = {**cls.DEFAULT_ENGINE_KWARGS, **engine_kwargs}
return create_engine(connection_string, **merged_kwargs)
engine = create_engine(connection_string, **merged_kwargs)
if USE_IAM_AUTH:
event.listen(engine, "do_connect", provide_iam_token)
return engine
@classmethod
def init_engine(cls, **engine_kwargs: Any) -> None:
"""Allow the caller to init the engine with extra params. Different clients
such as the API server and different Celery workers and tasks
need different settings.
"""
with cls._lock:
if not cls._engine:
cls._engine = cls._init_engine(**engine_kwargs)
@classmethod
def get_engine(cls) -> Engine:
"""Gets the SQLAlchemy engine. Will init a default engine if init hasn't
already been called. You probably want to init first!
"""
if not cls._engine:
with cls._lock:
if not cls._engine:
@@ -180,12 +223,10 @@ class SqlEngine:
@classmethod
def set_app_name(cls, app_name: str) -> None:
"""Class method to set the app name."""
cls._app_name = app_name
@classmethod
def get_app_name(cls) -> str:
"""Class method to get current app name."""
if not cls._app_name:
return ""
return cls._app_name
@@ -217,56 +258,71 @@ def get_all_tenant_ids() -> list[str] | list[None]:
for tenant in tenant_ids
if tenant is None or tenant.startswith(TENANT_ID_PREFIX)
]
return valid_tenants
def build_connection_string(
*,
db_api: str = ASYNC_DB_API,
user: str = POSTGRES_USER,
password: str = POSTGRES_PASSWORD,
host: str = POSTGRES_HOST,
port: str = POSTGRES_PORT,
db: str = POSTGRES_DB,
app_name: str | None = None,
) -> str:
if app_name:
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}"
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
def get_sqlalchemy_engine() -> Engine:
return SqlEngine.get_engine()
async def get_async_connection() -> Any:
"""
Custom connection function for async engine when using IAM auth.
"""
host = POSTGRES_HOST
port = POSTGRES_PORT
user = POSTGRES_USER
db = POSTGRES_DB
token = get_iam_auth_token(host, port, user, AWS_REGION)
# asyncpg requires 'ssl="require"' if SSL needed
return await asyncpg.connect(
user=user, password=token, host=host, port=int(port), database=db, ssl="require"
)
def get_sqlalchemy_async_engine() -> AsyncEngine:
global _ASYNC_ENGINE
if _ASYNC_ENGINE is None:
# Underlying asyncpg cannot accept application_name directly in the connection string
# https://github.com/MagicStack/asyncpg/issues/798
connection_string = build_connection_string()
app_name = SqlEngine.get_app_name() + "_async"
connection_string = build_connection_string(
db_api=ASYNC_DB_API,
use_iam=USE_IAM_AUTH,
)
connect_args: dict[str, Any] = {}
if app_name:
connect_args["server_settings"] = {"application_name": app_name}
connect_args["ssl"] = ssl_context
_ASYNC_ENGINE = create_async_engine(
connection_string,
connect_args={
"server_settings": {
"application_name": SqlEngine.get_app_name() + "_async"
}
},
# async engine is only used by API server, so we can use those values
# here as well
connect_args=connect_args,
pool_size=POSTGRES_API_SERVER_POOL_SIZE,
max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
pool_pre_ping=POSTGRES_POOL_PRE_PING,
pool_recycle=POSTGRES_POOL_RECYCLE,
)
if USE_IAM_AUTH:
@event.listens_for(_ASYNC_ENGINE.sync_engine, "do_connect")
def provide_iam_token_async(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
) -> None:
# For async engine using asyncpg, we still need to set the IAM token here.
host = POSTGRES_HOST
port = POSTGRES_PORT
user = POSTGRES_USER
token = get_iam_auth_token(host, port, user, AWS_REGION)
cparams["password"] = token
cparams["ssl"] = ssl_context
return _ASYNC_ENGINE
# Dependency to get the current tenant ID
# If no token is present, uses the default schema for this use case
def get_current_tenant_id(request: Request) -> str:
"""Dependency that extracts the tenant ID from the JWT token in the request and sets the context variable."""
if not MULTI_TENANT:
tenant_id = POSTGRES_DEFAULT_SCHEMA
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
@@ -275,7 +331,6 @@ def get_current_tenant_id(request: Request) -> str:
token = request.cookies.get("fastapiusersauth")
if not token:
current_value = CURRENT_TENANT_ID_CONTEXTVAR.get()
# If no token is present, use the default schema or handle accordingly
return current_value
try:
@@ -289,7 +344,6 @@ def get_current_tenant_id(request: Request) -> str:
if not is_valid_schema_name(tenant_id):
raise HTTPException(status_code=400, detail="Invalid tenant ID format")
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
return tenant_id
except jwt.InvalidTokenError:
return CURRENT_TENANT_ID_CONTEXTVAR.get()
@@ -316,7 +370,6 @@ async def get_async_session_with_tenant(
async with async_session_factory() as session:
try:
# Set the search_path to the tenant's schema
await session.execute(text(f'SET search_path = "{tenant_id}"'))
if POSTGRES_IDLE_SESSIONS_TIMEOUT:
await session.execute(
@@ -326,8 +379,6 @@ async def get_async_session_with_tenant(
)
except Exception:
logger.exception("Error setting search_path.")
# You can choose to re-raise the exception or handle it
# Here, we'll re-raise to prevent proceeding with an incorrect session
raise
else:
yield session
@@ -335,9 +386,6 @@ async def get_async_session_with_tenant(
@contextmanager
def get_session_with_default_tenant() -> Generator[Session, None, None]:
"""
Get a database session using the current tenant ID from the context variable.
"""
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
with get_session_with_tenant(tenant_id) as session:
yield session
@@ -349,7 +397,6 @@ def get_session_with_tenant(
) -> Generator[Session, None, None]:
"""
Generate a database session for a specific tenant.
This function:
1. Sets the database schema to the specified tenant's schema.
2. Preserves the tenant ID across the session.
@@ -357,27 +404,20 @@ def get_session_with_tenant(
4. Uses the default schema if no tenant ID is provided.
"""
engine = get_sqlalchemy_engine()
# Store the previous tenant ID
previous_tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or POSTGRES_DEFAULT_SCHEMA
if tenant_id is None:
tenant_id = POSTGRES_DEFAULT_SCHEMA
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
event.listen(engine, "checkout", set_search_path_on_checkout)
if not is_valid_schema_name(tenant_id):
raise HTTPException(status_code=400, detail="Invalid tenant ID")
try:
# Establish a raw connection
with engine.connect() as connection:
# Access the raw DBAPI connection and set the search_path
dbapi_connection = connection.connection
# Set the search_path outside of any transaction
cursor = dbapi_connection.cursor()
try:
cursor.execute(f'SET search_path = "{tenant_id}"')
@@ -390,21 +430,17 @@ def get_session_with_tenant(
finally:
cursor.close()
# Bind the session to the connection
with Session(bind=connection, expire_on_commit=False) as session:
try:
yield session
finally:
# Reset search_path to default after the session is used
if MULTI_TENANT:
cursor = dbapi_connection.cursor()
try:
cursor.execute('SET search_path TO "$user", public')
finally:
cursor.close()
finally:
# Restore the previous tenant ID
CURRENT_TENANT_ID_CONTEXTVAR.set(previous_tenant_id)
@@ -424,12 +460,9 @@ def get_session_generator_with_tenant() -> Generator[Session, None, None]:
def get_session() -> Generator[Session, None, None]:
"""Generate a database session with the appropriate tenant schema set."""
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT:
raise BasicAuthenticationError(
detail="User must authenticate",
)
raise BasicAuthenticationError(detail="User must authenticate")
engine = get_sqlalchemy_engine()
@@ -437,20 +470,17 @@ def get_session() -> Generator[Session, None, None]:
if MULTI_TENANT:
if not is_valid_schema_name(tenant_id):
raise HTTPException(status_code=400, detail="Invalid tenant ID")
# Set the search_path to the tenant's schema
session.execute(text(f'SET search_path = "{tenant_id}"'))
yield session
async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
"""Generate an async database session with the appropriate tenant schema set."""
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
engine = get_sqlalchemy_async_engine()
async with AsyncSession(engine, expire_on_commit=False) as async_session:
if MULTI_TENANT:
if not is_valid_schema_name(tenant_id):
raise HTTPException(status_code=400, detail="Invalid tenant ID")
# Set the search_path to the tenant's schema
await async_session.execute(text(f'SET search_path = "{tenant_id}"'))
yield async_session
@@ -461,7 +491,6 @@ def get_session_context_manager() -> ContextManager[Session]:
def get_session_factory() -> sessionmaker[Session]:
"""Get a session factory."""
global SessionFactory
if SessionFactory is None:
SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
@@ -489,3 +518,13 @@ async def warm_up_connections(
await async_conn.execute(text("SELECT 1"))
for async_conn in async_connections:
await async_conn.close()
def provide_iam_token(dialect: Any, conn_rec: Any, cargs: Any, cparams: Any) -> None:
if USE_IAM_AUTH:
host = POSTGRES_HOST
port = POSTGRES_PORT
user = POSTGRES_USER
region = os.getenv("AWS_REGION", "us-east-2")
# Configure for psycopg2 with IAM token
configure_psycopg2_iam_auth(cparams, host, port, user, region)

View File

@@ -5,6 +5,8 @@ from typing import Literal
from typing import NotRequired
from typing import Optional
from uuid import uuid4
from pydantic import BaseModel
from typing_extensions import TypedDict # noreorder
from uuid import UUID
@@ -1008,7 +1010,7 @@ class ChatSession(Base):
"ChatFolder", back_populates="chat_sessions"
)
messages: Mapped[list["ChatMessage"]] = relationship(
"ChatMessage", back_populates="chat_session"
"ChatMessage", back_populates="chat_session", cascade="all, delete-orphan"
)
persona: Mapped["Persona"] = relationship("Persona")
@@ -1076,6 +1078,8 @@ class ChatMessage(Base):
"SearchDoc",
secondary=ChatMessage__SearchDoc.__table__,
back_populates="chat_messages",
cascade="all, delete-orphan",
single_parent=True,
)
tool_call: Mapped["ToolCall"] = relationship(
@@ -1344,6 +1348,11 @@ class StarterMessage(TypedDict):
message: str
class StarterMessageModel(BaseModel):
name: str
message: str
class Persona(Base):
__tablename__ = "persona"

View File

@@ -543,6 +543,10 @@ def upsert_persona(
if tools is not None:
existing_persona.tools = tools or []
# We should only update display priority if it is not already set
if existing_persona.display_priority is None:
existing_persona.display_priority = display_priority
persona = existing_persona
else:

View File

@@ -369,6 +369,19 @@ class AdminCapable(abc.ABC):
raise NotImplementedError
class RandomCapable(abc.ABC):
"""Class must implement random document retrieval capability"""
@abc.abstractmethod
def random_retrieval(
self,
filters: IndexFilters,
num_to_retrieve: int = 10,
) -> list[InferenceChunkUncleaned]:
"""Retrieve random chunks matching the filters"""
raise NotImplementedError
class BaseIndex(
Verifiable,
Indexable,
@@ -376,6 +389,7 @@ class BaseIndex(
Deletable,
AdminCapable,
IdRetrievalCapable,
RandomCapable,
abc.ABC,
):
"""

View File

@@ -218,4 +218,10 @@ schema DANSWER_CHUNK_NAME {
expression: bm25(content) + (5 * bm25(title))
}
}
rank-profile random_ {
first-phase {
expression: random.match
}
}
}

View File

@@ -2,6 +2,7 @@ import concurrent.futures
import io
import logging
import os
import random
import re
import time
import urllib
@@ -534,7 +535,7 @@ class VespaIndex(DocumentIndex):
if self.secondary_index_name:
index_names.append(self.secondary_index_name)
with get_vespa_http_client() as http_client:
with get_vespa_http_client(http2=False) as http_client:
for index_name in index_names:
params = httpx.QueryParams(
{
@@ -545,8 +546,12 @@ class VespaIndex(DocumentIndex):
while True:
try:
vespa_url = (
f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}"
)
logger.debug(f'update_single PUT on URL "{vespa_url}"')
resp = http_client.put(
f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}",
vespa_url,
params=params,
headers={"Content-Type": "application/json"},
json=update_dict,
@@ -618,7 +623,7 @@ class VespaIndex(DocumentIndex):
if self.secondary_index_name:
index_names.append(self.secondary_index_name)
with get_vespa_http_client() as http_client:
with get_vespa_http_client(http2=False) as http_client:
for index_name in index_names:
params = httpx.QueryParams(
{
@@ -629,8 +634,12 @@ class VespaIndex(DocumentIndex):
while True:
try:
vespa_url = (
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}"
)
logger.debug(f'delete_single DELETE on URL "{vespa_url}"')
resp = http_client.delete(
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}",
vespa_url,
params=params,
)
resp.raise_for_status()
@@ -903,6 +912,32 @@ class VespaIndex(DocumentIndex):
logger.info("Batch deletion completed")
def random_retrieval(
self,
filters: IndexFilters,
num_to_retrieve: int = 10,
) -> list[InferenceChunkUncleaned]:
"""Retrieve random chunks matching the filters using Vespa's random ranking
This method is currently used for random chunk retrieval in the context of
assistant starter message creation (passed as sample context for usage by the assistant).
"""
vespa_where_clauses = build_vespa_filters(filters, remove_trailing_and=True)
yql = YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses
random_seed = random.randint(0, 1000000)
params: dict[str, str | int | float] = {
"yql": yql,
"hits": num_to_retrieve,
"timeout": VESPA_TIMEOUT,
"ranking.profile": "random_",
"ranking.properties.random.seed": random_seed,
}
return query_vespa(params)
class _VespaDeleteRequest:
def __init__(self, document_id: str, index_name: str) -> None:

View File

@@ -55,7 +55,9 @@ def remove_invalid_unicode_chars(text: str) -> str:
return _illegal_xml_chars_RE.sub("", text)
def get_vespa_http_client(no_timeout: bool = False) -> httpx.Client:
def get_vespa_http_client(
no_timeout: bool = False, http2: bool = False
) -> httpx.Client:
"""
Configure and return an HTTP client for communicating with Vespa,
including authentication if needed.
@@ -67,5 +69,5 @@ def get_vespa_http_client(no_timeout: bool = False) -> httpx.Client:
else None,
verify=False if not MANAGED_VESPA else True,
timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT,
http2=True,
http2=http2,
)

View File

@@ -19,7 +19,12 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> str:
def build_vespa_filters(
filters: IndexFilters,
*,
include_hidden: bool = False,
remove_trailing_and: bool = False, # Set to True when using as a complete Vespa query
) -> str:
def _build_or_filters(key: str, vals: list[str] | None) -> str:
if vals is None:
return ""
@@ -78,6 +83,9 @@ def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) ->
filter_str += _build_time_filter(filters.time_cutoff)
if remove_trailing_and and filter_str.endswith(" and "):
filter_str = filter_str[:-5] # We remove the trailing " and "
return filter_str

View File

@@ -453,7 +453,9 @@ class DefaultMultiLLM(LLM):
if LOG_DANSWER_MODEL_INTERACTIONS:
self.log_model_configs()
if DISABLE_LITELLM_STREAMING:
if (
DISABLE_LITELLM_STREAMING or self.config.model_name == "o1-2024-12-17"
): # TODO: remove once litellm supports streaming
yield self.invoke(prompt, tools, tool_choice, structured_response_format)
return

View File

@@ -29,6 +29,7 @@ OPENAI_PROVIDER_NAME = "openai"
OPEN_AI_MODEL_NAMES = [
"o1-mini",
"o1-preview",
"o1-2024-12-17",
"gpt-4",
"gpt-4o",
"gpt-4o-mini",

View File

@@ -28,6 +28,7 @@ from litellm.exceptions import RateLimitError # type: ignore
from litellm.exceptions import Timeout # type: ignore
from litellm.exceptions import UnprocessableEntityError # type: ignore
from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS
from onyx.configs.constants import MessageType
from onyx.configs.model_configs import GEN_AI_MAX_TOKENS
from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
@@ -45,10 +46,19 @@ logger = setup_logger()
def litellm_exception_to_error_msg(
e: Exception, llm: LLM, fallback_to_error_msg: bool = False
e: Exception,
llm: LLM,
fallback_to_error_msg: bool = False,
custom_error_msg_mappings: dict[str, str]
| None = LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS,
) -> str:
error_msg = str(e)
if custom_error_msg_mappings:
for error_msg_pattern, custom_error_msg in custom_error_msg_mappings.items():
if error_msg_pattern in error_msg:
return custom_error_msg
if isinstance(e, BadRequestError):
error_msg = "Bad request: The server couldn't process your request. Please check your input."
elif isinstance(e, AuthenticationError):

View File

@@ -0,0 +1,46 @@
PERSONA_CATEGORY_GENERATION_PROMPT = """
Based on the assistant's name, description, and instructions, generate a list of {num_categories}
**unique and diverse** categories that represent different types of starter messages a user
might send to initiate a conversation with this chatbot assistant.
**Ensure that the categories are varied and cover a wide range of topics related to the assistant's capabilities.**
Provide the categories as a JSON array of strings **without any code fences or additional text**.
**Context about the assistant:**
- **Name**: {name}
- **Description**: {description}
- **Instructions**: {instructions}
""".strip()
PERSONA_STARTER_MESSAGE_CREATION_PROMPT = """
Create a starter message that a **user** might send to initiate a conversation with a chatbot assistant.
**Category**: {category}
Your response should include two parts:
1. **Title**: A short, engaging title that reflects the user's intent
(e.g., 'Need Travel Advice', 'Question About Coding', 'Looking for Book Recommendations').
2. **Message**: The actual message that the user would send to the assistant.
This should be natural, engaging, and encourage a helpful response from the assistant.
**Avoid overly specific details; keep the message general and broadly applicable.**
For example:
- Instead of "I've just adopted a 6-month-old Labrador puppy who's pulling on the leash,"
write "I'm having trouble training my new puppy to walk nicely on a leash."
Ensure each part is clearly labeled and separated as shown above.
Do not provide any additional text or explanation and be extremely concise
**Context about the assistant:**
- **Name**: {name}
- **Description**: {description}
- **Instructions**: {instructions}
""".strip()
if __name__ == "__main__":
print(PERSONA_CATEGORY_GENERATION_PROMPT)
print(PERSONA_STARTER_MESSAGE_CREATION_PROMPT)

View File

@@ -31,6 +31,10 @@ class RedisConnectorIndex:
TERMINATE_PREFIX = PREFIX + "_terminate" # connectorindexing_terminate
# used to signal the overall workflow is still active
# it's difficult to prevent
ACTIVE_PREFIX = PREFIX + "_active"
def __init__(
self,
tenant_id: str | None,
@@ -54,6 +58,7 @@ class RedisConnectorIndex:
f"{self.GENERATOR_LOCK_PREFIX}_{id}/{search_settings_id}"
)
self.terminate_key = f"{self.TERMINATE_PREFIX}_{id}/{search_settings_id}"
self.active_key = f"{self.ACTIVE_PREFIX}_{id}/{search_settings_id}"
@classmethod
def fence_key_with_ids(cls, cc_pair_id: int, search_settings_id: int) -> str:
@@ -107,6 +112,26 @@ class RedisConnectorIndex:
# 10 minute TTL is good.
self.redis.set(f"{self.terminate_key}_{celery_task_id}", 0, ex=600)
def set_active(self) -> None:
"""This sets a signal to keep the indexing flow from getting cleaned up within
the expiration time.
The slack in timing is needed to avoid race conditions where simply checking
the celery queue and task status could result in race conditions."""
self.redis.set(self.active_key, 0, ex=300)
def active(self) -> bool:
if self.redis.exists(self.active_key):
return True
return False
def generator_locked(self) -> bool:
if self.redis.exists(self.generator_lock_key):
return True
return False
def set_generator_complete(self, payload: int | None) -> None:
if not payload:
self.redis.delete(self.generator_complete_key)
@@ -138,6 +163,7 @@ class RedisConnectorIndex:
return status
def reset(self) -> None:
self.redis.delete(self.active_key)
self.redis.delete(self.generator_lock_key)
self.redis.delete(self.generator_progress_key)
self.redis.delete(self.generator_complete_key)

View File

@@ -0,0 +1,271 @@
import json
import re
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from litellm import get_supported_openai_params
from sqlalchemy.orm import Session
from onyx.configs.chat_configs import NUM_PERSONA_PROMPT_GENERATION_CHUNKS
from onyx.configs.chat_configs import NUM_PERSONA_PROMPTS
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.postprocessing.postprocessing import cleanup_chunks
from onyx.context.search.preprocessing.access_filters import (
build_access_filters_for_user,
)
from onyx.db.document_set import get_document_sets_by_ids
from onyx.db.models import StarterMessageModel as StarterMessage
from onyx.db.models import User
from onyx.document_index.document_index_utils import get_both_index_names
from onyx.document_index.factory import get_default_document_index
from onyx.llm.factory import get_default_llms
from onyx.prompts.starter_messages import PERSONA_CATEGORY_GENERATION_PROMPT
from onyx.prompts.starter_messages import PERSONA_STARTER_MESSAGE_CREATION_PROMPT
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import FunctionCall
from onyx.utils.threadpool_concurrency import run_functions_in_parallel
logger = setup_logger()
def get_random_chunks_from_doc_sets(
doc_sets: List[str], db_session: Session, user: User | None = None
) -> List[InferenceChunk]:
"""
Retrieves random chunks from the specified document sets.
"""
curr_ind_name, sec_ind_name = get_both_index_names(db_session)
document_index = get_default_document_index(curr_ind_name, sec_ind_name)
acl_filters = build_access_filters_for_user(user, db_session)
filters = IndexFilters(document_set=doc_sets, access_control_list=acl_filters)
chunks = document_index.random_retrieval(
filters=filters, num_to_retrieve=NUM_PERSONA_PROMPT_GENERATION_CHUNKS
)
return cleanup_chunks(chunks)
def parse_categories(content: str) -> List[str]:
"""
Parses the JSON array of categories from the LLM response.
"""
# Clean the response to remove code fences and extra whitespace
content = content.strip().strip("```").strip()
if content.startswith("json"):
content = content[4:].strip()
try:
categories = json.loads(content)
if not isinstance(categories, list):
logger.error("Categories are not a list.")
return []
return categories
except json.JSONDecodeError as e:
logger.error(f"Failed to parse categories: {e}")
return []
def generate_start_message_prompts(
name: str,
description: str,
instructions: str,
categories: List[str],
chunk_contents: str,
supports_structured_output: bool,
fast_llm: Any,
) -> List[FunctionCall]:
"""
Generates the list of FunctionCall objects for starter message generation.
"""
functions = []
for category in categories:
# Create a prompt specific to the category
start_message_generation_prompt = (
PERSONA_STARTER_MESSAGE_CREATION_PROMPT.format(
name=name,
description=description,
instructions=instructions,
category=category,
)
)
if chunk_contents:
start_message_generation_prompt += (
"\n\nExample content this assistant has access to:\n"
"'''\n"
f"{chunk_contents}"
"\n'''"
)
if supports_structured_output:
functions.append(
FunctionCall(
fast_llm.invoke,
(start_message_generation_prompt, None, None, StarterMessage),
)
)
else:
functions.append(
FunctionCall(
fast_llm.invoke,
(start_message_generation_prompt,),
)
)
return functions
def parse_unstructured_output(output: str) -> Dict[str, str]:
"""
Parses the assistant's unstructured output into a dictionary with keys:
- 'name' (Title)
- 'message' (Message)
"""
# Debug output
logger.debug(f"LLM Output for starter message creation: {output}")
# Patterns to match
title_pattern = r"(?i)^\**Title\**\s*:\s*(.+)"
message_pattern = r"(?i)^\**Message\**\s*:\s*(.+)"
# Initialize the response dictionary
response_dict = {}
# Split the output into lines
lines = output.strip().split("\n")
# Variables to keep track of the current key being processed
current_key = None
current_value_lines = []
for line in lines:
# Check for title
title_match = re.match(title_pattern, line.strip())
if title_match:
# Save previous key-value pair if any
if current_key and current_value_lines:
response_dict[current_key] = " ".join(current_value_lines).strip()
current_value_lines = []
current_key = "name"
current_value_lines.append(title_match.group(1).strip())
continue
# Check for message
message_match = re.match(message_pattern, line.strip())
if message_match:
if current_key and current_value_lines:
response_dict[current_key] = " ".join(current_value_lines).strip()
current_value_lines = []
current_key = "message"
current_value_lines.append(message_match.group(1).strip())
continue
# If the line doesn't match a new key, append it to the current value
if current_key:
current_value_lines.append(line.strip())
# Add the last key-value pair
if current_key and current_value_lines:
response_dict[current_key] = " ".join(current_value_lines).strip()
# Validate that the necessary keys are present
if not all(k in response_dict for k in ["name", "message"]):
raise ValueError("Failed to parse the assistant's response.")
return response_dict
def generate_starter_messages(
name: str,
description: str,
instructions: str,
document_set_ids: List[int],
db_session: Session,
user: User | None,
) -> List[StarterMessage]:
"""
Generates starter messages by first obtaining categories and then generating messages for each category.
On failure, returns an empty list (or list with processed starter messages if some messages are processed successfully).
"""
_, fast_llm = get_default_llms(temperature=0.5)
provider = fast_llm.config.model_provider
model = fast_llm.config.model_name
params = get_supported_openai_params(model=model, custom_llm_provider=provider)
supports_structured_output = (
isinstance(params, list) and "response_format" in params
)
# Generate categories
category_generation_prompt = PERSONA_CATEGORY_GENERATION_PROMPT.format(
name=name,
description=description,
instructions=instructions,
num_categories=NUM_PERSONA_PROMPTS,
)
category_response = fast_llm.invoke(category_generation_prompt)
categories = parse_categories(cast(str, category_response.content))
if not categories:
logger.error("No categories were generated.")
return []
# Fetch example content if document sets are provided
if document_set_ids:
document_sets = get_document_sets_by_ids(
document_set_ids=document_set_ids,
db_session=db_session,
)
chunks = get_random_chunks_from_doc_sets(
doc_sets=[doc_set.name for doc_set in document_sets],
db_session=db_session,
user=user,
)
# Add example content context
chunk_contents = "\n".join(chunk.content.strip() for chunk in chunks)
else:
chunk_contents = ""
# Generate prompts for starter messages
functions = generate_start_message_prompts(
name,
description,
instructions,
categories,
chunk_contents,
supports_structured_output,
fast_llm,
)
# Run LLM calls in parallel
if not functions:
logger.error("No functions to execute for starter message generation.")
return []
results = run_functions_in_parallel(function_calls=functions)
prompts = []
for response in results.values():
try:
if supports_structured_output:
response_dict = json.loads(response.content)
else:
response_dict = parse_unstructured_output(response.content)
starter_message = StarterMessage(
name=response_dict["name"],
message=response_dict["message"],
)
prompts.append(starter_message)
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"Failed to parse starter message: {e}")
continue
return prompts

View File

@@ -48,6 +48,7 @@ def load_personas_from_yaml(
data = yaml.safe_load(file)
all_personas = data.get("personas", [])
for persona in all_personas:
doc_set_names = persona["document_sets"]
doc_sets: list[DocumentSetDBModel] = [
@@ -127,6 +128,7 @@ def load_personas_from_yaml(
display_priority=(
existing_persona.display_priority
if existing_persona is not None
and persona.get("display_priority") is None
else persona.get("display_priority")
),
is_visible=(

View File

@@ -39,7 +39,7 @@ personas:
document_sets: []
icon_shape: 23013
icon_color: "#6FB1FF"
display_priority: 1
display_priority: 0
is_visible: true
starter_messages:
- name: "Give me an overview of what's here"
@@ -64,7 +64,7 @@ personas:
document_sets: []
icon_shape: 50910
icon_color: "#FF6F6F"
display_priority: 0
display_priority: 1
is_visible: true
starter_messages:
- name: "Summarize a document"

View File

@@ -19,6 +19,7 @@ from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import NotificationType
from onyx.db.engine import get_current_tenant_id
from onyx.db.engine import get_session
from onyx.db.models import StarterMessageModel as StarterMessage
from onyx.db.models import User
from onyx.db.notification import create_notification
from onyx.db.persona import create_assistant_category
@@ -36,7 +37,11 @@ from onyx.db.persona import update_persona_shared_users
from onyx.db.persona import update_persona_visibility
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import ChatFileType
from onyx.secondary_llm_flows.starter_message_creation import (
generate_starter_messages,
)
from onyx.server.features.persona.models import CreatePersonaRequest
from onyx.server.features.persona.models import GenerateStarterMessageRequest
from onyx.server.features.persona.models import ImageGenerationToolStatus
from onyx.server.features.persona.models import PersonaCategoryCreate
from onyx.server.features.persona.models import PersonaCategoryResponse
@@ -377,3 +382,26 @@ def build_final_template_prompt(
retrieval_disabled=retrieval_disabled,
)
)
@basic_router.post("/assistant-prompt-refresh")
def build_assistant_prompts(
generate_persona_prompt_request: GenerateStarterMessageRequest,
db_session: Session = Depends(get_session),
user: User | None = Depends(current_user),
) -> list[StarterMessage]:
try:
logger.info(
"Generating starter messages for user: %s", user.id if user else "Anonymous"
)
return generate_starter_messages(
name=generate_persona_prompt_request.name,
description=generate_persona_prompt_request.description,
instructions=generate_persona_prompt_request.instructions,
document_set_ids=generate_persona_prompt_request.document_set_ids,
db_session=db_session,
user=user,
)
except Exception as e:
logger.exception("Failed to generate starter messages")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -17,6 +17,14 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
# More minimal request for generating a persona prompt
class GenerateStarterMessageRequest(BaseModel):
name: str
description: str
instructions: str
document_set_ids: list[int]
class CreatePersonaRequest(BaseModel):
name: str
description: str

View File

@@ -35,6 +35,7 @@ from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS
from onyx.db.chat import add_chats_to_session_from_slack_thread
from onyx.db.chat import create_chat_session
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import delete_all_chat_sessions_for_user
from onyx.db.chat import delete_chat_session
from onyx.db.chat import duplicate_chat_session_for_user_from_slack
from onyx.db.chat import get_chat_message
@@ -280,6 +281,17 @@ def patch_chat_session(
return None
@router.delete("/delete-all-chat-sessions")
def delete_all_chat_sessions(
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
try:
delete_all_chat_sessions_for_user(user=user, db_session=db_session)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.delete("/delete-chat-session/{session_id}")
def delete_chat_session_by_id(
session_id: UUID,

View File

@@ -11,6 +11,7 @@ from onyx.chat.models import RetrievalDocs
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import SearchFeedbackType
from onyx.configs.constants import SessionType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkContext
from onyx.context.search.models import RerankingDetails
@@ -151,6 +152,10 @@ class ChatSessionUpdateRequest(BaseModel):
sharing_status: ChatSessionSharedStatus
class DeleteAllSessionsRequest(BaseModel):
session_type: SessionType
class RenameChatSessionResponse(BaseModel):
new_name: str # This is only really useful if the name is generated

View File

@@ -25,11 +25,6 @@ class ToolCallSummary(BaseModel__v1):
tool_call_request: AIMessage
tool_call_result: ToolMessage
# This is a workaround to allow arbitrary types in the model
# TODO: Remove this once we have a better solution
class Config:
arbitrary_types_allowed = True
def tool_call_tokens(
tool_call_summary: ToolCallSummary, llm_tokenizer: BaseTokenizer

View File

@@ -22,6 +22,7 @@ from onyx.utils.variable_functionality import (
from onyx.utils.variable_functionality import noop_fallback
from shared_configs.configs import MULTI_TENANT
_DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.onyx.app/anonymous_telemetry"
_CACHED_UUID: str | None = None
_CACHED_INSTANCE_DOMAIN: str | None = None
@@ -117,9 +118,12 @@ def mt_cloud_telemetry(
event: MilestoneRecordType,
properties: dict | None = None,
) -> None:
print(f"mt_cloud_telemetry {distinct_id} {event} {properties}")
if not MULTI_TENANT:
print("mt_cloud_telemetry not MULTI_TENANT")
return
print("mt_cloud_telemetry MULTI_TENANT")
# MIT version should not need to include any Posthog code
# This is only for Onyx MT Cloud, this code should also never be hit, no reason for any orgs to
# be running the Multi Tenant version of Onyx.
@@ -137,8 +141,11 @@ def create_milestone_and_report(
properties: dict | None,
db_session: Session,
) -> None:
print(f"create_milestone_and_report {user} {event_type} {db_session}")
_, is_new = create_milestone_if_not_exists(user, event_type, db_session)
print(f"create_milestone_and_report {is_new}")
if is_new:
print("create_milestone_and_report is_new")
mt_cloud_telemetry(
distinct_id=distinct_id,
event=event_type,

View File

@@ -26,15 +26,10 @@ huggingface-hub==0.20.1
jira==3.5.1
jsonref==1.1.0
trafilatura==1.12.2
langchain==0.3.7
langchain-core==0.3.24
langchain-openai==0.2.9
langchain-text-splitters==0.3.2
langchainhub==0.1.21
langgraph==0.2.59
langgraph-checkpoint==2.0.5
langgraph-sdk==0.1.44
litellm==1.53.1
langchain==0.1.17
langchain-core==0.1.50
langchain-text-splitters==0.0.1
litellm==1.55.4
lxml==5.3.0
lxml_html_clean==0.2.2
llama-index==0.9.45

Some files were not shown because too many files have changed in this diff Show More