Compare commits

..

29 Commits

Author SHA1 Message Date
joachim-danswer
1c23cf574c Nits 2025-02-10 17:13:16 -08:00
joachim-danswer
0ccf78ac52 reused error strings & BaseMessage_Content 2025-02-10 16:57:25 -08:00
joachim-danswer
02b4b4bf0d remove execs 2025-02-10 16:21:59 -08:00
joachim-danswer
dade11a2e6 EL - OVERRIDE 2025-02-10 14:41:55 -08:00
joachim-danswer
188a5f0d62 EL comments
- overwrite -> override
 - enums for error types
 - some nits
2025-02-10 14:33:58 -08:00
joachim-danswer
89c0b1ad37 YS comments 2025-02-10 13:29:12 -08:00
pablodanswer
8b20fd31b6 quick update 2025-02-08 17:14:01 -08:00
pablodanswer
6a73245986 quick ux update 2025-02-07 23:40:26 -08:00
joachim-danswer
dd73fdcd08 timeout prep backend 2025-02-07 18:21:35 -08:00
joachim-danswer
768456609a Removal of defaults from various input states + removal of bas 2025-02-07 18:19:18 -08:00
rkuo-danswer
ae37f01f62 event driven indexing/docset/usergroup triggers (#3918)
* WIP

* trigger indexing immediately when the ccpair is created

* add some logging and indexing trigger to the mock-credential endpoint

* better comments

* fix integration test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-07 22:53:51 +00:00
pablodanswer
ef31e14518 remove debug logs for integration tests 2025-02-07 10:46:24 -08:00
evan-danswer
9b0cba367e small linear connector improvements (#3929)
* small linear connector improvements

* add todo for url handling
2025-02-07 01:31:49 +00:00
pablonyx
48ac690a70 Multi tenant tests (#3919)
* ensure fail on multi tenant successfully

* attempted fix

* udpate ingration tests

* minor update

* improve

* improve workflow

* fix migrations

* many more logs

* quick fix

* improve

* fix typo

* quick nit

* attempted fix

* very minor clean up
2025-02-07 01:24:00 +00:00
pablodanswer
bfa4fbd691 minor delay 2025-02-06 16:28:38 -08:00
rkuo-danswer
58fdc86d41 fix chromatic save/upload (#3927)
* try adding back some params

* raise timeout

* update chromatic version

* fix typo

* use chromatic imports

* update gitignore

* slim down the config file

* update readme

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-06 22:02:14 +00:00
pablonyx
6ff452a2e1 Update popup + misc standardization (#3906)
* pop

* various minor improvements

* improvement

* finalize

* update
2025-02-06 21:22:06 +00:00
pablonyx
e9b892301b Improvements to Redis + Vespa debugging
Improvements to Redis + Vespa debugging
2025-02-06 13:30:32 -08:00
pablodanswer
a202e2bf9d Improvements to Redis + Vespa debugging 2025-02-06 13:30:06 -08:00
pablonyx
3bc4e0d12f Very minor robustification (#3926)
* very minor robustification

* robust
2025-02-06 19:55:38 +00:00
trial-danswer
2fc41cd5df Helm Chart Fixes (#3900)
* initial commit for helm chart refactoring

* Continue refactoring helm. I was able to use helm to deploy all of the apps to a cluster in aws. The bottleneck was setting up PVC dynamic provisioning.

* use default storage class

* Fix linter errors

* Fix broken helm test

* update

* Helm chart fixes

* remove reference to ebsstorage

* Fix linter errors

---------

Co-authored-by: jpb80 <jordan.buttkevitz@gmail.com>
2025-02-06 10:41:09 -08:00
pablodanswer
8c42ff2ff8 slackbot configuration fix 2025-02-06 09:36:58 -08:00
rkuo-danswer
6ccb3f085a select only doc_id (#3920)
* select only doc_id

* select more doc ids

* fix user group

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-06 07:00:40 +00:00
pablonyx
a0a1b431be Various UX improvements
Various improvements
2025-02-05 21:13:22 -08:00
pablodanswer
f137fc78a6 various UX improvements 2025-02-05 21:12:55 -08:00
pablonyx
396f096dda Allows for Slackbots that do not have search enabled
Allow no search
2025-02-05 19:20:20 -08:00
pablodanswer
e04b2d6ff3 Allows for Slackbots that do not have search enabled 2025-02-05 19:19:50 -08:00
pablonyx
cbd8b094bd Minor misc docset updates
Minor misc docset updates
2025-02-05 19:14:32 -08:00
pablodanswer
5c7487e91f ensure tests pass 2025-02-05 17:02:49 -08:00
134 changed files with 3251 additions and 1074 deletions

View File

@@ -67,6 +67,7 @@ jobs:
NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
NEXT_PUBLIC_GTM_ENABLED=true
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true

View File

@@ -94,16 +94,19 @@ jobs:
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
MULTI_TENANT=true \
AUTH_TYPE=basic \
AUTH_TYPE=cloud \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
DEV_MODE=true \
docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack up -d
id: start_docker_multi_tenant
# In practice, `cloud` Auth type would require OAUTH credentials to be set.
- name: Run Multi-Tenant Integration Tests
run: |
echo "Waiting for 3 minutes to ensure API server is ready..."
sleep 180
echo "Running integration tests..."
docker run --rm --network danswer-stack_default \
--name test-runner \
@@ -119,6 +122,10 @@ jobs:
-e TEST_WEB_HOSTNAME=test-runner \
-e AUTH_TYPE=cloud \
-e MULTI_TENANT=true \
-e REQUIRE_EMAIL_VERIFICATION=false \
-e DISABLE_TELEMETRY=true \
-e IMAGE_TAG=test \
-e DEV_MODE=true \
onyxdotapp/onyx-integration:test \
/app/tests/integration/multitenant_tests
continue-on-error: true
@@ -126,17 +133,17 @@ jobs:
- name: Check multi-tenant test results
run: |
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
echo "Integration tests failed. Exiting with error."
if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
echo "Multi-tenant integration tests failed. Exiting with error."
exit 1
else
echo "All integration tests passed successfully."
echo "All multi-tenant integration tests passed successfully."
fi
- name: Stop multi-tenant Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack down -v
- name: Start Docker containers
run: |
@@ -216,27 +223,30 @@ jobs:
echo "All integration tests passed successfully."
fi
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
# ------------------------------------------------------------
# Always gather logs BEFORE "down":
- name: Dump API server logs
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
- name: Stop Docker containers
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: success() || failure()
if: always()
uses: actions/upload-artifact@v4
with:
name: docker-logs
name: docker-all-logs
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
- name: Stop Docker containers
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v

View File

@@ -101,7 +101,8 @@ COPY ./alembic_tenants /app/alembic_tenants
COPY ./alembic.ini /app/alembic.ini
COPY supervisord.conf /usr/etc/supervisord.conf
# Escape hatch
# Escape hatch scripts
COPY ./scripts/debugging /app/scripts/debugging
COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
# Put logo in assets

View File

@@ -5,7 +5,6 @@ Revises: 47e5bef3a1d7
Create Date: 2024-11-06 13:15:53.302644
"""
import logging
from typing import cast
from alembic import op
import sqlalchemy as sa
@@ -20,13 +19,8 @@ down_revision = "47e5bef3a1d7"
branch_labels: None = None
depends_on: None = None
# Configure logging
logger = logging.getLogger("alembic.runtime.migration")
logger.setLevel(logging.INFO)
def upgrade() -> None:
logger.info(f"{revision}: create_table: slack_bot")
# Create new slack_bot table
op.create_table(
"slack_bot",
@@ -63,7 +57,6 @@ def upgrade() -> None:
)
# Handle existing Slack bot tokens first
logger.info(f"{revision}: Checking for existing Slack bot.")
bot_token = None
app_token = None
first_row_id = None
@@ -71,15 +64,12 @@ def upgrade() -> None:
try:
tokens = cast(dict, get_kv_store().load("slack_bot_tokens_config_key"))
except Exception:
logger.warning("No existing Slack bot tokens found.")
tokens = {}
bot_token = tokens.get("bot_token")
app_token = tokens.get("app_token")
if bot_token and app_token:
logger.info(f"{revision}: Found bot and app tokens.")
session = Session(bind=op.get_bind())
new_slack_bot = SlackBot(
name="Slack Bot (Migrated)",
@@ -170,10 +160,9 @@ def upgrade() -> None:
# Clean up old tokens if they existed
try:
if bot_token and app_token:
logger.info(f"{revision}: Removing old bot and app tokens.")
get_kv_store().delete("slack_bot_tokens_config_key")
except Exception:
logger.warning("tried to delete tokens in dynamic config but failed")
pass
# Rename the table
op.rename_table(
"slack_bot_config__standard_answer_category",
@@ -190,8 +179,6 @@ def upgrade() -> None:
# Drop the table with CASCADE to handle dependent objects
op.execute("DROP TABLE slack_bot_config CASCADE")
logger.info(f"{revision}: Migration complete.")
def downgrade() -> None:
# Recreate the old slack_bot_config table
@@ -273,7 +260,7 @@ def downgrade() -> None:
}
get_kv_store().store("slack_bot_tokens_config_key", tokens)
except Exception:
logger.warning("Failed to save tokens back to KV store")
pass
# Drop the new tables in reverse order
op.drop_table("slack_channel_config")

View File

@@ -52,7 +52,11 @@ def upgrade() -> None:
slack_bot_id, persona_id, channel_config, enable_auto_filters, is_default
) VALUES (
:bot_id, NULL,
'{"channel_name": null, "respond_member_group_list": [], "answer_filters": [], "follow_up_tags": []}',
'{"channel_name": null, '
'"respond_member_group_list": [], '
'"answer_filters": [], '
'"follow_up_tags": [], '
'"respond_tag_only": true}',
FALSE, TRUE
)
"""

View File

@@ -0,0 +1,53 @@
"""delete non-search assistants
Revision ID: f5437cc136c5
Revises: eaa3b5593925
Create Date: 2025-02-04 16:17:15.677256
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "f5437cc136c5"
down_revision = "eaa3b5593925"
branch_labels = None
depends_on = None
def upgrade() -> None:
pass
def downgrade() -> None:
# Fix: split the statements into multiple op.execute() calls
op.execute(
"""
WITH personas_without_search AS (
SELECT p.id
FROM persona p
LEFT JOIN persona__tool pt ON p.id = pt.persona_id
LEFT JOIN tool t ON pt.tool_id = t.id
GROUP BY p.id
HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
)
UPDATE slack_channel_config
SET persona_id = NULL
WHERE is_default = TRUE AND persona_id IN (SELECT id FROM personas_without_search)
"""
)
op.execute(
"""
WITH personas_without_search AS (
SELECT p.id
FROM persona p
LEFT JOIN persona__tool pt ON p.id = pt.persona_id
LEFT JOIN tool t ON pt.tool_id = t.id
GROUP BY p.id
HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
)
DELETE FROM slack_channel_config
WHERE is_default = FALSE AND persona_id IN (SELECT id FROM personas_without_search)
"""
)

View File

@@ -2,8 +2,11 @@ from uuid import UUID
from sqlalchemy.orm import Session
from onyx.configs.constants import NotificationType
from onyx.db.models import Persona__User
from onyx.db.models import Persona__UserGroup
from onyx.db.notification import create_notification
from onyx.server.features.persona.models import PersonaSharedNotificationData
def make_persona_private(
@@ -23,6 +26,14 @@ def make_persona_private(
for user_uuid in user_ids:
db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))
create_notification(
user_id=user_uuid,
notif_type=NotificationType.PERSONA_SHARED,
db_session=db_session,
additional_data=PersonaSharedNotificationData(
persona_id=persona_id,
).model_dump(),
)
if group_ids:
for group_id in group_ids:
db_session.add(

View File

@@ -218,14 +218,14 @@ def fetch_user_groups_for_user(
return db_session.scalars(stmt).all()
def construct_document_select_by_usergroup(
def construct_document_id_select_by_usergroup(
user_group_id: int,
) -> Select:
"""This returns a statement that should be executed using
.yield_per() to minimize overhead. The primary consumers of this function
are background processing task generators."""
stmt = (
select(Document)
select(Document.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,

View File

@@ -64,6 +64,7 @@ async def _get_tenant_id_from_request(
try:
# Look up token data in Redis
token_data = await retrieve_auth_token_data_from_redis(request)
if not token_data:
@@ -87,13 +88,14 @@ async def _get_tenant_id_from_request(
if not is_valid_schema_name(tenant_id):
raise HTTPException(status_code=400, detail="Invalid tenant ID format")
return tenant_id
except Exception as e:
logger.error(f"Unexpected error in _get_tenant_id_from_request: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
finally:
if tenant_id:
return tenant_id
# As a final step, check for explicit tenant_id cookie
tenant_id_cookie = request.cookies.get(TENANT_ID_COOKIE_NAME)
if tenant_id_cookie and is_valid_schema_name(tenant_id_cookie):

View File

@@ -24,6 +24,7 @@ from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
from ee.onyx.server.tenants.user_mapping import user_owns_a_tenant
from onyx.auth.users import exceptions
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.constants import MilestoneRecordType
from onyx.db.engine import get_session_with_tenant
from onyx.db.engine import get_sqlalchemy_engine
@@ -85,7 +86,8 @@ async def create_tenant(email: str, referral_source: str | None = None) -> str:
# Provision tenant on data plane
await provision_tenant(tenant_id, email)
# Notify control plane
await notify_control_plane(tenant_id, email, referral_source)
if not DEV_MODE:
await notify_control_plane(tenant_id, email, referral_source)
except Exception as e:
logger.error(f"Tenant provisioning failed: {e}")
await rollback_tenant_provisioning(tenant_id)

View File

@@ -9,7 +9,6 @@ class CoreState(BaseModel):
This is the core state that is shared across all subgraphs.
"""
base_question: str = ""
log_messages: Annotated[list[str], add] = []
@@ -18,4 +17,4 @@ class SubgraphCoreState(BaseModel):
This is the core state that is shared across all subgraphs.
"""
log_messages: Annotated[list[str], add]
log_messages: Annotated[list[str], add] = []

View File

@@ -1,8 +1,8 @@
from datetime import datetime
from typing import cast
from langchain_core.messages import BaseMessage
from langchain_core.messages import HumanMessage
from langchain_core.messages import merge_message_runs
from langchain_core.runnables.config import RunnableConfig
from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
@@ -12,12 +12,39 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
SubQuestionAnswerCheckUpdate,
)
from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
binary_string_test,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_POSITIVE_VALUE_STR,
)
from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
)
def check_sub_answer(
@@ -53,14 +80,46 @@ def check_sub_answer(
graph_config = cast(GraphConfig, config["metadata"]["config"])
fast_llm = graph_config.tooling.fast_llm
response = list(
fast_llm.stream(
agent_error: AgentErrorLoggingFormat | None = None
response: BaseMessage | None = None
try:
response = fast_llm.invoke(
prompt=msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
)
)
quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
answer_quality = "yes" in quality_str.lower()
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - check sub answer")
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - check sub answer")
if agent_error:
answer_quality = True
log_result = agent_error.error_result
else:
if response:
quality_str: str = cast(str, response.content)
answer_quality = binary_string_test(
text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
)
else:
answer_quality = True
quality_str = "yes - because LLM error"
log_result = f"Answer quality: {quality_str}"
return SubQuestionAnswerCheckUpdate(
answer_quality=answer_quality,
@@ -69,7 +128,7 @@ def check_sub_answer(
graph_component="initial - generate individual sub answer",
node_name="check sub answer",
node_start_time=node_start_time,
result=f"Answer quality: {quality_str}",
result=log_result,
)
],
)

View File

@@ -16,6 +16,20 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
build_sub_question_answer_prompt,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
LLM_ANSWER_ERROR_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
@@ -30,11 +44,20 @@ from onyx.chat.models import StreamStopInfo
from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import NO_RECOVERED_DOCS
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
)
def generate_sub_answer(
state: AnswerQuestionState,
@@ -57,6 +80,8 @@ def generate_sub_answer(
if len(context_docs) == 0:
answer_str = NO_RECOVERED_DOCS
cited_documents: list = []
log_results = "No documents retrieved"
write_custom_event(
"sub_answers",
AgentAnswerPiece(
@@ -79,41 +104,67 @@ def generate_sub_answer(
response: list[str | list[str | dict[str, Any]]] = []
dispatch_timings: list[float] = []
for message in fast_llm.stream(
prompt=msg,
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
agent_error: AgentErrorLoggingFormat | None = None
try:
for message in fast_llm.stream(
prompt=msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
)
start_stream_token = datetime.now()
write_custom_event(
"sub_answers",
AgentAnswerPiece(
answer_piece=content,
level=level,
level_question_num=question_num,
answer_type="agent_sub_answer",
),
writer,
)
start_stream_token = datetime.now()
write_custom_event(
"sub_answers",
AgentAnswerPiece(
answer_piece=content,
level=level,
level_question_num=question_num,
answer_type="agent_sub_answer",
),
writer,
)
end_stream_token = datetime.now()
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
)
response.append(content)
end_stream_token = datetime.now()
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
)
response.append(content)
answer_str = merge_message_runs(response, chunk_separator="")[0].content
logger.debug(
f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
)
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - generate sub answer")
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - generate sub answer")
answer_citation_ids = get_answer_citation_ids(answer_str)
cited_documents = [
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
]
if agent_error:
answer_str = LLM_ANSWER_ERROR_MESSAGE
cited_documents = []
log_results = (
agent_error.error_result
or "Sub-answer generation failed due to LLM error"
)
else:
answer_str = merge_message_runs(response, chunk_separator="")[0].content
answer_citation_ids = get_answer_citation_ids(answer_str)
cited_documents = [
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
]
log_results = None
stop_event = StreamStopInfo(
stop_reason=StreamStopReason.FINISHED,
@@ -131,7 +182,7 @@ def generate_sub_answer(
graph_component="initial - generate individual sub answer",
node_name="generate sub answer",
node_start_time=node_start_time,
result="",
result=log_results or "",
)
],
)

View File

@@ -42,10 +42,8 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):
class SubQuestionAnsweringInput(SubgraphCoreState):
question: str = ""
question_id: str = (
"" # 0_0 is original question, everything else is <level>_<question_num>.
)
question: str
question_id: str
# level 0 is original question and first decomposition, level 1 is follow up, etc
# question_num is a unique number per original question per level.

View File

@@ -26,7 +26,18 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
trim_prompt_piece,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.operators import (
dedup_inference_sections,
)
@@ -42,12 +53,16 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.context.search.models import InferenceSection
from onyx.prompts.agent_search import (
INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
)
from onyx.context.search.models import InferenceSection
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
from onyx.prompts.agent_search import (
INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
)
@@ -57,6 +72,12 @@ from onyx.prompts.agent_search import (
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="LLM Timeout Error. The initial answer could not be generated.",
rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
general_error="General LLM Error. The initial answer could not be generated.",
)
def generate_initial_answer(
state: SubQuestionRetrievalState,
@@ -224,30 +245,82 @@ def generate_initial_answer(
streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
dispatch_timings: list[float] = []
for message in model.stream(msg):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
)
start_stream_token = datetime.now()
agent_error: AgentErrorLoggingFormat | None = None
try:
for message in model.stream(
msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
)
start_stream_token = datetime.now()
write_custom_event(
"initial_agent_answer",
AgentAnswerPiece(
answer_piece=content,
level=0,
level_question_num=0,
answer_type="agent_level_answer",
),
writer,
)
end_stream_token = datetime.now()
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
)
streamed_tokens.append(content)
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - generate initial answer")
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - generate initial answer")
if agent_error:
write_custom_event(
"initial_agent_answer",
AgentAnswerPiece(
answer_piece=content,
level=0,
level_question_num=0,
answer_type="agent_level_answer",
StreamingError(
error=AGENT_LLM_TIMEOUT_MESSAGE,
),
writer,
)
end_stream_token = datetime.now()
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
return InitialAnswerUpdate(
initial_answer=None,
error=AgentErrorLoggingFormat(
error_message=agent_error.error_message or "An LLM error occurred",
error_type=agent_error.error_type,
error_result=agent_error.error_result,
),
initial_agent_stats=None,
generated_sub_questions=sub_questions,
agent_base_end_time=None,
agent_base_metrics=None,
log_messages=[
get_langgraph_node_log_string(
graph_component="initial - generate initial answer",
node_name="generate initial answer",
node_start_time=node_start_time,
result=agent_error.error_result or "An LLM error occurred",
)
],
)
streamed_tokens.append(content)
logger.debug(
f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"

View File

@@ -25,7 +25,7 @@ def validate_initial_answer(
f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
)
verdict = True
verdict = True # not actually required as already streamed out. Refinement will do similar
return InitialAnswerQualityUpdate(
initial_answer_quality_eval=verdict,

View File

@@ -12,8 +12,9 @@ from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states
from onyx.agents.agent_search.deep_search.main.models import (
AgentRefinedMetrics,
)
from onyx.agents.agent_search.deep_search.main.operations import dispatch_subquestion
from onyx.agents.agent_search.deep_search.main.operations import (
dispatch_subquestion,
dispatch_subquestion_sep,
)
from onyx.agents.agent_search.deep_search.main.states import (
InitialQuestionDecompositionUpdate,
@@ -22,6 +23,18 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
build_history_prompt,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
@@ -32,6 +45,11 @@ from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType
from onyx.chat.models import SubQuestionPiece
from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
)
@@ -42,6 +60,12 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="LLM Timeout Error. Sub-questions could not be generated.",
rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
general_error="General LLM Error. Sub-questions could not be generated.",
)
def decompose_orig_question(
state: SubQuestionRetrievalState,
@@ -109,10 +133,37 @@ def decompose_orig_question(
),
writer,
)
# dispatches custom events for subquestion tokens, adding in subquestion ids.
streamed_tokens = dispatch_separated(
model.stream(msg), dispatch_subquestion(0, writer)
)
agent_error: AgentErrorLoggingFormat | None = None
streamed_tokens: list[BaseMessage_Content] = []
try:
streamed_tokens = dispatch_separated(
model.stream(
msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
),
dispatch_subquestion(0, writer),
sep_callback=dispatch_subquestion_sep(0, writer),
)
except LLMTimeoutError as e:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - decompose orig question")
raise e # fail loudly on this critical step
except LLMRateLimitError as e:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - decompose orig question")
raise e
stop_event = StreamStopInfo(
stop_reason=StreamStopReason.FINISHED,
@@ -121,19 +172,19 @@ def decompose_orig_question(
)
write_custom_event("stream_finished", stop_event, writer)
deomposition_response = merge_content(*streamed_tokens)
if agent_error:
initial_sub_questions: list[str] = []
log_result = agent_error.error_result
else:
deomposition_response = merge_content(*streamed_tokens)
# this call should only return strings. Commenting out for efficiency
# assert [type(tok) == str for tok in streamed_tokens]
list_of_subqs = cast(str, deomposition_response).split("\n")
# use no-op cast() instead of str() which runs code
# list_of_subquestions = clean_and_parse_list_string(cast(str, response))
list_of_subqs = cast(str, deomposition_response).split("\n")
decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
return InitialQuestionDecompositionUpdate(
initial_sub_questions=decomp_list,
initial_sub_questions=initial_sub_questions,
agent_start_time=agent_start_time,
agent_refined_start_time=None,
agent_refined_end_time=None,
@@ -147,7 +198,7 @@ def decompose_orig_question(
graph_component="initial - generate sub answers",
node_name="decompose original question",
node_start_time=node_start_time,
result=f"decomposed original question into {len(decomp_list)} subquestions",
result=log_result,
)
],
)

View File

@@ -252,9 +252,7 @@ if __name__ == "__main__":
db_session, primary_llm, fast_llm, search_request
)
inputs = MainInput(
base_question=graph_config.inputs.search_request.query, log_messages=[]
)
inputs = MainInput(log_messages=[])
for thing in compiled_graph.stream(
input=inputs,

View File

@@ -1,6 +1,7 @@
from datetime import datetime
from typing import cast
from langchain_core.messages import BaseMessage
from langchain_core.messages import HumanMessage
from langchain_core.runnables import RunnableConfig
from langgraph.types import StreamWriter
@@ -10,14 +11,37 @@ from onyx.agents.agent_search.deep_search.main.states import (
)
from onyx.agents.agent_search.deep_search.main.states import MainState
from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import RefinedAnswerImprovement
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
)
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="The LLM timed out, and the answers could not be compared.",
rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
general_error="The LLM encountered an error, and the answers could not be compared.",
)
def compare_answers(
@@ -40,15 +64,46 @@ def compare_answers(
msg = [HumanMessage(content=compare_answers_prompt)]
agent_error: AgentErrorLoggingFormat | None = None
# Get the rewritten queries in a defined format
model = graph_config.tooling.fast_llm
resp: BaseMessage | None = None
refined_answer_improvement: bool | None = None
# no need to stream this
resp = model.invoke(msg)
try:
resp = model.invoke(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
)
refined_answer_improvement = (
isinstance(resp.content, str) and "yes" in resp.content.lower()
)
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - compare answers")
# continue as True in this support step
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - compare answers")
# continue as True in this support step
if agent_error or resp is None:
refined_answer_improvement = True
if agent_error:
log_result = agent_error.error_result
else:
log_result = "An answer could not be generated."
else:
refined_answer_improvement = (
isinstance(resp.content, str) and "yes" in resp.content.lower()
)
log_result = f"Answer comparison: {refined_answer_improvement}"
write_custom_event(
"refined_answer_improvement",
@@ -65,7 +120,7 @@ def compare_answers(
graph_component="main",
node_name="compare answers",
node_start_time=node_start_time,
result=f"Answer comparison: {refined_answer_improvement}",
result=log_result,
)
],
)

View File

@@ -9,8 +9,9 @@ from langgraph.types import StreamWriter
from onyx.agents.agent_search.deep_search.main.models import (
RefinementSubQuestion,
)
from onyx.agents.agent_search.deep_search.main.operations import dispatch_subquestion
from onyx.agents.agent_search.deep_search.main.operations import (
dispatch_subquestion,
dispatch_subquestion_sep,
)
from onyx.agents.agent_search.deep_search.main.states import MainState
from onyx.agents.agent_search.deep_search.main.states import (
@@ -20,6 +21,18 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
build_history_prompt,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import (
format_entity_term_extraction,
@@ -29,10 +42,25 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
)
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
)
from onyx.tools.models import ToolCallKickoff
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="The LLM timed out. The sub-questions could not be generated.",
rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
general_error="The LLM encountered an error. The sub-questions could not be generated.",
)
def create_refined_sub_questions(
@@ -95,27 +123,65 @@ def create_refined_sub_questions(
# Grader
model = graph_config.tooling.fast_llm
streamed_tokens = dispatch_separated(
model.stream(msg), dispatch_subquestion(1, writer)
)
response = merge_content(*streamed_tokens)
agent_error: AgentErrorLoggingFormat | None = None
streamed_tokens: list[BaseMessage_Content] = []
try:
streamed_tokens = dispatch_separated(
model.stream(
msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
),
dispatch_subquestion(1, writer),
sep_callback=dispatch_subquestion_sep(1, writer),
)
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - create refined sub questions")
if isinstance(response, str):
parsed_response = [q for q in response.split("\n") if q.strip() != ""]
else:
raise ValueError("LLM response is not a string")
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - create refined sub questions")
refined_sub_question_dict = {}
for sub_question_num, sub_question in enumerate(parsed_response):
refined_sub_question = RefinementSubQuestion(
sub_question=sub_question,
sub_question_id=make_question_id(1, sub_question_num + 1),
verified=False,
answered=False,
answer="",
if agent_error:
refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
log_result = agent_error.error_result
write_custom_event(
"refined_sub_question_creation_error",
StreamingError(
error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
),
writer,
)
refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
else:
response = merge_content(*streamed_tokens)
if isinstance(response, str):
parsed_response = [q for q in response.split("\n") if q.strip() != ""]
else:
raise ValueError("LLM response is not a string")
refined_sub_question_dict = {}
for sub_question_num, sub_question in enumerate(parsed_response):
refined_sub_question = RefinementSubQuestion(
sub_question=sub_question,
sub_question_id=make_question_id(1, sub_question_num + 1),
verified=False,
answered=False,
answer="",
)
refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"
return RefinedQuestionDecompositionUpdate(
refined_sub_questions=refined_sub_question_dict,
@@ -125,7 +191,7 @@ def create_refined_sub_questions(
graph_component="main",
node_name="create refined sub questions",
node_start_time=node_start_time,
result=f"Created {len(refined_sub_question_dict)} refined sub questions",
result=log_result,
)
],
)

View File

@@ -26,6 +26,19 @@ def decide_refinement_need(
decision = True # TODO: just for current testing purposes
if state.error:
return RequireRefinemenEvalUpdate(
require_refined_answer_eval=False,
log_messages=[
get_langgraph_node_log_string(
graph_component="main",
node_name="decide refinement need",
node_start_time=node_start_time,
result="Timeout Error",
)
],
)
log_messages = [
get_langgraph_node_log_string(
graph_component="main",

View File

@@ -21,6 +21,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
)
from onyx.configs.constants import NUM_EXPLORATORY_DOCS
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
@@ -81,6 +84,7 @@ def extract_entities_terms(
# Grader
llm_response = fast_llm.invoke(
prompt=msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
)
cleaned_response = (

View File

@@ -11,7 +11,6 @@ from onyx.agents.agent_search.deep_search.main.models import (
AgentRefinedMetrics,
)
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
from onyx.agents.agent_search.deep_search.main.operations import logger
from onyx.agents.agent_search.deep_search.main.states import MainState
from onyx.agents.agent_search.deep_search.main.states import (
RefinedAnswerUpdate,
@@ -23,7 +22,18 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
trim_prompt_piece,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
from onyx.agents.agent_search.shared_graph_utils.operators import (
dedup_inference_sections,
@@ -43,8 +53,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
)
@@ -56,6 +72,15 @@ from onyx.prompts.agent_search import (
)
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="The LLM timed out. The refined answer could not be generated.",
rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
general_error="The LLM encountered an error. The refined answer could not be generated.",
)
def generate_refined_answer(
@@ -231,28 +256,80 @@ def generate_refined_answer(
streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
dispatch_timings: list[float] = []
for message in model.stream(msg):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
)
agent_error: AgentErrorLoggingFormat | None = None
start_stream_token = datetime.now()
try:
for message in model.stream(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
)
start_stream_token = datetime.now()
write_custom_event(
"refined_agent_answer",
AgentAnswerPiece(
answer_piece=content,
level=1,
level_question_num=0,
answer_type="agent_level_answer",
),
writer,
)
end_stream_token = datetime.now()
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
)
streamed_tokens.append(content)
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - generate refined answer")
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - generate refined answer")
if agent_error:
write_custom_event(
"refined_agent_answer",
AgentAnswerPiece(
answer_piece=content,
level=1,
level_question_num=0,
answer_type="agent_level_answer",
"initial_agent_answer",
StreamingError(
error=AGENT_LLM_TIMEOUT_MESSAGE,
),
writer,
)
end_stream_token = datetime.now()
dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
streamed_tokens.append(content)
return RefinedAnswerUpdate(
refined_answer=None,
refined_answer_quality=False, # TODO: replace this with the actual check value
refined_agent_stats=None,
agent_refined_end_time=None,
agent_refined_metrics=AgentRefinedMetrics(
refined_doc_boost_factor=0.0,
refined_question_boost_factor=0.0,
duration_s=None,
),
log_messages=[
get_langgraph_node_log_string(
graph_component="main",
node_name="generate refined answer",
node_start_time=node_start_time,
result=agent_error.error_result or "An LLM error occurred",
)
],
)
logger.debug(
f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
@@ -266,49 +343,6 @@ def generate_refined_answer(
revision_question_efficiency=revision_question_efficiency,
)
logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
logger.debug("-" * 10)
logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
logger.debug("-" * 100)
if state.initial_agent_stats:
initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
"utilized_chunk_ratio", "--"
)
initial_support_boost_factor = (
state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
)
num_initial_verified_docs = state.initial_agent_stats.original_question.get(
"num_verified_documents", "--"
)
initial_verified_docs_avg_score = (
state.initial_agent_stats.original_question.get("verified_avg_score", "--")
)
initial_sub_questions_verified_docs = (
state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
)
logger.debug("INITIAL AGENT STATS")
logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
logger.debug(
f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
)
logger.debug(
f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
)
if refined_agent_stats:
logger.debug("-" * 10)
logger.debug("REFINED AGENT STATS")
logger.debug(
f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
)
logger.debug(
f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
)
agent_refined_end_time = datetime.now()
if state.agent_refined_start_time:
agent_refined_duration = (

View File

@@ -9,6 +9,9 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
SubQuestionAnswerResults,
)
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import StreamStopInfo
from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType
from onyx.chat.models import SubQuestionPiece
from onyx.context.search.models import IndexFilters
from onyx.tools.models import SearchQueryInfo
@@ -34,6 +37,22 @@ def dispatch_subquestion(
return _helper
def dispatch_subquestion_sep(level: int, writer: StreamWriter) -> Callable[[int], None]:
def _helper(sep_num: int) -> None:
write_custom_event(
"stream_finished",
StreamStopInfo(
stop_reason=StreamStopReason.FINISHED,
stream_type=StreamType.SUB_QUESTIONS,
level=level,
level_question_num=sep_num,
),
writer,
)
return _helper
def calculate_initial_agent_stats(
decomp_answer_results: list[SubQuestionAnswerResults],
original_question_stats: AgentChunkRetrievalStats,

View File

@@ -17,6 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import (
EntityRelationshipTermExtraction,
)
@@ -76,6 +77,7 @@ class InitialAnswerUpdate(LoggerUpdate):
"""
initial_answer: str | None = None
error: AgentErrorLoggingFormat | None = None
initial_agent_stats: InitialAgentResultStats | None = None
generated_sub_questions: list[str] = []
agent_base_end_time: datetime | None = None
@@ -88,6 +90,7 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
"""
refined_answer: str | None = None
error: AgentErrorLoggingFormat | None = None
refined_agent_stats: RefinedAgentStats | None = None
refined_answer_quality: bool = False

View File

@@ -16,14 +16,40 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
QueryExpansionUpdate,
)
from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
QUERY_REWRITING_PROMPT,
)
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
general_error="Query rewriting failed due to LLM error - the original question will be used.",
)
def expand_queries(
@@ -54,13 +80,43 @@ def expand_queries(
)
]
llm_response_list = dispatch_separated(
llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
)
agent_error: AgentErrorLoggingFormat | None = None
llm_response_list: list[BaseMessage_Content] = []
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
try:
llm_response_list = dispatch_separated(
llm.stream(
prompt=msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
),
dispatch_subquery(level, question_num, writer),
)
except LLMTimeoutError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - expand queries")
rewritten_queries = llm_response.split("\n")
except LLMRateLimitError:
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - expand queries")
# use subquestion as query if query generation fails
if agent_error:
llm_response = ""
rewritten_queries = [question]
log_result = agent_error.error_result
else:
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
0
].content
rewritten_queries = llm_response.split("\n")
log_result = f"Number of expanded queries: {len(rewritten_queries)}"
return QueryExpansionUpdate(
expanded_queries=rewritten_queries,
@@ -69,7 +125,7 @@ def expand_queries(
graph_component="shared - expanded retrieval",
node_name="expand queries",
node_start_time=node_start_time,
result=f"Number of expanded queries: {len(rewritten_queries)}",
result=log_result,
)
],
)

View File

@@ -1,5 +1,6 @@
from typing import cast
from langchain_core.messages import BaseMessage
from langchain_core.messages import HumanMessage
from langchain_core.runnables.config import RunnableConfig
@@ -10,12 +11,41 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
DocVerificationUpdate,
)
from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
binary_string_test,
)
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
trim_prompt_piece,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_RATELIMIT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_LLM_TIMEOUT_MESSAGE,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AGENT_POSITIVE_VALUE_STR,
)
from onyx.agents.agent_search.shared_graph_utils.constants import (
AgentLLMErrorType,
)
from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
DOCUMENT_VERIFICATION_PROMPT,
)
from onyx.utils.logger import setup_logger
logger = setup_logger()
_llm_node_error_strings = LLMNodeErrorStrings(
timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
)
def verify_documents(
@@ -26,7 +56,7 @@ def verify_documents(
Args:
state (DocVerificationInput): The current state
config (RunnableConfig): Configuration containing ProSearchConfig
config (RunnableConfig): Configuration containing AgentSearchConfig
Updates:
verified_documents: list[InferenceSection]
@@ -51,11 +81,42 @@ def verify_documents(
)
]
response = fast_llm.invoke(msg)
agent_error: AgentErrorLoggingFormat | None = None
response: BaseMessage | None = None
verified_documents = []
if isinstance(response.content, str) and "yes" in response.content.lower():
verified_documents.append(retrieved_document_to_verify)
try:
response = fast_llm.invoke(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
)
except LLMTimeoutError:
# In this case, we decide to continue and don't raise an error, as
# little harm in letting some docs through that are less relevant.
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
error_result=_llm_node_error_strings.timeout,
)
logger.error("LLM Timeout Error - verify documents")
except LLMRateLimitError:
# In this case, we decide to continue and don't raise an error, as
# little harm in letting some docs through that are less relevant.
agent_error = AgentErrorLoggingFormat(
error_type=AgentLLMErrorType.RATE_LIMIT,
error_message=AGENT_LLM_RATELIMIT_MESSAGE,
error_result=_llm_node_error_strings.rate_limit,
)
logger.error("LLM Rate Limit Error - verify documents")
if agent_error or response is None:
verified_documents = [retrieved_document_to_verify]
else:
verified_documents = []
if isinstance(response.content, str) and binary_string_test(
text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
):
verified_documents.append(retrieved_document_to_verify)
return DocVerificationUpdate(
verified_documents=verified_documents,

View File

@@ -21,9 +21,13 @@ from onyx.context.search.models import InferenceSection
class ExpandedRetrievalInput(SubgraphCoreState):
question: str = ""
base_search: bool = False
# exception from 'no default value'for LangGraph input states
# Here, sub_question_id default Nonoe implies usage for the
# original question. This is sometimes needed for nested sub-graphs
sub_question_id: str | None = None
question: str
base_search: bool
## Update/Return States
@@ -88,4 +92,4 @@ class DocVerificationInput(ExpandedRetrievalInput):
class RetrievalInput(ExpandedRetrievalInput):
query_to_retrieve: str = ""
query_to_retrieve: str

View File

@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
main_graph_builder as main_graph_builder_a,
)
from onyx.agents.agent_search.deep_search.main.states import (
MainInput as MainInput_a,
MainInput as MainInput,
)
from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
@@ -21,6 +21,7 @@ from onyx.chat.models import AnswerPacket
from onyx.chat.models import AnswerStream
from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import RefinedAnswerImprovement
from onyx.chat.models import StreamingError
from onyx.chat.models import StreamStopInfo
from onyx.chat.models import SubQueryPiece
from onyx.chat.models import SubQuestionPiece
@@ -33,6 +34,7 @@ from onyx.llm.factory import get_default_llms
from onyx.tools.tool_runner import ToolCallKickoff
from onyx.utils.logger import setup_logger
logger = setup_logger()
_COMPILED_GRAPH: CompiledStateGraph | None = None
@@ -72,13 +74,15 @@ def _parse_agent_event(
return cast(AnswerPacket, event["data"])
elif event["name"] == "refined_answer_improvement":
return cast(RefinedAnswerImprovement, event["data"])
elif event["name"] == "refined_sub_question_creation_error":
return cast(StreamingError, event["data"])
return None
def manage_sync_streaming(
compiled_graph: CompiledStateGraph,
config: GraphConfig,
graph_input: BasicInput | MainInput_a,
graph_input: BasicInput | MainInput,
) -> Iterable[StreamEvent]:
message_id = config.persistence.message_id if config.persistence else None
for event in compiled_graph.stream(
@@ -92,7 +96,7 @@ def manage_sync_streaming(
def run_graph(
compiled_graph: CompiledStateGraph,
config: GraphConfig,
input: BasicInput | MainInput_a,
input: BasicInput | MainInput,
) -> AnswerStream:
config.behavior.perform_initial_search_decomposition = (
INITIAL_SEARCH_DECOMPOSITION_ENABLED
@@ -123,9 +127,7 @@ def run_main_graph(
) -> AnswerStream:
compiled_graph = load_compiled_graph()
input = MainInput_a(
base_question=config.inputs.search_request.query, log_messages=[]
)
input = MainInput(log_messages=[])
# Agent search is not a Tool per se, but this is helpful for the frontend
yield ToolCallKickoff(
@@ -172,9 +174,7 @@ if __name__ == "__main__":
# search_request.persona = get_persona_by_id(1, None, db_session)
# config.perform_initial_search_path_decision = False
config.behavior.perform_initial_search_decomposition = True
input = MainInput_a(
base_question=config.inputs.search_request.query, log_messages=[]
)
input = MainInput(log_messages=[])
tool_responses: list = []
for output in run_graph(compiled_graph, config, input):

View File

@@ -150,3 +150,17 @@ def get_prompt_enrichment_components(
history=history,
date_str=date_str,
)
def binary_string_test(text: str, positive_value: str = "yes") -> bool:
"""
Tests if a string contains a positive value (case-insensitive).
Args:
text: The string to test
positive_value: The value to look for (defaults to "yes")
Returns:
True if the positive value is found in the text
"""
return positive_value.lower() in text.lower()

View File

@@ -0,0 +1,17 @@
from enum import Enum
AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
AGENT_LLM_RATELIMIT_MESSAGE = (
"The agent encountered a rate limit error. Please try again."
)
LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
AGENT_POSITIVE_VALUE_STR = "yes"
AGENT_NEGATIVE_VALUE_STR = "no"
class AgentLLMErrorType(str, Enum):
TIMEOUT = "timeout"
RATE_LIMIT = "rate_limit"
GENERAL_ERROR = "general_error"

View File

@@ -1,3 +1,5 @@
from typing import Any
from pydantic import BaseModel
from onyx.agents.agent_search.deep_search.main.models import (
@@ -56,6 +58,12 @@ class InitialAgentResultStats(BaseModel):
agent_effectiveness: dict[str, float | int | None]
class AgentErrorLoggingFormat(BaseModel):
error_message: str
error_type: str
error_result: str | None = None
class RefinedAgentStats(BaseModel):
revision_doc_efficiency: float | None
revision_question_efficiency: float | None
@@ -126,3 +134,12 @@ class AgentPromptEnrichmentComponents(BaseModel):
persona_prompts: PersonaPromptExpressions
history: str
date_str: str
class LLMNodeErrorStrings(BaseModel):
timeout: str = "LLM Timeout Error"
rate_limit: str = "LLM Rate Limit Error"
general_error: str = "General LLM Error"
BaseMessage_Content = str | list[str | dict[str, Any]]

View File

@@ -20,6 +20,7 @@ from onyx.agents.agent_search.models import GraphInputs
from onyx.agents.agent_search.models import GraphPersistence
from onyx.agents.agent_search.models import GraphSearchConfig
from onyx.agents.agent_search.models import GraphTooling
from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
from onyx.agents.agent_search.shared_graph_utils.models import (
EntityRelationshipTermExtraction,
)
@@ -34,6 +35,9 @@ from onyx.chat.models import StreamStopInfo
from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
)
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -46,6 +50,8 @@ from onyx.context.search.models import SearchRequest
from onyx.db.engine import get_session_context_manager
from onyx.db.persona import get_persona_by_id
from onyx.db.persona import Persona
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.llm.interfaces import LLM
from onyx.prompts.agent_search import (
ASSISTANT_SYSTEM_PROMPT_DEFAULT,
@@ -65,8 +71,9 @@ from onyx.tools.tool_implementations.search.search_tool import (
from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.utils import explicit_tool_calling_supported
from onyx.utils.logger import setup_logger
BaseMessage_Content = str | list[str | dict[str, Any]]
logger = setup_logger()
# Post-processing
@@ -295,6 +302,7 @@ def _dispatch_nonempty(
def dispatch_separated(
tokens: Iterator[BaseMessage],
dispatch_event: Callable[[str, int], None],
sep_callback: Callable[[int], None] | None = None,
sep: str = DISPATCH_SEP_CHAR,
) -> list[BaseMessage_Content]:
num = 1
@@ -304,6 +312,10 @@ def dispatch_separated(
if sep in content:
sub_question_parts = content.split(sep)
_dispatch_nonempty(sub_question_parts[0], dispatch_event, num)
if sep_callback:
sep_callback(num)
num += 1
_dispatch_nonempty(
"".join(sub_question_parts[1:]).strip(), dispatch_event, num
@@ -312,6 +324,9 @@ def dispatch_separated(
_dispatch_nonempty(content, dispatch_event, num)
streamed_tokens.append(content)
if sep_callback:
sep_callback(num)
return streamed_tokens
@@ -364,8 +379,24 @@ def summarize_history(
)
)
history_response = llm.invoke(history_context_prompt)
try:
history_response = llm.invoke(
history_context_prompt,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
)
except LLMTimeoutError:
logger.error("LLM Timeout Error - summarize history")
return (
history # this is what is done at this point anyway, so we default to this
)
except LLMRateLimitError:
logger.error("LLM Rate Limit Error - summarize history")
return (
history # this is what is done at this point anyway, so we default to this
)
assert isinstance(history_response.content, str)
return history_response.content

View File

@@ -179,11 +179,14 @@ def try_generate_document_cc_pair_cleanup_tasks(
if tasks_generated is None:
raise ValueError("RedisConnectorDeletion.generate_tasks returned None")
insert_sync_record(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.CONNECTOR_DELETION,
)
try:
insert_sync_record(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.CONNECTOR_DELETION,
)
except Exception:
pass
except TaskDependencyError:
redis_connector.delete.set_fence(None)

View File

@@ -13,6 +13,21 @@ AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 3 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 8 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds
#####
# Agent Configs
#####
@@ -77,4 +92,76 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
) # 2000
AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
) # 25
AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
) # 3
AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
) # 30
AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
) # 8
AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
) # 12
AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
) # 25
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
) # 25
AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
) # 8
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
) # 6
AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
) # 1
AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
) # 4
AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
) # 8
GRAPH_VERSION_NAME: str = "a"

View File

@@ -263,6 +263,11 @@ class PostgresAdvisoryLocks(Enum):
class OnyxCeleryQueues:
# "celery" is the default queue defined by celery and also the queue
# we are running in the primary worker to run system tasks
# Tasks running in this queue should be designed specifically to run quickly
PRIMARY = "celery"
# Light queue
VESPA_METADATA_SYNC = "vespa_metadata_sync"
DOC_PERMISSIONS_UPSERT = "doc_permissions_upsert"

View File

@@ -91,6 +91,7 @@ class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
f"&response_type=code"
f"&scope=read"
f"&state={state}"
f"&prompt=consent" # prompts user for access; allows choosing workspace
)
@classmethod

View File

@@ -105,6 +105,32 @@ def construct_document_select_for_connector_credential_pair_by_needs_sync(
return stmt
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
connector_id: int, credential_id: int
) -> Select:
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
)
)
stmt = (
select(DbDocument.id)
.where(
DbDocument.id.in_(initial_doc_ids_stmt),
or_(
DbDocument.last_modified
> DbDocument.last_synced, # last_modified is newer than last_synced
DbDocument.last_synced.is_(None), # never synced
),
)
.distinct()
)
return stmt
def get_all_documents_needing_vespa_sync_for_cc_pair(
db_session: Session, cc_pair_id: int
) -> list[DbDocument]:

View File

@@ -545,7 +545,7 @@ def fetch_documents_for_document_set_paginated(
return documents, documents[-1].id if documents else None
def construct_document_select_by_docset(
def construct_document_id_select_by_docset(
document_set_id: int,
current_only: bool = True,
) -> Select:
@@ -554,7 +554,7 @@ def construct_document_select_by_docset(
are background processing task generators."""
stmt = (
select(Document)
select(Document.id)
.join(
DocumentByConnectorCredentialPair,
DocumentByConnectorCredentialPair.id == Document.id,

View File

@@ -11,6 +11,7 @@ from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import aliased
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
@@ -19,6 +20,7 @@ from onyx.configs.app_configs import DISABLE_AUTH
from onyx.configs.chat_configs import BING_API_KEY
from onyx.configs.chat_configs import CONTEXT_CHUNKS_ABOVE
from onyx.configs.chat_configs import CONTEXT_CHUNKS_BELOW
from onyx.configs.constants import NotificationType
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
from onyx.db.models import DocumentSet
@@ -32,6 +34,8 @@ from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.notification import create_notification
from onyx.server.features.persona.models import PersonaSharedNotificationData
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.utils.logger import setup_logger
@@ -169,6 +173,15 @@ def make_persona_private(
for user_uuid in user_ids:
db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))
create_notification(
user_id=user_uuid,
notif_type=NotificationType.PERSONA_SHARED,
db_session=db_session,
additional_data=PersonaSharedNotificationData(
persona_id=persona_id,
).model_dump(),
)
db_session.commit()
# May cause error if someone switches down to MIT from EE
@@ -708,3 +721,15 @@ def update_persona_label(
def delete_persona_label(label_id: int, db_session: Session) -> None:
db_session.query(PersonaLabel).filter(PersonaLabel.id == label_id).delete()
db_session.commit()
def persona_has_search_tool(persona_id: int, db_session: Session) -> bool:
persona = (
db_session.query(Persona)
.options(joinedload(Persona.tools))
.filter(Persona.id == persona_id)
.one_or_none()
)
if persona is None:
raise ValueError(f"Persona with ID {persona_id} does not exist")
return any(tool.in_code_tool_id == "run_search" for tool in persona.tools)

View File

@@ -256,7 +256,7 @@ def fetch_slack_channel_config_for_channel_or_default(
db_session: Session, slack_bot_id: int, channel_name: str | None
) -> SlackChannelConfig | None:
# attempt to find channel-specific config first
if channel_name:
if channel_name is not None:
sc_config = db_session.scalar(
select(SlackChannelConfig).where(
SlackChannelConfig.slack_bot_id == slack_bot_id,

View File

@@ -50,6 +50,18 @@ litellm.telemetry = False
_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
class LLMTimeoutError(Exception):
"""
Exception raised when an LLM call times out.
"""
class LLMRateLimitError(Exception):
"""
Exception raised when an LLM call is rate limited.
"""
def _base_msg_to_role(msg: BaseMessage) -> str:
if isinstance(msg, HumanMessage) or isinstance(msg, HumanMessageChunk):
return "user"
@@ -380,6 +392,7 @@ class DefaultMultiLLM(LLM):
tool_choice: ToolChoiceOptions | None,
stream: bool,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
# litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
# to a dict representation
@@ -405,7 +418,7 @@ class DefaultMultiLLM(LLM):
stream=stream,
# model params
temperature=0,
timeout=self._timeout,
timeout=timeout_override or self._timeout,
# For now, we don't support parallel tool calls
# NOTE: we can't pass this in if tools are not specified
# or else OpenAI throws an error
@@ -424,6 +437,12 @@ class DefaultMultiLLM(LLM):
except Exception as e:
self._record_error(processed_prompt, e)
# for break pointing
if isinstance(e, litellm.Timeout):
raise LLMTimeoutError(e)
elif isinstance(e, litellm.RateLimitError):
raise LLMRateLimitError(e)
raise e
@property
@@ -444,6 +463,7 @@ class DefaultMultiLLM(LLM):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> BaseMessage:
if LOG_DANSWER_MODEL_INTERACTIONS:
self.log_model_configs()
@@ -451,7 +471,12 @@ class DefaultMultiLLM(LLM):
response = cast(
litellm.ModelResponse,
self._completion(
prompt, tools, tool_choice, False, structured_response_format
prompt=prompt,
tools=tools,
tool_choice=tool_choice,
stream=False,
structured_response_format=structured_response_format,
timeout_override=timeout_override,
),
)
choice = response.choices[0]
@@ -469,19 +494,31 @@ class DefaultMultiLLM(LLM):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> Iterator[BaseMessage]:
if LOG_DANSWER_MODEL_INTERACTIONS:
self.log_model_configs()
if DISABLE_LITELLM_STREAMING:
yield self.invoke(prompt, tools, tool_choice, structured_response_format)
yield self.invoke(
prompt,
tools,
tool_choice,
structured_response_format,
timeout_override,
)
return
output = None
response = cast(
litellm.CustomStreamWrapper,
self._completion(
prompt, tools, tool_choice, True, structured_response_format
prompt=prompt,
tools=tools,
tool_choice=tool_choice,
stream=True,
structured_response_format=structured_response_format,
timeout_override=timeout_override,
),
)
try:

View File

@@ -81,6 +81,7 @@ class CustomModelServer(LLM):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> BaseMessage:
return self._execute(prompt)
@@ -90,5 +91,6 @@ class CustomModelServer(LLM):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> Iterator[BaseMessage]:
yield self._execute(prompt)

View File

@@ -90,12 +90,13 @@ class LLM(abc.ABC):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> BaseMessage:
self._precall(prompt)
# TODO add a postcall to log model outputs independent of concrete class
# implementation
return self._invoke_implementation(
prompt, tools, tool_choice, structured_response_format
prompt, tools, tool_choice, structured_response_format, timeout_override
)
@abc.abstractmethod
@@ -105,6 +106,7 @@ class LLM(abc.ABC):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> BaseMessage:
raise NotImplementedError
@@ -114,12 +116,13 @@ class LLM(abc.ABC):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> Iterator[BaseMessage]:
self._precall(prompt)
# TODO add a postcall to log model outputs independent of concrete class
# implementation
messages = self._stream_implementation(
prompt, tools, tool_choice, structured_response_format
prompt, tools, tool_choice, structured_response_format, timeout_override
)
tokens = []
@@ -138,5 +141,6 @@ class LLM(abc.ABC):
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
) -> Iterator[BaseMessage]:
raise NotImplementedError

View File

@@ -1,4 +1,5 @@
from datetime import datetime
from typing import cast
import pytz
import timeago # type: ignore
@@ -338,6 +339,23 @@ def _build_citations_blocks(
return citations_block
def _build_answer_blocks(
answer: ChatOnyxBotResponse, fallback_answer: str
) -> list[SectionBlock]:
if not answer.answer:
answer_blocks = [SectionBlock(text=fallback_answer)]
else:
# replaces markdown links with slack format links
formatted_answer = format_slack_message(answer.answer)
answer_processed = decode_escapes(
remove_slack_text_interactions(formatted_answer)
)
answer_blocks = [
SectionBlock(text=text) for text in _split_text(answer_processed)
]
return answer_blocks
def _build_qa_response_blocks(
answer: ChatOnyxBotResponse,
) -> list[Block]:
@@ -376,21 +394,10 @@ def _build_qa_response_blocks(
filter_block = SectionBlock(text=f"_{filter_text}_")
if not answer.answer:
answer_blocks = [
SectionBlock(
text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓"
)
]
else:
# replaces markdown links with slack format links
formatted_answer = format_slack_message(answer.answer)
answer_processed = decode_escapes(
remove_slack_text_interactions(formatted_answer)
)
answer_blocks = [
SectionBlock(text=text) for text in _split_text(answer_processed)
]
answer_blocks = _build_answer_blocks(
answer=answer,
fallback_answer="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓",
)
response_blocks: list[Block] = []
@@ -481,6 +488,7 @@ def build_slack_response_blocks(
use_citations: bool,
feedback_reminder_id: str | None,
skip_ai_feedback: bool = False,
expecting_search_result: bool = False,
) -> list[Block]:
"""
This function is a top level function that builds all the blocks for the Slack response.
@@ -491,9 +499,19 @@ def build_slack_response_blocks(
message_info.thread_messages[-1].message, message_info.is_bot_msg
)
answer_blocks = _build_qa_response_blocks(
answer=answer,
)
if expecting_search_result:
answer_blocks = _build_qa_response_blocks(
answer=answer,
)
else:
answer_blocks = cast(
list[Block],
_build_answer_blocks(
answer=answer,
fallback_answer="Sorry, I was unable to generate an answer.",
),
)
web_follow_up_block = []
if channel_conf and channel_conf.get("show_continue_in_web_ui"):

View File

@@ -27,6 +27,7 @@ from onyx.db.engine import get_session_with_tenant
from onyx.db.models import SlackChannelConfig
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.persona import persona_has_search_tool
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.blocks import build_slack_response_blocks
from onyx.onyxbot.slack.handlers.utils import send_team_member_message
@@ -106,7 +107,8 @@ def handle_regular_answer(
]
prompt = persona.prompts[0] if persona.prompts else None
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
with get_session_with_tenant(tenant_id) as db_session:
expecting_search_result = persona_has_search_tool(persona.id, db_session)
# TODO: Add in support for Slack to truncate messages based on max LLM context
# llm, _ = get_llms_for_persona(persona)
@@ -303,12 +305,12 @@ def handle_regular_answer(
return True
retrieval_info = answer.docs
if not retrieval_info:
if not retrieval_info and expecting_search_result:
# This should not happen, even with no docs retrieved, there is still info returned
raise RuntimeError("Failed to retrieve docs, cannot answer question.")
top_docs = retrieval_info.top_documents
if not top_docs and not should_respond_even_with_no_docs:
top_docs = retrieval_info.top_documents if retrieval_info else []
if not top_docs and expecting_search_result:
logger.error(
f"Unable to answer question: '{user_message}' - no documents found"
)
@@ -337,7 +339,8 @@ def handle_regular_answer(
)
if (
only_respond_if_citations
expecting_search_result
and only_respond_if_citations
and not answer.citations
and not message_info.bypass_filters
):
@@ -363,6 +366,7 @@ def handle_regular_answer(
channel_conf=channel_conf,
use_citations=True, # No longer supporting quotes
feedback_reminder_id=feedback_reminder_id,
expecting_search_result=expecting_search_result,
)
try:

View File

@@ -801,18 +801,6 @@ def process_message(
channel_name=channel_name,
)
# Be careful about this default, don't want to accidentally spam every channel
# Users should be able to DM slack bot in their private channels though
if (
not respond_every_channel
# Can't have configs for DMs so don't toss them out
and not is_dm
# If /OnyxBot (is_bot_msg) or @OnyxBot (bypass_filters)
# always respond with the default configs
and not (details.is_bot_msg or details.bypass_filters)
):
return
follow_up = bool(
slack_channel_config.channel_config
and slack_channel_config.channel_config.get("follow_up_tags")

View File

@@ -5,8 +5,6 @@ UNKNOWN_ANSWER = "I do not have enough information to answer this question."
NO_RECOVERED_DOCS = "No relevant information recovered"
YES = "yes"
NO = "no"
# Framing/Support/Template Prompts
HISTORY_FRAMING_PROMPT = f"""
For more context, here is the history of the conversation so far that preceded this question:

View File

@@ -16,9 +16,8 @@ from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document import (
construct_document_select_for_connector_credential_pair_by_needs_sync,
construct_document_id_select_for_connector_credential_pair_by_needs_sync,
)
from onyx.db.models import Document
from onyx.redis.redis_object_helper import RedisObjectHelper
@@ -72,7 +71,8 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
last_lock_time = time.monotonic()
async_results = []
num_tasks_sent = 0
cc_pair = get_connector_credential_pair_from_id(
db_session=db_session,
cc_pair_id=int(self._id),
@@ -80,14 +80,14 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
if not cc_pair:
return None
stmt = construct_document_select_for_connector_credential_pair_by_needs_sync(
stmt = construct_document_id_select_for_connector_credential_pair_by_needs_sync(
cc_pair.connector_id, cc_pair.credential_id
)
num_docs = 0
for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
doc = cast(Document, doc)
for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
doc_id = cast(str, doc_id)
current_time = time.monotonic()
if current_time - last_lock_time >= (
CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
@@ -98,7 +98,7 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
num_docs += 1
# check if we should skip the document (typically because it's already syncing)
if doc.id in self.skip_docs:
if doc_id in self.skip_docs:
continue
# celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
@@ -114,21 +114,21 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
)
# Priority on sync's triggered by new indexing should be medium
result = celery_app.send_task(
celery_app.send_task(
OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
task_id=custom_task_id,
priority=OnyxCeleryPriority.MEDIUM,
)
async_results.append(result)
self.skip_docs.add(doc.id)
num_tasks_sent += 1
self.skip_docs.add(doc_id)
if len(async_results) >= max_tasks:
if num_tasks_sent >= max_tasks:
break
return len(async_results), num_docs
return num_tasks_sent, num_docs
class RedisGlobalConnectorCredentialPair:

View File

@@ -14,8 +14,7 @@ from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.document_set import construct_document_select_by_docset
from onyx.db.models import Document
from onyx.db.document_set import construct_document_id_select_by_docset
from onyx.redis.redis_object_helper import RedisObjectHelper
@@ -66,10 +65,11 @@ class RedisDocumentSet(RedisObjectHelper):
"""
last_lock_time = time.monotonic()
async_results = []
stmt = construct_document_select_by_docset(int(self._id), current_only=False)
for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
doc = cast(Document, doc)
num_tasks_sent = 0
stmt = construct_document_id_select_by_docset(int(self._id), current_only=False)
for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
doc_id = cast(str, doc_id)
current_time = time.monotonic()
if current_time - last_lock_time >= (
CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
@@ -86,17 +86,17 @@ class RedisDocumentSet(RedisObjectHelper):
# add to the set BEFORE creating the task.
redis_client.sadd(self.taskset_key, custom_task_id)
result = celery_app.send_task(
celery_app.send_task(
OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
task_id=custom_task_id,
priority=OnyxCeleryPriority.LOW,
)
async_results.append(result)
num_tasks_sent += 1
return len(async_results), len(async_results)
return num_tasks_sent, num_tasks_sent
def reset(self) -> None:
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

View File

@@ -14,7 +14,6 @@ from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.models import Document
from onyx.redis.redis_object_helper import RedisObjectHelper
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version
@@ -66,23 +65,22 @@ class RedisUserGroup(RedisObjectHelper):
user group up to date over multiple batches.
"""
last_lock_time = time.monotonic()
async_results = []
num_tasks_sent = 0
if not global_version.is_ee_version():
return 0, 0
try:
construct_document_select_by_usergroup = fetch_versioned_implementation(
construct_document_id_select_by_usergroup = fetch_versioned_implementation(
"onyx.db.user_group",
"construct_document_select_by_usergroup",
"construct_document_id_select_by_usergroup",
)
except ModuleNotFoundError:
return 0, 0
stmt = construct_document_select_by_usergroup(int(self._id))
for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
doc = cast(Document, doc)
stmt = construct_document_id_select_by_usergroup(int(self._id))
for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
doc_id = cast(str, doc_id)
current_time = time.monotonic()
if current_time - last_lock_time >= (
CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
@@ -99,17 +97,17 @@ class RedisUserGroup(RedisObjectHelper):
# add to the set BEFORE creating the task.
redis_client.sadd(self.taskset_key, custom_task_id)
result = celery_app.send_task(
celery_app.send_task(
OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
task_id=custom_task_id,
priority=OnyxCeleryPriority.LOW,
)
async_results.append(result)
num_tasks_sent += 1
return len(async_results), len(async_results)
return num_tasks_sent, num_tasks_sent
def reset(self) -> None:
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

View File

@@ -22,6 +22,8 @@ from onyx.background.celery.tasks.pruning.tasks import (
try_creating_prune_generator_task,
)
from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import (
get_connector_credential_pair_from_id_for_user,
@@ -228,6 +230,13 @@ def update_cc_pair_status(
db_session.commit()
# this speeds up the start of indexing by firing the check immediately
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
kwargs=dict(tenant_id=tenant_id),
priority=OnyxCeleryPriority.HIGH,
)
return JSONResponse(
status_code=HTTPStatus.OK, content={"message": str(HTTPStatus.OK)}
)
@@ -540,7 +549,14 @@ def associate_credential_to_connector(
metadata: ConnectorCredentialPairMetadata,
user: User | None = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str = Depends(get_current_tenant_id),
) -> StatusResponse[int]:
"""NOTE(rkuo): internally discussed and the consensus is this endpoint
and create_connector_with_mock_credential should be combined.
The intent of this endpoint is to handle connectors that actually need credentials.
"""
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
)(
@@ -563,6 +579,18 @@ def associate_credential_to_connector(
groups=metadata.groups,
)
# trigger indexing immediately
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},
)
logger.info(
f"associate_credential_to_connector - running check_for_indexing: "
f"cc_pair={response.data}"
)
return response
except IntegrityError as e:
logger.error(f"IntegrityError: {e}")

View File

@@ -804,6 +804,14 @@ def create_connector_with_mock_credential(
db_session: Session = Depends(get_session),
tenant_id: str = Depends(get_current_tenant_id),
) -> StatusResponse:
"""NOTE(rkuo): internally discussed and the consensus is this endpoint
and associate_credential_to_connector should be combined.
The intent of this endpoint is to handle connectors that don't need credentials,
AKA web, file, etc ... but there isn't any reason a single endpoint couldn't
server this purpose.
"""
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
)(
@@ -841,6 +849,18 @@ def create_connector_with_mock_credential(
groups=connector_data.groups,
)
# trigger indexing immediately
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},
)
logger.info(
f"create_connector_with_mock_credential - running check_for_indexing: "
f"cc_pair={response.data}"
)
create_milestone_and_report(
user=user,
distinct_id=user.email if user else tenant_id or "N/A",
@@ -1005,6 +1025,8 @@ def connector_run_once(
kwargs={"tenant_id": tenant_id},
)
logger.info("connector_run_once - running check_for_indexing")
msg = f"Marked {num_triggers} index attempts with indexing triggers."
return StatusResponse(
success=True,

View File

@@ -179,12 +179,10 @@ def oauth_callback(
db_session=db_session,
)
# TODO: use a library for url handling
sep = "&" if "?" in desired_return_url else "?"
return CallbackResponse(
redirect_url=(
f"{desired_return_url}?credentialId={credential.id}"
if "?" not in desired_return_url
else f"{desired_return_url}&credentialId={credential.id}"
)
redirect_url=f"{desired_return_url}{sep}credentialId={credential.id}"
)

View File

@@ -6,11 +6,15 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.document_set import check_document_sets_are_public
from onyx.db.document_set import fetch_all_document_sets_for_user
from onyx.db.document_set import insert_document_set
from onyx.db.document_set import mark_document_set_as_to_be_deleted
from onyx.db.document_set import update_document_set
from onyx.db.engine import get_current_tenant_id
from onyx.db.engine import get_session
from onyx.db.models import User
from onyx.server.features.document_set.models import CheckDocSetPublicRequest
@@ -29,6 +33,7 @@ def create_document_set(
document_set_creation_request: DocumentSetCreationRequest,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str = Depends(get_current_tenant_id),
) -> int:
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
@@ -46,6 +51,13 @@ def create_document_set(
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},
priority=OnyxCeleryPriority.HIGH,
)
return document_set_db_model.id
@@ -54,6 +66,7 @@ def patch_document_set(
document_set_update_request: DocumentSetUpdateRequest,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str = Depends(get_current_tenant_id),
) -> None:
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
@@ -72,12 +85,19 @@ def patch_document_set(
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},
priority=OnyxCeleryPriority.HIGH,
)
@router.delete("/admin/document-set/{document_set_id}")
def delete_document_set(
document_set_id: int,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str = Depends(get_current_tenant_id),
) -> None:
try:
mark_document_set_as_to_be_deleted(
@@ -88,6 +108,12 @@ def delete_document_set(
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},
priority=OnyxCeleryPriority.HIGH,
)
"""Endpoints for non-admins"""

View File

@@ -197,6 +197,11 @@ def create_deletion_attempt_for_connector_id(
kwargs={"tenant_id": tenant_id},
)
logger.info(
f"create_deletion_attempt_for_connector_id - running check_for_connector_deletion: "
f"cc_pair={cc_pair.id}"
)
if cc_pair.connector.source == DocumentSource.FILE:
connector = cc_pair.connector
file_store = get_default_file_store(db_session)

View File

@@ -247,6 +247,7 @@ def create_bot(
respond_member_group_list=[],
answer_filters=[],
follow_up_tags=[],
respond_tag_only=True,
)
insert_slack_channel_config(
db_session=db_session,

View File

@@ -34,6 +34,7 @@ from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.auth.users import optional_user
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import ENABLE_EMAIL_INVITES
from onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
from onyx.configs.app_configs import VALID_EMAIL_DOMAINS
@@ -286,7 +287,7 @@ def bulk_invite_users(
detail=f"Invalid email address: {email} - {str(e)}",
)
if MULTI_TENANT:
if MULTI_TENANT and not DEV_MODE:
try:
fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning", "add_users_to_tenant", None

View File

@@ -717,15 +717,14 @@ def upload_files_for_chat(
else ChatFileType.PLAIN_TEXT
)
if file_type == ChatFileType.IMAGE:
file_content_io = file.file
# NOTE: Image conversion to JPEG used to be enforced here.
# This was removed to:
# 1. Preserve original file content for downloads
# 2. Maintain transparency in formats like PNG
# 3. Ameliorate issue with file conversion
else:
file_content_io = io.BytesIO(file.file.read())
file_content = file.file.read() # Read the file content
# NOTE: Image conversion to JPEG used to be enforced here.
# This was removed to:
# 1. Preserve original file content for downloads
# 2. Maintain transparency in formats like PNG
# 3. Ameliorate issue with file conversion
file_content_io = io.BytesIO(file_content)
new_content_type = file.content_type
@@ -747,6 +746,7 @@ def upload_files_for_chat(
file_name=file.filename or "",
)
text_file_id = str(uuid.uuid4())
file_store.save_file(
file_name=text_file_id,
content=io.BytesIO(extracted_text.encode()),

View File

@@ -10,6 +10,8 @@ from uuid import UUID
from redis import Redis
from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
from onyx.auth.invited_users import get_invited_users
from onyx.auth.invited_users import write_invited_users
from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
from onyx.configs.app_configs import REDIS_DB_NUMBER
from onyx.configs.app_configs import REDIS_HOST
@@ -21,6 +23,7 @@ from onyx.db.users import get_user_by_email
from onyx.redis.redis_pool import RedisPool
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
# Tool to run helpful operations on Redis in production
# This is targeted for internal usage and may not have all the necessary parameters
@@ -310,6 +313,13 @@ if __name__ == "__main__":
required=False,
)
parser.add_argument(
"--tenant-id",
type=str,
help="Tenant ID for get, delete user token, or add to invited users",
required=False,
)
parser.add_argument(
"--batch",
type=int,
@@ -328,11 +338,32 @@ if __name__ == "__main__":
parser.add_argument(
"--user-email",
type=str,
help="User email for get or delete user token",
help="User email for get, delete user token, or add to invited users",
required=False,
)
args = parser.parse_args()
if args.tenant_id:
CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id)
if args.command == "add_invited_user":
if not args.user_email:
print("Error: --user-email is required for add_invited_user command")
sys.exit(1)
current_invited_users = get_invited_users()
if args.user_email not in current_invited_users:
current_invited_users.append(args.user_email)
if args.dry_run:
print(f"(DRY-RUN) Would add {args.user_email} to invited users")
else:
write_invited_users(current_invited_users)
print(f"Added {args.user_email} to invited users")
else:
print(f"{args.user_email} is already in the invited users list")
sys.exit(0)
exitcode = onyx_redis(
command=args.command,
batch=args.batch,

View File

@@ -255,6 +255,24 @@ def get_documents_for_tenant_connector(
print_documents(documents)
def search_for_document(
index_name: str, document_id: str, max_hits: int | None = 10
) -> List[Dict[str, Any]]:
yql_query = (
f'select * from sources {index_name} where document_id contains "{document_id}"'
)
params: dict[str, Any] = {"yql": yql_query}
if max_hits is not None:
params["hits"] = max_hits
with get_vespa_http_client() as client:
response = client.get(f"{SEARCH_ENDPOINT}/search/", params=params)
response.raise_for_status()
result = response.json()
documents = result.get("root", {}).get("children", [])
logger.info(f"Found {len(documents)} documents from query.")
return documents
def search_documents(
tenant_id: str, connector_id: int, query: str, n: int = 10
) -> None:
@@ -440,10 +458,98 @@ def get_document_acls(
print("-" * 80)
def get_current_chunk_count(
document_id: str, index_name: str, tenant_id: str
) -> int | None:
with get_session_with_tenant(tenant_id=tenant_id) as session:
return (
session.query(Document.chunk_count)
.filter(Document.id == document_id)
.scalar()
)
def get_number_of_chunks_we_think_exist(
document_id: str, index_name: str, tenant_id: str
) -> int:
current_chunk_count = get_current_chunk_count(document_id, index_name, tenant_id)
print(f"Current chunk count: {current_chunk_count}")
doc_info = VespaIndex.enrich_basic_chunk_info(
index_name=index_name,
http_client=get_vespa_http_client(),
document_id=document_id,
previous_chunk_count=current_chunk_count,
new_chunk_count=0,
)
chunk_ids = get_document_chunk_ids(
enriched_document_info_list=[doc_info],
tenant_id=tenant_id,
large_chunks_enabled=False,
)
return len(chunk_ids)
class VespaDebugging:
# Class for managing Vespa debugging actions.
def __init__(self, tenant_id: str | None = None):
self.tenant_id = POSTGRES_DEFAULT_SCHEMA if not tenant_id else tenant_id
self.index_name = get_index_name(self.tenant_id)
def sample_document_counts(self) -> None:
# Sample random documents and compare chunk counts
mismatches = []
no_chunks = []
with get_session_with_tenant(tenant_id=self.tenant_id) as session:
# Get a sample of random documents
from sqlalchemy import func
sample_docs = (
session.query(Document.id, Document.link, Document.semantic_id)
.order_by(func.random())
.limit(1000)
.all()
)
for doc in sample_docs:
document_id, link, semantic_id = doc
(
number_of_chunks_in_vespa,
number_of_chunks_we_think_exist,
) = self.compare_chunk_count(document_id)
if number_of_chunks_in_vespa != number_of_chunks_we_think_exist:
mismatches.append(
(
document_id,
link,
semantic_id,
number_of_chunks_in_vespa,
number_of_chunks_we_think_exist,
)
)
elif number_of_chunks_in_vespa == 0:
no_chunks.append((document_id, link, semantic_id))
# Print results
print("\nDocuments with mismatched chunk counts:")
for doc_id, link, semantic_id, vespa_count, expected_count in mismatches:
print(f"Document ID: {doc_id}")
print(f"Link: {link}")
print(f"Semantic ID: {semantic_id}")
print(f"Chunks in Vespa: {vespa_count}")
print(f"Expected chunks: {expected_count}")
print("-" * 80)
print("\nDocuments with no chunks in Vespa:")
for doc_id, link, semantic_id in no_chunks:
print(f"Document ID: {doc_id}")
print(f"Link: {link}")
print(f"Semantic ID: {semantic_id}")
print("-" * 80)
print(f"\nTotal mismatches: {len(mismatches)}")
print(f"Total documents with no chunks: {len(no_chunks)}")
def print_config(self) -> None:
# Print Vespa config.
@@ -457,6 +563,16 @@ class VespaDebugging:
# List documents for a tenant.
list_documents(n, self.tenant_id)
def compare_chunk_count(self, document_id: str) -> tuple[int, int]:
docs = search_for_document(self.index_name, document_id, max_hits=None)
number_of_chunks_we_think_exist = get_number_of_chunks_we_think_exist(
document_id, self.index_name, self.tenant_id
)
print(
f"Number of chunks in Vespa: {len(docs)}, Number of chunks we think exist: {number_of_chunks_we_think_exist}"
)
return len(docs), number_of_chunks_we_think_exist
def search_documents(self, connector_id: int, query: str, n: int = 10) -> None:
# Search documents for a tenant and connector.
search_documents(self.tenant_id, connector_id, query, n)
@@ -464,9 +580,11 @@ class VespaDebugging:
def update_document(
self, connector_id: int, doc_id: str, fields: Dict[str, Any]
) -> None:
# Update a document.
update_document(self.tenant_id, connector_id, doc_id, fields)
def search_for_document(self, document_id: str) -> List[Dict[str, Any]]:
return search_for_document(self.index_name, document_id)
def delete_document(self, connector_id: int, doc_id: str) -> None:
# Delete a document.
delete_document(self.tenant_id, connector_id, doc_id)
@@ -483,7 +601,6 @@ class VespaDebugging:
def main() -> None:
# Main CLI entry point.
parser = argparse.ArgumentParser(description="Vespa debugging tool")
parser.add_argument(
"--action",

View File

@@ -70,6 +70,7 @@ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# Set up application files
COPY ./onyx /app/onyx
COPY ./shared_configs /app/shared_configs
COPY ./alembic_tenants /app/alembic_tenants
COPY ./alembic /app/alembic
COPY ./alembic.ini /app/alembic.ini
COPY ./pytest.ini /app/pytest.ini

View File

@@ -24,35 +24,6 @@ def generate_auth_token() -> str:
class TenantManager:
@staticmethod
def create(
tenant_id: str | None = None,
initial_admin_email: str | None = None,
referral_source: str | None = None,
) -> dict[str, str]:
body = {
"tenant_id": tenant_id,
"initial_admin_email": initial_admin_email,
"referral_source": referral_source,
}
token = generate_auth_token()
headers = {
"Authorization": f"Bearer {token}",
"X-API-KEY": "",
"Content-Type": "application/json",
}
response = requests.post(
url=f"{API_SERVER_URL}/tenants/create",
json=body,
headers=headers,
)
response.raise_for_status()
return response.json()
@staticmethod
def get_all_users(
user_performing_action: DATestUser | None = None,

View File

@@ -92,6 +92,7 @@ class UserManager:
# Set cookies in the headers
test_user.headers["Cookie"] = f"fastapiusersauth={session_cookie}; "
test_user.cookies = {"fastapiusersauth": session_cookie}
return test_user
@staticmethod
@@ -102,6 +103,7 @@ class UserManager:
response = requests.get(
url=f"{API_SERVER_URL}/me",
headers=user_to_verify.headers,
cookies=user_to_verify.cookies,
)
if user_to_verify.is_active is False:

View File

@@ -242,6 +242,18 @@ def reset_postgres_multitenant() -> None:
schema_name = schema[0]
cur.execute(f'DROP SCHEMA "{schema_name}" CASCADE')
# Drop tables in the public schema
cur.execute(
"""
SELECT tablename FROM pg_tables
WHERE schemaname = 'public'
"""
)
public_tables = cur.fetchall()
for table in public_tables:
table_name = table[0]
cur.execute(f'DROP TABLE IF EXISTS public."{table_name}" CASCADE')
cur.close()
conn.close()

View File

@@ -44,6 +44,7 @@ class DATestUser(BaseModel):
headers: dict
role: UserRole
is_active: bool
cookies: dict = {}
class DATestPersonaLabel(BaseModel):

View File

@@ -4,7 +4,6 @@ from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.tenant import TenantManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestCCPair
@@ -13,25 +12,28 @@ from tests.integration.common_utils.test_models import DATestUser
def test_multi_tenant_access_control(reset_multitenant: None) -> None:
# Create Tenant 1 and its Admin User
TenantManager.create("tenant_dev1", "test1@test.com", "Data Plane Registration")
test_user1: DATestUser = UserManager.create(name="test1", email="test1@test.com")
assert UserManager.is_role(test_user1, UserRole.ADMIN)
# Creating an admin user (first user created is automatically an admin and also proviions the tenant
admin_user1: DATestUser = UserManager.create(
email="admin@onyx-test.com",
)
assert UserManager.is_role(admin_user1, UserRole.ADMIN)
# Create Tenant 2 and its Admin User
TenantManager.create("tenant_dev2", "test2@test.com", "Data Plane Registration")
test_user2: DATestUser = UserManager.create(name="test2", email="test2@test.com")
assert UserManager.is_role(test_user2, UserRole.ADMIN)
admin_user2: DATestUser = UserManager.create(
email="admin2@onyx-test.com",
)
assert UserManager.is_role(admin_user2, UserRole.ADMIN)
# Create connectors for Tenant 1
cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(
user_performing_action=test_user1,
user_performing_action=admin_user1,
)
api_key_1: DATestAPIKey = APIKeyManager.create(
user_performing_action=test_user1,
user_performing_action=admin_user1,
)
api_key_1.headers.update(test_user1.headers)
LLMProviderManager.create(user_performing_action=test_user1)
api_key_1.headers.update(admin_user1.headers)
LLMProviderManager.create(user_performing_action=admin_user1)
# Seed documents for Tenant 1
cc_pair_1.documents = []
@@ -49,13 +51,13 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
# Create connectors for Tenant 2
cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch(
user_performing_action=test_user2,
user_performing_action=admin_user2,
)
api_key_2: DATestAPIKey = APIKeyManager.create(
user_performing_action=test_user2,
user_performing_action=admin_user2,
)
api_key_2.headers.update(test_user2.headers)
LLMProviderManager.create(user_performing_action=test_user2)
api_key_2.headers.update(admin_user2.headers)
LLMProviderManager.create(user_performing_action=admin_user2)
# Seed documents for Tenant 2
cc_pair_2.documents = []
@@ -76,17 +78,17 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
# Create chat sessions for each user
chat_session1: DATestChatSession = ChatSessionManager.create(
user_performing_action=test_user1
user_performing_action=admin_user1
)
chat_session2: DATestChatSession = ChatSessionManager.create(
user_performing_action=test_user2
user_performing_action=admin_user2
)
# User 1 sends a message and gets a response
response1 = ChatSessionManager.send_message(
chat_session_id=chat_session1.id,
message="What is in Tenant 1's documents?",
user_performing_action=test_user1,
user_performing_action=admin_user1,
)
# Assert that the search tool was used
assert response1.tool_name == "run_search"
@@ -100,14 +102,16 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
), "Tenant 2 document IDs should not be in the response"
# Assert that the contents are correct
for doc in response1.tool_result or []:
assert doc["content"] == "Tenant 1 Document Content"
assert any(
doc["content"] == "Tenant 1 Document Content"
for doc in response1.tool_result or []
), "Tenant 1 Document Content not found in any document"
# User 2 sends a message and gets a response
response2 = ChatSessionManager.send_message(
chat_session_id=chat_session2.id,
message="What is in Tenant 2's documents?",
user_performing_action=test_user2,
user_performing_action=admin_user2,
)
# Assert that the search tool was used
assert response2.tool_name == "run_search"
@@ -119,15 +123,18 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
assert not response_doc_ids.intersection(
tenant1_doc_ids
), "Tenant 1 document IDs should not be in the response"
# Assert that the contents are correct
for doc in response2.tool_result or []:
assert doc["content"] == "Tenant 2 Document Content"
assert any(
doc["content"] == "Tenant 2 Document Content"
for doc in response2.tool_result or []
), "Tenant 2 Document Content not found in any document"
# User 1 tries to access Tenant 2's documents
response_cross = ChatSessionManager.send_message(
chat_session_id=chat_session1.id,
message="What is in Tenant 2's documents?",
user_performing_action=test_user1,
user_performing_action=admin_user1,
)
# Assert that the search tool was used
assert response_cross.tool_name == "run_search"
@@ -140,7 +147,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
response_cross2 = ChatSessionManager.send_message(
chat_session_id=chat_session2.id,
message="What is in Tenant 1's documents?",
user_performing_action=test_user2,
user_performing_action=admin_user2,
)
# Assert that the search tool was used
assert response_cross2.tool_name == "run_search"

View File

@@ -4,14 +4,12 @@ from onyx.db.models import UserRole
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.tenant import TenantManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
# Test flow from creating tenant to registering as a user
def test_tenant_creation(reset_multitenant: None) -> None:
TenantManager.create("tenant_dev", "test@test.com", "Data Plane Registration")
test_user: DATestUser = UserManager.create(name="test", email="test@test.com")
assert UserManager.is_role(test_user, UserRole.ADMIN)

View File

@@ -1,23 +1,23 @@
import time
from datetime import datetime
from onyx.db.models import IndexingStatus
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestIndexAttempt
from tests.integration.common_utils.test_models import DATestUser
def _verify_index_attempt_pagination(
cc_pair_id: int,
index_attempts: list[DATestIndexAttempt],
index_attempt_ids: list[int],
page_size: int = 5,
user_performing_action: DATestUser | None = None,
) -> None:
retrieved_attempts: list[int] = []
last_time_started = None # Track the last time_started seen
for i in range(0, len(index_attempts), page_size):
for i in range(0, len(index_attempt_ids), page_size):
paginated_result = IndexAttemptManager.get_index_attempt_page(
cc_pair_id=cc_pair_id,
page=(i // page_size),
@@ -26,9 +26,9 @@ def _verify_index_attempt_pagination(
)
# Verify that the total items is equal to the length of the index attempts list
assert paginated_result.total_items == len(index_attempts)
assert paginated_result.total_items == len(index_attempt_ids)
# Verify that the number of items in the page is equal to the page size
assert len(paginated_result.items) == min(page_size, len(index_attempts) - i)
assert len(paginated_result.items) == min(page_size, len(index_attempt_ids) - i)
# Verify time ordering within the page (descending order)
for attempt in paginated_result.items:
@@ -42,7 +42,7 @@ def _verify_index_attempt_pagination(
retrieved_attempts.extend([attempt.id for attempt in paginated_result.items])
# Create a set of all the expected index attempt IDs
all_expected_attempts = set(attempt.id for attempt in index_attempts)
all_expected_attempts = set(index_attempt_ids)
# Create a set of all the retrieved index attempt IDs
all_retrieved_attempts = set(retrieved_attempts)
@@ -51,6 +51,9 @@ def _verify_index_attempt_pagination(
def test_index_attempt_pagination(reset: None) -> None:
MAX_WAIT = 60
all_attempt_ids: list[int] = []
# Create an admin user to perform actions
user_performing_action: DATestUser = UserManager.create(
name="admin_performing_action",
@@ -62,20 +65,49 @@ def test_index_attempt_pagination(reset: None) -> None:
user_performing_action=user_performing_action,
)
# Create 300 successful index attempts
# Creating a CC pair will create an index attempt as well. wait for it.
start = time.monotonic()
while True:
paginated_result = IndexAttemptManager.get_index_attempt_page(
cc_pair_id=cc_pair.id,
page=0,
page_size=5,
user_performing_action=user_performing_action,
)
if paginated_result.total_items == 1:
all_attempt_ids.append(paginated_result.items[0].id)
print("Initial index attempt from cc_pair creation detected. Continuing...")
break
elapsed = time.monotonic() - start
if elapsed > MAX_WAIT:
raise TimeoutError(
f"Initial index attempt: Not detected within {MAX_WAIT} seconds."
)
print(
f"Waiting for initial index attempt: elapsed={elapsed:.2f} timeout={MAX_WAIT}"
)
time.sleep(1)
# Create 299 successful index attempts (for 300 total)
base_time = datetime.now()
all_attempts = IndexAttemptManager.create_test_index_attempts(
num_attempts=300,
generated_attempts = IndexAttemptManager.create_test_index_attempts(
num_attempts=299,
cc_pair_id=cc_pair.id,
status=IndexingStatus.SUCCESS,
base_time=base_time,
)
for attempt in generated_attempts:
all_attempt_ids.append(attempt.id)
# Verify basic pagination with different page sizes
print("Verifying basic pagination with page size 5")
_verify_index_attempt_pagination(
cc_pair_id=cc_pair.id,
index_attempts=all_attempts,
index_attempt_ids=all_attempt_ids,
page_size=5,
user_performing_action=user_performing_action,
)
@@ -84,7 +116,7 @@ def test_index_attempt_pagination(reset: None) -> None:
print("Verifying pagination with page size 100")
_verify_index_attempt_pagination(
cc_pair_id=cc_pair.id,
index_attempts=all_attempts,
index_attempt_ids=all_attempt_ids,
page_size=100,
user_performing_action=user_performing_action,
)

View File

@@ -0,0 +1,423 @@
services:
api_server:
image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
build:
context: ../../backend
dockerfile: Dockerfile
command: >
/bin/sh -c "
alembic -n schema_private upgrade head &&
echo \"Starting Onyx Api Server\" &&
uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
ports:
- "8080:8080"
environment:
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
- MULTI_TENANT=true
- LOG_LEVEL=DEBUG
- AUTH_TYPE=cloud
- REQUIRE_EMAIL_VERIFICATION=false
- DISABLE_TELEMETRY=true
- IMAGE_TAG=test
- DEV_MODE=true
# Auth Settings
- SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-}
- ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
- VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-}
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
- SMTP_SERVER=${SMTP_SERVER:-}
- SMTP_PORT=${SMTP_PORT:-587}
- SMTP_USER=${SMTP_USER:-}
- SMTP_PASS=${SMTP_PASS:-}
- ENABLE_EMAIL_INVITES=${ENABLE_EMAIL_INVITES:-}
- EMAIL_FROM=${EMAIL_FROM:-}
- OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-}
- OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-}
- OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-}
- TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-}
- CORS_ALLOWED_ORIGIN=${CORS_ALLOWED_ORIGIN:-}
# Gen AI Settings
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
- QA_TIMEOUT=${QA_TIMEOUT:-}
- MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
- LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
- BING_API_KEY=${BING_API_KEY:-}
- DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
- GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
- TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
# Query Options
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-}
- HYBRID_ALPHA=${HYBRID_ALPHA:-}
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other services
- POSTGRES_HOST=relational_db
- POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-}
# Don't change the NLP model configs unless you know what you're doing
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-}
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
- DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
- LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-}
- LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
- LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
- LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
- LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
- LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}
- LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}
- CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
- LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-}
# Egnyte OAuth Configs
- EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}
- EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}
- EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}
# Linear OAuth Configs
- LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}
- LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}
# Analytics Configs
- SENTRY_DSN=${SENTRY_DSN:-}
# Chat Configs
- HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}
# Enables the use of bedrock models or IAM Auth
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
- AWS_REGION_NAME=${AWS_REGION_NAME:-}
- API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
# Seeding configuration
- USE_IAM_AUTH=${USE_IAM_AUTH:-}
extra_hosts:
- "host.docker.internal:host-gateway"
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
background:
image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
build:
context: ../../backend
dockerfile: Dockerfile
command: >
/bin/sh -c "
if [ -f /etc/ssl/certs/custom-ca.crt ]; then
update-ca-certificates;
fi &&
/usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf"
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
environment:
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
- MULTI_TENANT=true
- LOG_LEVEL=DEBUG
- AUTH_TYPE=cloud
- REQUIRE_EMAIL_VERIFICATION=false
- DISABLE_TELEMETRY=true
- IMAGE_TAG=test
- ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
- JWT_PUBLIC_KEY_URL=${JWT_PUBLIC_KEY_URL:-}
# Gen AI Settings (Needed by OnyxBot)
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
- QA_TIMEOUT=${QA_TIMEOUT:-}
- MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
- GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
- LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
- GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
- BING_API_KEY=${BING_API_KEY:-}
# Query Options
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-}
- HYBRID_ALPHA=${HYBRID_ALPHA:-}
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other Services
- POSTGRES_HOST=relational_db
- POSTGRES_USER=${POSTGRES_USER:-}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
- POSTGRES_DB=${POSTGRES_DB:-}
- POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-}
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
- ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
# Indexing Configs
- VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-}
- NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-}
- ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
- DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}
- DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-}
- CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-}
- EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-}
- CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-}
- JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-}
- WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-}
- JIRA_API_VERSION=${JIRA_API_VERSION:-}
- GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
- NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
- GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
- MAX_DOCUMENT_CHARS=${MAX_DOCUMENT_CHARS:-}
- MAX_FILE_SIZE_BYTES=${MAX_FILE_SIZE_BYTES:-}
# Egnyte OAuth Configs
- EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}
- EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}
- EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}
# Lienar OAuth Configs
- LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}
- LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}
# Celery Configs (defaults are set in the supervisord.conf file.
# prefer doing that to have one source of defaults)
- CELERY_WORKER_INDEXING_CONCURRENCY=${CELERY_WORKER_INDEXING_CONCURRENCY:-}
- CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-}
- CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-}
# Onyx SlackBot Configs
- DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-}
- DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-}
- DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-}
- DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-}
- DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-} # Currently unused
- NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-}
- DANSWER_BOT_MAX_QPM=${DANSWER_BOT_MAX_QPM:-}
- DANSWER_BOT_MAX_WAIT_TIME=${DANSWER_BOT_MAX_WAIT_TIME:-}
# Logging
# Leave this on pretty please? Nothing sensitive is collected!
# https://docs.onyx.app/more/telemetry
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
- LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs
- LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging
# Log all of Onyx prompts and interactions with the LLM
- LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
- LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
- LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
# Analytics Configs
- SENTRY_DSN=${SENTRY_DSN:-}
# Enterprise Edition stuff
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
- USE_IAM_AUTH=${USE_IAM_AUTH:-}
- AWS_REGION_NAME=${AWS_REGION_NAME:-}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
# Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
# volumes:
# - ./bundle.pem:/app/bundle.pem:ro
extra_hosts:
- "host.docker.internal:host-gateway"
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
# Uncomment the following lines if you need to include a custom CA certificate
# This section enables the use of a custom CA certificate
# If present, the custom CA certificate is mounted as a volume
# The container checks for its existence and updates the system's CA certificates
# This allows for secure communication with services using custom SSL certificates
# Optional volume mount for CA certificate
# volumes:
# # Maps to the CA_CERT_PATH environment variable in the Dockerfile
# - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro
web_server:
image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}
build:
context: ../../web
dockerfile: Dockerfile
args:
- NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false}
- NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false}
- NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
- NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
- NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
- NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-}
- NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}
# Enterprise Edition only
- NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
# DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.
- NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
depends_on:
- api_server
restart: always
environment:
- INTERNAL_URL=http://api_server:8080
- WEB_DOMAIN=${WEB_DOMAIN:-}
- THEME_IS_DARK=${THEME_IS_DARK:-}
- DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
# Enterprise Edition only
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
- NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL:-}
inference_model_server:
image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
build:
context: ../../backend
dockerfile: Dockerfile.model_server
command: >
/bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
echo 'Skipping service...';
exit 0;
else
exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
fi"
restart: on-failure
environment:
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
# Analytics Configs
- SENTRY_DSN=${SENTRY_DSN:-}
volumes:
# Not necessary, this is just to reduce download time during startup
- model_cache_huggingface:/root/.cache/huggingface/
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
indexing_model_server:
image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
build:
context: ../../backend
dockerfile: Dockerfile.model_server
command: >
/bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
echo 'Skipping service...';
exit 0;
else
exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
fi"
restart: on-failure
environment:
- INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
- INDEXING_ONLY=True
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
# Analytics Configs
- SENTRY_DSN=${SENTRY_DSN:-}
volumes:
# Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
relational_db:
image: postgres:15.2-alpine
command: -c 'max_connections=250'
restart: always
environment:
- POSTGRES_USER=${POSTGRES_USER:-postgres}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
ports:
- "5432:5432"
volumes:
- db_volume:/var/lib/postgresql/data
# This container name cannot have an underscore in it due to Vespa expectations of the URL
index:
image: vespaengine/vespa:8.277.17
restart: always
ports:
- "19071:19071"
- "8081:8081"
volumes:
- vespa_volume:/opt/vespa/var
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
nginx:
image: nginx:1.23.4-alpine
restart: always
# nginx will immediately crash with `nginx: [emerg] host not found in upstream`
# if api_server / web_server are not up
depends_on:
- api_server
- web_server
environment:
- DOMAIN=localhost
ports:
- "80:80"
- "3000:80" # allow for localhost:3000 usage, since that is the norm
volumes:
- ../data/nginx:/etc/nginx/conf.d
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
# The specified script waits for the api_server to start up.
# Without this we've seen issues where nginx shows no error logs but
# does not recieve any traffic
# NOTE: we have to use dos2unix to remove Carriage Return chars from the file
# in order to make this work on both Unix-like systems and windows
command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
cache:
image: redis:7.4-alpine
restart: always
ports:
- "6379:6379"
# docker silently mounts /data even without an explicit volume mount, which enables
# persistence. explicitly setting save and appendonly forces ephemeral behavior.
command: redis-server --save "" --appendonly no
volumes:
db_volume:
vespa_volume: # Created by the container itself
model_cache_huggingface:
indexing_huggingface_model_cache:

View File

@@ -4,12 +4,12 @@ dependencies:
version: 14.3.1
- name: vespa
repository: https://onyx-dot-app.github.io/vespa-helm-charts
version: 0.2.18
version: 0.2.20
- name: nginx
repository: oci://registry-1.docker.io/bitnamicharts
version: 15.14.0
- name: redis
repository: https://charts.bitnami.com/bitnami
version: 20.1.0
digest: sha256:5c9eb3d55d5f8e3beb64f26d26f686c8d62755daa10e2e6d87530bdf2fbbf957
generated: "2024-12-10T10:47:35.812483-08:00"
digest: sha256:4615c033064a987e3f66a48f4744d2e88bd1cc932c79453c4928455695a72778
generated: "2025-02-04T11:45:05.39228-08:00"

View File

@@ -23,7 +23,7 @@ dependencies:
repository: https://charts.bitnami.com/bitnami
condition: postgresql.enabled
- name: vespa
version: 0.2.18
version: 0.2.20
repository: https://onyx-dot-app.github.io/vespa-helm-charts
condition: vespa.enabled
- name: nginx

View File

@@ -7,10 +7,10 @@ metadata:
data:
INTERNAL_URL: "http://{{ include "onyx-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
POSTGRES_HOST: {{ .Release.Name }}-postgresql
VESPA_HOST: da-vespa-0.vespa-service
VESPA_HOST: {{ .Values.vespa.name }}.{{ .Values.vespa.service.name }}.{{ .Release.Namespace }}.svc.cluster.local
REDIS_HOST: {{ .Release.Name }}-redis-master
MODEL_SERVER_HOST: "{{ include "onyx-stack.fullname" . }}-inference-model-service"
INDEXING_MODEL_SERVER_HOST: "{{ include "onyx-stack.fullname" . }}-indexing-model-service"
{{- range $key, $value := .Values.configMap }}
{{ $key }}: "{{ $value }}"
{{- end }}
{{- end }}

View File

@@ -5,6 +5,7 @@
postgresql:
primary:
persistence:
storageClass: ""
size: 5Gi
enabled: true
auth:
@@ -12,13 +13,52 @@ postgresql:
secretKeys:
# overwriting as postgres typically expects 'postgres-password'
adminPasswordKey: postgres_password
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
vespa:
name: da-vespa-0
service:
name: vespa-service
volumeClaimTemplates:
- metadata:
name: vespa-storage
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: ""
enabled: true
replicaCount: 1
image:
repository: vespa
pullPolicy: IfNotPresent
tag: "8.277.17"
podAnnotations: {}
podLabels:
app: vespa
app.kubernetes.io/instance: onyx
app.kubernetes.io/name: vespa
securityContext:
privileged: true
runAsUser: 0
resources:
# The Vespa Helm chart specifies default resources, which are quite modest. We override
# them here to increase chances of the chart running successfully.
requests:
cpu: 1500m
memory: 4000Mi
limits:
cpu: 1500m
memory: 4000Mi
persistent:
storageClassName: ""
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
inferenceCapability:
service:
portName: modelserver
@@ -272,15 +312,9 @@ background:
podSecurityContext:
{}
# fsGroup: 2000
securityContext:
{}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
privileged: true
runAsUser: 0
enableMiniChunk: "true"
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
@@ -316,50 +350,6 @@ background:
nodeSelector: {}
tolerations: []
vespa:
volumeClaimTemplates:
- metadata:
name: vespa-storage
spec:
accessModes:
- ReadWriteOnce
storageClassName: ""
resources:
requests:
storage: 1Gi
enabled: true
replicaCount: 1
image:
repository: vespa
pullPolicy: IfNotPresent
tag: "8.277.17"
podAnnotations: {}
podLabels:
app: vespa
app.kubernetes.io/instance: onyx
app.kubernetes.io/name: vespa
podSecurityContext:
{}
# fsGroup: 2000
securityContext:
privileged: true
runAsUser: 0
resources:
# The Vespa Helm chart specifies default resources, which are quite modest. We override
# them here to increase chances of the chart running successfully.
requests:
cpu: 1500m
memory: 4000Mi
limits:
cpu: 1500m
memory: 4000Mi
nodeSelector: {}
tolerations: []
affinity: {}
redis:

4
web/.gitignore vendored
View File

@@ -35,6 +35,8 @@ yarn-error.log*
*.tsbuildinfo
next-env.d.ts
# playwright testing temp files
/admin_auth.json
/user_auth.json
/build-archive.log
/test-results

View File

@@ -81,6 +81,9 @@ ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}
ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}
ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}
# Use NODE_OPTIONS in the build command
RUN NODE_OPTIONS="${NODE_OPTIONS}" npx next build
@@ -160,6 +163,9 @@ ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}
ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}
ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}
# Note: Don't expose ports here, Compose will handle that for us if necessary.
# If you want to run this without compose, specify the ports to
# expose via cli

View File

@@ -21,3 +21,42 @@ Open [http://localhost:3000](http://localhost:3000) with your browser to see the
_Note:_ if you are having problems accessing the ^, try setting the `WEB_DOMAIN` env variable to
`http://127.0.0.1:3000` and accessing it there.
## Testing
This testing process will reset your application into a clean state.
Don't run these tests if you don't want to do this!
Bring up the entire application.
1. Reset the instance
```cd backend
export PYTEST_IGNORE_SKIP=true
pytest -s tests/integration/tests/playwright/test_playwright.py
```
2. Run playwright
```
cd web
npx playwright test
```
3. Inspect results
By default, playwright.config.ts is configured to output the results to:
```
web/test-results
```
4. Upload results to Chromatic (Optional)
This step would normally not be run by third party developers, but first party devs
may use this for local troubleshooting and testing.
```
cd web
npx chromatic --playwright --project-token={your token here}
```

151
web/package-lock.json generated
View File

@@ -15,6 +15,7 @@
"@headlessui/react": "^2.2.0",
"@headlessui/tailwindcss": "^0.2.1",
"@phosphor-icons/react": "^2.0.8",
"@radix-ui/react-accordion": "^1.2.2",
"@radix-ui/react-checkbox": "^1.1.2",
"@radix-ui/react-collapsible": "^1.1.2",
"@radix-ui/react-dialog": "^1.1.2",
@@ -83,11 +84,11 @@
"yup": "^1.4.0"
},
"devDependencies": {
"@chromatic-com/playwright": "^0.10.0",
"@chromatic-com/playwright": "^0.10.2",
"@tailwindcss/typography": "^0.5.10",
"@types/chrome": "^0.0.287",
"@types/jest": "^29.5.14",
"chromatic": "^11.18.1",
"chromatic": "^11.25.2",
"eslint": "^8.48.0",
"eslint-config-next": "^14.1.0",
"jest": "^29.7.0",
@@ -756,9 +757,9 @@
"license": "MIT"
},
"node_modules/@chromatic-com/playwright": {
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/@chromatic-com/playwright/-/playwright-0.10.0.tgz",
"integrity": "sha512-QjKnOfuIcq9Y97QwA3MMVzOceXn1ikelUeC8gy60d2PbsQ2NNxH2n/PrAJ8Sllr225mXD1ts9xBH+Hq3+Blo5A==",
"version": "0.10.2",
"resolved": "https://registry.npmjs.org/@chromatic-com/playwright/-/playwright-0.10.2.tgz",
"integrity": "sha512-SfP4I0rWPeSNW5VtV7eiuNSsZYK9IdVPTBT1SnUFJd3lACS1YJJd5s8pTisJvgh5Q8u9VNGWXfeuV3ddGJyRtw==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -3442,6 +3443,140 @@
"integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==",
"license": "MIT"
},
"node_modules/@radix-ui/react-accordion": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.2.tgz",
"integrity": "sha512-b1oh54x4DMCdGsB4/7ahiSrViXxaBwRPotiZNnYXjLha9vfuURSAZErki6qjDoSIV0eXx5v57XnTGVtGwnfp2g==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.1",
"@radix-ui/react-collapsible": "1.1.2",
"@radix-ui/react-collection": "1.1.1",
"@radix-ui/react-compose-refs": "1.1.1",
"@radix-ui/react-context": "1.1.1",
"@radix-ui/react-direction": "1.1.0",
"@radix-ui/react-id": "1.1.0",
"@radix-ui/react-primitive": "2.0.1",
"@radix-ui/react-use-controllable-state": "1.1.0"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/primitive": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.1.tgz",
"integrity": "sha512-SJ31y+Q/zAyShtXJc8x83i9TYdbAfHZ++tUZnvjJJqFjzsdUnKsxPL6IEtBlxKkU7yzer//GQtZSV4GbldL3YA==",
"license": "MIT"
},
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-collection": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.1.tgz",
"integrity": "sha512-LwT3pSho9Dljg+wY2KN2mrrh6y3qELfftINERIzBUO9e0N+t0oMTyn3k9iv+ZqgrwGkRnLpNJrsMv9BZlt2yuA==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.1",
"@radix-ui/react-context": "1.1.1",
"@radix-ui/react-primitive": "2.0.1",
"@radix-ui/react-slot": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-compose-refs": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.1.tgz",
"integrity": "sha512-Y9VzoRDSJtgFMUCoiZBDVo084VQ5hfpXxVE+NgkdNsjiDBByiImMZKKhxMwCbdHvhlENG6a833CbFkOQvTricw==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-context": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz",
"integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-primitive": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.0.1.tgz",
"integrity": "sha512-sHCWTtxwNn3L3fH8qAfnF3WbUZycW93SM1j3NFDzXBiz8D6F5UTTy8G1+WFEaiCdvCVRJWj6N2R4Xq6HdiHmDg==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-slot": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-slot": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.1.tgz",
"integrity": "sha512-RApLLOcINYJA+dMVbOju7MYv1Mb2EBp2nH4HdDzXTSyaR5optlm6Otrz1euW3HbdOR8UmmFK06TD+A9frYWv+g==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-arrow": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.0.tgz",
@@ -8442,9 +8577,9 @@
}
},
"node_modules/chromatic": {
"version": "11.18.1",
"resolved": "https://registry.npmjs.org/chromatic/-/chromatic-11.18.1.tgz",
"integrity": "sha512-hkNT9vA6K9+PnE/khhZYBnRCOm8NonaQDs7RZ8YHFo7/lh1b/x/uFMkTjWjaj/mkM6QOR/evu5VcZMtcaauSlw==",
"version": "11.25.2",
"resolved": "https://registry.npmjs.org/chromatic/-/chromatic-11.25.2.tgz",
"integrity": "sha512-/9eQWn6BU1iFsop86t8Au21IksTRxwXAl7if8YHD05L2AbuMjClLWZo5cZojqrJHGKDhTqfrC2X2xE4uSm0iKw==",
"dev": true,
"license": "MIT",
"bin": {

View File

@@ -18,6 +18,7 @@
"@headlessui/react": "^2.2.0",
"@headlessui/tailwindcss": "^0.2.1",
"@phosphor-icons/react": "^2.0.8",
"@radix-ui/react-accordion": "^1.2.2",
"@radix-ui/react-checkbox": "^1.1.2",
"@radix-ui/react-collapsible": "^1.1.2",
"@radix-ui/react-dialog": "^1.1.2",
@@ -86,11 +87,11 @@
"yup": "^1.4.0"
},
"devDependencies": {
"@chromatic-com/playwright": "^0.10.0",
"@chromatic-com/playwright": "^0.10.2",
"@tailwindcss/typography": "^0.5.10",
"@types/chrome": "^0.0.287",
"@types/jest": "^29.5.14",
"chromatic": "^11.18.1",
"chromatic": "^11.25.2",
"eslint": "^8.48.0",
"eslint-config-next": "^14.1.0",
"jest": "^29.7.0",

View File

@@ -2,7 +2,19 @@ import { defineConfig, devices } from "@playwright/test";
export default defineConfig({
globalSetup: require.resolve("./tests/e2e/global-setup"),
timeout: 30000, // 30 seconds timeout
timeout: 60000, // 60 seconds timeout
reporter: [
["list"],
// Warning: uncommenting the html reporter may cause the chromatic-archives
// directory to be deleted after the test run, which will break CI.
// [
// 'html',
// {
// outputFolder: 'test-results', // or whatever directory you want
// open: 'never', // can be 'always' | 'on-failure' | 'never'
// },
// ],
],
projects: [
{
name: "admin",

View File

@@ -28,6 +28,7 @@ import { Spinner } from "@/components/Spinner";
import { deleteApiKey, regenerateApiKey } from "./lib";
import { OnyxApiKeyForm } from "./OnyxApiKeyForm";
import { APIKey } from "./types";
import CreateButton from "@/components/ui/createButton";
const API_KEY_TEXT = `API Keys allow you to access Onyx APIs programmatically. Click the button below to generate a new API Key.`;
@@ -111,14 +112,7 @@ function Main() {
}
const newApiKeyButton = (
<Button
variant="navigate"
size="sm"
className="mt-3"
onClick={() => setShowCreateUpdateForm(true)}
>
Create API Key
</Button>
<CreateButton href="/admin/api-key/new" text="Create API Key" />
);
if (apiKeys.length === 0) {

View File

@@ -40,7 +40,12 @@ import * as Yup from "yup";
import CollapsibleSection from "./CollapsibleSection";
import { SuccessfulPersonaUpdateRedirectType } from "./enums";
import { Persona, PersonaLabel, StarterMessage } from "./interfaces";
import { PersonaUpsertParameters, createPersona, updatePersona } from "./lib";
import {
PersonaUpsertParameters,
createPersona,
updatePersona,
deletePersona,
} from "./lib";
import {
CameraIcon,
GroupsIconSkeleton,
@@ -71,7 +76,6 @@ import { LLMSelector } from "@/components/llm/LLMSelector";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal";
import { DeletePersonaButton } from "./[id]/DeletePersonaButton";
import Title from "@/components/ui/title";
import { SEARCH_TOOL_ID } from "@/app/chat/tools/constants";
@@ -322,10 +326,39 @@ export function AssistantEditor({
}));
};
const [deleteModalOpen, setDeleteModalOpen] = useState(false);
if (!labels) {
return <></>;
}
const openDeleteModal = () => {
setDeleteModalOpen(true);
};
const closeDeleteModal = () => {
setDeleteModalOpen(false);
};
const handleDeletePersona = async () => {
if (existingPersona) {
const response = await deletePersona(existingPersona.id);
if (response.ok) {
await refreshAssistants();
router.push(
redirectType === SuccessfulPersonaUpdateRedirectType.ADMIN
? `/admin/assistants?u=${Date.now()}`
: `/chat`
);
} else {
setPopup({
type: "error",
message: `Failed to delete persona - ${await response.text()}`,
});
}
}
};
return (
<div className="mx-auto max-w-4xl">
<style>
@@ -364,6 +397,14 @@ export function AssistantEditor({
}}
/>
)}
{deleteModalOpen && existingPersona && (
<DeleteEntityModal
entityType="Persona"
entityName={existingPersona.name}
onClose={closeDeleteModal}
onSubmit={handleDeletePersona}
/>
)}
{popup}
<Formik
enableReinitialize={true}
@@ -1312,14 +1353,6 @@ export function AssistantEditor({
explanationLink="https://docs.onyx.app/guides/assistants"
className="[&_textarea]:placeholder:text-text-muted/50"
/>
<div className="flex justify-end">
{existingPersona && (
<DeletePersonaButton
personaId={existingPersona!.id}
redirectType={SuccessfulPersonaUpdateRedirectType.ADMIN}
/>
)}
</div>
</>
)}
@@ -1338,6 +1371,18 @@ export function AssistantEditor({
Cancel
</Button>
</div>
<div className="flex justify-end">
{existingPersona && (
<Button
variant="destructive"
onClick={openDeleteModal}
type="button"
>
Delete
</Button>
)}
</div>
</Form>
);
}}

View File

@@ -17,6 +17,7 @@ import { FiEdit2 } from "react-icons/fi";
import { TrashIcon } from "@/components/icons/icons";
import { useUser } from "@/components/user/UserProvider";
import { useAssistants } from "@/components/context/AssistantsContext";
import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal";
function PersonaTypeDisplay({ persona }: { persona: Persona }) {
if (persona.builtin_persona) {
@@ -53,6 +54,8 @@ export function PersonasTable() {
}, [editablePersonas]);
const [finalPersonas, setFinalPersonas] = useState<Persona[]>([]);
const [deleteModalOpen, setDeleteModalOpen] = useState(false);
const [personaToDelete, setPersonaToDelete] = useState<Persona | null>(null);
useEffect(() => {
const editable = editablePersonas.sort(personaComparator);
@@ -98,9 +101,42 @@ export function PersonasTable() {
await refreshUser();
};
const openDeleteModal = (persona: Persona) => {
setPersonaToDelete(persona);
setDeleteModalOpen(true);
};
const closeDeleteModal = () => {
setDeleteModalOpen(false);
setPersonaToDelete(null);
};
const handleDeletePersona = async () => {
if (personaToDelete) {
const response = await deletePersona(personaToDelete.id);
if (response.ok) {
await refreshAssistants();
closeDeleteModal();
} else {
setPopup({
type: "error",
message: `Failed to delete persona - ${await response.text()}`,
});
}
}
};
return (
<div>
{popup}
{deleteModalOpen && personaToDelete && (
<DeleteEntityModal
entityType="Persona"
entityName={personaToDelete.name}
onClose={closeDeleteModal}
onSubmit={handleDeletePersona}
/>
)}
<DraggableTable
headers={["Name", "Description", "Type", "Is Visible", "Delete"]}
@@ -170,16 +206,7 @@ export function PersonasTable() {
{!persona.builtin_persona && isEditable ? (
<div
className="hover:bg-hover rounded p-1 cursor-pointer"
onClick={async () => {
const response = await deletePersona(persona.id);
if (response.ok) {
await refreshAssistants();
} else {
alert(
`Failed to delete persona - ${await response.text()}`
);
}
}}
onClick={() => openDeleteModal(persona)}
>
<TrashIcon />
</div>

View File

@@ -1,15 +1,12 @@
"use client";
import { PersonasTable } from "./PersonaTable";
import { FiPlusSquare } from "react-icons/fi";
import Link from "next/link";
import Text from "@/components/ui/text";
import Title from "@/components/ui/title";
import { Separator } from "@/components/ui/separator";
import { AssistantsIcon } from "@/components/icons/icons";
import { AdminPageTitle } from "@/components/admin/Title";
import LabelManagement from "./LabelManagement";
import { SubLabel } from "@/components/admin/connectors/Field";
import CreateButton from "@/components/ui/createButton";
export default async function Page() {
return (
<div className="mx-auto container">
@@ -33,15 +30,7 @@ export default async function Page() {
<Separator />
<Title>Create an Assistant</Title>
<Link
href="/admin/assistants/new"
className="flex py-2 px-4 mt-2 border border-border h-fit cursor-pointer hover:bg-hover text-sm w-40"
>
<div className="mx-auto flex">
<FiPlusSquare className="my-auto mr-2" />
New Assistant
</div>
</Link>
<CreateButton href="/admin/assistants/new" text="New Assistant" />
<Separator />

View File

@@ -49,7 +49,7 @@ export function SlackChannelConfigsTable({
}}
>
<FiSettings />
Edit Default Config
Edit Default Configuration
</Button>
<Link href={`/admin/bots/${slackBotId}/channels/new`}>
<Button variant="outline">

View File

@@ -45,13 +45,26 @@ export const SlackChannelConfigCreationForm = ({
const existingSlackBotUsesPersona = existingSlackChannelConfig?.persona
? !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
: false;
const existingPersonaHasSearchTool = existingSlackChannelConfig?.persona
? existingSlackChannelConfig.persona.tools.some(
(tool) => tool.in_code_tool_id === SEARCH_TOOL_ID
)
: false;
const searchEnabledAssistants = useMemo(() => {
return personas.filter((persona) => {
return persona.tools.some(
(tool) => tool.in_code_tool_id == SEARCH_TOOL_ID
);
});
const [searchEnabledAssistants, nonSearchAssistants] = useMemo(() => {
return personas.reduce(
(acc, persona) => {
if (
persona.tools.some((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID)
) {
acc[0].push(persona);
} else {
acc[1].push(persona);
}
return acc;
},
[[], []] as [Persona[], Persona[]]
);
}, [personas]);
return (
@@ -105,7 +118,9 @@ export const SlackChannelConfigCreationForm = ({
standard_answer_categories:
existingSlackChannelConfig?.standard_answer_categories || [],
knowledge_source: existingSlackBotUsesPersona
? "assistant"
? existingPersonaHasSearchTool
? "assistant"
: "non_search_assistant"
: existingSlackChannelConfig?.persona
? "document_sets"
: "all_public",
@@ -148,7 +163,12 @@ export const SlackChannelConfigCreationForm = ({
}),
standard_answer_categories: Yup.array(),
knowledge_source: Yup.string()
.oneOf(["all_public", "document_sets", "assistant"])
.oneOf([
"all_public",
"document_sets",
"assistant",
"non_search_assistant",
])
.required(),
})}
onSubmit={async (values, formikHelpers) => {
@@ -159,13 +179,16 @@ export const SlackChannelConfigCreationForm = ({
slack_bot_id,
channel_name: values.channel_name,
respond_member_group_list: values.respond_member_group_list,
usePersona: values.knowledge_source === "assistant",
usePersona:
values.knowledge_source === "assistant" ||
values.knowledge_source === "non_search_assistant",
document_sets:
values.knowledge_source === "document_sets"
? values.document_sets
: [],
persona_id:
values.knowledge_source === "assistant"
values.knowledge_source === "assistant" ||
values.knowledge_source === "non_search_assistant"
? values.persona_id
: null,
standard_answer_categories: values.standard_answer_categories.map(
@@ -204,7 +227,7 @@ export const SlackChannelConfigCreationForm = ({
}
}}
>
{({ isSubmitting, values, setFieldValue }) => (
{({ isSubmitting, values, setFieldValue, ...formikProps }) => (
<Form>
<div className="pb-6 w-full">
<SlackChannelConfigFormFields
@@ -213,9 +236,11 @@ export const SlackChannelConfigCreationForm = ({
isDefault={isDefault}
documentSets={documentSets}
searchEnabledAssistants={searchEnabledAssistants}
nonSearchAssistants={nonSearchAssistants}
standardAnswerCategoryResponse={standardAnswerCategoryResponse}
setPopup={setPopup}
slack_bot_id={slack_bot_id}
formikProps={formikProps}
/>
</div>
</Form>

View File

@@ -10,7 +10,6 @@ import {
} from "formik";
import { CCPairDescriptor, DocumentSet } from "@/lib/types";
import {
BooleanFormField,
Label,
SelectorFormField,
SubLabel,
@@ -42,18 +41,29 @@ import { fetchSlackChannels } from "../lib";
import { Badge } from "@/components/ui/badge";
import useSWR from "swr";
import { ThreeDotsLoader } from "@/components/Loading";
import {
Accordion,
AccordionContent,
AccordionItem,
AccordionTrigger,
} from "@/components/ui/accordion";
import { Separator } from "@/components/ui/separator";
import { CheckFormField } from "@/components/ui/CheckField";
export interface SlackChannelConfigFormFieldsProps {
isUpdate: boolean;
isDefault: boolean;
documentSets: DocumentSet[];
searchEnabledAssistants: Persona[];
nonSearchAssistants: Persona[];
standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
setPopup: (popup: {
message: string;
type: "error" | "success" | "warning";
}) => void;
slack_bot_id: number;
formikProps: any;
}
export function SlackChannelConfigFormFields({
@@ -61,15 +71,15 @@ export function SlackChannelConfigFormFields({
isDefault,
documentSets,
searchEnabledAssistants,
nonSearchAssistants,
standardAnswerCategoryResponse,
setPopup,
slack_bot_id,
formikProps,
}: SlackChannelConfigFormFieldsProps) {
const router = useRouter();
const { values, setFieldValue } = useFormikContext<any>();
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
const [viewUnselectableSets, setViewUnselectableSets] = useState(false);
const [currentSearchTerm, setCurrentSearchTerm] = useState("");
const [viewSyncEnabledAssistants, setViewSyncEnabledAssistants] =
useState(false);
@@ -178,6 +188,7 @@ export function SlackChannelConfigFormFields({
}));
}
);
if (isLoading) {
return <ThreeDotsLoader />;
}
@@ -194,7 +205,7 @@ export function SlackChannelConfigFormFields({
<>
<label
htmlFor="channel_name"
className="block font-medium text-base mb-2"
className="block text-text font-medium text-base mb-2"
>
Select A Slack Channel:
</label>{" "}
@@ -204,11 +215,9 @@ export function SlackChannelConfigFormFields({
options={channelOptions || []}
onSelect={(selected) => {
form.setFieldValue("channel_name", selected.name);
setCurrentSearchTerm(selected.name);
}}
initialSearchTerm={field.value}
onSearchTermChange={(term) => {
setCurrentSearchTerm(term);
form.setFieldValue("channel_name", term);
}}
/>
@@ -242,9 +251,15 @@ export function SlackChannelConfigFormFields({
<RadioGroupItemField
value="assistant"
id="assistant"
label="Specific Assistant"
label="Search Assistant"
sublabel="Control both the documents and the prompt to use for answering questions"
/>
<RadioGroupItemField
value="non_search_assistant"
id="non_search_assistant"
label="Non-Search Assistant"
sublabel="Chat with an assistant that does not use documents"
/>
</RadioGroup>
</div>
{values.knowledge_source === "document_sets" &&
@@ -408,118 +423,165 @@ export function SlackChannelConfigFormFields({
)}
</div>
)}
</div>
{values.knowledge_source === "non_search_assistant" && (
<div className="mt-4">
<SubLabel>
<>
Select the non-search assistant OnyxBot will use while answering
questions in Slack.
{syncEnabledAssistants.length > 0 && (
<>
<br />
<span className="text-sm text-text-dark/80">
Note: Some of your assistants have auto-synced connectors
in their document sets. You cannot select these assistants
as they will not be able to answer questions in Slack.{" "}
<button
type="button"
onClick={() =>
setViewSyncEnabledAssistants(
(viewSyncEnabledAssistants) =>
!viewSyncEnabledAssistants
)
}
className="text-sm text-link"
>
{viewSyncEnabledAssistants
? "Hide un-selectable "
: "View all "}
assistants
</button>
</span>
</>
)}
</>
</SubLabel>
<div className="mt-6">
<AdvancedOptionsToggle
showAdvancedOptions={showAdvancedOptions}
setShowAdvancedOptions={setShowAdvancedOptions}
/>
</div>
{showAdvancedOptions && (
<div className="mt-2 space-y-4">
<div className="w-64">
<SelectorFormField
name="response_type"
label="Answer Type"
tooltip="Controls the format of OnyxBot's responses."
options={[
{ name: "Standard", value: "citations" },
{ name: "Detailed", value: "quotes" },
]}
name="persona_id"
options={nonSearchAssistants.map((persona) => ({
name: persona.name,
value: persona.id,
}))}
/>
</div>
)}
</div>
<Separator className="my-4" />
<Accordion type="multiple" className=" gap-y-2 w-full">
{values.knowledge_source !== "non_search_assistant" && (
<AccordionItem value="search-options">
<AccordionTrigger className="text-text">
Search Configuration
</AccordionTrigger>
<AccordionContent>
<div className="space-y-4">
<div className="w-64">
<SelectorFormField
name="response_type"
label="Answer Type"
tooltip="Controls the format of OnyxBot's responses."
options={[
{ name: "Standard", value: "citations" },
{ name: "Detailed", value: "quotes" },
]}
/>
</div>
<CheckFormField
name="enable_auto_filters"
label="Enable LLM Autofiltering"
tooltip="If set, the LLM will generate source and time filters based on the user's query"
/>
<BooleanFormField
name="show_continue_in_web_ui"
removeIndent
label="Show Continue in Web UI button"
tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
/>
<CheckFormField
name="answer_validity_check_enabled"
label="Only respond if citations found"
tooltip="If set, will only answer questions where the model successfully produces citations"
/>
</div>
</AccordionContent>
</AccordionItem>
)}
<AccordionItem className="mt-4" value="general-options">
<AccordionTrigger>General Configuration</AccordionTrigger>
<AccordionContent>
<div className="space-y-4">
<CheckFormField
name="show_continue_in_web_ui"
label="Show Continue in Web UI button"
tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
/>
<CheckFormField
name="still_need_help_enabled"
onChange={(checked: boolean) => {
setFieldValue("still_need_help_enabled", checked);
if (!checked) {
setFieldValue("follow_up_tags", []);
}
}}
label={'Give a "Still need help?" button'}
tooltip={`OnyxBot's response will include a button at the bottom
of the response that asks the user if they still need help.`}
/>
{values.still_need_help_enabled && (
<CollapsibleSection prompt="Configure Still Need Help Button">
<TextArrayField
name="follow_up_tags"
label="(Optional) Users / Groups to Tag"
values={values}
subtext={
<div>
The Slack users / groups we should tag if the user
clicks the &quot;Still need help?&quot; button. If no
emails are provided, we will not tag anyone and will
just react with a 🆘 emoji to the original message.
</div>
}
placeholder="User email or user group name..."
/>
</CollapsibleSection>
)}
<CheckFormField
name="questionmark_prefilter_enabled"
label="Only respond to questions"
tooltip="If set, OnyxBot will only respond to messages that contain a question mark"
/>
<CheckFormField
name="respond_tag_only"
label="Respond to @OnyxBot Only"
tooltip="If set, OnyxBot will only respond when directly tagged"
/>
<CheckFormField
name="respond_to_bots"
label="Respond to Bot messages"
tooltip="If not set, OnyxBot will always ignore messages from Bots"
/>
<BooleanFormField
name="still_need_help_enabled"
removeIndent
onChange={(checked: boolean) => {
setFieldValue("still_need_help_enabled", checked);
if (!checked) {
setFieldValue("follow_up_tags", []);
}
}}
label={'Give a "Still need help?" button'}
tooltip={`OnyxBot's response will include a button at the bottom
of the response that asks the user if they still need help.`}
/>
{values.still_need_help_enabled && (
<CollapsibleSection prompt="Configure Still Need Help Button">
<TextArrayField
name="follow_up_tags"
label="(Optional) Users / Groups to Tag"
values={values}
name="respond_member_group_list"
label="(Optional) Respond to Certain Users / Groups"
subtext={
<div>
The Slack users / groups we should tag if the user clicks
the &quot;Still need help?&quot; button. If no emails are
provided, we will not tag anyone and will just react with a
🆘 emoji to the original message.
</div>
"If specified, OnyxBot responses will only " +
"be visible to the members or groups in this list."
}
values={values}
placeholder="User email or user group name..."
/>
</CollapsibleSection>
)}
<BooleanFormField
name="answer_validity_check_enabled"
removeIndent
label="Only respond if citations found"
tooltip="If set, will only answer questions where the model successfully produces citations"
/>
<BooleanFormField
name="questionmark_prefilter_enabled"
removeIndent
label="Only respond to questions"
tooltip="If set, OnyxBot will only respond to messages that contain a question mark"
/>
<BooleanFormField
name="respond_tag_only"
removeIndent
label="Respond to @OnyxBot Only"
tooltip="If set, OnyxBot will only respond when directly tagged"
/>
<BooleanFormField
name="respond_to_bots"
removeIndent
label="Respond to Bot messages"
tooltip="If not set, OnyxBot will always ignore messages from Bots"
/>
<BooleanFormField
name="enable_auto_filters"
removeIndent
label="Enable LLM Autofiltering"
tooltip="If set, the LLM will generate source and time filters based on the user's query"
/>
<TextArrayField
name="respond_member_group_list"
label="(Optional) Respond to Certain Users / Groups"
subtext={
"If specified, OnyxBot responses will only " +
"be visible to the members or groups in this list."
}
values={values}
placeholder="User email or user group name..."
/>
<StandardAnswerCategoryDropdownField
standardAnswerCategoryResponse={standardAnswerCategoryResponse}
categories={values.standard_answer_categories}
setCategories={(categories: any) =>
setFieldValue("standard_answer_categories", categories)
}
/>
</div>
)}
<StandardAnswerCategoryDropdownField
standardAnswerCategoryResponse={standardAnswerCategoryResponse}
categories={values.standard_answer_categories}
setCategories={(categories: any) =>
setFieldValue("standard_answer_categories", categories)
}
/>
</div>
</AccordionContent>
</AccordionItem>
</Accordion>
<div className="flex mt-8 gap-x-2 w-full justify-end">
{shouldShowPrivacyAlert && (

View File

@@ -11,6 +11,7 @@ import { SourceIcon } from "@/components/SourceIcon";
import { SlackBotTable } from "./SlackBotTable";
import { useSlackBots } from "./[bot-id]/hooks";
import { ValidSources } from "@/lib/types";
import CreateButton from "@/components/ui/createButton";
const Main = () => {
const {
@@ -71,27 +72,7 @@ const Main = () => {
found in the Onyx documentation to get started!
</p>
<Link
className="
flex
py-2
px-4
mt-2
border
border-border
h-fit
cursor-pointer
hover:bg-hover
text-sm
w-40
"
href="/admin/bots/new"
>
<div className="mx-auto flex">
<FiPlusSquare className="my-auto mr-2" />
New Slack Bot
</div>
</Link>
<CreateButton href="/admin/bots/new" text="New Slack Bot" />
<SlackBotTable slackBots={slackBots} />
</div>

View File

@@ -11,6 +11,7 @@ import {
GeminiIcon,
OpenSourceIcon,
AnthropicSVG,
IconProps,
} from "@/components/icons/icons";
import { FaRobot } from "react-icons/fa";
@@ -74,29 +75,36 @@ export interface LLMProviderDescriptor {
}
export const getProviderIcon = (providerName: string, modelName?: string) => {
const modelNameToIcon = (
modelName: string,
fallbackIcon: ({ size, className }: IconProps) => JSX.Element
): (({ size, className }: IconProps) => JSX.Element) => {
if (modelName?.toLowerCase().includes("amazon")) {
return AmazonIcon;
}
if (modelName?.toLowerCase().includes("phi")) {
return MicrosoftIconSVG;
}
if (modelName?.toLowerCase().includes("mistral")) {
return MistralIcon;
}
if (modelName?.toLowerCase().includes("llama")) {
return MetaIcon;
}
if (modelName?.toLowerCase().includes("gemini")) {
return GeminiIcon;
}
if (modelName?.toLowerCase().includes("claude")) {
return AnthropicIcon;
} else {
return fallbackIcon;
}
};
switch (providerName) {
case "openai":
// Special cases for openai based on modelName
if (modelName?.toLowerCase().includes("amazon")) {
return AmazonIcon;
}
if (modelName?.toLowerCase().includes("phi")) {
return MicrosoftIconSVG;
}
if (modelName?.toLowerCase().includes("mistral")) {
return MistralIcon;
}
if (modelName?.toLowerCase().includes("llama")) {
return MetaIcon;
}
if (modelName?.toLowerCase().includes("gemini")) {
return GeminiIcon;
}
if (modelName?.toLowerCase().includes("claude")) {
return AnthropicIcon;
}
return OpenAIIcon; // Default for openai
return modelNameToIcon(modelName || "", OpenAIIcon);
case "anthropic":
return AnthropicSVG;
case "bedrock":
@@ -104,7 +112,7 @@ export const getProviderIcon = (providerName: string, modelName?: string) => {
case "azure":
return AzureIcon;
default:
return CPUIcon;
return modelNameToIcon(modelName || "", CPUIcon);
}
};

View File

@@ -18,7 +18,11 @@ import AdvancedFormPage from "./pages/Advanced";
import DynamicConnectionForm from "./pages/DynamicConnectorCreationForm";
import CreateCredential from "@/components/credentials/actions/CreateCredential";
import ModifyCredential from "@/components/credentials/actions/ModifyCredential";
import { ConfigurableSources, oauthSupportedSources } from "@/lib/types";
import {
ConfigurableSources,
oauthSupportedSources,
ValidSources,
} from "@/lib/types";
import {
Credential,
credentialTemplates,
@@ -444,7 +448,7 @@ export default function AddConnector({
<CardSection>
<Title className="mb-2 text-lg">Select a credential</Title>
{connector == "gmail" ? (
{connector == ValidSources.Gmail ? (
<GmailMain />
) : (
<>

View File

@@ -40,6 +40,7 @@ import {
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import CreateButton from "@/components/ui/createButton";
const numToDisplay = 50;
@@ -305,9 +306,13 @@ const Main = () => {
<div className="mb-3"></div>
<div className="flex mb-6">
<Link href="/admin/documents/sets/new">
<CreateButton
href="/admin/documents/sets/new"
text="New Document Set"
/>
{/* <Link href="/admin/documents/sets/new">
<Button variant="navigate">New Document Set</Button>
</Link>
</Link> */}
</div>
{documentSets.length > 0 && (

View File

@@ -231,7 +231,7 @@ export function SettingsForm() {
<Checkbox
label="Pro Search Disabled"
sublabel="If set, users will not be able to use Pro Search."
checked={settings.pro_search_disabled}
checked={settings.pro_search_disabled ?? false}
onChange={(e) =>
handleToggleSettingsField("pro_search_disabled", e.target.checked)
}

View File

@@ -10,7 +10,7 @@ export interface Settings {
notifications: Notification[];
needs_reindexing: boolean;
gpu_enabled: boolean;
pro_search_disabled: boolean;
pro_search_disabled: boolean | null;
product_gating: GatingType;
auto_scroll: boolean;
}

View File

@@ -18,6 +18,7 @@ import { usePopup } from "@/components/admin/connectors/Popup";
import { CreateRateLimitModal } from "./CreateRateLimitModal";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { ShieldIcon } from "@/components/icons/icons";
import CreateButton from "@/components/ui/createButton";
const BASE_URL = "/api/admin/token-rate-limits";
const GLOBAL_TOKEN_FETCH_URL = `${BASE_URL}/global`;
@@ -138,15 +139,10 @@ function Main() {
</li>
</ul>
<Button
variant="navigate"
size="sm"
className="my-4"
onClick={() => setModalIsOpen(true)}
>
Create a Token Rate Limit
</Button>
<CreateButton
href="/admin/token-rate-limits/new"
text="Create a Token Rate Limit"
/>
{isPaidEnterpriseFeaturesEnabled && (
<Tabs
value={tabIndex.toString()}

View File

@@ -9,6 +9,7 @@ import { fetchSS } from "@/lib/utilsSS";
import { ErrorCallout } from "@/components/ErrorCallout";
import { AdminPageTitle } from "@/components/admin/Title";
import { ToolIcon } from "@/components/icons/icons";
import CreateButton from "@/components/ui/createButton";
export default async function Page() {
const toolResponse = await fetchSS("/tool");
@@ -39,27 +40,7 @@ export default async function Page() {
<Separator />
<Title>Create a Tool</Title>
<Link
href="/admin/tools/new"
className="
flex
py-2
px-4
mt-2
border
border-border
h-fit
cursor-pointer
hover:bg-hover
text-sm
w-40
"
>
<div className="mx-auto flex">
<FiPlusSquare className="my-auto mr-2" />
New Tool
</div>
</Link>
<CreateButton href="/admin/tools/new" text="New Tool" />
<Separator />

View File

@@ -471,9 +471,6 @@ export function ChatPage({
}
return;
}
const shouldScrollToBottom =
visibleRange.get(existingChatSessionId) === undefined ||
visibleRange.get(existingChatSessionId)?.end == 0;
clearSelectedDocuments();
setIsFetchingChatMessages(true);
@@ -511,16 +508,13 @@ export function ChatPage({
// go to bottom. If initial load, then do a scroll,
// otherwise just appear at the bottom
if (shouldScrollToBottom) {
scrollInitialized.current = false;
}
if (shouldScrollToBottom) {
if (!hasPerformedInitialScroll && autoScrollEnabled) {
clientScrollToBottom();
} else if (isChatSessionSwitch && autoScrollEnabled) {
clientScrollToBottom(true);
}
scrollInitialized.current = false;
if (!hasPerformedInitialScroll) {
clientScrollToBottom();
} else if (isChatSessionSwitch) {
clientScrollToBottom(true);
}
setIsFetchingChatMessages(false);
@@ -1034,6 +1028,7 @@ export function ChatPage({
) {
setDocumentSidebarToggled(false);
}
clientScrollToBottom();
}, [chatSessionIdRef.current]);
const loadNewPageLogic = (event: MessageEvent) => {
@@ -1068,7 +1063,6 @@ export function ChatPage({
if (!documentSidebarInitialWidth && maxDocumentSidebarWidth) {
documentSidebarInitialWidth = Math.min(700, maxDocumentSidebarWidth);
}
class CurrentMessageFIFO {
private stack: PacketType[] = [];
isComplete: boolean = false;
@@ -1127,6 +1121,7 @@ export function ChatPage({
"Continue Generating (pick up exactly where you left off)",
});
};
const [uncaughtError, setUncaughtError] = useState<string | null>(null);
const onSubmit = async ({
messageIdToResend,
@@ -1332,7 +1327,9 @@ export function ChatPage({
searchParams.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined,
useExistingUserMessage: isSeededChat,
useLanggraph:
!settings?.settings.pro_search_disabled && proSearchEnabled,
!settings?.settings.pro_search_disabled &&
proSearchEnabled &&
retrievalEnabled,
});
const delay = (ms: number) => {
@@ -1440,21 +1437,22 @@ export function ChatPage({
}
}
// Continuously refine the sub_questions based on the packets that we receive
// // Continuously refine the sub_questions based on the packets that we receive
if (
Object.hasOwn(packet, "stop_reason") &&
Object.hasOwn(packet, "level_question_num")
) {
// sub_questions = constructSubQuestions(
// sub_questions,
// packet as StreamStopInfo
// );
sub_questions = constructSubQuestions(
sub_questions,
packet as StreamStopInfo
);
} else if (Object.hasOwn(packet, "sub_question")) {
is_generating = true;
sub_questions = constructSubQuestions(
sub_questions,
packet as SubQuestionPiece
);
setAgenticGenerating(true);
} else if (Object.hasOwn(packet, "sub_query")) {
sub_questions = constructSubQuestions(
sub_questions,
@@ -1552,8 +1550,23 @@ export function ChatPage({
}
);
} else if (Object.hasOwn(packet, "error")) {
error = (packet as StreamingError).error;
stackTrace = (packet as StreamingError).stack_trace;
if (
sub_questions.length > 0 &&
sub_questions
.filter((q) => q.level === 0)
.every((q) => q.is_stopped === true)
) {
setUncaughtError((packet as StreamingError).error);
updateChatState("input");
setAgenticGenerating(false);
setAlternativeGeneratingAssistant(null);
setSubmittedMessage("");
return;
// throw new Error((packet as StreamingError).error);
} else {
error = (packet as StreamingError).error;
stackTrace = (packet as StreamingError).stack_trace;
}
} else if (Object.hasOwn(packet, "message_id")) {
finalMessage = packet as BackendMessage;
} else if (Object.hasOwn(packet, "stop_reason")) {
@@ -1663,6 +1676,7 @@ export function ChatPage({
completeMessageMapOverride: currentMessageMap(completeMessageDetail),
});
}
setAgenticGenerating(false);
resetRegenerationState(currentSessionId());
updateChatState("input");
@@ -1790,6 +1804,7 @@ export function ChatPage({
// Used to maintain a "time out" for history sidebar so our existing refs can have time to process change
const [untoggled, setUntoggled] = useState(false);
const [loadingError, setLoadingError] = useState<string | null>(null);
const [agenticGenerating, setAgenticGenerating] = useState(false);
const explicitlyUntoggle = () => {
setShowHistorySidebar(false);
@@ -1834,17 +1849,17 @@ export function ChatPage({
const autoScrollEnabled =
user?.preferences?.auto_scroll == null
? settings?.enterpriseSettings?.auto_scroll || false
: user?.preferences?.auto_scroll!;
: user?.preferences?.auto_scroll! && !agenticGenerating;
// useScrollonStream({
// chatState: currentSessionChatState,
// scrollableDivRef,
// scrollDist,
// endDivRef,
// debounceNumber,
// mobile: settings?.isMobile,
// enableAutoScroll: autoScrollEnabled,
// });
useScrollonStream({
chatState: currentSessionChatState,
scrollableDivRef,
scrollDist,
endDivRef,
debounceNumber,
mobile: settings?.isMobile,
enableAutoScroll: autoScrollEnabled,
});
// Virtualization + Scrolling related effects and functions
const scrollInitialized = useRef(false);
@@ -2040,6 +2055,7 @@ export function ChatPage({
}
const data = await response.json();
router.push(data.redirect_url);
} catch (error) {
console.error("Error seeding chat from Slack:", error);
@@ -2634,6 +2650,7 @@ export function ChatPage({
{message.sub_questions &&
message.sub_questions.length > 0 ? (
<AgenticMessage
error={uncaughtError}
docSidebarToggled={
documentSidebarToggled &&
(selectedMessageForDocDisplay ==
@@ -3058,20 +3075,19 @@ export function ChatPage({
</div>
<div
ref={inputRef}
className="absolute bottom-0 z-10 w-full"
className="absolute pointer-events-none bottom-0 z-10 w-full"
>
<div className="w-[95%] mx-auto relative mb-8">
{aboveHorizon && (
<div className="pointer-events-none w-full bg-transparent flex sticky justify-center">
<button
onClick={() => clientScrollToBottom()}
className="p-1 pointer-events-auto rounded-2xl bg-background-strong border border-border mb-2 mx-auto "
>
<FiArrowDown size={18} />
</button>
</div>
)}
{aboveHorizon && (
<div className="mx-auto w-fit !pointer-events-none flex sticky justify-center">
<button
onClick={() => clientScrollToBottom()}
className="p-1 pointer-events-auto rounded-2xl bg-background-strong border border-border mx-auto "
>
<FiArrowDown size={18} />
</button>
</div>
)}
<div className="pointer-events-auto w-[95%] mx-auto relative mb-8">
<ChatInputBar
proSearchEnabled={proSearchEnabled}
setProSearchEnabled={() => toggleProSearch()}

View File

@@ -1,5 +1,6 @@
import React, { useContext, useEffect, useRef, useState } from "react";
import { FiPlusCircle, FiPlus, FiInfo, FiX, FiFilter } from "react-icons/fi";
import { FiLoader } from "react-icons/fi";
import { ChatInputOption } from "./ChatInputOption";
import { Persona } from "@/app/admin/assistants/interfaces";
import LLMPopover from "./LLMPopover";
@@ -36,6 +37,9 @@ import { buildImgUrl } from "../files/images/utils";
import { useUser } from "@/components/user/UserProvider";
import { AgenticToggle } from "./AgenticToggle";
import { SettingsContext } from "@/components/settings/SettingsProvider";
import { LoadingIndicator } from "react-select/dist/declarations/src/components/indicators";
import { FidgetSpinner } from "react-loader-spinner";
import { LoadingAnimation } from "@/components/Loading";
const MAX_INPUT_HEIGHT = 200;
export const SourceChip2 = ({
@@ -709,12 +713,16 @@ export function ChatInputBar({
<SourceChip
key={`file-${index}`}
icon={
<img
className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
src={buildImgUrl(file.id)}
/>
file.isUploading ? (
<FiLoader className="animate-spin" />
) : (
<img
className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
src={buildImgUrl(file.id)}
/>
)
}
title={file.name || "File"}
title={file.name || "File" + file.id}
onRemove={() => {
setFiles(
files.filter(

View File

@@ -5,7 +5,7 @@ import {
PopoverTrigger,
} from "@/components/ui/popover";
import { ChatInputOption } from "./ChatInputOption";
import { getDisplayNameForModel } from "@/lib/hooks";
import { defaultModelsByProvider, getDisplayNameForModel } from "@/lib/hooks";
import {
checkLLMSupportsImageInput,
destructureValue,
@@ -61,22 +61,23 @@ export default function LLMPopover({
llmOptionsByProvider[llmProvider.provider] = [];
}
(llmProvider.display_model_names || llmProvider.model_names).forEach(
(modelName) => {
if (!uniqueModelNames.has(modelName)) {
uniqueModelNames.add(modelName);
llmOptionsByProvider[llmProvider.provider].push({
name: modelName,
value: structureValue(
llmProvider.name,
llmProvider.provider,
modelName
),
icon: getProviderIcon(llmProvider.provider, modelName),
});
}
(
llmProvider.display_model_names ||
defaultModelsByProvider[llmProvider.provider]
).forEach((modelName) => {
if (!uniqueModelNames.has(modelName)) {
uniqueModelNames.add(modelName);
llmOptionsByProvider[llmProvider.provider].push({
name: modelName,
value: structureValue(
llmProvider.name,
llmProvider.provider,
modelName
),
icon: getProviderIcon(llmProvider.provider, modelName),
});
}
);
});
});
const llmOptions = Object.entries(llmOptionsByProvider).flatMap(

View File

@@ -218,6 +218,7 @@ export interface SubQuestionDetail extends BaseQuestionIdentifier {
sub_queries?: SubQueryDetail[] | null;
context_docs?: { top_documents: OnyxDocument[] } | null;
is_complete?: boolean;
is_stopped?: boolean;
}
export interface SubQueryDetail {
@@ -249,14 +250,13 @@ export const constructSubQuestions = (
// );
if ("stop_reason" in newDetail) {
console.log("STOP REASON");
console.log(newDetail);
const { level, level_question_num } = newDetail;
let subQuestion = updatedSubQuestions.find(
(sq) => sq.level === level && sq.level_question_num === level_question_num
);
if (subQuestion) {
// subQuestion.is_complete = true;
subQuestion.is_complete = true;
subQuestion.is_stopped = true;
}
} else if ("top_documents" in newDetail) {
const { level, level_question_num, top_documents } = newDetail;

View File

@@ -80,6 +80,7 @@ export const AgenticMessage = ({
agenticDocs,
secondLevelSubquestions,
toggleDocDisplay,
error,
}: {
docSidebarToggled?: boolean;
isImprovement?: boolean | null;
@@ -110,6 +111,7 @@ export const AgenticMessage = ({
regenerate?: (modelOverRide: LlmOverride) => Promise<void>;
setPresentingDocument?: (document: OnyxDocument) => void;
toggleDocDisplay?: (agentic: boolean) => void;
error?: string | null;
}) => {
const [noShowingMessage, setNoShowingMessage] = useState(isComplete);
@@ -483,11 +485,28 @@ export const AgenticMessage = ({
) : (
content
)}
{error && (
<p className="mt-2 text-red-700 text-sm my-auto">
{error}
</p>
)}
</div>
</div>
</>
) : isComplete ? null : (
<></>
) : isComplete ? (
error && (
<p className="mt-2 mx-4 text-red-700 text-sm my-auto">
{error}
</p>
)
) : (
<>
{error && (
<p className="mt-2 mx-4 text-red-700 text-sm my-auto">
{error}
</p>
)}
</>
)}
{handleFeedback &&
(isActive ? (

Some files were not shown because too many files have changed in this diff Show More