Compare commits

..

21 Commits

Author SHA1 Message Date
pablonyx
b8bc300ab8 update 2025-04-02 10:19:27 -07:00
rkuo-danswer
8a8526dbbb harden join function (#4424)
* harden join function

* remove log spam

* use time.monotonic

* add pid logging

* client only celery app

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-02 01:04:00 -07:00
Weves
be20586ba1 Add retries for confluence calls 2025-04-01 23:00:37 -07:00
Weves
a314462d1e Fix migrations 2025-04-01 21:48:32 -07:00
rkuo-danswer
155f53c3d7 Revert "Add user invitation test (#4161)" (#4422)
This reverts commit 806de92feb.

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-01 19:55:04 -07:00
pablonyx
7c027df186 Fix cc pair doc deletion (#4420) 2025-04-01 18:44:15 -07:00
pablonyx
0a5db96026 update (#4415) 2025-04-02 00:42:42 +00:00
joachim-danswer
daef985b02 Simpler approach (#4414) 2025-04-01 16:52:59 -07:00
Weves
b7ece296e0 Additional logging to salesforce perm sync 2025-04-01 16:19:50 -07:00
Richard Kuo (Onyx)
d7063e0a1d expose acl link feature in onyx_vespa 2025-04-01 16:19:50 -07:00
pablonyx
ee073f6d30 Tracking things (#4352) 2025-04-01 16:19:50 -07:00
Raunak Bhagat
2e524816a0 Regen (#4409)
* Edit styling of regeneration dropdown

* Finish regeneration style changes

* Remove invalid props

* Update web/src/app/chat/input/ChatInputBar.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Remove unused variables

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-01 16:19:50 -07:00
pablonyx
47ef0c8658 Still delete cookies (#4404) 2025-04-01 16:19:50 -07:00
pablonyx
806de92feb Add user invitation test (#4161) 2025-04-01 16:19:50 -07:00
pablonyx
da39f32fea Validate advanced fields + proper yup assurances for lists (#4399) 2025-04-01 16:19:50 -07:00
pablonyx
2a87837ce1 Very minor auth standardization (#4400) 2025-04-01 16:19:50 -07:00
pablonyx
7491cdd0f0 Update migration (#4410) 2025-04-01 16:19:50 -07:00
SubashMohan
aabd698295 refactor tests for Highspot connector to use mocking for API key retrieval (#4346) 2025-04-01 16:19:50 -07:00
Weves
4b725e4d1a Init engine in slackbot 2025-04-01 16:19:50 -07:00
rkuo-danswer
34d2d92fa8 also set permission upsert to medium priority (#4405)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-01 16:19:50 -07:00
pablonyx
3a3b2a2f8d add user files (#4152) 2025-04-01 16:19:44 -07:00
30 changed files with 473 additions and 320 deletions

View File

@@ -0,0 +1,50 @@
"""add prompt length limit
Revision ID: f71470ba9274
Revises: 6a804aeb4830
Create Date: 2025-04-01 15:07:14.977435
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "f71470ba9274"
down_revision = "6a804aeb4830"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"prompt",
"system_prompt",
existing_type=sa.TEXT(),
type_=sa.String(length=8000),
existing_nullable=False,
)
op.alter_column(
"prompt",
"task_prompt",
existing_type=sa.TEXT(),
type_=sa.String(length=8000),
existing_nullable=False,
)
def downgrade() -> None:
op.alter_column(
"prompt",
"system_prompt",
existing_type=sa.String(length=8000),
type_=sa.TEXT(),
existing_nullable=False,
)
op.alter_column(
"prompt",
"task_prompt",
existing_type=sa.String(length=8000),
type_=sa.TEXT(),
existing_nullable=False,
)

View File

@@ -0,0 +1,77 @@
"""updated constraints for ccpairs
Revision ID: f7505c5b0284
Revises: f71470ba9274
Create Date: 2025-04-01 17:50:42.504818
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "f7505c5b0284"
down_revision = "f71470ba9274"
branch_labels = None
depends_on = None
def upgrade() -> None:
# 1) Drop the old foreign-key constraints
op.drop_constraint(
"document_by_connector_credential_pair_connector_id_fkey",
"document_by_connector_credential_pair",
type_="foreignkey",
)
op.drop_constraint(
"document_by_connector_credential_pair_credential_id_fkey",
"document_by_connector_credential_pair",
type_="foreignkey",
)
# 2) Re-add them with ondelete='CASCADE'
op.create_foreign_key(
"document_by_connector_credential_pair_connector_id_fkey",
source_table="document_by_connector_credential_pair",
referent_table="connector",
local_cols=["connector_id"],
remote_cols=["id"],
ondelete="CASCADE",
)
op.create_foreign_key(
"document_by_connector_credential_pair_credential_id_fkey",
source_table="document_by_connector_credential_pair",
referent_table="credential",
local_cols=["credential_id"],
remote_cols=["id"],
ondelete="CASCADE",
)
def downgrade() -> None:
# Reverse the changes for rollback
op.drop_constraint(
"document_by_connector_credential_pair_connector_id_fkey",
"document_by_connector_credential_pair",
type_="foreignkey",
)
op.drop_constraint(
"document_by_connector_credential_pair_credential_id_fkey",
"document_by_connector_credential_pair",
type_="foreignkey",
)
# Recreate without CASCADE
op.create_foreign_key(
"document_by_connector_credential_pair_connector_id_fkey",
"document_by_connector_credential_pair",
"connector",
["connector_id"],
["id"],
)
op.create_foreign_key(
"document_by_connector_credential_pair_credential_id_fkey",
"document_by_connector_credential_pair",
"credential",
["credential_id"],
["id"],
)

View File

@@ -58,6 +58,7 @@ def _get_objects_access_for_user_email_from_salesforce(
f"Time taken to get Salesforce user ID: {end_time - start_time} seconds"
)
if user_id is None:
logger.warning(f"User '{user_email}' not found in Salesforce")
return None
# This is the only query that is not cached in the function
@@ -65,6 +66,7 @@ def _get_objects_access_for_user_email_from_salesforce(
object_id_to_access = get_objects_access_for_user_id(
salesforce_client, user_id, list(object_ids)
)
logger.debug(f"Object ID to access: {object_id_to_access}")
return object_id_to_access

View File

@@ -56,6 +56,7 @@ from httpx_oauth.oauth2 import OAuth2Token
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME
from onyx.auth.api_key import get_hashed_api_key_from_request
from onyx.auth.email_utils import send_forgot_password_email
from onyx.auth.email_utils import send_user_verification_email
@@ -513,6 +514,25 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
return user
async def on_after_login(
self,
user: User,
request: Optional[Request] = None,
response: Optional[Response] = None,
) -> None:
try:
if response and request and ANONYMOUS_USER_COOKIE_NAME in request.cookies:
response.delete_cookie(
ANONYMOUS_USER_COOKIE_NAME,
# Ensure cookie deletion doesn't override other cookies by setting the same path/domain
path="/",
domain=None,
secure=WEB_DOMAIN.startswith("https"),
)
logger.debug(f"Deleted anonymous user cookie for user {user.email}")
except Exception:
logger.exception("Error deleting anonymous user cookie")
async def on_after_register(
self, user: User, request: Optional[Request] = None
) -> None:
@@ -1302,6 +1322,7 @@ def get_oauth_router(
# Login user
response = await backend.login(strategy, user)
await user_manager.on_after_login(user, request, response)
# Prepare redirect response
if tenant_id is None:
# Use URL utility to add parameters
@@ -1311,9 +1332,14 @@ def get_oauth_router(
# No parameters to add
redirect_response = RedirectResponse(next_url, status_code=302)
# Copy headers and other attributes from 'response' to 'redirect_response'
# Copy headers from auth response to redirect response, with special handling for Set-Cookie
for header_name, header_value in response.headers.items():
redirect_response.headers[header_name] = header_value
# FastAPI can have multiple Set-Cookie headers as a list
if header_name.lower() == "set-cookie" and isinstance(header_value, list):
for cookie_value in header_value:
redirect_response.headers.append(header_name, cookie_value)
else:
redirect_response.headers[header_name] = header_value
if hasattr(response, "body"):
redirect_response.body = response.body

View File

@@ -1,5 +1,6 @@
import logging
import multiprocessing
import os
import time
from typing import Any
from typing import cast
@@ -305,7 +306,7 @@ def wait_for_db(sender: Any, **kwargs: Any) -> None:
def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
logger.info("Running as a secondary celery worker.")
logger.info(f"Running as a secondary celery worker: pid={os.getpid()}")
# Set up variables for waiting on primary worker
WAIT_INTERVAL = 5

View File

@@ -0,0 +1,7 @@
from celery import Celery
import onyx.background.celery.apps.app_base as app_base
celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.client")
celery_app.Task = app_base.TenantAwareTask # type: ignore [misc]

View File

@@ -1,4 +1,5 @@
import logging
import os
from typing import Any
from typing import cast
@@ -95,7 +96,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
logger.info("Running as the primary celery worker.")
logger.info(f"Running as the primary celery worker: pid={os.getpid()}")
# Less startup checks in multi-tenant case
if MULTI_TENANT:

View File

@@ -0,0 +1,16 @@
import onyx.background.celery.configs.base as shared_config
broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options
redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
result_backend = shared_config.result_backend
result_expires = shared_config.result_expires # 86400 seconds is the default
task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

View File

@@ -0,0 +1,20 @@
"""Factory stub for running celery worker / celery beat.
This code is different from the primary/beat stubs because there is no EE version to
fetch. Port over the code in those files if we add an EE version of this worker.
This is an app stub purely for sending tasks as a client.
"""
from celery import Celery
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
set_is_ee_based_on_env_variable()
def get_app() -> Celery:
from onyx.background.celery.apps.client import celery_app
return celery_app
app = get_app()

View File

@@ -13,6 +13,7 @@ from typing import TYPE_CHECKING
from typing import TypeVar
from urllib.parse import parse_qs
from urllib.parse import quote
from urllib.parse import urljoin
from urllib.parse import urlparse
import requests
@@ -342,9 +343,14 @@ def build_confluence_document_id(
Returns:
str: The document id
"""
if is_cloud and not base_url.endswith("/wiki"):
base_url += "/wiki"
return f"{base_url}{content_url}"
# NOTE: urljoin is tricky and will drop the last segment of the base if it doesn't
# end with "/" because it believes that makes it a file.
final_url = base_url.rstrip("/") + "/"
if is_cloud and not final_url.endswith("/wiki/"):
final_url = urljoin(final_url, "wiki") + "/"
final_url = urljoin(final_url, content_url.lstrip("/"))
return final_url
def datetime_from_string(datetime_string: str) -> datetime:
@@ -454,6 +460,19 @@ def _handle_http_error(e: requests.HTTPError, attempt: int) -> int:
logger.warning("HTTPError with `None` as response or as headers")
raise e
# Confluence Server returns 403 when rate limited
if e.response.status_code == 403:
FORBIDDEN_MAX_RETRY_ATTEMPTS = 7
FORBIDDEN_RETRY_DELAY = 10
if attempt < FORBIDDEN_MAX_RETRY_ATTEMPTS:
logger.warning(
"403 error. This sometimes happens when we hit "
f"Confluence rate limits. Retrying in {FORBIDDEN_RETRY_DELAY} seconds..."
)
return FORBIDDEN_RETRY_DELAY
raise e
if (
e.response.status_code != 429
and RATE_LIMIT_MESSAGE_LOWERCASE not in e.response.text.lower()

View File

@@ -5,11 +5,13 @@ from typing import cast
from sqlalchemy.orm import Session
from onyx.chat.models import ContextualPruningConfig
from onyx.chat.models import PromptConfig
from onyx.chat.models import SectionRelevancePiece
from onyx.chat.prune_and_merge import _merge_sections
from onyx.chat.prune_and_merge import ChunkRange
from onyx.chat.prune_and_merge import merge_chunk_intervals
from onyx.chat.prune_and_merge import prune_and_merge_sections
from onyx.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
from onyx.context.search.enums import LLMEvaluationType
from onyx.context.search.enums import QueryFlow
@@ -61,6 +63,7 @@ class SearchPipeline:
| None = None,
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
prompt_config: PromptConfig | None = None,
contextual_pruning_config: ContextualPruningConfig | None = None,
):
# NOTE: The Search Request contains a lot of fields that are overrides, many of them can be None
# and typically are None. The preprocessing will fetch default values to replace these empty overrides.
@@ -77,6 +80,9 @@ class SearchPipeline:
self.search_settings = get_current_search_settings(db_session)
self.document_index = get_default_document_index(self.search_settings, None)
self.prompt_config: PromptConfig | None = prompt_config
self.contextual_pruning_config: ContextualPruningConfig | None = (
contextual_pruning_config
)
# Preprocessing steps generate this
self._search_query: SearchQuery | None = None
@@ -420,7 +426,26 @@ class SearchPipeline:
if self._final_context_sections is not None:
return self._final_context_sections
self._final_context_sections = _merge_sections(sections=self.reranked_sections)
if (
self.contextual_pruning_config is not None
and self.prompt_config is not None
):
self._final_context_sections = prune_and_merge_sections(
sections=self.reranked_sections,
section_relevance_list=None,
prompt_config=self.prompt_config,
llm_config=self.llm.config,
question=self.search_query.query,
contextual_pruning_config=self.contextual_pruning_config,
)
else:
logger.error(
"Contextual pruning or prompt config not set, using default merge"
)
self._final_context_sections = _merge_sections(
sections=self.reranked_sections
)
return self._final_context_sections
@property

View File

@@ -703,7 +703,11 @@ class Connector(Base):
)
documents_by_connector: Mapped[
list["DocumentByConnectorCredentialPair"]
] = relationship("DocumentByConnectorCredentialPair", back_populates="connector")
] = relationship(
"DocumentByConnectorCredentialPair",
back_populates="connector",
passive_deletes=True,
)
# synchronize this validation logic with RefreshFrequencySchema etc on front end
# until we have a centralized validation schema
@@ -757,7 +761,11 @@ class Credential(Base):
)
documents_by_credential: Mapped[
list["DocumentByConnectorCredentialPair"]
] = relationship("DocumentByConnectorCredentialPair", back_populates="credential")
] = relationship(
"DocumentByConnectorCredentialPair",
back_populates="credential",
passive_deletes=True,
)
user: Mapped[User | None] = relationship("User", back_populates="credentials")
@@ -1110,10 +1118,10 @@ class DocumentByConnectorCredentialPair(Base):
id: Mapped[str] = mapped_column(ForeignKey("document.id"), primary_key=True)
# TODO: transition this to use the ConnectorCredentialPair id directly
connector_id: Mapped[int] = mapped_column(
ForeignKey("connector.id"), primary_key=True
ForeignKey("connector.id", ondelete="CASCADE"), primary_key=True
)
credential_id: Mapped[int] = mapped_column(
ForeignKey("credential.id"), primary_key=True
ForeignKey("credential.id", ondelete="CASCADE"), primary_key=True
)
# used to better keep track of document counts at a connector level
@@ -1123,10 +1131,10 @@ class DocumentByConnectorCredentialPair(Base):
has_been_indexed: Mapped[bool] = mapped_column(Boolean)
connector: Mapped[Connector] = relationship(
"Connector", back_populates="documents_by_connector"
"Connector", back_populates="documents_by_connector", passive_deletes=True
)
credential: Mapped[Credential] = relationship(
"Credential", back_populates="documents_by_credential"
"Credential", back_populates="documents_by_credential", passive_deletes=True
)
__table_args__ = (
@@ -1650,8 +1658,8 @@ class Prompt(Base):
)
name: Mapped[str] = mapped_column(String)
description: Mapped[str] = mapped_column(String)
system_prompt: Mapped[str] = mapped_column(Text)
task_prompt: Mapped[str] = mapped_column(Text)
system_prompt: Mapped[str] = mapped_column(String(length=8000))
task_prompt: Mapped[str] = mapped_column(String(length=8000))
include_citations: Mapped[bool] = mapped_column(Boolean, default=True)
datetime_aware: Mapped[bool] = mapped_column(Boolean, default=True)
# Default prompts are configured via backend during deployment

View File

@@ -5,6 +5,7 @@ from datetime import timezone
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.context.search.models import IndexFilters
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
@@ -74,8 +75,10 @@ def build_vespa_filters(
filter_str += f'({TENANT_ID} contains "{filters.tenant_id}") and '
# ACL filters
# if filters.access_control_list is not None:
# filter_str += _build_or_filters(ACCESS_CONTROL_LIST, filters.access_control_list)
if filters.access_control_list is not None:
filter_str += _build_or_filters(
ACCESS_CONTROL_LIST, filters.access_control_list
)
# Source type filters
source_strs = (

View File

@@ -602,7 +602,7 @@ def get_max_input_tokens(
)
if input_toks <= 0:
raise RuntimeError("No tokens for input for the LLM given settings")
return GEN_AI_MODEL_FALLBACK_MAX_TOKENS
return input_toks

View File

@@ -21,7 +21,7 @@ from onyx.background.celery.tasks.external_group_syncing.tasks import (
from onyx.background.celery.tasks.pruning.tasks import (
try_creating_prune_generator_task,
)
from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.background.indexing.models import IndexAttemptErrorPydantic
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
@@ -219,7 +219,7 @@ def update_cc_pair_status(
continue
# Revoke the task to prevent it from running
primary_app.control.revoke(index_payload.celery_task_id)
client_app.control.revoke(index_payload.celery_task_id)
# If it is running, then signaling for termination will get the
# watchdog thread to kill the spawned task
@@ -238,7 +238,7 @@ def update_cc_pair_status(
db_session.commit()
# this speeds up the start of indexing by firing the check immediately
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
kwargs=dict(tenant_id=tenant_id),
priority=OnyxCeleryPriority.HIGH,
@@ -376,7 +376,7 @@ def prune_cc_pair(
f"{cc_pair.connector.name} connector."
)
payload_id = try_creating_prune_generator_task(
primary_app, cc_pair, db_session, r, tenant_id
client_app, cc_pair, db_session, r, tenant_id
)
if not payload_id:
raise HTTPException(
@@ -450,7 +450,7 @@ def sync_cc_pair(
f"{cc_pair.connector.name} connector."
)
payload_id = try_creating_permissions_sync_task(
primary_app, cc_pair_id, r, tenant_id
client_app, cc_pair_id, r, tenant_id
)
if not payload_id:
raise HTTPException(
@@ -524,7 +524,7 @@ def sync_cc_pair_groups(
f"{cc_pair.connector.name} connector."
)
payload_id = try_creating_external_group_sync_task(
primary_app, cc_pair_id, r, tenant_id
client_app, cc_pair_id, r, tenant_id
)
if not payload_id:
raise HTTPException(
@@ -634,7 +634,7 @@ def associate_credential_to_connector(
)
# trigger indexing immediately
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},

View File

@@ -20,7 +20,7 @@ from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import ENABLED_CONNECTOR_TYPES
from onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH
from onyx.configs.constants import DocumentSource
@@ -928,7 +928,7 @@ def create_connector_with_mock_credential(
)
# trigger indexing immediately
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},
@@ -1314,7 +1314,7 @@ def trigger_indexing_for_cc_pair(
# run the beat task to pick up the triggers immediately
priority = OnyxCeleryPriority.HIGHEST if is_user_file else OnyxCeleryPriority.HIGH
logger.info(f"Sending indexing check task with priority {priority}")
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=priority,
kwargs={"tenant_id": tenant_id},

View File

@@ -6,7 +6,7 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.document_set import check_document_sets_are_public
@@ -52,7 +52,7 @@ def create_document_set(
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},
priority=OnyxCeleryPriority.HIGH,
@@ -85,7 +85,7 @@ def patch_document_set(
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},
priority=OnyxCeleryPriority.HIGH,
@@ -108,7 +108,7 @@ def delete_document_set(
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},
priority=OnyxCeleryPriority.HIGH,

View File

@@ -10,7 +10,7 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import KV_GEN_AI_KEY_CHECK_TIME
@@ -192,7 +192,7 @@ def create_deletion_attempt_for_connector_id(
db_session.commit()
# run the beat task to pick up this deletion from the db immediately
primary_app.send_task(
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},

View File

@@ -376,6 +376,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
db_session=alternate_db_session or self.db_session,
prompt_config=self.prompt_config,
retrieved_sections_callback=retrieved_sections_callback,
contextual_pruning_config=self.contextual_pruning_config,
)
search_query_info = SearchQueryInfo(
@@ -447,6 +448,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
db_session=self.db_session,
bypass_acl=self.bypass_acl,
prompt_config=self.prompt_config,
contextual_pruning_config=self.contextual_pruning_config,
)
# Log what we're doing

View File

@@ -887,6 +887,7 @@ def main() -> None:
type=int,
help="Maximum number of documents to delete (for delete-all-documents)",
)
parser.add_argument("--link", help="Document link (for get_acls filter)")
args = parser.parse_args()
vespa_debug = VespaDebugging(args.tenant_id)
@@ -924,7 +925,11 @@ def main() -> None:
elif args.action == "get_acls":
if args.cc_pair_id is None:
parser.error("--cc-pair-id is required for get_acls action")
vespa_debug.acls(args.cc_pair_id, args.n)
if args.link is None:
vespa_debug.acls(args.cc_pair_id, args.n)
else:
vespa_debug.acls_by_link(args.cc_pair_id, args.link)
if __name__ == "__main__":

View File

@@ -165,17 +165,18 @@ class DocumentManager:
doc["fields"]["document_id"]: doc["fields"] for doc in retrieved_docs_dict
}
# NOTE(rkuo): too much log spam
# Left this here for debugging purposes.
import json
# import json
print("DEBUGGING DOCUMENTS")
print(retrieved_docs)
for doc in retrieved_docs.values():
printable_doc = doc.copy()
print(printable_doc.keys())
printable_doc.pop("embeddings")
printable_doc.pop("title_embedding")
print(json.dumps(printable_doc, indent=2))
# print("DEBUGGING DOCUMENTS")
# print(retrieved_docs)
# for doc in retrieved_docs.values():
# printable_doc = doc.copy()
# print(printable_doc.keys())
# printable_doc.pop("embeddings")
# printable_doc.pop("title_embedding")
# print(json.dumps(printable_doc, indent=2))
for document in cc_pair.documents:
retrieved_doc = retrieved_docs.get(document.id)

View File

@@ -1,3 +1,4 @@
import time
from datetime import datetime
from datetime import timedelta
from urllib.parse import urlencode
@@ -191,7 +192,7 @@ class IndexAttemptManager:
user_performing_action: DATestUser | None = None,
) -> None:
"""Wait for an IndexAttempt to complete"""
start = datetime.now()
start = time.monotonic()
while True:
index_attempt = IndexAttemptManager.get_index_attempt_by_id(
index_attempt_id=index_attempt_id,
@@ -203,7 +204,7 @@ class IndexAttemptManager:
print(f"IndexAttempt {index_attempt_id} completed")
return
elapsed = (datetime.now() - start).total_seconds()
elapsed = time.monotonic() - start
if elapsed > timeout:
raise TimeoutError(
f"IndexAttempt {index_attempt_id} did not complete within {timeout} seconds"

View File

@@ -242,15 +242,7 @@ export function AssistantEditor({
enabledToolsMap[tool.id] = personaCurrentToolIds.includes(tool.id);
});
const {
selectedFiles,
selectedFolders,
addSelectedFile,
removeSelectedFile,
addSelectedFolder,
removeSelectedFolder,
clearSelectedItems,
} = useDocumentsContext();
const { selectedFiles, selectedFolders } = useDocumentsContext();
const [showVisibilityWarning, setShowVisibilityWarning] = useState(false);

View File

@@ -302,11 +302,17 @@ export default function AddConnector({
...connector_specific_config
} = values;
// Apply transforms from connectors.ts configuration
// Apply special transforms according to application logic
const transformedConnectorSpecificConfig = Object.entries(
connector_specific_config
).reduce(
(acc, [key, value]) => {
// Filter out empty strings from arrays
if (Array.isArray(value)) {
value = (value as any[]).filter(
(item) => typeof item !== "string" || item.trim() !== ""
);
}
const matchingConfigValue = configuration.values.find(
(configValue) => configValue.name === key
);

View File

@@ -4,177 +4,62 @@ import {
LlmDescriptor,
useLlmManager,
} from "@/lib/hooks";
import { StringOrNumberOption } from "@/components/Dropdown";
import { Persona } from "@/app/admin/assistants/interfaces";
import { destructureValue, getFinalLLM, structureValue } from "@/lib/llm/utils";
import { destructureValue } from "@/lib/llm/utils";
import { useState } from "react";
import { Hoverable } from "@/components/Hoverable";
import { Popover } from "@/components/popover/Popover";
import { IconType } from "react-icons";
import { FiRefreshCw, FiCheck } from "react-icons/fi";
export function RegenerateDropdown({
options,
selected,
onSelect,
side,
maxHeight,
alternate,
onDropdownVisibleChange,
}: {
alternate?: string;
options: StringOrNumberOption[];
selected: string | null;
onSelect: (value: string | number | null) => void;
includeDefault?: boolean;
side?: "top" | "right" | "bottom" | "left";
maxHeight?: string;
onDropdownVisibleChange: (isVisible: boolean) => void;
}) {
const [isOpen, setIsOpen] = useState(false);
const toggleDropdownVisible = (isVisible: boolean) => {
setIsOpen(isVisible);
onDropdownVisibleChange(isVisible);
};
const Dropdown = (
<div className="overflow-y-auto border border-neutral-800 py-2 min-w-fit bg-neutral-50 dark:bg-neutral-900 rounded-md shadow-lg">
<div className="mb-1 flex items-center justify-between px-4 pt-2">
<span className="text-sm text-neutral-600 dark:text-neutral-400">
Regenerate with
</span>
</div>
{options.map((option) => (
<div
key={option.value}
role="menuitem"
className={`flex items-center m-1.5 p-1.5 text-sm cursor-pointer focus-visible:outline-0 group relative hover:bg-neutral-200 dark:hover:bg-neutral-800 rounded-md my-0 px-3 mx-2 gap-2.5 py-3 !pr-3 ${
option.value === selected
? "bg-neutral-200 dark:bg-neutral-800"
: ""
}`}
onClick={() => onSelect(option.value)}
>
<div className="flex grow items-center justify-between gap-2">
<div>
<div className="flex items-center gap-3">
<div>{getDisplayNameForModel(option.name)}</div>
</div>
</div>
</div>
{option.value === selected && (
<FiCheck className="text-neutral-700 dark:text-neutral-300" />
)}
</div>
))}
</div>
);
return (
<Popover
open={isOpen}
onOpenChange={toggleDropdownVisible}
content={
<div onClick={() => toggleDropdownVisible(!isOpen)}>
{!alternate ? (
<Hoverable size={16} icon={FiRefreshCw as IconType} />
) : (
<Hoverable
size={16}
icon={FiRefreshCw as IconType}
hoverText={getDisplayNameForModel(alternate)}
/>
)}
</div>
}
popover={Dropdown}
align="start"
side={side}
sideOffset={5}
triggerMaxWidth
/>
);
}
import { FiRefreshCw } from "react-icons/fi";
import LLMPopover from "./input/LLMPopover";
export default function RegenerateOption({
selectedAssistant,
regenerate,
overriddenModel,
onHoverChange,
onDropdownVisibleChange,
}: {
selectedAssistant: Persona;
regenerate: (modelOverRide: LlmDescriptor) => Promise<void>;
overriddenModel?: string;
onHoverChange: (isHovered: boolean) => void;
onDropdownVisibleChange: (isVisible: boolean) => void;
}) {
const { llmProviders } = useChatContext();
const llmManager = useLlmManager(llmProviders);
const [_, llmName] = getFinalLLM(llmProviders, selectedAssistant, null);
const llmOptionsByProvider: {
[provider: string]: { name: string; value: string }[];
} = {};
const uniqueModelNames = new Set<string>();
llmProviders.forEach((llmProvider) => {
if (!llmOptionsByProvider[llmProvider.provider]) {
llmOptionsByProvider[llmProvider.provider] = [];
}
(llmProvider.display_model_names || llmProvider.model_names).forEach(
(modelName) => {
if (!uniqueModelNames.has(modelName)) {
uniqueModelNames.add(modelName);
llmOptionsByProvider[llmProvider.provider].push({
name: modelName,
value: structureValue(
llmProvider.name,
llmProvider.provider,
modelName
),
});
}
}
);
});
const llmOptions = Object.entries(llmOptionsByProvider).flatMap(
([provider, options]) => [...options]
);
const currentModelName =
llmManager?.currentLlm.modelName ||
(selectedAssistant
? selectedAssistant.llm_model_version_override || llmName
: llmName);
const [isOpen, setIsOpen] = useState(false);
const toggleDropdownVisible = (isVisible: boolean) => {
setIsOpen(isVisible);
onDropdownVisibleChange(isVisible);
};
return (
<div
className="group flex items-center relative"
onMouseEnter={() => onHoverChange(true)}
onMouseLeave={() => onHoverChange(false)}
>
<RegenerateDropdown
onDropdownVisibleChange={onDropdownVisibleChange}
alternate={overriddenModel}
options={llmOptions}
selected={currentModelName}
onSelect={(value) => {
const { name, provider, modelName } = destructureValue(
value as string
);
regenerate({
name: name,
provider: provider,
modelName: modelName,
});
}}
/>
</div>
<LLMPopover
llmManager={llmManager}
llmProviders={llmProviders}
requiresImageGeneration={false}
currentAssistant={selectedAssistant}
currentModelName={overriddenModel}
trigger={
<div onClick={() => toggleDropdownVisible(!isOpen)}>
{!overriddenModel ? (
<Hoverable size={16} icon={FiRefreshCw as IconType} />
) : (
<Hoverable
size={16}
icon={FiRefreshCw as IconType}
hoverText={getDisplayNameForModel(overriddenModel)}
/>
)}
</div>
}
onSelect={(value) => {
const { name, provider, modelName } = destructureValue(value as string);
regenerate({
name: name,
provider: provider,
modelName: modelName,
});
}}
/>
);
}

View File

@@ -6,7 +6,7 @@ import { Persona } from "@/app/admin/assistants/interfaces";
import LLMPopover from "./LLMPopover";
import { InputPrompt } from "@/app/chat/interfaces";
import { FilterManager, LlmManager } from "@/lib/hooks";
import { FilterManager, getDisplayNameForModel, LlmManager } from "@/lib/hooks";
import { useChatContext } from "@/components/context/ChatContext";
import { ChatFileType, FileDescriptor } from "../interfaces";
import {
@@ -38,6 +38,7 @@ import { useUser } from "@/components/user/UserProvider";
import { useDocumentSelection } from "../useDocumentSelection";
import { AgenticToggle } from "./AgenticToggle";
import { SettingsContext } from "@/components/settings/SettingsProvider";
import { getProviderIcon } from "@/app/admin/configuration/llm/interfaces";
import { LoadingIndicator } from "react-select/dist/declarations/src/components/indicators";
import { FidgetSpinner } from "react-loader-spinner";
import { LoadingAnimation } from "@/components/Loading";
@@ -799,6 +800,27 @@ export function ChatInputBar({
llmManager={llmManager}
requiresImageGeneration={false}
currentAssistant={selectedAssistant}
trigger={
<button
className="dark:text-white text-black focus:outline-none"
data-testid="llm-popover-trigger"
>
<ChatInputOption
minimize
toggle
flexPriority="stiff"
name={getDisplayNameForModel(
llmManager?.currentLlm.modelName || "Models"
)}
Icon={getProviderIcon(
llmManager?.currentLlm.provider || "anthropic",
llmManager?.currentLlm.modelName ||
"claude-3-5-sonnet-20240620"
)}
tooltipContent="Switch models"
/>
</button>
}
/>
{retrievalEnabled && (

View File

@@ -1,16 +1,9 @@
import React, {
useState,
useEffect,
useCallback,
useLayoutEffect,
useMemo,
} from "react";
import React, { useState, useEffect, useCallback, useMemo } from "react";
import {
Popover,
PopoverContent,
PopoverTrigger,
} from "@/components/ui/popover";
import { ChatInputOption } from "./ChatInputOption";
import { getDisplayNameForModel } from "@/lib/hooks";
import {
checkLLMSupportsImageInput,
@@ -35,12 +28,16 @@ import { FiAlertTriangle } from "react-icons/fi";
import { Slider } from "@/components/ui/slider";
import { useUser } from "@/components/user/UserProvider";
import { TruncatedText } from "@/components/ui/truncatedText";
import { ChatInputOption } from "./ChatInputOption";
interface LLMPopoverProps {
llmProviders: LLMProviderDescriptor[];
llmManager: LlmManager;
requiresImageGeneration?: boolean;
currentAssistant?: Persona;
trigger?: React.ReactElement;
onSelect?: (value: string) => void;
currentModelName?: string;
}
export default function LLMPopover({
@@ -48,70 +45,69 @@ export default function LLMPopover({
llmManager,
requiresImageGeneration,
currentAssistant,
trigger,
onSelect,
currentModelName,
}: LLMPopoverProps) {
const [isOpen, setIsOpen] = useState(false);
const { user } = useUser();
// Memoize the options to prevent unnecessary recalculations
const {
llmOptionsByProvider,
llmOptions,
defaultProvider,
defaultModelDisplayName,
} = useMemo(() => {
const llmOptionsByProvider: {
[provider: string]: {
name: string;
value: string;
icon: React.FC<{ size?: number; className?: string }>;
}[];
} = {};
const { llmOptions, defaultProvider, defaultModelDisplayName } =
useMemo(() => {
const llmOptionsByProvider: {
[provider: string]: {
name: string;
value: string;
icon: React.FC<{ size?: number; className?: string }>;
}[];
} = {};
const uniqueModelNames = new Set<string>();
const uniqueModelNames = new Set<string>();
llmProviders.forEach((llmProvider) => {
if (!llmOptionsByProvider[llmProvider.provider]) {
llmOptionsByProvider[llmProvider.provider] = [];
}
(llmProvider.display_model_names || llmProvider.model_names).forEach(
(modelName) => {
if (!uniqueModelNames.has(modelName)) {
uniqueModelNames.add(modelName);
llmOptionsByProvider[llmProvider.provider].push({
name: modelName,
value: structureValue(
llmProvider.name,
llmProvider.provider,
modelName
),
icon: getProviderIcon(llmProvider.provider, modelName),
});
}
llmProviders.forEach((llmProvider) => {
if (!llmOptionsByProvider[llmProvider.provider]) {
llmOptionsByProvider[llmProvider.provider] = [];
}
(llmProvider.display_model_names || llmProvider.model_names).forEach(
(modelName) => {
if (!uniqueModelNames.has(modelName)) {
uniqueModelNames.add(modelName);
llmOptionsByProvider[llmProvider.provider].push({
name: modelName,
value: structureValue(
llmProvider.name,
llmProvider.provider,
modelName
),
icon: getProviderIcon(llmProvider.provider, modelName),
});
}
}
);
});
const llmOptions = Object.entries(llmOptionsByProvider).flatMap(
([provider, options]) => [...options]
);
});
const llmOptions = Object.entries(llmOptionsByProvider).flatMap(
([provider, options]) => [...options]
);
const defaultProvider = llmProviders.find(
(llmProvider) => llmProvider.is_default_provider
);
const defaultProvider = llmProviders.find(
(llmProvider) => llmProvider.is_default_provider
);
const defaultModelName = defaultProvider?.default_model_name;
const defaultModelDisplayName = defaultModelName
? getDisplayNameForModel(defaultModelName)
: null;
const defaultModelName = defaultProvider?.default_model_name;
const defaultModelDisplayName = defaultModelName
? getDisplayNameForModel(defaultModelName)
: null;
return {
llmOptionsByProvider,
llmOptions,
defaultProvider,
defaultModelDisplayName,
};
}, [llmProviders]);
return {
llmOptionsByProvider,
llmOptions,
defaultProvider,
defaultModelDisplayName,
};
}, [llmProviders]);
const [localTemperature, setLocalTemperature] = useState(
llmManager.temperature ?? 0.5
@@ -135,32 +131,34 @@ export default function LLMPopover({
// Memoize trigger content to prevent rerendering
const triggerContent = useMemo(
() => (
<button
className="dark:text-[#fff] text-[#000] focus:outline-none"
data-testid="llm-popover-trigger"
>
<ChatInputOption
minimize
toggle
flexPriority="stiff"
name={getDisplayNameForModel(
llmManager?.currentLlm.modelName ||
defaultModelDisplayName ||
"Models"
)}
Icon={getProviderIcon(
llmManager?.currentLlm.provider ||
defaultProvider?.provider ||
"anthropic",
llmManager?.currentLlm.modelName ||
defaultProvider?.default_model_name ||
"claude-3-5-sonnet-20240620"
)}
tooltipContent="Switch models"
/>
</button>
),
trigger
? () => trigger
: () => (
<button
className="dark:text-[#fff] text-[#000] focus:outline-none"
data-testid="llm-popover-trigger"
>
<ChatInputOption
minimize
toggle
flexPriority="stiff"
name={getDisplayNameForModel(
llmManager?.currentLlm.modelName ||
defaultModelDisplayName ||
"Models"
)}
Icon={getProviderIcon(
llmManager?.currentLlm.provider ||
defaultProvider?.provider ||
"anthropic",
llmManager?.currentLlm.modelName ||
defaultProvider?.default_model_name ||
"claude-3-5-sonnet-20240620"
)}
tooltipContent="Switch models"
/>
</button>
),
[defaultModelDisplayName, defaultProvider, llmManager?.currentLlm]
);
@@ -178,12 +176,14 @@ export default function LLMPopover({
<button
key={index}
className={`w-full flex items-center gap-x-2 px-3 py-2 text-sm text-left hover:bg-background-100 dark:hover:bg-neutral-800 transition-colors duration-150 ${
llmManager.currentLlm.modelName === name
(currentModelName || llmManager.currentLlm.modelName) ===
name
? "bg-background-100 dark:bg-neutral-900 text-text"
: "text-text-darker"
}`}
onClick={() => {
llmManager.updateCurrentLlm(destructureValue(value));
onSelect?.(value);
setIsOpen(false);
}}
>

View File

@@ -178,7 +178,6 @@ export const AgenticMessage = ({
const [isViewingInitialAnswer, setIsViewingInitialAnswer] = useState(true);
const [canShowResponse, setCanShowResponse] = useState(isComplete);
const [isRegenerateHovered, setIsRegenerateHovered] = useState(false);
const [isRegenerateDropdownVisible, setIsRegenerateDropdownVisible] =
useState(false);
@@ -597,7 +596,6 @@ export const AgenticMessage = ({
onDropdownVisibleChange={
setIsRegenerateDropdownVisible
}
onHoverChange={setIsRegenerateHovered}
selectedAssistant={currentPersona!}
regenerate={regenerate}
overriddenModel={overriddenModel}
@@ -613,16 +611,10 @@ export const AgenticMessage = ({
absolute -bottom-5
z-10
invisible ${
(isHovering ||
isRegenerateHovered ||
settings?.isMobile) &&
"!visible"
(isHovering || settings?.isMobile) && "!visible"
}
opacity-0 ${
(isHovering ||
isRegenerateHovered ||
settings?.isMobile) &&
"!opacity-100"
(isHovering || settings?.isMobile) && "!opacity-100"
}
translate-y-2 ${
(isHovering || settings?.isMobile) &&
@@ -697,7 +689,6 @@ export const AgenticMessage = ({
}
regenerate={regenerate}
overriddenModel={overriddenModel}
onHoverChange={setIsRegenerateHovered}
/>
</CustomTooltip>
)}

View File

@@ -301,7 +301,6 @@ export const AIMessage = ({
const finalContent = processContent(content as string);
const [isRegenerateHovered, setIsRegenerateHovered] = useState(false);
const [isRegenerateDropdownVisible, setIsRegenerateDropdownVisible] =
useState(false);
const { isHovering, trackedElementRef, hoverElementRef } = useMouseTracking();
@@ -728,7 +727,6 @@ export const AIMessage = ({
onDropdownVisibleChange={
setIsRegenerateDropdownVisible
}
onHoverChange={setIsRegenerateHovered}
selectedAssistant={currentPersona!}
regenerate={regenerate}
overriddenModel={overriddenModel}
@@ -744,16 +742,10 @@ export const AIMessage = ({
absolute -bottom-5
z-10
invisible ${
(isHovering ||
isRegenerateHovered ||
settings?.isMobile) &&
"!visible"
(isHovering || settings?.isMobile) && "!visible"
}
opacity-0 ${
(isHovering ||
isRegenerateHovered ||
settings?.isMobile) &&
"!opacity-100"
(isHovering || settings?.isMobile) && "!opacity-100"
}
flex md:flex-row gap-x-0.5 bg-background-125/40 -mx-1.5 p-1.5 rounded-lg
`}
@@ -818,7 +810,6 @@ export const AIMessage = ({
}
regenerate={regenerate}
overriddenModel={overriddenModel}
onHoverChange={setIsRegenerateHovered}
/>
</CustomTooltip>
)}

View File

@@ -1333,10 +1333,10 @@ export function createConnectorValidationSchema(
): Yup.ObjectSchema<Record<string, any>> {
const configuration = connectorConfigs[connector];
return Yup.object().shape({
const object = Yup.object().shape({
access_type: Yup.string().required("Access Type is required"),
name: Yup.string().required("Connector Name is required"),
...configuration.values.reduce(
...[...configuration.values, ...configuration.advanced_values].reduce(
(acc, field) => {
let schema: any =
field.type === "select"
@@ -1363,6 +1363,8 @@ export function createConnectorValidationSchema(
pruneFreq: Yup.number().min(0, "Prune frequency must be non-negative"),
refreshFreq: Yup.number().min(0, "Refresh frequency must be non-negative"),
});
return object;
}
export const defaultPruneFreqDays = 30; // 30 days