Compare commits

...

13 Commits

Author SHA1 Message Date
Wenxi Onyx
a702192607 bump braintrust 2025-12-03 20:58:59 -08:00
dependabot[bot]
ef1b448b4c chore(deps): Bump next from 16.0.1 to 16.0.7 in /web (#6563)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Wenxi Onyx <wenxi@onyx.app>
2025-12-03 20:58:59 -08:00
Chris Weaver
f80cf23591 fix: jira attribute error (#6584) 2025-12-03 20:58:59 -08:00
Chris Weaver
81b43ea0de fix: improve jira perm sync handling (#6575) 2025-12-03 20:58:59 -08:00
Chris Weaver
dfb961f6d0 fix: workaround for bugged Confluence API (#6311) 2025-12-03 20:58:59 -08:00
Evan Lohn
6d3bca381f fix: expand special casing around sharepoint shared drives (#6539) 2025-12-03 20:58:59 -08:00
Emerson Gomes
42ec352fa2 fix: prevent heartbeat timeout state pollution in validation loop (#5782)
Co-authored-by: Claude <noreply@anthropic.com>
2025-12-03 20:58:59 -08:00
Emerson Gomes
70b1312461 db: remove duplicate chunk_stats deletion in delete_documents_complete__no_commit (#5792) 2025-12-03 20:58:59 -08:00
Nikolas Garza
a1df56df13 chore: remove fed slack entities button on doc set edit page (#6385) 2025-12-02 16:50:12 -08:00
Nikolas Garza
90c206d9e1 fix: eager load persona in slack channel config (#6535) 2025-12-02 16:50:12 -08:00
きわみざむらい
5e1c89d673 fix: Add proper DISABLE_MODEL_SERVER environment variable support (#6468)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-12-02 16:50:12 -08:00
Emerson Gomes
2239a58b1d Harden markdown link protocol handling (#6517) 2025-12-02 16:50:12 -08:00
Justin Tahara
825edba531 fix(feedback): API Endpoint fix (#6500) 2025-12-02 16:50:12 -08:00
33 changed files with 1372 additions and 445 deletions

View File

@@ -29,6 +29,9 @@ env:
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# Jira
JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}
# LLMs
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

View File

@@ -41,6 +41,10 @@ CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
JIRA_PERMISSION_DOC_SYNC_FREQUENCY = int(
os.environ.get("JIRA_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
)
# In seconds, default is 30 minutes
JIRA_PERMISSION_GROUP_SYNC_FREQUENCY = int(
os.environ.get("JIRA_PERMISSION_GROUP_SYNC_FREQUENCY") or 30 * 60
)
#####

View File

@@ -3,12 +3,15 @@ from collections.abc import Generator
from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
from onyx.background.error_logging import emit_background_error
from onyx.configs.app_configs import CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC
from onyx.connectors.confluence.onyx_confluence import (
get_user_email_from_username__server,
)
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ConnectorCredentialPair
from onyx.db.users import get_all_users
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -19,7 +22,7 @@ def _build_group_member_email_map(
) -> dict[str, set[str]]:
group_member_emails: dict[str, set[str]] = {}
for user in confluence_client.paginated_cql_user_retrieval():
logger.debug(f"Processing groups for user: {user}")
logger.info(f"Processing groups for user: {user}")
email = user.email
if not email:
@@ -31,6 +34,8 @@ def _build_group_member_email_map(
confluence_client=confluence_client,
user_name=user_name,
)
else:
logger.error(f"user result missing username field: {user}")
if not email:
# If we still don't have an email, skip this user
@@ -64,6 +69,92 @@ def _build_group_member_email_map(
return group_member_emails
def _build_group_member_email_map_from_onyx_users(
confluence_client: OnyxConfluence,
) -> dict[str, set[str]]:
"""Hacky, but it's the only way to do this as long as the
Confluence APIs are broken.
This is fixed in Confluence Data Center 10.1.0, so first choice
is to tell users to upgrade to 10.1.0.
https://jira.atlassian.com/browse/CONFSERVER-95999
"""
with get_session_with_current_tenant() as db_session:
# don't include external since they are handled by the "through confluence"
# user fetching mechanism
user_emails = [
user.email for user in get_all_users(db_session, include_external=False)
]
def _infer_username_from_email(email: str) -> str:
return email.split("@")[0]
group_member_emails: dict[str, set[str]] = {}
for email in user_emails:
logger.info(f"Processing groups for user with email: {email}")
try:
user_name = _infer_username_from_email(email)
response = confluence_client.get_user_details_by_username(user_name)
user_key = response.get("userKey")
if not user_key:
logger.error(f"User key not found for user with email {email}")
continue
all_users_groups: set[str] = set()
for group in confluence_client.paginated_groups_by_user_retrieval(user_key):
# group name uniqueness is enforced by Confluence, so we can use it as a group ID
group_id = group["name"]
group_member_emails.setdefault(group_id, set()).add(email)
all_users_groups.add(group_id)
if not all_users_groups:
msg = f"No groups found for user with email: {email}"
logger.error(msg)
else:
logger.info(
f"Found groups {all_users_groups} for user with email {email}"
)
except Exception:
logger.exception(f"Error getting user details for user with email {email}")
return group_member_emails
def _build_final_group_to_member_email_map(
confluence_client: OnyxConfluence,
cc_pair_id: int,
# if set, will infer confluence usernames from onyx users in addition to using the
# confluence users API. This is a hacky workaround for the fact that the Confluence
# users API is broken before Confluence Data Center 10.1.0.
use_onyx_users: bool = CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC,
) -> dict[str, set[str]]:
group_to_member_email_map = _build_group_member_email_map(
confluence_client=confluence_client,
cc_pair_id=cc_pair_id,
)
group_to_member_email_map_from_onyx_users = (
(
_build_group_member_email_map_from_onyx_users(
confluence_client=confluence_client,
)
)
if use_onyx_users
else {}
)
all_group_ids = set(group_to_member_email_map.keys()) | set(
group_to_member_email_map_from_onyx_users.keys()
)
final_group_to_member_email_map = {}
for group_id in all_group_ids:
group_member_emails = group_to_member_email_map.get(
group_id, set()
) | group_to_member_email_map_from_onyx_users.get(group_id, set())
final_group_to_member_email_map[group_id] = group_member_emails
return final_group_to_member_email_map
def confluence_group_sync(
tenant_id: str,
cc_pair: ConnectorCredentialPair,
@@ -87,13 +178,12 @@ def confluence_group_sync(
confluence_client._probe_connection(**probe_kwargs)
confluence_client._initialize_connection(**final_kwargs)
group_member_email_map = _build_group_member_email_map(
confluence_client=confluence_client,
cc_pair_id=cc_pair.id,
group_to_member_email_map = _build_final_group_to_member_email_map(
confluence_client, cc_pair.id
)
all_found_emails = set()
for group_id, group_member_emails in group_member_email_map.items():
for group_id, group_member_emails in group_to_member_email_map.items():
yield (
ExternalUserGroup(
id=group_id,

View File

@@ -0,0 +1,136 @@
from collections.abc import Generator
from jira import JIRA
from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.connectors.jira.utils import build_jira_client
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger
logger = setup_logger()
def _get_jira_group_members_email(
jira_client: JIRA,
group_name: str,
) -> list[str]:
"""Get all member emails for a Jira group.
Filters out app accounts (bots, integrations) and only returns real user emails.
"""
emails: list[str] = []
try:
# group_members returns an OrderedDict of account_id -> member_info
members = jira_client.group_members(group=group_name)
if not members:
logger.warning(f"No members found for group {group_name}")
return emails
for account_id, member_info in members.items():
# member_info is a dict with keys like 'fullname', 'email', 'active'
email = member_info.get("email")
# Skip "hidden" emails - these are typically app accounts
if email and email != "hidden":
emails.append(email)
else:
# For cloud, we might need to fetch user details separately
try:
user = jira_client.user(id=account_id)
# Skip app accounts (bots, integrations, etc.)
if hasattr(user, "accountType") and user.accountType == "app":
logger.info(
f"Skipping app account {account_id} for group {group_name}"
)
continue
if hasattr(user, "emailAddress") and user.emailAddress:
emails.append(user.emailAddress)
else:
logger.warning(f"User {account_id} has no email address")
except Exception as e:
logger.warning(
f"Could not fetch email for user {account_id} in group {group_name}: {e}"
)
except Exception as e:
logger.error(f"Error fetching members for group {group_name}: {e}")
return emails
def _build_group_member_email_map(
jira_client: JIRA,
) -> dict[str, set[str]]:
"""Build a map of group names to member emails."""
group_member_emails: dict[str, set[str]] = {}
try:
# Get all groups from Jira - returns a list of group name strings
group_names = jira_client.groups()
if not group_names:
logger.warning("No groups found in Jira")
return group_member_emails
logger.info(f"Found {len(group_names)} groups in Jira")
for group_name in group_names:
if not group_name:
continue
member_emails = _get_jira_group_members_email(
jira_client=jira_client,
group_name=group_name,
)
if member_emails:
group_member_emails[group_name] = set(member_emails)
logger.debug(
f"Found {len(member_emails)} members for group {group_name}"
)
else:
logger.debug(f"No members found for group {group_name}")
except Exception as e:
logger.error(f"Error building group member email map: {e}")
return group_member_emails
def jira_group_sync(
tenant_id: str,
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""
Sync Jira groups and their members.
This function fetches all groups from Jira and yields ExternalUserGroup
objects containing the group ID and member emails.
"""
jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
scoped_token = cc_pair.connector.connector_specific_config.get(
"scoped_token", False
)
if not jira_base_url:
raise ValueError("No jira_base_url found in connector config")
jira_client = build_jira_client(
credentials=cc_pair.credential.credential_json,
jira_base=jira_base_url,
scoped_token=scoped_token,
)
group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
if not group_member_email_map:
raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")
for group_id, group_member_emails in group_member_email_map.items():
yield ExternalUserGroup(
id=group_id,
user_emails=list(group_member_emails),
)

View File

@@ -16,6 +16,10 @@ HolderMap = dict[str, list[Holder]]
logger = setup_logger()
def _get_role_id(holder: Holder) -> str | None:
return holder.get("value") or holder.get("parameter")
def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
"""
A "Holder" in JIRA is a person / entity who "holds" the corresponding permission.
@@ -110,80 +114,137 @@ def _get_user_emails(user_holders: list[Holder]) -> list[str]:
return emails
def _get_user_emails_from_project_roles(
def _get_user_emails_and_groups_from_project_roles(
jira_client: JIRA,
jira_project: str,
project_role_holders: list[Holder],
) -> list[str]:
# NOTE (@raunakab) a `parallel_yield` may be helpful here...?
) -> tuple[list[str], list[str]]:
"""
Get user emails and group names from project roles.
Returns a tuple of (emails, group_names).
"""
# Get role IDs - Cloud uses "value", Data Center uses "parameter"
role_ids = []
for holder in project_role_holders:
role_id = _get_role_id(holder)
if role_id:
role_ids.append(role_id)
else:
logger.warning(f"No value or parameter in projectRole holder: {holder}")
roles = [
jira_client.project_role(project=jira_project, id=project_role_holder["value"])
for project_role_holder in project_role_holders
if "value" in project_role_holder
jira_client.project_role(project=jira_project, id=role_id)
for role_id in role_ids
]
emails = []
groups = []
for role in roles:
if not hasattr(role, "actors"):
logger.warning(f"Project role {role} has no actors attribute")
continue
for actor in role.actors:
if not hasattr(actor, "actorUser") or not hasattr(
actor.actorUser, "accountId"
):
# Handle group actors
if hasattr(actor, "actorGroup"):
group_name = getattr(actor.actorGroup, "name", None) or getattr(
actor.actorGroup, "displayName", None
)
if group_name:
groups.append(group_name)
continue
user = jira_client.user(id=actor.actorUser.accountId)
if not hasattr(user, "accountType") or user.accountType != "atlassian":
# Handle user actors
if hasattr(actor, "actorUser"):
account_id = getattr(actor.actorUser, "accountId", None)
if not account_id:
logger.error(f"No accountId in actorUser: {actor.actorUser}")
continue
user = jira_client.user(id=account_id)
if not hasattr(user, "accountType") or user.accountType != "atlassian":
logger.info(
f"Skipping user {account_id} because it is not an atlassian user"
)
continue
if not hasattr(user, "emailAddress"):
msg = f"User's email address was not able to be retrieved; {actor.actorUser.accountId=}"
if hasattr(user, "displayName"):
msg += f" {actor.displayName=}"
logger.warning(msg)
continue
emails.append(user.emailAddress)
continue
if not hasattr(user, "emailAddress"):
msg = f"User's email address was not able to be retrieved; {actor.actorUser.accountId=}"
if hasattr(user, "displayName"):
msg += f" {actor.displayName=}"
logger.warn(msg)
continue
logger.debug(f"Skipping actor type: {actor}")
emails.append(user.emailAddress)
return emails
return emails, groups
def _build_external_access_from_holder_map(
jira_client: JIRA, jira_project: str, holder_map: HolderMap
) -> ExternalAccess:
"""
# Note:
If the `holder_map` contains an instance of "anyone", then this is a public JIRA project.
Otherwise, we fetch the "projectRole"s (i.e., the user-groups in JIRA speak), and the user emails.
"""
Build ExternalAccess from the holder map.
Holder types handled:
- "anyone": Public project, anyone can access
- "applicationRole": All users with a Jira license can access (treated as public)
- "user": Specific users with access
- "projectRole": Project roles containing users and/or groups
- "group": Groups directly assigned in the permission scheme
"""
# Public access - anyone can view
if "anyone" in holder_map:
return ExternalAccess(
external_user_emails=set(), external_user_group_ids=set(), is_public=True
)
# applicationRole means all users with a Jira license can access - treat as public
if "applicationRole" in holder_map:
return ExternalAccess(
external_user_emails=set(), external_user_group_ids=set(), is_public=True
)
# Get emails from explicit user holders
user_emails = (
_get_user_emails(user_holders=holder_map["user"])
if "user" in holder_map
else []
)
project_role_user_emails = (
_get_user_emails_from_project_roles(
jira_client=jira_client,
jira_project=jira_project,
project_role_holders=holder_map["projectRole"],
# Get emails and groups from project roles
project_role_user_emails: list[str] = []
project_role_groups: list[str] = []
if "projectRole" in holder_map:
project_role_user_emails, project_role_groups = (
_get_user_emails_and_groups_from_project_roles(
jira_client=jira_client,
jira_project=jira_project,
project_role_holders=holder_map["projectRole"],
)
)
if "projectRole" in holder_map
else []
)
# Get groups directly assigned in permission scheme (common in Data Center)
# Format: {'type': 'group', 'parameter': 'group-name', 'expand': 'group'}
direct_groups: list[str] = []
if "group" in holder_map:
for group_holder in holder_map["group"]:
group_name = _get_role_id(group_holder)
if group_name:
direct_groups.append(group_name)
else:
logger.error(f"No parameter/value in group holder: {group_holder}")
external_user_emails = set(user_emails + project_role_user_emails)
external_user_group_ids = set(project_role_groups + direct_groups)
return ExternalAccess(
external_user_emails=external_user_emails,
external_user_group_ids=set(),
external_user_group_ids=external_user_group_ids,
is_public=False,
)
@@ -197,9 +258,11 @@ def get_project_permissions(
)
if not hasattr(project_permissions, "permissions"):
logger.error(f"Project {jira_project} has no permissions attribute")
return None
if not isinstance(project_permissions.permissions, list):
logger.error(f"Project {jira_project} permissions is not a list")
return None
holder_map = _build_holder_map(permissions=project_permissions.permissions)

View File

@@ -15,6 +15,7 @@ from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP_REVERSE
from onyx.connectors.sharepoint.connector import sleep_and_retry
from onyx.utils.logger import setup_logger
@@ -511,8 +512,8 @@ def get_external_access_from_sharepoint(
f"Failed to get SharePoint list item ID for item {drive_item.id}"
)
if drive_name == "Shared Documents":
drive_name = "Documents"
if drive_name in SHARED_DOCUMENTS_MAP_REVERSE:
drive_name = SHARED_DOCUMENTS_MAP_REVERSE[drive_name]
item = client_context.web.lists.get_by_title(drive_name).items.get_by_id(
item_id

View File

@@ -11,6 +11,7 @@ from ee.onyx.configs.app_configs import GITHUB_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import JIRA_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import JIRA_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY
@@ -23,6 +24,7 @@ from ee.onyx.external_permissions.gmail.doc_sync import gmail_doc_sync
from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
from ee.onyx.external_permissions.perm_sync_types import CensoringFuncType
from ee.onyx.external_permissions.perm_sync_types import DocSyncFuncType
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
@@ -110,6 +112,11 @@ _SOURCE_TO_SYNC_CONFIG: dict[DocumentSource, SyncConfig] = {
doc_sync_func=jira_doc_sync,
initial_index_should_sync=True,
),
group_sync_config=GroupSyncConfig(
group_sync_frequency=JIRA_PERMISSION_GROUP_SYNC_FREQUENCY,
group_sync_func=jira_group_sync,
group_sync_is_cc_pair_agnostic=True,
),
),
# Groups are not needed for Slack.
# All channel access is done at the individual user level.

View File

@@ -156,8 +156,6 @@ def validate_active_indexing_attempts(
"""
logger.info("Validating active indexing attempts")
heartbeat_timeout_seconds = HEARTBEAT_TIMEOUT_SECONDS
with get_session_with_current_tenant() as db_session:
# Find all active indexing attempts
@@ -175,6 +173,9 @@ def validate_active_indexing_attempts(
for attempt in active_attempts:
lock_beat.reacquire()
# Initialize timeout for each attempt to prevent state pollution
heartbeat_timeout_seconds = HEARTBEAT_TIMEOUT_SECONDS
# Double-check the attempt still exists and has the same status
fresh_attempt = get_index_attempt(db_session, attempt.id)
if not fresh_attempt or fresh_attempt.status.is_terminal():

View File

@@ -528,6 +528,10 @@ CONFLUENCE_TIMEZONE_OFFSET = float(
os.environ.get("CONFLUENCE_TIMEZONE_OFFSET", get_current_tz_offset())
)
CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC = (
os.environ.get("CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC", "").lower() == "true"
)
GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int(
os.environ.get("GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD", 10 * 1024 * 1024)
)

View File

@@ -68,6 +68,13 @@ logger = setup_logger()
SLIM_BATCH_SIZE = 1000
SHARED_DOCUMENTS_MAP = {
"Documents": "Shared Documents",
"Dokumente": "Freigegebene Dokumente",
"Documentos": "Documentos compartidos",
}
SHARED_DOCUMENTS_MAP_REVERSE = {v: k for k, v in SHARED_DOCUMENTS_MAP.items()}
ASPX_EXTENSION = ".aspx"
@@ -778,7 +785,10 @@ class SharepointConnector(
drive
for drive in drives
if (drive.name and drive.name.lower() == drive_name.lower())
or (drive.name == "Documents" and drive_name == "Shared Documents")
or (
drive.name in SHARED_DOCUMENTS_MAP
and SHARED_DOCUMENTS_MAP[drive.name] == drive_name
)
]
drive = drives[0] if len(drives) > 0 else None
if drive is None:
@@ -885,10 +895,12 @@ class SharepointConnector(
for drive in drives
if drive.name == site_descriptor.drive_name
or (
drive.name == "Documents"
and site_descriptor.drive_name == "Shared Documents"
drive.name in SHARED_DOCUMENTS_MAP
and SHARED_DOCUMENTS_MAP[drive.name]
== site_descriptor.drive_name
)
]
] # NOTE: right now we only support english, german and spanish drive names
# add to SHARED_DOCUMENTS_MAP if you want to support more languages
if not drives:
logger.warning(f"Drive '{site_descriptor.drive_name}' not found")
return []
@@ -914,9 +926,11 @@ class SharepointConnector(
)
# Use "Shared Documents" as the library name for the default "Documents" drive
# NOTE: right now we only support english, german and spanish drive names
# add to SHARED_DOCUMENTS_MAP if you want to support more languages
drive_name = (
"Shared Documents"
if drive.name == "Documents"
SHARED_DOCUMENTS_MAP[drive.name]
if drive.name in SHARED_DOCUMENTS_MAP
else cast(str, drive.name)
)
@@ -1455,10 +1469,8 @@ class SharepointConnector(
# Clear current drive and continue to next
checkpoint.current_drive_name = None
return checkpoint
current_drive_name = (
"Shared Documents"
if current_drive_name == "Documents"
else current_drive_name
current_drive_name = SHARED_DOCUMENTS_MAP.get(
current_drive_name, current_drive_name
)
for driveitem in driveitems:
driveitem_extension = get_file_ext(driveitem.name)

View File

@@ -686,11 +686,6 @@ def delete_documents_complete__no_commit(
document_ids=document_ids,
)
delete_chunk_stats_by_connector_credential_pair__no_commit(
db_session=db_session,
document_ids=document_ids,
)
delete_documents_by_connector_credential_pair__no_commit(db_session, document_ids)
delete_document_feedback_for_documents__no_commit(
document_ids=document_ids, db_session=db_session

View File

@@ -2,6 +2,7 @@ from collections.abc import Sequence
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
@@ -269,7 +270,9 @@ def fetch_slack_channel_config_for_channel_or_default(
# attempt to find channel-specific config first
if channel_name is not None:
sc_config = db_session.scalar(
select(SlackChannelConfig).where(
select(SlackChannelConfig)
.options(joinedload(SlackChannelConfig.persona))
.where(
SlackChannelConfig.slack_bot_id == slack_bot_id,
SlackChannelConfig.channel_config["channel_name"].astext
== channel_name,
@@ -283,7 +286,9 @@ def fetch_slack_channel_config_for_channel_or_default(
# if none found, see if there is a default
default_sc = db_session.scalar(
select(SlackChannelConfig).where(
select(SlackChannelConfig)
.options(joinedload(SlackChannelConfig.persona))
.where(
SlackChannelConfig.slack_bot_id == slack_bot_id,
SlackChannelConfig.is_default == True, # noqa: E712
)

View File

@@ -1066,6 +1066,17 @@ class InformationContentClassificationModel:
self,
queries: list[str],
) -> list[ContentClassificationPrediction]:
if os.environ.get("DISABLE_MODEL_SERVER", "").lower() == "true":
logger.info(
"DISABLE_MODEL_SERVER is set, returning default classifications"
)
return [
ContentClassificationPrediction(
predicted_label=1, content_boost_factor=1.0
)
for _ in queries
]
response = requests.post(self.content_server_endpoint, json=queries)
response.raise_for_status()
@@ -1092,6 +1103,14 @@ class ConnectorClassificationModel:
query: str,
available_connectors: list[str],
) -> list[str]:
# Check if model server is disabled
if os.environ.get("DISABLE_MODEL_SERVER", "").lower() == "true":
logger.info(
"DISABLE_MODEL_SERVER is set, returning all available connectors"
)
# Return all available connectors when model server is disabled
return available_connectors
connector_classification_request = ConnectorClassificationRequest(
available_connectors=available_connectors,
query=query,

View File

@@ -117,12 +117,14 @@ def handle_regular_answer(
# This way slack flow always has a persona
persona = slack_channel_config.persona
if not persona:
logger.warning("No persona found for channel config, using default persona")
with get_session_with_current_tenant() as db_session:
persona = get_persona_by_id(DEFAULT_PERSONA_ID, user, db_session)
document_set_names = [
document_set.name for document_set in persona.document_sets
]
else:
logger.info(f"Using persona {persona.name} for channel config")
document_set_names = [
document_set.name for document_set in persona.document_sets
]

View File

@@ -1,3 +1,4 @@
import os
from functools import lru_cache
import requests
@@ -13,16 +14,14 @@ logger = setup_logger()
def _get_gpu_status_from_model_server(indexing: bool) -> bool:
if os.environ.get("DISABLE_MODEL_SERVER", "").lower() == "true":
logger.info("DISABLE_MODEL_SERVER is set, assuming no GPU available")
return False
if indexing:
model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}"
else:
model_server_url = f"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}"
# If model server is disabled, return False (no GPU available)
if model_server_url in ["disabled", "disabled:9000"]:
logger.info("Model server is disabled, assuming no GPU available")
return False
if "http" not in model_server_url:
model_server_url = f"http://{model_server_url}"

View File

@@ -85,7 +85,7 @@ botocore==1.39.11
# s3transfer
botocore-stubs==1.40.74
# via boto3-stubs
braintrust==0.2.6
braintrust==0.3.9
# via braintrust-langchain
braintrust-langchain==0.0.4
bytecode==0.17.0
@@ -1150,6 +1150,7 @@ werkzeug==3.1.1
wrapt==1.17.3
# via
# aiobotocore
# braintrust
# ddtrace
# deprecated
# langfuse

View File

@@ -10,14 +10,24 @@ SLACK_CHANNEL_ID = "channel_id"
# Default to True (skip warmup) if not set, otherwise respect the value
SKIP_WARM_UP = os.environ.get("SKIP_WARM_UP", "true").lower() == "true"
MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
# Check if model server is disabled
DISABLE_MODEL_SERVER = os.environ.get("DISABLE_MODEL_SERVER", "").lower() == "true"
# If model server is disabled, use "disabled" as host to trigger proper handling
if DISABLE_MODEL_SERVER:
MODEL_SERVER_HOST = "disabled"
MODEL_SERVER_ALLOWED_HOST = "disabled"
INDEXING_MODEL_SERVER_HOST = "disabled"
else:
MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
INDEXING_MODEL_SERVER_HOST = (
os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
)
MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000")
# Model server for indexing should use a separate one to not allow indexing to introduce delay
# for inference
INDEXING_MODEL_SERVER_HOST = (
os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
)
INDEXING_MODEL_SERVER_PORT = int(
os.environ.get("INDEXING_MODEL_SERVER_PORT") or MODEL_SERVER_PORT
)

View File

@@ -0,0 +1,29 @@
import os
from typing import Any
import pytest
@pytest.fixture
def jira_connector_config() -> dict[str, Any]:
jira_base_url = os.environ.get("JIRA_BASE_URL", "https://danswerai.atlassian.net")
return {
"jira_base_url": jira_base_url,
"project_key": "", # Empty to sync all projects
"scoped_token": False,
}
@pytest.fixture
def jira_credential_json() -> dict[str, Any]:
user_email = os.environ.get("JIRA_ADMIN_USER_EMAIL", "chris@onyx.app")
api_token = os.environ.get("JIRA_ADMIN_API_TOKEN")
assert user_email, "JIRA_ADMIN_USER_EMAIL environment variable is required"
assert api_token, "JIRA_ADMIN_API_TOKEN environment variable is required"
return {
"jira_user_email": user_email,
"jira_api_token": api_token,
}

View File

@@ -0,0 +1,228 @@
from typing import Any
from pydantic import BaseModel
from sqlalchemy.orm import Session
from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.utils import DocumentRow
from onyx.db.utils import SortOrder
from onyx.utils.variable_functionality import global_version
# In order to get these tests to run, use the credentials from Bitwarden.
# Search up "ENV vars for local and Github tests", and find the Jira relevant key-value pairs.
# Required env vars: JIRA_USER_EMAIL, JIRA_API_TOKEN
class DocExternalAccessSet(BaseModel):
"""A version of DocExternalAccess that uses sets for comparison."""
doc_id: str
external_user_emails: set[str]
external_user_group_ids: set[str]
is_public: bool
@classmethod
def from_doc_external_access(
cls, doc_external_access: DocExternalAccess
) -> "DocExternalAccessSet":
return cls(
doc_id=doc_external_access.doc_id,
external_user_emails=doc_external_access.external_access.external_user_emails,
external_user_group_ids=doc_external_access.external_access.external_user_group_ids,
is_public=doc_external_access.external_access.is_public,
)
def test_jira_doc_sync(
db_session: Session,
jira_connector_config: dict[str, Any],
jira_credential_json: dict[str, Any],
) -> None:
"""Test that Jira doc sync returns documents with correct permissions.
This test uses the AS project which has applicationRole permission,
meaning all documents should be marked as public.
"""
# NOTE: must set EE on or else the connector will skip the perm syncing
global_version.set_ee()
try:
# Use AS project specifically for this test
connector_config = {
**jira_connector_config,
"project_key": "AS", # DailyConnectorTestProject
}
connector = Connector(
name="Test Jira Doc Sync Connector",
source=DocumentSource.JIRA,
input_type=InputType.POLL,
connector_specific_config=connector_config,
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
db_session.add(connector)
db_session.flush()
credential = Credential(
source=DocumentSource.JIRA,
credential_json=jira_credential_json,
)
db_session.add(credential)
db_session.flush()
cc_pair = ConnectorCredentialPair(
connector_id=connector.id,
credential_id=credential.id,
name="Test Jira Doc Sync CC Pair",
status=ConnectorCredentialPairStatus.ACTIVE,
access_type=AccessType.SYNC,
auto_sync_options=None,
)
db_session.add(cc_pair)
db_session.flush()
db_session.refresh(cc_pair)
# Mock functions - we don't have existing docs in the test DB
def fetch_all_existing_docs_fn(
sort_order: SortOrder | None = None,
) -> list[DocumentRow]:
return []
def fetch_all_existing_docs_ids_fn() -> list[str]:
return []
doc_sync_iter = jira_doc_sync(
cc_pair=cc_pair,
fetch_all_existing_docs_fn=fetch_all_existing_docs_fn,
fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
)
# Expected documents from the danswerai.atlassian.net Jira instance
# The AS project has applicationRole permission, so all docs should be public
_EXPECTED_JIRA_DOCS = [
DocExternalAccessSet(
doc_id="https://danswerai.atlassian.net/browse/AS-3",
external_user_emails=set(),
external_user_group_ids=set(),
is_public=True,
),
DocExternalAccessSet(
doc_id="https://danswerai.atlassian.net/browse/AS-4",
external_user_emails=set(),
external_user_group_ids=set(),
is_public=True,
),
]
expected_docs = {doc.doc_id: doc for doc in _EXPECTED_JIRA_DOCS}
actual_docs = {
doc.doc_id: DocExternalAccessSet.from_doc_external_access(doc)
for doc in doc_sync_iter
}
assert expected_docs == actual_docs, (
f"Expected docs: {expected_docs}\n" f"Actual docs: {actual_docs}"
)
finally:
db_session.rollback()
def test_jira_doc_sync_with_specific_permissions(
db_session: Session,
jira_connector_config: dict[str, Any],
jira_credential_json: dict[str, Any],
) -> None:
"""Test that Jira doc sync returns documents with specific permissions.
This test uses a project that has specific user permissions to verify
that specific users are correctly extracted.
"""
# NOTE: must set EE on or else the connector will skip the perm syncing
global_version.set_ee()
try:
# Use SUP project which has specific user permissions
connector_config = {
**jira_connector_config,
"project_key": "SUP",
}
connector = Connector(
name="Test Jira Doc Sync with Groups Connector",
source=DocumentSource.JIRA,
input_type=InputType.POLL,
connector_specific_config=connector_config,
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
db_session.add(connector)
db_session.flush()
credential = Credential(
source=DocumentSource.JIRA,
credential_json=jira_credential_json,
)
db_session.add(credential)
db_session.flush()
cc_pair = ConnectorCredentialPair(
connector_id=connector.id,
credential_id=credential.id,
name="Test Jira Doc Sync with Groups CC Pair",
status=ConnectorCredentialPairStatus.ACTIVE,
access_type=AccessType.SYNC,
auto_sync_options=None,
)
db_session.add(cc_pair)
db_session.flush()
db_session.refresh(cc_pair)
# Mock functions
def fetch_all_existing_docs_fn(
sort_order: SortOrder | None = None,
) -> list[DocumentRow]:
return []
def fetch_all_existing_docs_ids_fn() -> list[str]:
return []
doc_sync_iter = jira_doc_sync(
cc_pair=cc_pair,
fetch_all_existing_docs_fn=fetch_all_existing_docs_fn,
fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
)
docs = list(doc_sync_iter)
# SUP project should have user-specific permissions (not public)
assert len(docs) > 0, "Expected at least one document from SUP project"
_EXPECTED_USER_EMAILS = set(
["yuhong@onyx.app", "chris@onyx.app", "founders@onyx.app"]
)
_EXPECTED_USER_GROUP_IDS = set(["jira-users-danswerai"])
for doc in docs:
assert doc.doc_id.startswith("https://danswerai.atlassian.net/browse/SUP-")
# SUP project has specific users assigned, not applicationRole
assert (
not doc.external_access.is_public
), f"Document {doc.doc_id} should not be public"
# Should have user emails
assert doc.external_access.external_user_emails == _EXPECTED_USER_EMAILS
assert (
doc.external_access.external_user_group_ids == _EXPECTED_USER_GROUP_IDS
)
finally:
db_session.rollback()

View File

@@ -0,0 +1,133 @@
from typing import Any
from sqlalchemy.orm import Session
from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from shared_configs.contextvars import get_current_tenant_id
from tests.daily.connectors.confluence.models import ExternalUserGroupSet
# In order to get these tests to run, use the credentials from Bitwarden.
# Search up "ENV vars for local and Github tests", and find the Jira relevant key-value pairs.
# Required env vars: JIRA_USER_EMAIL, JIRA_API_TOKEN
# Expected groups from the danswerai.atlassian.net Jira instance
# Note: These groups are shared with Confluence since they're both Atlassian products
# App accounts (bots, integrations) are filtered out
_EXPECTED_JIRA_GROUPS = [
ExternalUserGroupSet(
id="Yuhong Only No Chris Allowed",
user_emails={"yuhong@onyx.app"},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="confluence-admins-danswerai",
user_emails={"chris@onyx.app", "yuhong@onyx.app"},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="confluence-user-access-admins-danswerai",
user_emails={"hagen@danswer.ai"},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="confluence-users-danswerai",
user_emails={
"chris@onyx.app",
"founders@onyx.app",
"hagen@danswer.ai",
"pablo@onyx.app",
"yuhong@onyx.app",
},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="jira-admins-danswerai",
user_emails={"founders@onyx.app", "hagen@danswer.ai", "pablo@onyx.app"},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="jira-user-access-admins-danswerai",
user_emails={"hagen@danswer.ai"},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="jira-users-danswerai",
user_emails={
"chris@onyx.app",
"founders@onyx.app",
"hagen@danswer.ai",
"pablo@onyx.app",
},
gives_anyone_access=False,
),
ExternalUserGroupSet(
id="org-admins",
user_emails={
"chris@onyx.app",
"founders@onyx.app",
"yuhong@onyx.app",
},
gives_anyone_access=False,
),
]
def test_jira_group_sync(
db_session: Session,
jira_connector_config: dict[str, Any],
jira_credential_json: dict[str, Any],
) -> None:
try:
connector = Connector(
name="Test Jira Connector",
source=DocumentSource.JIRA,
input_type=InputType.POLL,
connector_specific_config=jira_connector_config,
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
db_session.add(connector)
db_session.flush()
credential = Credential(
source=DocumentSource.JIRA,
credential_json=jira_credential_json,
)
db_session.add(credential)
db_session.flush()
cc_pair = ConnectorCredentialPair(
connector_id=connector.id,
credential_id=credential.id,
name="Test Jira CC Pair",
status=ConnectorCredentialPairStatus.ACTIVE,
access_type=AccessType.SYNC,
auto_sync_options=None,
)
db_session.add(cc_pair)
db_session.flush()
db_session.refresh(cc_pair)
tenant_id = get_current_tenant_id()
group_sync_iter = jira_group_sync(
tenant_id=tenant_id,
cc_pair=cc_pair,
)
expected_groups = {group.id: group for group in _EXPECTED_JIRA_GROUPS}
actual_groups = {
group.id: ExternalUserGroupSet.from_model(external_user_group=group)
for group in group_sync_iter
}
assert expected_groups == actual_groups
finally:
db_session.rollback()

View File

@@ -6,9 +6,11 @@ from unittest.mock import patch
from uuid import uuid4
# Set environment variables to disable model server for testing
os.environ["DISABLE_MODEL_SERVER"] = "true"
os.environ["MODEL_SERVER_HOST"] = "disabled"
os.environ["MODEL_SERVER_PORT"] = "9000"
from sqlalchemy import inspect
from sqlalchemy.orm import Session
from slack_sdk.errors import SlackApiError
@@ -760,3 +762,76 @@ def test_multiple_missing_scopes_resilience(
# Should still return available channels
assert len(result) == 1, f"Expected 1 channel, got {len(result)}"
assert result["C1234567890"]["name"] == "general"
def test_slack_channel_config_eager_loads_persona(db_session: Session) -> None:
"""Test that fetch_slack_channel_config_for_channel_or_default eagerly loads persona.
This prevents lazy loading failures when the session context changes later
in the request handling flow (e.g., in handle_regular_answer).
"""
from onyx.db.slack_channel_config import (
fetch_slack_channel_config_for_channel_or_default,
)
unique_id = str(uuid4())[:8]
# Create a persona (using same fields as _create_test_persona_with_slack_config)
persona = Persona(
name=f"test_eager_load_persona_{unique_id}",
description="Test persona for eager loading test",
chunks_above=0,
chunks_below=0,
llm_relevance_filter=True,
llm_filter_extraction=True,
recency_bias=RecencyBiasSetting.AUTO,
system_prompt="You are a helpful assistant.",
task_prompt="Answer the user's question.",
)
db_session.add(persona)
db_session.flush()
# Create a slack bot
slack_bot = SlackBot(
name=f"Test Bot {unique_id}",
bot_token=f"xoxb-test-{unique_id}",
app_token=f"xapp-test-{unique_id}",
enabled=True,
)
db_session.add(slack_bot)
db_session.flush()
# Create slack channel config with persona
channel_name = f"test-channel-{unique_id}"
slack_channel_config = SlackChannelConfig(
slack_bot_id=slack_bot.id,
persona_id=persona.id,
channel_config={"channel_name": channel_name, "disabled": False},
enable_auto_filters=False,
is_default=False,
)
db_session.add(slack_channel_config)
db_session.commit()
# Fetch the config using the function under test
fetched_config = fetch_slack_channel_config_for_channel_or_default(
db_session=db_session,
slack_bot_id=slack_bot.id,
channel_name=channel_name,
)
assert fetched_config is not None, "Should find the channel config"
# Check that persona relationship is already loaded (not pending lazy load)
insp = inspect(fetched_config)
assert insp is not None, "Should be able to inspect the config"
assert "persona" not in insp.unloaded, (
"Persona should be eagerly loaded, not pending lazy load. "
"This is required to prevent fallback to default persona when "
"session context changes in handle_regular_answer."
)
# Verify the persona is correct
assert fetched_config.persona is not None, "Persona should not be None"
assert fetched_config.persona.id == persona.id, "Should load the correct persona"
assert fetched_config.persona.name == persona.name

View File

@@ -0,0 +1,200 @@
from __future__ import annotations
from collections import deque
from collections.abc import Sequence
from datetime import datetime
from types import SimpleNamespace
from typing import Any
import pytest
from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.sharepoint.connector import SharepointConnectorCheckpoint
from onyx.connectors.sharepoint.connector import SiteDescriptor
class _FakeQuery:
def __init__(self, payload: Sequence[Any]) -> None:
self._payload = payload
def execute_query(self) -> Sequence[Any]:
return self._payload
class _FakeFolder:
def __init__(self, items: Sequence[Any]) -> None:
self._items = items
self.name = "root"
def get_by_path(self, _path: str) -> _FakeFolder:
return self
def get_files(
self, *, recursive: bool, page_size: int
) -> _FakeQuery: # noqa: ARG002
return _FakeQuery(self._items)
class _FakeDrive:
def __init__(self, name: str, items: Sequence[Any]) -> None:
self.name = name
self.root = _FakeFolder(items)
class _FakeDrivesCollection:
def __init__(self, drives: Sequence[_FakeDrive]) -> None:
self._drives = drives
def get(self) -> _FakeQuery:
return _FakeQuery(list(self._drives))
class _FakeSite:
def __init__(self, drives: Sequence[_FakeDrive]) -> None:
self.drives = _FakeDrivesCollection(drives)
class _FakeSites:
def __init__(self, drives: Sequence[_FakeDrive]) -> None:
self._drives = drives
def get_by_url(self, _url: str) -> _FakeSite:
return _FakeSite(self._drives)
class _FakeGraphClient:
def __init__(self, drives: Sequence[_FakeDrive]) -> None:
self.sites = _FakeSites(drives)
def _build_connector(drives: Sequence[_FakeDrive]) -> SharepointConnector:
connector = SharepointConnector()
connector._graph_client = _FakeGraphClient(drives) # type: ignore[assignment]
return connector
@pytest.mark.parametrize(
("requested_drive_name", "graph_drive_name"),
[
("Shared Documents", "Documents"),
("Freigegebene Dokumente", "Dokumente"),
("Documentos compartidos", "Documentos"),
],
)
def test_fetch_driveitems_matches_international_drive_names(
requested_drive_name: str, graph_drive_name: str
) -> None:
item = SimpleNamespace(parent_reference=SimpleNamespace(path=None))
connector = _build_connector([_FakeDrive(graph_drive_name, [item])])
site_descriptor = SiteDescriptor(
url="https://example.sharepoint.com/sites/sample",
drive_name=requested_drive_name,
folder_path=None,
)
results = connector._fetch_driveitems(site_descriptor=site_descriptor)
assert len(results) == 1
drive_item, returned_drive_name = results[0]
assert drive_item is item
assert returned_drive_name == requested_drive_name
@pytest.mark.parametrize(
("requested_drive_name", "graph_drive_name"),
[
("Shared Documents", "Documents"),
("Freigegebene Dokumente", "Dokumente"),
("Documentos compartidos", "Documentos"),
],
)
def test_get_drive_items_for_drive_name_matches_map(
requested_drive_name: str, graph_drive_name: str
) -> None:
item = SimpleNamespace()
connector = _build_connector([_FakeDrive(graph_drive_name, [item])])
site_descriptor = SiteDescriptor(
url="https://example.sharepoint.com/sites/sample",
drive_name=requested_drive_name,
folder_path=None,
)
results = connector._get_drive_items_for_drive_name(
site_descriptor=site_descriptor,
drive_name=requested_drive_name,
)
assert len(results) == 1
assert results[0] is item
def test_load_from_checkpoint_maps_drive_name(monkeypatch: pytest.MonkeyPatch) -> None:
connector = SharepointConnector()
connector._graph_client = object() # type: ignore[assignment]
connector.include_site_pages = False
captured_drive_names: list[str] = []
def fake_get_drive_items(
self: SharepointConnector,
site_descriptor: SiteDescriptor,
drive_name: str,
start: datetime | None,
end: datetime | None,
) -> list[SimpleNamespace]:
assert drive_name == "Documents"
return [
SimpleNamespace(
name="sample.pdf",
web_url="https://example.sharepoint.com/sites/sample/sample.pdf",
)
]
def fake_convert(
driveitem: SimpleNamespace,
drive_name: str,
ctx: Any,
graph_client: Any,
include_permissions: bool,
) -> SimpleNamespace:
captured_drive_names.append(drive_name)
return SimpleNamespace(sections=["content"])
monkeypatch.setattr(
SharepointConnector,
"_get_drive_items_for_drive_name",
fake_get_drive_items,
)
monkeypatch.setattr(
"onyx.connectors.sharepoint.connector._convert_driveitem_to_document_with_permissions",
fake_convert,
)
checkpoint = SharepointConnectorCheckpoint(has_more=True)
checkpoint.cached_site_descriptors = deque()
checkpoint.current_site_descriptor = SiteDescriptor(
url="https://example.sharepoint.com/sites/sample",
drive_name=SHARED_DOCUMENTS_MAP["Documents"],
folder_path=None,
)
checkpoint.cached_drive_names = deque(["Documents"])
checkpoint.current_drive_name = None
checkpoint.process_site_pages = False
generator = connector._load_from_checkpoint(
start=0,
end=0,
checkpoint=checkpoint,
include_permissions=False,
)
documents: list[Any] = []
try:
while True:
documents.append(next(generator))
except StopIteration:
pass
assert len(documents) == 1
assert captured_drive_names == [SHARED_DOCUMENTS_MAP["Documents"]]

View File

@@ -1,3 +1,4 @@
import os
from collections.abc import AsyncGenerator
from typing import List
from unittest.mock import AsyncMock
@@ -9,6 +10,12 @@ from httpx import AsyncClient
from litellm.exceptions import RateLimitError
from onyx.natural_language_processing.search_nlp_models import CloudEmbedding
from onyx.natural_language_processing.search_nlp_models import (
ConnectorClassificationModel,
)
from onyx.natural_language_processing.search_nlp_models import (
InformationContentClassificationModel,
)
from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType
@@ -81,3 +88,95 @@ async def test_rate_limit_handling() -> None:
model_name="fake-model",
text_type=EmbedTextType.QUERY,
)
class TestInformationContentClassificationModel:
"""Test cases for InformationContentClassificationModel with DISABLE_MODEL_SERVER"""
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
def test_predict_with_disable_model_server(self) -> None:
"""Test that predict returns default classifications when DISABLE_MODEL_SERVER is true"""
model = InformationContentClassificationModel()
queries = ["What is AI?", "How does Python work?"]
results = model.predict(queries)
assert len(results) == 2
for result in results:
assert result.predicted_label == 1 # 1 indicates informational content
assert result.content_boost_factor == 1.0 # Default boost factor
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "false"})
@patch("requests.post")
def test_predict_with_model_server_enabled(self, mock_post: MagicMock) -> None:
"""Test that predict makes request when DISABLE_MODEL_SERVER is false"""
mock_response = MagicMock()
mock_response.json.return_value = [
{"predicted_label": 1, "content_boost_factor": 1.0},
{"predicted_label": 0, "content_boost_factor": 0.8},
]
mock_post.return_value = mock_response
model = InformationContentClassificationModel()
queries = ["test1", "test2"]
results = model.predict(queries)
assert len(results) == 2
assert results[0].predicted_label == 1
assert results[0].content_boost_factor == 1.0
assert results[1].predicted_label == 0
assert results[1].content_boost_factor == 0.8
mock_post.assert_called_once()
class TestConnectorClassificationModel:
"""Test cases for ConnectorClassificationModel with DISABLE_MODEL_SERVER"""
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
def test_predict_with_disable_model_server(self) -> None:
"""Test that predict returns all connectors when DISABLE_MODEL_SERVER is true"""
model = ConnectorClassificationModel()
query = "Search for documentation"
available_connectors = ["confluence", "slack", "github"]
results = model.predict(query, available_connectors)
assert results == available_connectors
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "false"})
@patch("requests.post")
def test_predict_with_model_server_enabled(self, mock_post: MagicMock) -> None:
"""Test that predict makes request when DISABLE_MODEL_SERVER is false"""
mock_response = MagicMock()
mock_response.json.return_value = {"connectors": ["confluence", "github"]}
mock_post.return_value = mock_response
model = ConnectorClassificationModel()
query = "Search for documentation"
available_connectors = ["confluence", "slack", "github"]
results = model.predict(query, available_connectors)
assert results == ["confluence", "github"]
mock_post.assert_called_once()
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "1"})
@patch("requests.post")
def test_predict_with_disable_model_server_numeric(
self, mock_post: MagicMock
) -> None:
"""Test that predict makes request when DISABLE_MODEL_SERVER is 1 (not 'true')"""
# "1" should NOT trigger disable (only "true" should)
mock_response = MagicMock()
mock_response.json.return_value = {"connectors": ["github"]}
mock_post.return_value = mock_response
model = ConnectorClassificationModel()
query = "Find issues"
available_connectors = ["jira", "github"]
results = model.predict(query, available_connectors)
assert results == ["github"]
mock_post.assert_called_once()

View File

@@ -0,0 +1,103 @@
"""
Test cases for onyx/utils/gpu_utils.py with DISABLE_MODEL_SERVER environment variable
"""
import os
from unittest import TestCase
from unittest.mock import MagicMock
from unittest.mock import patch
import requests
from onyx.utils.gpu_utils import _get_gpu_status_from_model_server
class TestGPUUtils(TestCase):
"""Test cases for GPU utilities with DISABLE_MODEL_SERVER support"""
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
def test_disable_model_server_true(self) -> None:
"""Test that GPU status returns False when DISABLE_MODEL_SERVER is true"""
result = _get_gpu_status_from_model_server(indexing=False)
assert result is False
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "True"})
def test_disable_model_server_capital_true(self) -> None:
"""Test that GPU status returns False when DISABLE_MODEL_SERVER is True (capital)"""
# "True" WILL trigger disable because .lower() is called
result = _get_gpu_status_from_model_server(indexing=False)
assert result is False
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "1"})
@patch("requests.get")
def test_disable_model_server_one(self, mock_get: MagicMock) -> None:
"""Test that GPU status makes request when DISABLE_MODEL_SERVER is 1"""
# "1" should NOT trigger disable (only "true" should)
mock_response = MagicMock()
mock_response.json.return_value = {"gpu_available": True}
mock_get.return_value = mock_response
result = _get_gpu_status_from_model_server(indexing=False)
assert result is True
mock_get.assert_called_once()
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "yes"})
@patch("requests.get")
def test_disable_model_server_yes(self, mock_get: MagicMock) -> None:
"""Test that GPU status makes request when DISABLE_MODEL_SERVER is yes"""
# "yes" should NOT trigger disable (only "true" should)
mock_response = MagicMock()
mock_response.json.return_value = {"gpu_available": False}
mock_get.return_value = mock_response
result = _get_gpu_status_from_model_server(indexing=True)
assert result is False
mock_get.assert_called_once()
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "false"})
@patch("requests.get")
def test_disable_model_server_false(self, mock_get: MagicMock) -> None:
"""Test that GPU status makes request when DISABLE_MODEL_SERVER is false"""
mock_response = MagicMock()
mock_response.json.return_value = {"gpu_available": True}
mock_get.return_value = mock_response
result = _get_gpu_status_from_model_server(indexing=True)
assert result is True
mock_get.assert_called_once()
@patch.dict(os.environ, {}, clear=True)
@patch("requests.get")
def test_disable_model_server_not_set(self, mock_get: MagicMock) -> None:
"""Test that GPU status makes request when DISABLE_MODEL_SERVER is not set"""
mock_response = MagicMock()
mock_response.json.return_value = {"gpu_available": False}
mock_get.return_value = mock_response
result = _get_gpu_status_from_model_server(indexing=False)
assert result is False
mock_get.assert_called_once()
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
def test_disabled_host_fallback(self) -> None:
"""Test that disabled host is handled correctly via environment variable"""
result = _get_gpu_status_from_model_server(indexing=True)
assert result is False
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "false"})
@patch("requests.get")
def test_request_exception_handling(self, mock_get: MagicMock) -> None:
"""Test that exceptions are properly raised when GPU status request fails"""
mock_get.side_effect = requests.RequestException("Connection error")
with self.assertRaises(requests.RequestException):
_get_gpu_status_from_model_server(indexing=False)
@patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
@patch("requests.get")
def test_gpu_status_request_with_disable(self, mock_get: MagicMock) -> None:
"""Test that no request is made when DISABLE_MODEL_SERVER is true"""
result = _get_gpu_status_from_model_server(indexing=True)
assert result is False
# Verify that no HTTP request was made
mock_get.assert_not_called()

View File

@@ -123,7 +123,7 @@ backend = [
"mistune==0.8.4",
"sendgrid==6.11.0",
"exa_py==1.15.4",
"braintrust[openai-agents]==0.2.6",
"braintrust[openai-agents]==0.3.9",
"braintrust-langchain==0.0.4",
"openai-agents==0.4.2",
"langfuse==3.10.0",

9
uv.lock generated
View File

@@ -533,7 +533,7 @@ wheels = [
[[package]]
name = "braintrust"
version = "0.2.6"
version = "0.3.9"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "chevron" },
@@ -545,10 +545,11 @@ dependencies = [
{ name = "sseclient-py" },
{ name = "tqdm" },
{ name = "typing-extensions" },
{ name = "wrapt" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e8/66/d8bf0a99a29ec7c6cbf47a1e9416ea2f9dcab7bf6d6539cd6a8e755ec22f/braintrust-0.2.6.tar.gz", hash = "sha256:e9c2ae3cce09a8562fa436bc6ec20039c495a6f82928ac36050b5fbc7b89743a", size = 180921, upload-time = "2025-08-27T18:13:01.804Z" }
sdist = { url = "https://files.pythonhosted.org/packages/fb/5c/d4086bcf843a49916cefdd0379a138d9024d5fe75e6cea1e672da19cb272/braintrust-0.3.9.tar.gz", hash = "sha256:8c56ccb214bb102291968cdda67d7bf36a4b01fefc6b1998e53c272379eacab3", size = 237224, upload-time = "2025-11-25T22:20:24.368Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/86/8d4bb3b0ab6dd40d2d428bb0b48d29601ed2a344aa6c618378311a73844a/braintrust-0.2.6-py3-none-any.whl", hash = "sha256:d07f1666881c25a8d65a04cfb84f45a0016531928eb3133172942fae49f9a8f8", size = 210465, upload-time = "2025-08-27T18:13:00.503Z" },
{ url = "https://files.pythonhosted.org/packages/ed/b6/e21ddf4e815c3fc845c61b5084bd4d35b882700d78ec054e9e9803aa04db/braintrust-0.3.9-py3-none-any.whl", hash = "sha256:2b903671837dea85d74e984403f154b60b77a3dde4c0bffa17121fdcb24f18d0", size = 280622, upload-time = "2025-11-25T22:20:22.553Z" },
]
[package.optional-dependencies]
@@ -3656,7 +3657,7 @@ backend = [
{ name = "beautifulsoup4", specifier = "==4.12.3" },
{ name = "boto3", specifier = "==1.39.11" },
{ name = "boto3-stubs", extras = ["s3"], specifier = "==1.39.11" },
{ name = "braintrust", extras = ["openai-agents"], specifier = "==0.2.6" },
{ name = "braintrust", extras = ["openai-agents"], specifier = "==0.3.9" },
{ name = "braintrust-langchain", specifier = "==0.0.4" },
{ name = "celery", specifier = "==5.5.1" },
{ name = "chardet", specifier = "==5.2.0" },

125
web/package-lock.json generated
View File

@@ -56,7 +56,7 @@
"lowlight": "^3.3.0",
"lucide-react": "^0.454.0",
"mdast-util-find-and-replace": "^3.0.1",
"next": "^16.0.1",
"next": "^16.0.7",
"next-themes": "^0.4.4",
"npm": "^10.8.0",
"postcss": "^8.5.6",
@@ -914,8 +914,6 @@
},
"node_modules/@eslint/eslintrc/node_modules/ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -942,8 +940,6 @@
},
"node_modules/@eslint/eslintrc/node_modules/json-schema-traverse": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
"integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
"dev": true,
"license": "MIT"
},
@@ -1285,9 +1281,7 @@
}
},
"node_modules/@istanbuljs/load-nyc-config/node_modules/js-yaml": {
"version": "3.14.2",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
"integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
"version": "3.14.1",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -1684,7 +1678,9 @@
}
},
"node_modules/@next/env": {
"version": "16.0.1",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.0.7.tgz",
"integrity": "sha512-gpaNgUh5nftFKRkRQGnVi5dpcYSKGcZZkQffZ172OrG/XkrnS7UBTQ648YY+8ME92cC4IojpI2LqTC8sTDhAaw==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -1722,7 +1718,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.0.1",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.0.7.tgz",
"integrity": "sha512-LlDtCYOEj/rfSnEn/Idi+j1QKHxY9BJFmxx7108A6D8K0SB+bNgfYQATPk/4LqOl4C0Wo3LACg2ie6s7xqMpJg==",
"cpu": [
"arm64"
],
@@ -1736,12 +1734,13 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.0.1.tgz",
"integrity": "sha512-kETZBocRux3xITiZtOtVoVvXyQLB7VBxN7L6EPqgI5paZiUlnsgYv4q8diTNYeHmF9EiehydOBo20lTttCbHAg==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.0.7.tgz",
"integrity": "sha512-rtZ7BhnVvO1ICf3QzfW9H3aPz7GhBrnSIMZyr4Qy6boXF0b5E3QLs+cvJmg3PsTCG2M1PBoC+DANUi4wCOKXpA==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
@@ -1751,12 +1750,13 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.0.1.tgz",
"integrity": "sha512-hWg3BtsxQuSKhfe0LunJoqxjO4NEpBmKkE+P2Sroos7yB//OOX3jD5ISP2wv8QdUwtRehMdwYz6VB50mY6hqAg==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.0.7.tgz",
"integrity": "sha512-mloD5WcPIeIeeZqAIP5c2kdaTa6StwP4/2EGy1mUw8HiexSHGK/jcM7lFuS3u3i2zn+xH9+wXJs6njO7VrAqww==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1766,12 +1766,13 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.0.1.tgz",
"integrity": "sha512-UPnOvYg+fjAhP3b1iQStcYPWeBFRLrugEyK/lDKGk7kLNua8t5/DvDbAEFotfV1YfcOY6bru76qN9qnjLoyHCQ==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.0.7.tgz",
"integrity": "sha512-+ksWNrZrthisXuo9gd1XnjHRowCbMtl/YgMpbRvFeDEqEBd523YHPWpBuDjomod88U8Xliw5DHhekBC3EOOd9g==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1781,12 +1782,13 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.0.1.tgz",
"integrity": "sha512-Et81SdWkcRqAJziIgFtsFyJizHoWne4fzJkvjd6V4wEkWTB4MX6J0uByUb0peiJQ4WeAt6GGmMszE5KrXK6WKg==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.0.7.tgz",
"integrity": "sha512-4WtJU5cRDxpEE44Ana2Xro1284hnyVpBb62lIpU5k85D8xXxatT+rXxBgPkc7C1XwkZMWpK5rXLXTh9PFipWsA==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1796,12 +1798,13 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.0.1.tgz",
"integrity": "sha512-qBbgYEBRrC1egcG03FZaVfVxrJm8wBl7vr8UFKplnxNRprctdP26xEv9nJ07Ggq4y1adwa0nz2mz83CELY7N6Q==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.0.7.tgz",
"integrity": "sha512-HYlhqIP6kBPXalW2dbMTSuB4+8fe+j9juyxwfMwCe9kQPPeiyFn7NMjNfoFOfJ2eXkeQsoUGXg+O2SE3m4Qg2w==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1811,12 +1814,13 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.0.1.tgz",
"integrity": "sha512-cPuBjYP6I699/RdbHJonb3BiRNEDm5CKEBuJ6SD8k3oLam2fDRMKAvmrli4QMDgT2ixyRJ0+DTkiODbIQhRkeQ==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.0.7.tgz",
"integrity": "sha512-EviG+43iOoBRZg9deGauXExjRphhuYmIOJ12b9sAPy0eQ6iwcPxfED2asb/s2/yiLYOdm37kPaiZu8uXSYPs0Q==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
@@ -1826,12 +1830,13 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.0.1.tgz",
"integrity": "sha512-XeEUJsE4JYtfrXe/LaJn3z1pD19fK0Q6Er8Qoufi+HqvdO4LEPyCxLUt4rxA+4RfYo6S9gMlmzCMU2F+AatFqQ==",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.0.7.tgz",
"integrity": "sha512-gniPjy55zp5Eg0896qSrf3yB1dw4F/3s8VK1ephdsZZ129j2n6e1WqCbE2YgcKhW9hPB9TVZENugquWJD5x0ug==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
@@ -5608,8 +5613,6 @@
},
"node_modules/ajv": {
"version": "8.17.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"license": "MIT",
"dependencies": {
"fast-deep-equal": "^3.1.3",
@@ -5718,8 +5721,6 @@
},
"node_modules/anymatch/node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"license": "MIT",
"engines": {
"node": ">=8.6"
@@ -5937,8 +5938,6 @@
},
"node_modules/autoprefixer": {
"version": "10.4.22",
"resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.22.tgz",
"integrity": "sha512-ARe0v/t9gO28Bznv6GgqARmVqcWOV3mfgUPn9becPHMiD3o9BwlRgaeccZnwTpZ7Zwqrm+c1sUSsMxIzQzc8Xg==",
"funding": [
{
"type": "opencollective",
@@ -6410,8 +6409,6 @@
},
"node_modules/caniuse-lite": {
"version": "1.0.30001754",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001754.tgz",
"integrity": "sha512-x6OeBXueoAceOmotzx3PO4Zpt4rzpeIFsSr6AAePTZxSkXiYDUmpypEl7e2+8NCd9bD7bXjqyef8CJYPC1jfxg==",
"funding": [
{
"type": "opencollective",
@@ -8135,8 +8132,6 @@
},
"node_modules/eslint/node_modules/ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -8152,8 +8147,6 @@
},
"node_modules/eslint/node_modules/json-schema-traverse": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
"integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
"dev": true,
"license": "MIT"
},
@@ -8522,8 +8515,6 @@
},
"node_modules/fork-ts-checker-webpack-plugin/node_modules/ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -8547,8 +8538,6 @@
},
"node_modules/fork-ts-checker-webpack-plugin/node_modules/json-schema-traverse": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
"integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
"dev": true,
"license": "MIT"
},
@@ -8621,8 +8610,6 @@
},
"node_modules/fraction.js": {
"version": "5.3.4",
"resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
"integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==",
"license": "MIT",
"engines": {
"node": "*"
@@ -10698,8 +10685,6 @@
},
"node_modules/jest-util/node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"dev": true,
"license": "MIT",
"engines": {
@@ -10790,9 +10775,7 @@
"license": "MIT"
},
"node_modules/js-yaml": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
"version": "4.1.0",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -10875,8 +10858,6 @@
},
"node_modules/json-schema-traverse": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
"license": "MIT"
},
"node_modules/json-stable-stringify-without-jsonify": {
@@ -12016,8 +11997,6 @@
},
"node_modules/micromatch/node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"license": "MIT",
"engines": {
"node": ">=8.6"
@@ -12142,10 +12121,12 @@
"license": "MIT"
},
"node_modules/next": {
"version": "16.0.1",
"version": "16.0.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.0.7.tgz",
"integrity": "sha512-3mBRJyPxT4LOxAJI6IsXeFtKfiJUbjCLgvXO02fV8Wy/lIhPvP94Fe7dGhUgHXcQy4sSuYwQNcOLhIfOm0rL0A==",
"license": "MIT",
"dependencies": {
"@next/env": "16.0.1",
"@next/env": "16.0.7",
"@swc/helpers": "0.5.15",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
@@ -12158,14 +12139,14 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.0.1",
"@next/swc-darwin-x64": "16.0.1",
"@next/swc-linux-arm64-gnu": "16.0.1",
"@next/swc-linux-arm64-musl": "16.0.1",
"@next/swc-linux-x64-gnu": "16.0.1",
"@next/swc-linux-x64-musl": "16.0.1",
"@next/swc-win32-arm64-msvc": "16.0.1",
"@next/swc-win32-x64-msvc": "16.0.1",
"@next/swc-darwin-arm64": "16.0.7",
"@next/swc-darwin-x64": "16.0.7",
"@next/swc-linux-arm64-gnu": "16.0.7",
"@next/swc-linux-arm64-musl": "16.0.7",
"@next/swc-linux-x64-gnu": "16.0.7",
"@next/swc-linux-x64-musl": "16.0.7",
"@next/swc-win32-arm64-msvc": "16.0.7",
"@next/swc-win32-x64-msvc": "16.0.7",
"sharp": "^0.34.4"
},
"peerDependencies": {
@@ -15259,8 +15240,6 @@
},
"node_modules/picomatch": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"license": "MIT",
"engines": {
"node": ">=12"
@@ -16139,8 +16118,6 @@
},
"node_modules/readdirp/node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"license": "MIT",
"engines": {
"node": ">=8.6"
@@ -18249,8 +18226,6 @@
},
"node_modules/uri-js": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
"integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
"dev": true,
"license": "BSD-2-Clause",
"dependencies": {
@@ -18987,8 +18962,6 @@
},
"node_modules/zustand": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.8.tgz",
"integrity": "sha512-gyPKpIaxY9XcO2vSMrLbiER7QMAMGOQZVRdJ6Zi782jkbzZygq5GI9nG8g+sMgitRtndwaBSl7uiqC49o1SSiw==",
"license": "MIT",
"engines": {
"node": ">=12.20.0"

View File

@@ -71,7 +71,7 @@
"lowlight": "^3.3.0",
"lucide-react": "^0.454.0",
"mdast-util-find-and-replace": "^3.0.1",
"next": "^16.0.1",
"next": "^16.0.7",
"next-themes": "^0.4.4",
"npm": "^10.8.0",
"postcss": "^8.5.6",

View File

@@ -101,7 +101,7 @@ export default function AIMessage({
// Toggle logic
if (currentFeedback === clickedFeedback) {
// Clicking same button - remove feedback
await handleFeedbackChange(nodeId, null);
await handleFeedbackChange(messageId, null);
}
// Clicking like (will automatically clear dislike if it was active).
@@ -113,12 +113,12 @@ export default function AIMessage({
// Open modal for positive feedback
setFeedbackModalProps({
feedbackType: "like",
messageId: nodeId,
messageId,
});
modal.toggle(true);
} else {
// No modal needed - just submit like (this replaces any existing feedback)
await handleFeedbackChange(nodeId, "like");
await handleFeedbackChange(messageId, "like");
}
}

View File

@@ -3,24 +3,14 @@ import {
FederatedConnectorDetail,
FederatedConnectorConfig,
federatedSourceToRegularSource,
ValidSources,
} from "@/lib/types";
import { SourceIcon } from "@/components/SourceIcon";
import SvgX from "@/icons/x";
import SvgSettings from "@/icons/settings";
import { Label } from "@/components/ui/label";
import { ErrorMessage } from "formik";
import Text from "@/refresh-components/texts/Text";
import IconButton from "@/refresh-components/buttons/IconButton";
import { Input } from "@/components/ui/input";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import Button from "@/refresh-components/buttons/Button";
interface FederatedConnectorSelectorProps {
name: string;
@@ -33,194 +23,6 @@ interface FederatedConnectorSelectorProps {
showError?: boolean;
}
interface EntityConfigDialogProps {
connectorId: number;
connectorName: string;
connectorSource: ValidSources | null;
currentEntities: Record<string, any>;
onSave: (entities: Record<string, any>) => void;
onClose: () => void;
isOpen: boolean;
}
const EntityConfigDialog = ({
connectorId,
connectorName,
connectorSource,
currentEntities,
onSave,
onClose,
isOpen,
}: EntityConfigDialogProps) => {
const [entities, setEntities] =
useState<Record<string, any>>(currentEntities);
const [entitySchema, setEntitySchema] = useState<Record<string, any> | null>(
null
);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
if (isOpen) {
setEntities(currentEntities || {});
}
}, [currentEntities, isOpen]);
useEffect(() => {
if (isOpen && connectorId) {
const fetchEntitySchema = async () => {
setIsLoading(true);
setError(null);
try {
const response = await fetch(
`/api/federated/${connectorId}/entities`
);
if (!response.ok) {
throw new Error(
`Failed to fetch entity schema: ${response.statusText}`
);
}
const data = await response.json();
setEntitySchema(data.entities);
} catch (err) {
setError(
err instanceof Error ? err.message : "Failed to load entity schema"
);
} finally {
setIsLoading(false);
}
};
fetchEntitySchema();
}
}, [isOpen, connectorId]);
const handleSave = () => {
onSave(entities);
onClose();
};
const handleEntityChange = (key: string, value: any) => {
setEntities((prev) => ({
...prev,
[key]: value,
}));
};
if (!connectorSource) {
return null;
}
return (
<Dialog open={isOpen} onOpenChange={onClose}>
<DialogContent className="max-w-md">
<DialogHeader>
<DialogTitle className="flex items-center gap-2">
<SourceIcon
sourceType={federatedSourceToRegularSource(connectorSource)}
iconSize={20}
/>
Configure {connectorName}
</DialogTitle>
</DialogHeader>
<div className="space-y-4">
{isLoading && (
<div className="text-center py-4">
<div className="animate-spin h-6 w-6 border-2 border-blue-500 border-t-transparent rounded-full mx-auto mb-2"></div>
<p className="text-sm text-muted-foreground">
Loading configuration...
</p>
</div>
)}
{error && (
<div className="text-red-500 text-sm p-3 bg-red-50 rounded-md">
{error}
</div>
)}
{entitySchema && !isLoading && (
<div className="space-y-3">
<p className="text-sm text-muted-foreground">
Configure which entities to include from this connector:
</p>
{Object.entries(entitySchema).map(
([key, field]: [string, any]) => (
<div key={key} className="space-y-2">
<Label className="text-sm font-medium">
{field.description || key}
{field.required && (
<span className="text-red-500 ml-1">*</span>
)}
</Label>
{field.type === "list" ? (
<div className="space-y-2">
<Input
type="text"
placeholder={
field.example || `Enter ${key} (comma-separated)`
}
value={
Array.isArray(entities[key])
? entities[key].join(", ")
: ""
}
onChange={(e) => {
const value = e.target.value;
const list = value
? value
.split(",")
.map((item) => item.trim())
.filter(Boolean)
: [];
handleEntityChange(key, list);
}}
/>
<p className="text-xs text-muted-foreground">
{field.description && field.description !== key
? field.description
: `Enter ${key} separated by commas`}
</p>
</div>
) : (
<div className="space-y-2">
<Input
type="text"
placeholder={field.example || `Enter ${key}`}
value={entities[key] || ""}
onChange={(e) =>
handleEntityChange(key, e.target.value)
}
/>
{field.description && field.description !== key && (
<p className="text-xs text-muted-foreground">
{field.description}
</p>
)}
</div>
)}
</div>
)
)}
</div>
)}
<div className="flex justify-end gap-2 pt-4">
<Button secondary onClick={onClose}>
Cancel
</Button>
<Button onClick={handleSave} disabled={isLoading}>
Save Configuration
</Button>
</div>
</div>
</DialogContent>
</Dialog>
);
};
export const FederatedConnectorSelector = ({
name,
label,
@@ -233,19 +35,6 @@ export const FederatedConnectorSelector = ({
}: FederatedConnectorSelectorProps) => {
const [open, setOpen] = useState(false);
const [searchQuery, setSearchQuery] = useState("");
const [configDialogState, setConfigDialogState] = useState<{
isOpen: boolean;
connectorId: number | null;
connectorName: string;
connectorSource: ValidSources | null;
currentEntities: Record<string, any>;
}>({
isOpen: false,
connectorId: null,
connectorName: "",
connectorSource: null,
currentEntities: {},
});
const dropdownRef = useRef<HTMLDivElement>(null);
const inputRef = useRef<HTMLInputElement>(null);
@@ -307,36 +96,6 @@ export const FederatedConnectorSelector = ({
);
};
const openConfigDialog = (connectorId: number) => {
const connector = federatedConnectors.find((c) => c.id === connectorId);
const config = selectedConfigs.find(
(c) => c.federated_connector_id === connectorId
);
if (connector) {
setConfigDialogState({
isOpen: true,
connectorId,
connectorName: connector.name,
connectorSource: connector.source,
currentEntities: config?.entities || {},
});
}
};
const saveEntityConfig = (entities: Record<string, any>) => {
const updatedConfigs = selectedConfigs.map((config) => {
if (config.federated_connector_id === configDialogState.connectorId) {
return {
...config,
entities,
};
}
return config;
});
onChange(updatedConfigs);
};
useEffect(() => {
const handleClickOutside = (event: MouseEvent) => {
if (
@@ -472,14 +231,6 @@ export const FederatedConnectorSelector = ({
)}
</div>
<div className="flex items-center ml-2 gap-1">
<IconButton
internal
type="button"
tooltip="Configure entities"
aria-label="Configure entities"
onClick={() => openConfigDialog(connector.id)}
icon={SvgSettings}
/>
<IconButton
internal
type="button"
@@ -500,18 +251,6 @@ export const FederatedConnectorSelector = ({
</div>
)}
<EntityConfigDialog
connectorId={configDialogState.connectorId!}
connectorName={configDialogState.connectorName}
connectorSource={configDialogState.connectorSource}
currentEntities={configDialogState.currentEntities}
onSave={saveEntityConfig}
onClose={() =>
setConfigDialogState((prev) => ({ ...prev, isOpen: false }))
}
isOpen={configDialogState.isOpen}
/>
{showError && (
<ErrorMessage
name={name}

View File

@@ -813,6 +813,7 @@ export const connectorConfigs: Record<
description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required).
• Specifying 'https://onyxai.sharepoint.com/sites/support' for example only indexes this site.
• Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example only indexes this folder.
• Specifying sites currently works for SharePoint instances using English, Spanish, or German. Contact the Onyx team if you need another language supported.
`,
},
],

View File

@@ -93,19 +93,8 @@ export const NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK =
export const NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY =
process.env.NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY;
// Add support for custom URL protocols in markdown links
export const ALLOWED_URL_PROTOCOLS = [
"http:",
"https:",
"mailto:",
"tel:",
"slack:",
"vscode:",
"file:",
"sms:",
"spotify:",
"zoommtg:",
];
// Restrict markdown links to safe protocols
export const ALLOWED_URL_PROTOCOLS = ["http:", "https:", "mailto:"] as const;
export const MAX_CHARACTERS_PERSONA_DESCRIPTION = 5000000;
export const MAX_STARTER_MESSAGES = 4;

View File

@@ -11,28 +11,33 @@ export const truncateString = (str: string, maxLength: number) => {
};
/**
* Custom URL transformer function for ReactMarkdown
* Allows specific protocols to be used in markdown links
* We use this with the urlTransform prop in ReactMarkdown
* Custom URL transformer function for ReactMarkdown.
* Only allows a small, safe set of protocols and strips everything else.
* Returning null removes the href attribute entirely.
*/
export function transformLinkUri(href: string) {
if (!href) return href;
export function transformLinkUri(href: string): string | null {
if (!href) return null;
const trimmedHref = href.trim();
if (!trimmedHref) return null;
const url = href.trim();
try {
const parsedUrl = new URL(url);
if (
ALLOWED_URL_PROTOCOLS.some((protocol) =>
parsedUrl.protocol.startsWith(protocol)
)
) {
return url;
const parsedUrl = new URL(trimmedHref);
const protocol = parsedUrl.protocol.toLowerCase();
if (ALLOWED_URL_PROTOCOLS.some((allowed) => allowed === protocol)) {
return trimmedHref;
}
return null;
} catch {
// If it's not a valid URL with protocol, return the original href
return href;
// Allow relative URLs, but drop anything that looks like a protocol-prefixed link
if (/^[a-zA-Z][a-zA-Z\d+.-]*:\S*/.test(trimmedHref)) {
return null;
}
return trimmedHref;
}
return href;
}
export function isSubset(parent: string[], child: string[]): boolean {