Compare commits

..

13 Commits

Author SHA1 Message Date
pablodanswer
ffe8ac168f update foreign key migration 2025-01-31 08:42:28 -08:00
pablonyx
17b280e59e Remove cloud_kubes from public repo
Remove `cloud_kubes` from public repo
2025-01-30 19:19:09 -08:00
pablonyx
5edba4a7f3 Foreign key input prompts
Foreign key input prompts
2025-01-30 19:18:49 -08:00
pablodanswer
d842fed37e foreign key updates 2025-01-30 19:17:32 -08:00
Weves
14981162fd Pin shapely version 2025-01-30 18:02:35 -08:00
Chris Weaver
288daa4e90 Add more airtable logging (#3862)
* Add more airtable logging

* Add multithreading

* Remove empty comment
2025-01-30 17:33:42 -08:00
pablonyx
5e21dc6cb3 Optimize /persona query (#3859)
* k

* delete

* k
2025-01-30 23:20:19 +00:00
Weves
39b3a503b4 Add more group sync logging 2025-01-30 14:42:14 -08:00
pablonyx
a70d472b5c Update e2e frontend tests (#3843)
* fix input prompts

* assistant ordering validation

* k

* Revert "fix input prompts"

This reverts commit a4b577bdd7.

* fix alembic

* foreign key updates

* Revert "foreign key updates"

This reverts commit fe17795a037f831790d69229e1067ccb5aab5bd9.

* improve e2e tests

* fix admin
2025-01-30 20:15:29 +00:00
devin-ai-integration[bot]
0ed2886ad0 Can't create starter messages for existing assistants. (#3825)
* fix: move starter messages out of advanced options for better visibility

Co-Authored-By: Chris Weaver <chris@onyx.app>

* fix: ensure starter message input field is visible in edit flow

Co-Authored-By: Chris Weaver <chris@onyx.app>

* chore: fix prettier formatting

Co-Authored-By: Chris Weaver <chris@onyx.app>

* chore: fix prettier formatting for starter messages description

Co-Authored-By: Chris Weaver <chris@onyx.app>

* chore: fix prettier formatting for starter messages initialization

Co-Authored-By: Chris Weaver <chris@onyx.app>

* fix: prevent unintended deletion of second message in StarterMessagesList

Co-Authored-By: Chris Weaver <chris@onyx.app>

* Fix empty starter messages

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Chris Weaver <chris@onyx.app>
Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-01-30 10:26:54 -08:00
pablodanswer
6b31e2f622 remove cloud_kubes from public repo 2025-01-30 09:52:57 -08:00
hagen-danswer
aabf8a99bc Fixed SharePoint connector polling (#3834)
* Fixed SharePoint connector polling

* finish

* fix sharepoint connector
2025-01-30 17:43:11 +00:00
Chris Weaver
95701db1bd Add more sync records + fix small bug in monitoring task causing deletion metrics to never be emitted (#3837)
Double check we don't double-emit + fix pruning metric

Add log

Fix comment

rename
2025-01-29 18:03:49 -08:00
55 changed files with 764 additions and 1182 deletions

View File

@@ -1,7 +1,7 @@
"""foreign key input prompts
Revision ID: 33ea50e88f24
Revises: 4d58345da04a
Revises: a6df6b88ef81
Create Date: 2025-01-29 10:54:22.141765
"""
@@ -10,22 +10,24 @@ from alembic import op
# revision identifiers, used by Alembic.
revision = "33ea50e88f24"
down_revision = "4d58345da04a"
down_revision = "a6df6b88ef81"
branch_labels = None
depends_on = None
def upgrade() -> None:
# First drop the existing FK constraints
op.drop_constraint(
"inputprompt__user_input_prompt_id_fkey",
"inputprompt__user",
type_="foreignkey",
# Safely drop constraints if exists
op.execute(
"""
ALTER TABLE inputprompt__user
DROP CONSTRAINT IF EXISTS inputprompt__user_input_prompt_id_fkey
"""
)
op.drop_constraint(
"inputprompt__user_user_id_fkey",
"inputprompt__user",
type_="foreignkey",
op.execute(
"""
ALTER TABLE inputprompt__user
DROP CONSTRAINT IF EXISTS inputprompt__user_user_id_fkey
"""
)
# Recreate with ON DELETE CASCADE
@@ -37,10 +39,11 @@ def upgrade() -> None:
["id"],
ondelete="CASCADE",
)
op.create_foreign_key(
"inputprompt__user_user_id_fkey",
"inputprompt__user",
'"user"',
"user",
["user_id"],
["id"],
ondelete="CASCADE",
@@ -71,7 +74,7 @@ def downgrade() -> None:
op.create_foreign_key(
"inputprompt__user_user_id_fkey",
"inputprompt__user",
'"user"',
"user",
["user_id"],
["id"],
)

View File

@@ -0,0 +1,29 @@
"""remove recent assistants
Revision ID: a6df6b88ef81
Revises: 4d58345da04a
Create Date: 2025-01-29 10:25:52.790407
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "a6df6b88ef81"
down_revision = "4d58345da04a"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_column("user", "recent_assistants")
def downgrade() -> None:
op.add_column(
"user",
sa.Column(
"recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
),
)

View File

@@ -14,6 +14,8 @@ def _build_group_member_email_map(
) -> dict[str, set[str]]:
group_member_emails: dict[str, set[str]] = {}
for user_result in confluence_client.paginated_cql_user_retrieval():
logger.debug(f"Processing groups for user: {user_result}")
user = user_result.get("user", {})
if not user:
logger.warning(f"user result missing user field: {user_result}")
@@ -33,10 +35,17 @@ def _build_group_member_email_map(
logger.warning(f"user result missing email field: {user_result}")
continue
all_users_groups: set[str] = set()
for group in confluence_client.paginated_groups_by_user_retrieval(user):
# group name uniqueness is enforced by Confluence, so we can use it as a group ID
group_id = group["name"]
group_member_emails.setdefault(group_id, set()).add(email)
all_users_groups.add(group_id)
if not group_member_emails:
logger.warning(f"No groups found for user with email: {email}")
else:
logger.debug(f"Found groups {all_users_groups} for user with email {email}")
return group_member_emails

View File

@@ -111,6 +111,7 @@ async def login_as_anonymous_user(
token = generate_anonymous_user_jwt_token(tenant_id)
response = Response()
response.delete_cookie("fastapiusersauth")
response.set_cookie(
key=ANONYMOUS_USER_COOKIE_NAME,
value=token,

View File

@@ -11,6 +11,7 @@ from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session
from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
from ee.onyx.db.document import upsert_document_external_perms
@@ -31,12 +32,17 @@ from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.connector import mark_cc_pair_as_permissions_synced
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document import upsert_document_by_connector_credential_pair
from onyx.db.engine import get_session_with_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.db.users import batch_add_ext_perm_user_if_not_exists
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_doc_perm_sync import (
@@ -57,6 +63,9 @@ LIGHT_SOFT_TIME_LIMIT = 105
LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15
"""Jobs / utils for kicking off doc permissions sync tasks."""
def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
"""Returns boolean indicating if external doc permissions sync is due."""
@@ -174,6 +183,15 @@ def try_creating_permissions_sync_task(
custom_task_id = f"{redis_connector.permissions.generator_task_key}_{uuid4()}"
# create before setting fence to avoid race condition where the monitoring
# task updates the sync record before it is created
with get_session_with_tenant(tenant_id) as db_session:
insert_sync_record(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.EXTERNAL_PERMISSIONS,
)
# set a basic fence to start
payload = RedisConnectorPermissionSyncPayload(started=None, celery_task_id=None)
redis_connector.permissions.set_fence(payload)
@@ -400,3 +418,53 @@ def update_external_document_permissions_task(
f"Error Syncing Document Permissions: connector_id={connector_id} doc_id={doc_id}"
)
return False
"""Monitoring CCPair permissions utils, called in monitor_vespa_sync"""
def monitor_ccpair_permissions_taskset(
tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
if cc_pair_id_str is None:
task_logger.warning(
f"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}"
)
return
cc_pair_id = int(cc_pair_id_str)
redis_connector = RedisConnector(tenant_id, cc_pair_id)
if not redis_connector.permissions.fenced:
return
initial = redis_connector.permissions.generator_complete
if initial is None:
return
remaining = redis_connector.permissions.get_remaining()
task_logger.info(
f"Permissions sync progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
)
if remaining > 0:
return
payload: RedisConnectorPermissionSyncPayload | None = (
redis_connector.permissions.payload
)
start_time: datetime | None = payload.started if payload else None
mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), start_time)
task_logger.info(f"Successfully synced permissions for cc_pair={cc_pair_id}")
update_sync_record_status(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.EXTERNAL_PERMISSIONS,
sync_status=SyncStatus.SUCCESS,
num_docs_synced=initial,
)
redis_connector.permissions.reset()

View File

@@ -33,7 +33,11 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair_from
from onyx.db.engine import get_session_with_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_ext_group_sync import (
RedisConnectorExternalGroupSyncPayload,
@@ -200,6 +204,15 @@ def try_creating_external_group_sync_task(
celery_task_id=result.id,
)
# create before setting fence to avoid race condition where the monitoring
# task updates the sync record before it is created
with get_session_with_tenant(tenant_id) as db_session:
insert_sync_record(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.EXTERNAL_GROUP,
)
redis_connector.external_group_sync.set_fence(payload)
except Exception:
@@ -289,11 +302,26 @@ def connector_external_group_sync_generator_task(
)
mark_cc_pair_as_external_group_synced(db_session, cc_pair.id)
update_sync_record_status(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.EXTERNAL_GROUP,
sync_status=SyncStatus.SUCCESS,
)
except Exception as e:
task_logger.exception(
f"Failed to run external group sync: cc_pair={cc_pair_id}"
)
with get_session_with_tenant(tenant_id) as db_session:
update_sync_record_status(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.EXTERNAL_GROUP,
sync_status=SyncStatus.FAILED,
)
redis_connector.external_group_sync.generator_clear()
redis_connector.external_group_sync.taskset_clear()
raise e

View File

@@ -58,6 +58,11 @@ _SYNC_START_LATENCY_KEY_FMT = (
"sync_start_latency:{sync_type}:{entity_id}:{sync_record_id}"
)
_CONNECTOR_START_TIME_KEY_FMT = "connector_start_time:{cc_pair_id}:{index_attempt_id}"
_CONNECTOR_END_TIME_KEY_FMT = "connector_end_time:{cc_pair_id}:{index_attempt_id}"
_SYNC_START_TIME_KEY_FMT = "sync_start_time:{sync_type}:{entity_id}:{sync_record_id}"
_SYNC_END_TIME_KEY_FMT = "sync_end_time:{sync_type}:{entity_id}:{sync_record_id}"
def _mark_metric_as_emitted(redis_std: Redis, key: str) -> None:
"""Mark a metric as having been emitted by setting a Redis key with expiration"""
@@ -303,8 +308,6 @@ def _build_connector_final_metrics(
)
)
_mark_metric_as_emitted(redis_std, metric_key)
return metrics
@@ -344,6 +347,52 @@ def _collect_connector_metrics(db_session: Session, redis_std: Redis) -> list[Me
if one_hour_ago > most_recent_attempt.time_created:
continue
# Build a job_id for correlation
job_id = build_job_id(
"connector", str(cc_pair.id), str(most_recent_attempt.id)
)
# Add raw start time metric if available
if most_recent_attempt.time_started:
start_time_key = _CONNECTOR_START_TIME_KEY_FMT.format(
cc_pair_id=cc_pair.id,
index_attempt_id=most_recent_attempt.id,
)
metrics.append(
Metric(
key=start_time_key,
name="connector_start_time",
value=most_recent_attempt.time_started.timestamp(),
tags={
"job_id": job_id,
"connector_id": str(cc_pair.connector.id),
"source": str(cc_pair.connector.source),
},
)
)
# Add raw end time metric if available and in terminal state
if (
most_recent_attempt.status.is_terminal()
and most_recent_attempt.time_updated
):
end_time_key = _CONNECTOR_END_TIME_KEY_FMT.format(
cc_pair_id=cc_pair.id,
index_attempt_id=most_recent_attempt.id,
)
metrics.append(
Metric(
key=end_time_key,
name="connector_end_time",
value=most_recent_attempt.time_updated.timestamp(),
tags={
"job_id": job_id,
"connector_id": str(cc_pair.connector.id),
"source": str(cc_pair.connector.source),
},
)
)
# Connector start latency
start_latency_metric = _build_connector_start_latency_metric(
cc_pair, most_recent_attempt, second_most_recent_attempt, redis_std
@@ -365,9 +414,10 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
"""
Collect metrics for document set and group syncing:
- Success/failure status
- Start latency (always)
- Start latency (for doc sets / user groups)
- Duration & doc count (only if success)
- Throughput (docs/min) (only if success)
- Raw start/end times for each sync
"""
one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)
@@ -389,6 +439,43 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
# Build a job_id for correlation
job_id = build_job_id("sync_record", str(sync_record.id))
# Add raw start time metric
start_time_key = _SYNC_START_TIME_KEY_FMT.format(
sync_type=sync_record.sync_type,
entity_id=sync_record.entity_id,
sync_record_id=sync_record.id,
)
metrics.append(
Metric(
key=start_time_key,
name="sync_start_time",
value=sync_record.sync_start_time.timestamp(),
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
},
)
)
# Add raw end time metric if available
if sync_record.sync_end_time:
end_time_key = _SYNC_END_TIME_KEY_FMT.format(
sync_type=sync_record.sync_type,
entity_id=sync_record.entity_id,
sync_record_id=sync_record.id,
)
metrics.append(
Metric(
key=end_time_key,
name="sync_end_time",
value=sync_record.sync_end_time.timestamp(),
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
},
)
)
# Emit a SUCCESS/FAIL boolean metric
# Use a single Redis key to avoid re-emitting final metrics
final_metric_key = _FINAL_METRIC_KEY_FMT.format(
@@ -439,7 +526,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
if duration_seconds is not None:
metrics.append(
Metric(
key=None,
key=final_metric_key,
name="sync_duration_seconds",
value=duration_seconds,
tags={
@@ -455,7 +542,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
metrics.append(
Metric(
key=None,
key=final_metric_key,
name="sync_doc_count",
value=doc_count,
tags={
@@ -468,7 +555,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
if sync_speed is not None:
metrics.append(
Metric(
key=None,
key=final_metric_key,
name="sync_speed_docs_per_min",
value=sync_speed,
tags={
@@ -482,9 +569,6 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
f"Invalid sync record {sync_record.id} with no duration"
)
# Mark final metrics as emitted so we don't re-emit
_mark_metric_as_emitted(redis_std, final_metric_key)
# Emit start latency
start_latency_key = _SYNC_START_LATENCY_KEY_FMT.format(
sync_type=sync_record.sync_type,
@@ -502,22 +586,20 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
entity = db_session.scalar(
select(UserGroup).where(UserGroup.id == sync_record.entity_id)
)
else:
task_logger.info(
f"Skipping sync record {sync_record.id} of type {sync_record.sync_type}."
)
continue
if entity is None:
task_logger.error(
f"Could not find entity for sync record {sync_record.id} "
f"(type={sync_record.sync_type}, id={sync_record.entity_id})."
f"Sync record of type {sync_record.sync_type} doesn't have an entity "
f"associated with it (id={sync_record.entity_id}). Skipping start latency metric."
)
continue
# Calculate start latency in seconds:
# (actual sync start) - (last modified time)
if entity.time_last_modified_by_user and sync_record.sync_start_time:
if (
entity is not None
and entity.time_last_modified_by_user
and sync_record.sync_start_time
):
start_latency = (
sync_record.sync_start_time - entity.time_last_modified_by_user
).total_seconds()
@@ -541,8 +623,6 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
)
)
_mark_metric_as_emitted(redis_std, start_latency_key)
return metrics
@@ -607,9 +687,12 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
for metric_fn in metric_functions:
metrics = metric_fn()
for metric in metrics:
metric.log()
metric.emit(tenant_id)
if metric.key:
# double check to make sure we aren't double-emitting metrics
if metric.key is not None and not _has_metric_been_emitted(
redis_std, metric.key
):
metric.log()
metric.emit(tenant_id)
_mark_metric_as_emitted(redis_std, metric.key)
task_logger.info("Successfully collected background metrics")

View File

@@ -25,13 +25,18 @@ from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.models import InputType
from onyx.db.connector import mark_ccpair_as_pruned
from onyx.db.connector_credential_pair import get_connector_credential_pair
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.document import get_documents_for_connector_credential_pair
from onyx.db.engine import get_session_with_tenant
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import pruning_ctx
@@ -40,6 +45,9 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
"""Jobs / utils for kicking off pruning tasks."""
def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
"""Returns boolean indicating if pruning is due.
@@ -204,6 +212,14 @@ def try_creating_prune_generator_task(
priority=OnyxCeleryPriority.LOW,
)
# create before setting fence to avoid race condition where the monitoring
# task updates the sync record before it is created
insert_sync_record(
db_session=db_session,
entity_id=cc_pair.id,
sync_type=SyncType.PRUNING,
)
# set this only after all tasks have been added
redis_connector.prune.set_fence(True)
except Exception:
@@ -348,3 +364,52 @@ def connector_pruning_generator_task(
lock.release()
task_logger.info(f"Pruning generator finished: cc_pair={cc_pair_id}")
"""Monitoring pruning utils, called in monitor_vespa_sync"""
def monitor_ccpair_pruning_taskset(
tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
if cc_pair_id_str is None:
task_logger.warning(
f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}"
)
return
cc_pair_id = int(cc_pair_id_str)
redis_connector = RedisConnector(tenant_id, cc_pair_id)
if not redis_connector.prune.fenced:
return
initial = redis_connector.prune.generator_complete
if initial is None:
return
remaining = redis_connector.prune.get_remaining()
task_logger.info(
f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
)
if remaining > 0:
return
mark_ccpair_as_pruned(int(cc_pair_id), db_session)
task_logger.info(
f"Successfully pruned connector credential pair. cc_pair={cc_pair_id}"
)
update_sync_record_status(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.PRUNING,
sync_status=SyncStatus.SUCCESS,
num_docs_synced=initial,
)
redis_connector.prune.taskset_clear()
redis_connector.prune.generator_clear()
redis_connector.prune.set_fence(False)

View File

@@ -24,6 +24,10 @@ from onyx.access.access import get_access_for_document
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.background.celery.tasks.doc_permission_syncing.tasks import (
monitor_ccpair_permissions_taskset,
)
from onyx.background.celery.tasks.pruning.tasks import monitor_ccpair_pruning_taskset
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
@@ -34,8 +38,6 @@ from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.connector import fetch_connector_by_id
from onyx.db.connector import mark_cc_pair_as_permissions_synced
from onyx.db.connector import mark_ccpair_as_pruned
from onyx.db.connector_credential_pair import add_deletion_failure_message
from onyx.db.connector_credential_pair import (
delete_connector_credential_pair__no_commit,
@@ -72,9 +74,6 @@ from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
from onyx.redis.redis_connector_doc_perm_sync import (
RedisConnectorPermissionSyncPayload,
)
from onyx.redis.redis_connector_index import RedisConnectorIndex
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_document_set import RedisDocumentSet
@@ -653,83 +652,6 @@ def monitor_connector_deletion_taskset(
redis_connector.delete.reset()
def monitor_ccpair_pruning_taskset(
tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
if cc_pair_id_str is None:
task_logger.warning(
f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}"
)
return
cc_pair_id = int(cc_pair_id_str)
redis_connector = RedisConnector(tenant_id, cc_pair_id)
if not redis_connector.prune.fenced:
return
initial = redis_connector.prune.generator_complete
if initial is None:
return
remaining = redis_connector.prune.get_remaining()
task_logger.info(
f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
)
if remaining > 0:
return
mark_ccpair_as_pruned(int(cc_pair_id), db_session)
task_logger.info(
f"Successfully pruned connector credential pair. cc_pair={cc_pair_id}"
)
redis_connector.prune.taskset_clear()
redis_connector.prune.generator_clear()
redis_connector.prune.set_fence(False)
def monitor_ccpair_permissions_taskset(
tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
if cc_pair_id_str is None:
task_logger.warning(
f"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}"
)
return
cc_pair_id = int(cc_pair_id_str)
redis_connector = RedisConnector(tenant_id, cc_pair_id)
if not redis_connector.permissions.fenced:
return
initial = redis_connector.permissions.generator_complete
if initial is None:
return
remaining = redis_connector.permissions.get_remaining()
task_logger.info(
f"Permissions sync progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
)
if remaining > 0:
return
payload: RedisConnectorPermissionSyncPayload | None = (
redis_connector.permissions.payload
)
start_time: datetime | None = payload.started if payload else None
mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), start_time)
task_logger.info(f"Successfully synced permissions for cc_pair={cc_pair_id}")
redis_connector.permissions.reset()
def monitor_ccpair_indexing_taskset(
tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
) -> None:

View File

@@ -478,6 +478,12 @@ INDEXING_SIZE_WARNING_THRESHOLD = int(
# 0 disables this behavior and is the default.
INDEXING_TRACER_INTERVAL = int(os.environ.get("INDEXING_TRACER_INTERVAL") or 0)
# Enable multi-threaded embedding model calls for parallel processing
# Note: only applies for API-based embedding models
INDEXING_EMBEDDING_MODEL_NUM_THREADS = int(
os.environ.get("INDEXING_EMBEDDING_MODEL_NUM_THREADS") or 1
)
# During an indexing attempt, specifies the number of batches which are allowed to
# exception without aborting the attempt.
INDEXING_EXCEPTION_LIMIT = int(os.environ.get("INDEXING_EXCEPTION_LIMIT") or 0)

View File

@@ -1,3 +1,5 @@
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import Any
@@ -274,6 +276,11 @@ class AirtableConnector(LoadConnector):
field_val = fields.get(field_name)
field_type = field_schema.type
logger.debug(
f"Processing field '{field_name}' of type '{field_type}' "
f"for record '{record_id}'."
)
field_sections, field_metadata = self._process_field(
field_id=field_schema.id,
field_name=field_name,
@@ -327,19 +334,45 @@ class AirtableConnector(LoadConnector):
primary_field_name = field.name
break
record_documents: list[Document] = []
for record in records:
document = self._process_record(
record=record,
table_schema=table_schema,
primary_field_name=primary_field_name,
)
if document:
record_documents.append(document)
logger.info(f"Starting to process Airtable records for {table.name}.")
# Process records in parallel batches using ThreadPoolExecutor
PARALLEL_BATCH_SIZE = 16
max_workers = min(PARALLEL_BATCH_SIZE, len(records))
# Process records in batches
for i in range(0, len(records), PARALLEL_BATCH_SIZE):
batch_records = records[i : i + PARALLEL_BATCH_SIZE]
record_documents: list[Document] = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit batch tasks
future_to_record = {
executor.submit(
self._process_record,
record=record,
table_schema=table_schema,
primary_field_name=primary_field_name,
): record
for record in batch_records
}
# Wait for all tasks in this batch to complete
for future in as_completed(future_to_record):
record = future_to_record[future]
try:
document = future.result()
if document:
record_documents.append(document)
except Exception as e:
logger.exception(f"Failed to process record {record['id']}")
raise e
# After batch is complete, yield if we've hit the batch size
if len(record_documents) >= self.batch_size:
yield record_documents
record_documents = []
# Yield any remaining records
if record_documents:
yield record_documents

View File

@@ -1,4 +1,5 @@
import sys
import time
from datetime import datetime
from onyx.connectors.interfaces import BaseConnector
@@ -45,7 +46,17 @@ class ConnectorRunner:
def run(self) -> GenerateDocumentsOutput:
"""Adds additional exception logging to the connector."""
try:
yield from self.doc_batch_generator
start = time.monotonic()
for batch in self.doc_batch_generator:
# to know how long connector is taking
logger.debug(
f"Connector took {time.monotonic() - start} seconds to build a batch."
)
yield batch
start = time.monotonic()
except Exception:
exc_type, _, exc_traceback = sys.exc_info()

View File

@@ -150,6 +150,16 @@ class Document(DocumentBase):
id: str # This must be unique or during indexing/reindexing, chunks will be overwritten
source: DocumentSource
def get_total_char_length(self) -> int:
"""Calculate the total character length of the document including sections, metadata, and identifiers."""
section_length = sum(len(section.text) for section in self.sections)
identifier_length = len(self.semantic_identifier) + len(self.title or "")
metadata_length = sum(
len(k) + len(v) if isinstance(v, str) else len(k) + sum(len(x) for x in v)
for k, v in self.metadata.items()
)
return section_length + identifier_length + metadata_length
def to_short_descriptor(self) -> str:
"""Used when logging the identity of a document"""
return f"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'"

View File

@@ -127,13 +127,6 @@ class SharepointConnector(LoadConnector, PollConnector):
start: datetime | None = None,
end: datetime | None = None,
) -> list[tuple[DriveItem, str]]:
filter_str = ""
if start is not None and end is not None:
filter_str = (
f"last_modified_datetime ge {start.isoformat()} and "
f"last_modified_datetime le {end.isoformat()}"
)
final_driveitems: list[tuple[DriveItem, str]] = []
try:
site = self.graph_client.sites.get_by_url(site_descriptor.url)
@@ -167,9 +160,10 @@ class SharepointConnector(LoadConnector, PollConnector):
root_folder = root_folder.get_by_path(folder_part)
# Get all items recursively
query = root_folder.get_files(True, 1000)
if filter_str:
query = query.filter(filter_str)
query = root_folder.get_files(
recursive=True,
page_size=1000,
)
driveitems = query.execute_query()
logger.debug(
f"Found {len(driveitems)} items in drive '{drive.name}'"
@@ -180,11 +174,12 @@ class SharepointConnector(LoadConnector, PollConnector):
"Shared Documents" if drive.name == "Documents" else drive.name
)
# Filter items based on folder path if specified
if site_descriptor.folder_path:
# Filter items to ensure they're in the specified folder or its subfolders
# The path will be in format: /drives/{drive_id}/root:/folder/path
filtered_driveitems = [
(item, drive_name)
driveitems = [
item
for item in driveitems
if any(
path_part == site_descriptor.folder_path
@@ -196,7 +191,7 @@ class SharepointConnector(LoadConnector, PollConnector):
)[1].split("/")
)
]
if len(filtered_driveitems) == 0:
if len(driveitems) == 0:
all_paths = [
item.parent_reference.path for item in driveitems
]
@@ -204,11 +199,23 @@ class SharepointConnector(LoadConnector, PollConnector):
f"Nothing found for folder '{site_descriptor.folder_path}' "
f"in; any of valid paths: {all_paths}"
)
final_driveitems.extend(filtered_driveitems)
else:
final_driveitems.extend(
[(item, drive_name) for item in driveitems]
# Filter items based on time window if specified
if start is not None and end is not None:
driveitems = [
item
for item in driveitems
if start
<= item.last_modified_datetime.replace(tzinfo=timezone.utc)
<= end
]
logger.debug(
f"Found {len(driveitems)} items within time window in drive '{drive.name}'"
)
for item in driveitems:
final_driveitems.append((item, drive_name))
except Exception as e:
# Some drives might not be accessible
logger.warning(f"Failed to process drive: {str(e)}")

View File

@@ -28,6 +28,9 @@ class SyncType(str, PyEnum):
DOCUMENT_SET = "document_set"
USER_GROUP = "user_group"
CONNECTOR_DELETION = "connector_deletion"
PRUNING = "pruning" # not really a sync, but close enough
EXTERNAL_PERMISSIONS = "external_permissions"
EXTERNAL_GROUP = "external_group"
def __str__(self) -> str:
return self.value

View File

@@ -161,9 +161,7 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
hidden_assistants: Mapped[list[int]] = mapped_column(
postgresql.JSONB(), nullable=False, default=[]
)
recent_assistants: Mapped[list[dict]] = mapped_column(
postgresql.JSONB(), nullable=False, default=list, server_default="[]"
)
pinned_assistants: Mapped[list[int] | None] = mapped_column(
postgresql.JSONB(), nullable=True, default=None
)

View File

@@ -11,7 +11,7 @@ from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import aliased
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from onyx.auth.schemas import UserRole
@@ -291,8 +291,9 @@ def get_personas_for_user(
include_deleted: bool = False,
joinedload_all: bool = False,
) -> Sequence[Persona]:
stmt = select(Persona).distinct()
stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)
stmt = select(Persona)
stmt = _add_user_filters(stmt, user, get_editable)
if not include_default:
stmt = stmt.where(Persona.builtin_persona.is_(False))
if not include_slack_bot_personas:
@@ -302,14 +303,16 @@ def get_personas_for_user(
if joinedload_all:
stmt = stmt.options(
joinedload(Persona.prompts),
joinedload(Persona.tools),
joinedload(Persona.document_sets),
joinedload(Persona.groups),
joinedload(Persona.users),
selectinload(Persona.prompts),
selectinload(Persona.tools),
selectinload(Persona.document_sets),
selectinload(Persona.groups),
selectinload(Persona.users),
selectinload(Persona.labels),
)
return db_session.execute(stmt).unique().scalars().all()
results = db_session.execute(stmt).scalars().all()
return results
def get_personas(db_session: Session) -> Sequence[Persona]:

View File

@@ -380,6 +380,15 @@ def index_doc_batch(
new_docs=0, total_docs=len(filtered_documents), total_chunks=0
)
doc_descriptors = [
{
"doc_id": doc.id,
"doc_length": doc.get_total_char_length(),
}
for doc in ctx.updatable_docs
]
logger.debug(f"Starting indexing process for documents: {doc_descriptors}")
logger.debug("Starting chunking")
chunks: list[DocAwareChunk] = chunker.chunk(ctx.updatable_docs)

View File

@@ -1,6 +1,8 @@
import threading
import time
from collections.abc import Callable
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from functools import wraps
from typing import Any
@@ -11,6 +13,7 @@ from requests import RequestException
from requests import Response
from retry import retry
from onyx.configs.app_configs import INDEXING_EMBEDDING_MODEL_NUM_THREADS
from onyx.configs.app_configs import LARGE_CHUNK_RATIO
from onyx.configs.app_configs import SKIP_WARM_UP
from onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
@@ -155,6 +158,7 @@ class EmbeddingModel:
text_type: EmbedTextType,
batch_size: int,
max_seq_length: int,
num_threads: int = INDEXING_EMBEDDING_MODEL_NUM_THREADS,
) -> list[Embedding]:
text_batches = batch_list(texts, batch_size)
@@ -163,12 +167,15 @@ class EmbeddingModel:
)
embeddings: list[Embedding] = []
for idx, text_batch in enumerate(text_batches, start=1):
def process_batch(
batch_idx: int, text_batch: list[str]
) -> tuple[int, list[Embedding]]:
if self.callback:
if self.callback.should_stop():
raise RuntimeError("_batch_encode_texts detected stop signal")
logger.debug(f"Encoding batch {idx} of {len(text_batches)}")
logger.debug(f"Encoding batch {batch_idx} of {len(text_batches)}")
embed_request = EmbedRequest(
model_name=self.model_name,
texts=text_batch,
@@ -185,10 +192,43 @@ class EmbeddingModel:
)
response = self._make_model_server_request(embed_request)
embeddings.extend(response.embeddings)
return batch_idx, response.embeddings
# only multi thread if:
# 1. num_threads is greater than 1
# 2. we are using an API-based embedding model (provider_type is not None)
# 3. there are more than 1 batch (no point in threading if only 1)
if num_threads >= 1 and self.provider_type and len(text_batches) > 1:
with ThreadPoolExecutor(max_workers=num_threads) as executor:
future_to_batch = {
executor.submit(process_batch, idx, batch): idx
for idx, batch in enumerate(text_batches, start=1)
}
# Collect results in order
batch_results: list[tuple[int, list[Embedding]]] = []
for future in as_completed(future_to_batch):
try:
result = future.result()
batch_results.append(result)
if self.callback:
self.callback.progress("_batch_encode_texts", 1)
except Exception as e:
logger.exception("Embedding model failed to process batch")
raise e
# Sort by batch index and extend embeddings
batch_results.sort(key=lambda x: x[0])
for _, batch_embeddings in batch_results:
embeddings.extend(batch_embeddings)
else:
# Original sequential processing
for idx, text_batch in enumerate(text_batches, start=1):
_, batch_embeddings = process_batch(idx, text_batch)
embeddings.extend(batch_embeddings)
if self.callback:
self.callback.progress("_batch_encode_texts", 1)
if self.callback:
self.callback.progress("_batch_encode_texts", 1)
return embeddings
def encode(

View File

@@ -92,7 +92,7 @@ class RedisConnectorPrune:
if fence_bytes is None:
return None
fence_int = cast(int, fence_bytes)
fence_int = int(cast(bytes, fence_bytes))
return fence_int
@generator_complete.setter

View File

@@ -44,7 +44,6 @@ class UserPreferences(BaseModel):
chosen_assistants: list[int] | None = None
hidden_assistants: list[int] = []
visible_assistants: list[int] = []
recent_assistants: list[int] | None = None
default_model: str | None = None
auto_scroll: bool | None = None
pinned_assistants: list[int] | None = None

View File

@@ -572,59 +572,6 @@ class ChosenDefaultModelRequest(BaseModel):
default_model: str | None = None
class RecentAssistantsRequest(BaseModel):
current_assistant: int
def update_recent_assistants(
recent_assistants: list[int] | None, current_assistant: int
) -> list[int]:
if recent_assistants is None:
recent_assistants = []
else:
recent_assistants = [x for x in recent_assistants if x != current_assistant]
# Add current assistant to start of list
recent_assistants.insert(0, current_assistant)
# Keep only the 5 most recent assistants
recent_assistants = recent_assistants[:5]
return recent_assistants
@router.patch("/user/recent-assistants")
def update_user_recent_assistants(
request: RecentAssistantsRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
if user is None:
if AUTH_TYPE == AuthType.DISABLED:
store = get_kv_store()
no_auth_user = fetch_no_auth_user(store)
preferences = no_auth_user.preferences
recent_assistants = preferences.recent_assistants
updated_preferences = update_recent_assistants(
recent_assistants, request.current_assistant
)
preferences.recent_assistants = updated_preferences
set_no_auth_user_preferences(store, preferences)
return
else:
raise RuntimeError("This should never happen")
recent_assistants = UserInfo.from_model(user).preferences.recent_assistants
updated_recent_assistants = update_recent_assistants(
recent_assistants, request.current_assistant
)
db_session.execute(
update(User)
.where(User.id == user.id) # type: ignore
.values(recent_assistants=updated_recent_assistants)
)
db_session.commit()
@router.patch("/shortcut-enabled")
def update_user_shortcut_enabled(
shortcut_enabled: bool,
@@ -731,30 +678,6 @@ class ChosenAssistantsRequest(BaseModel):
chosen_assistants: list[int]
@router.patch("/user/assistant-list")
def update_user_assistant_list(
request: ChosenAssistantsRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
if user is None:
if AUTH_TYPE == AuthType.DISABLED:
store = get_kv_store()
no_auth_user = fetch_no_auth_user(store)
no_auth_user.preferences.chosen_assistants = request.chosen_assistants
set_no_auth_user_preferences(store, no_auth_user.preferences)
return
else:
raise RuntimeError("This should never happen")
db_session.execute(
update(User)
.where(User.id == user.id) # type: ignore
.values(chosen_assistants=request.chosen_assistants)
)
db_session.commit()
def update_assistant_visibility(
preferences: UserPreferences, assistant_id: int, show: bool
) -> UserPreferences:

View File

@@ -81,6 +81,7 @@ hubspot-api-client==8.1.0
asana==5.0.8
dropbox==11.36.2
boto3-stubs[s3]==1.34.133
shapely==2.0.6
stripe==10.12.0
urllib3==2.2.3
mistune==0.8.4

View File

@@ -176,3 +176,35 @@ def test_sharepoint_connector_other_library(
for expected in expected_documents:
doc = find_document(found_documents, expected.semantic_identifier)
verify_document_content(doc, expected)
def test_sharepoint_connector_poll(
mock_get_unstructured_api_key: MagicMock,
sharepoint_credentials: dict[str, str],
) -> None:
# Initialize connector with the base site URL
connector = SharepointConnector(
sites=["https://danswerai.sharepoint.com/sites/sharepoint-tests"]
)
# Load credentials
connector.load_credentials(sharepoint_credentials)
# Set time window to only capture test1.docx (modified at 2025-01-28 20:51:42+00:00)
start = datetime(2025, 1, 28, 20, 51, 30, tzinfo=timezone.utc) # 12 seconds before
end = datetime(2025, 1, 28, 20, 51, 50, tzinfo=timezone.utc) # 8 seconds after
# Get documents within the time window
document_batches = list(connector._fetch_from_sharepoint(start=start, end=end))
found_documents: list[Document] = [
doc for batch in document_batches for doc in batch
]
# Should only find test1.docx
assert len(found_documents) == 1, "Should only find one document in the time window"
doc = found_documents[0]
assert doc.semantic_identifier == "test1.docx"
verify_document_metadata(doc)
verify_document_content(
doc, [d for d in EXPECTED_DOCUMENTS if d.semantic_identifier == "test1.docx"][0]
)

View File

@@ -1,75 +0,0 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: celery-worker-heavy-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: celery-worker-heavy
minReplicas: 1
maxReplicas: 5
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 60
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: celery-worker-light-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: celery-worker-light
minReplicas: 1
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: celery-worker-indexing-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: celery-worker-indexing
minReplicas: 1
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: celery-worker-monitoring-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: celery-worker-indexing
minReplicas: 1
maxReplicas: 4
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

View File

@@ -1,13 +0,0 @@
apiVersion: keda.sh/v1alpha1
kind: TriggerAuthentication
metadata:
name: celery-worker-auth
namespace: onyx
spec:
secretTargetRef:
- parameter: host
name: keda-redis-secret
key: host
- parameter: password
name: keda-redis-secret
key: password

View File

@@ -1,53 +0,0 @@
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: celery-worker-indexing-scaledobject
namespace: onyx
labels:
app: celery-worker-indexing
spec:
scaleTargetRef:
name: celery-worker-indexing
minReplicaCount: 1
maxReplicaCount: 30
triggers:
- type: redis
metadata:
sslEnabled: "true"
port: "6379"
enableTLS: "true"
listName: connector_indexing
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
sslEnabled: "true"
port: "6379"
enableTLS: "true"
listName: connector_indexing:2
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
sslEnabled: "true"
port: "6379"
enableTLS: "true"
listName: connector_indexing:3
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: cpu
metadata:
type: Utilization
value: "70"
- type: memory
metadata:
type: Utilization
value: "70"

View File

@@ -1,58 +0,0 @@
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: celery-worker-light-scaledobject
namespace: onyx
labels:
app: celery-worker-light
spec:
scaleTargetRef:
name: celery-worker-light
minReplicaCount: 5
maxReplicaCount: 20
triggers:
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: vespa_metadata_sync
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: vespa_metadata_sync:2
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: vespa_metadata_sync:3
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: connector_deletion
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: connector_deletion:2
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth

View File

@@ -1,70 +0,0 @@
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: celery-worker-primary-scaledobject
namespace: onyx
labels:
app: celery-worker-primary
spec:
scaleTargetRef:
name: celery-worker-primary
pollingInterval: 15 # Check every 15 seconds
cooldownPeriod: 30 # Wait 30 seconds before scaling down
minReplicaCount: 4
maxReplicaCount: 4
triggers:
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: celery
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: celery:1
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: celery:2
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: celery:3
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: periodic_tasks
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth
- type: redis
metadata:
port: "6379"
enableTLS: "true"
listName: periodic_tasks:2
listLength: "1"
databaseIndex: "15"
authenticationRef:
name: celery-worker-auth

View File

@@ -1,19 +0,0 @@
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: indexing-model-server-scaledobject
namespace: onyx
labels:
app: indexing-model-server
spec:
scaleTargetRef:
name: indexing-model-server-deployment
pollingInterval: 15 # Check every 15 seconds
cooldownPeriod: 30 # Wait 30 seconds before scaling down
minReplicaCount: 10
maxReplicaCount: 10
triggers:
- type: cpu
metadata:
type: Utilization
value: "70"

View File

@@ -1,9 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: keda-redis-secret
namespace: onyx
type: Opaque
data:
host: { base64 encoded host here }
password: { base64 encoded password here }

View File

@@ -1,44 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery-beat
spec:
replicas: 1
selector:
matchLabels:
app: celery-beat
template:
metadata:
labels:
app: celery-beat
spec:
containers:
- name: celery-beat
image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
imagePullPolicy: IfNotPresent
command:
[
"celery",
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO",
]
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-secrets
key: redis_password
- name: ONYX_VERSION
value: "v0.11.0-cloud.beta.8"
envFrom:
- configMapRef:
name: env-configmap
resources:
requests:
cpu: "250m"
memory: "512Mi"
limits:
cpu: "500m"
memory: "1Gi"

View File

@@ -1,60 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery-worker-heavy
spec:
replicas: 2
selector:
matchLabels:
app: celery-worker-heavy
template:
metadata:
labels:
app: celery-worker-heavy
spec:
containers:
- name: celery-worker-heavy
image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
imagePullPolicy: IfNotPresent
command:
[
"celery",
"-A",
"onyx.background.celery.versioned_apps.heavy",
"worker",
"--loglevel=INFO",
"--hostname=heavy@%n",
"-Q",
"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync",
]
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-secrets
key: redis_password
- name: ONYX_VERSION
value: "v0.11.0-cloud.beta.8"
envFrom:
- configMapRef:
name: env-configmap
volumeMounts:
- name: vespa-certificates
mountPath: "/app/certs"
readOnly: true
resources:
requests:
cpu: "1000m"
memory: "2Gi"
limits:
cpu: "2000m"
memory: "4Gi"
volumes:
- name: vespa-certificates
secret:
secretName: vespa-certificates
items:
- key: cert.pem
path: cert.pem
- key: key.pem
path: key.pem

View File

@@ -1,62 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery-worker-indexing
spec:
replicas: 1
selector:
matchLabels:
app: celery-worker-indexing
template:
metadata:
labels:
app: celery-worker-indexing
spec:
containers:
- name: celery-worker-indexing
image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
imagePullPolicy: IfNotPresent
command:
[
"celery",
"-A",
"onyx.background.celery.versioned_apps.indexing",
"worker",
"--loglevel=INFO",
"--hostname=indexing@%n",
"-Q",
"connector_indexing",
"--prefetch-multiplier=1",
"--concurrency=10",
]
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-secrets
key: redis_password
- name: ONYX_VERSION
value: "v0.11.0-cloud.beta.8"
envFrom:
- configMapRef:
name: env-configmap
volumeMounts:
- name: vespa-certificates
mountPath: "/app/certs"
readOnly: true
resources:
requests:
cpu: "500m"
memory: "4Gi"
limits:
cpu: "1000m"
memory: "8Gi"
volumes:
- name: vespa-certificates
secret:
secretName: vespa-certificates
items:
- key: cert.pem
path: cert.pem
- key: key.pem
path: key.pem

View File

@@ -1,62 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery-worker-light
spec:
replicas: 1
selector:
matchLabels:
app: celery-worker-light
template:
metadata:
labels:
app: celery-worker-light
spec:
containers:
- name: celery-worker-light
image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
imagePullPolicy: IfNotPresent
command:
[
"celery",
"-A",
"onyx.background.celery.versioned_apps.light",
"worker",
"--loglevel=INFO",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
"--prefetch-multiplier=1",
"--concurrency=10",
]
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-secrets
key: redis_password
- name: ONYX_VERSION
value: "v0.11.0-cloud.beta.8"
envFrom:
- configMapRef:
name: env-configmap
volumeMounts:
- name: vespa-certificates
mountPath: "/app/certs"
readOnly: true
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"
volumes:
- name: vespa-certificates
secret:
secretName: vespa-certificates
items:
- key: cert.pem
path: cert.pem
- key: key.pem
path: key.pem

View File

@@ -1,62 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery-worker-monitoring
spec:
replicas: 2
selector:
matchLabels:
app: celery-worker-monitoring
template:
metadata:
labels:
app: celery-worker-monitoring
spec:
containers:
- name: celery-worker-monitoring
image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
imagePullPolicy: IfNotPresent
command:
[
"celery",
"-A",
"onyx.background.celery.versioned_apps.monitoring",
"worker",
"--loglevel=INFO",
"--hostname=monitoring@%n",
"-Q",
"monitoring",
"--prefetch-multiplier=8",
"--concurrency=8",
]
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-secrets
key: redis_password
- name: ONYX_VERSION
value: "v0.11.0-cloud.beta.8"
envFrom:
- configMapRef:
name: env-configmap
volumeMounts:
- name: vespa-certificates
mountPath: "/app/certs"
readOnly: true
resources:
requests:
cpu: "1000m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "1Gi"
volumes:
- name: vespa-certificates
secret:
secretName: vespa-certificates
items:
- key: cert.pem
path: cert.pem
- key: key.pem
path: key.pem

View File

@@ -1,62 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery-worker-primary
spec:
replicas: 1
selector:
matchLabels:
app: celery-worker-primary
template:
metadata:
labels:
app: celery-worker-primary
spec:
containers:
- name: celery-worker-primary
image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
imagePullPolicy: IfNotPresent
command:
[
"celery",
"-A",
"onyx.background.celery.versioned_apps.primary",
"worker",
"--loglevel=INFO",
"--hostname=primary@%n",
"-Q",
"celery,periodic_tasks",
"--prefetch-multiplier=1",
"--concurrency=10",
]
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-secrets
key: redis_password
- name: ONYX_VERSION
value: "v0.11.0-cloud.beta.8"
envFrom:
- configMapRef:
name: env-configmap
volumeMounts:
- name: vespa-certificates
mountPath: "/app/certs"
readOnly: true
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"
volumes:
- name: vespa-certificates
secret:
secretName: vespa-certificates
items:
- key: cert.pem
path: cert.pem
- key: key.pem
path: key.pem

View File

@@ -1,41 +1,16 @@
import { defineConfig, devices } from "@playwright/test";
export default defineConfig({
workers: 1, // temporary change to see if single threaded testing stabilizes the tests
testDir: "./tests/e2e", // Folder for test files
reporter: "list",
// Configure paths for screenshots
// expect: {
// toMatchSnapshot: {
// threshold: 0.2, // Adjust the threshold for visual diffs
// },
// },
// reporter: [["html", { outputFolder: "test-results/output/report" }]], // HTML report location
// outputDir: "test-results/output/screenshots", // Set output folder for test artifacts
globalSetup: require.resolve("./tests/e2e/global-setup"),
projects: [
{
// dependency for admin workflows
name: "admin_setup",
testMatch: /.*\admin_auth\.setup\.ts/,
},
{
// tests admin workflows
name: "chromium-admin",
grep: /@admin/,
name: "admin",
use: {
...devices["Desktop Chrome"],
// Use prepared auth state.
storageState: "admin_auth.json",
},
dependencies: ["admin_setup"],
},
{
// tests logged out / guest workflows
name: "chromium-guest",
grep: /@guest/,
use: {
...devices["Desktop Chrome"],
},
testIgnore: ["**/codeUtils.test.ts"],
},
],
});

View File

@@ -232,11 +232,9 @@ export function AssistantEditor({
existingPersona?.llm_model_provider_override ?? null,
llm_model_version_override:
existingPersona?.llm_model_version_override ?? null,
starter_messages: existingPersona?.starter_messages ?? [
{
message: "",
},
],
starter_messages: existingPersona?.starter_messages?.length
? existingPersona.starter_messages
: [{ message: "" }],
enabled_tools_map: enabledToolsMap,
icon_color: existingPersona?.icon_color ?? defautIconColor,
icon_shape: existingPersona?.icon_shape ?? defaultIconShape,
@@ -1099,7 +1097,9 @@ export function AssistantEditor({
)}
</div>
</div>
<Separator />
<div className="w-full flex flex-col">
<div className="flex gap-x-2 items-center">
<div className="block font-medium text-sm">
@@ -1110,6 +1110,7 @@ export function AssistantEditor({
<SubLabel>
Sample messages that help users understand what this
assistant can do and how to interact with it effectively.
New input fields will appear automatically as you type.
</SubLabel>
<div className="w-full">

View File

@@ -64,19 +64,16 @@ export default function StarterMessagesList({
size="icon"
onClick={() => {
arrayHelpers.remove(index);
if (
index === values.length - 2 &&
!values[values.length - 1].message
) {
arrayHelpers.pop();
}
}}
className={`text-gray-400 hover:text-red-500 ${
index === values.length - 1 && !starterMessage.message
? "opacity-50 cursor-not-allowed"
: ""
}`}
disabled={index === values.length - 1 && !starterMessage.message}
disabled={
(index === values.length - 1 && !starterMessage.message) ||
(values.length === 1 && index === 0) // should never happen, but just in case
}
>
<FiTrash2 className="h-4 w-4" />
</Button>

View File

@@ -111,6 +111,7 @@ import {
import AssistantModal from "../assistants/mine/AssistantModal";
import { getSourceMetadata } from "@/lib/sources";
import { UserSettingsModal } from "./modal/UserSettingsModal";
import { AlignStartVertical } from "lucide-react";
const TEMP_USER_MESSAGE_ID = -1;
const TEMP_ASSISTANT_MESSAGE_ID = -2;
@@ -189,7 +190,11 @@ export function ChatPage({
const [userSettingsToggled, setUserSettingsToggled] = useState(false);
const { assistants: availableAssistants, finalAssistants } = useAssistants();
const {
assistants: availableAssistants,
finalAssistants,
pinnedAssistants,
} = useAssistants();
const [showApiKeyModal, setShowApiKeyModal] = useState(
!shouldShowWelcomeModal
@@ -272,16 +277,6 @@ export function ChatPage({
SEARCH_PARAM_NAMES.TEMPERATURE
);
const defaultTemperature = search_param_temperature
? parseFloat(search_param_temperature)
: selectedAssistant?.tools.some(
(tool) =>
tool.in_code_tool_id === SEARCH_TOOL_ID ||
tool.in_code_tool_id === INTERNET_SEARCH_TOOL_ID
)
? 0
: 0.7;
const setSelectedAssistantFromId = (assistantId: number) => {
// NOTE: also intentionally look through available assistants here, so that
// even if the user has hidden an assistant they can still go back to it
@@ -297,20 +292,21 @@ export function ChatPage({
const [presentingDocument, setPresentingDocument] =
useState<OnyxDocument | null>(null);
const { recentAssistants, refreshRecentAssistants } = useAssistants();
// Current assistant is decided based on this ordering
// 1. Alternative assistant (assistant selected explicitly by user)
// 2. Selected assistant (assistnat default in this chat session)
// 3. First pinned assistants (ordered list of pinned assistants)
// 4. Available assistants (ordered list of available assistants)
const liveAssistant: Persona | undefined = useMemo(
() =>
alternativeAssistant ||
selectedAssistant ||
recentAssistants[0] ||
finalAssistants[0] ||
pinnedAssistants[0] ||
availableAssistants[0],
[
alternativeAssistant,
selectedAssistant,
recentAssistants,
finalAssistants,
pinnedAssistants,
availableAssistants,
]
);
@@ -816,7 +812,6 @@ export function ChatPage({
setMaxTokens(maxTokens);
}
}
refreshRecentAssistants(liveAssistant?.id);
fetchMaxTokens();
}, [liveAssistant]);

View File

@@ -19,9 +19,7 @@ import {
import { useRouter, useSearchParams } from "next/navigation";
import { ChatSession } from "../interfaces";
import { NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA } from "@/lib/constants";
import { Folder } from "../folders/interfaces";
import { usePopup } from "@/components/admin/connectors/Popup";
import { SettingsContext } from "@/components/settings/SettingsProvider";
import { DocumentIcon2, NewChatIcon } from "@/components/icons/icons";
@@ -251,9 +249,11 @@ export const HistorySidebar = forwardRef<HTMLDivElement, HistorySidebarProps>(
const handleNewChat = () => {
reset();
console.log("currentChatSession", currentChatSession);
const newChatUrl =
`/${page}` +
(NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA && currentChatSession
(currentChatSession
? `?assistantId=${currentChatSession.persona_id}`
: "");
router.push(newChatUrl);
@@ -293,8 +293,7 @@ export const HistorySidebar = forwardRef<HTMLDivElement, HistorySidebarProps>(
className="w-full px-2 py-1 rounded-md items-center hover:bg-hover cursor-pointer transition-all duration-150 flex gap-x-2"
href={
`/${page}` +
(NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA &&
currentChatSession?.persona_id
(currentChatSession
? `?assistantId=${currentChatSession?.persona_id}`
: "")
}
@@ -319,14 +318,6 @@ export const HistorySidebar = forwardRef<HTMLDivElement, HistorySidebarProps>(
<Link
className="w-full px-2 py-1 rounded-md items-center hover:bg-hover cursor-pointer transition-all duration-150 flex gap-x-2"
href="/chat/input-prompts"
onClick={(e) => {
if (e.metaKey || e.ctrlKey) {
return;
}
if (handleNewChat) {
handleNewChat();
}
}}
>
<DocumentIcon2
size={20}

View File

@@ -2,7 +2,6 @@
import { UserDropdown } from "../UserDropdown";
import { FiShare2 } from "react-icons/fi";
import { SetStateAction, useContext, useEffect } from "react";
import { NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA } from "@/lib/constants";
import { ChatSession } from "@/app/chat/interfaces";
import Link from "next/link";
import { pageType } from "@/app/chat/sessionSidebar/types";
@@ -42,8 +41,7 @@ export default function FunctionalHeader({
event.preventDefault();
window.open(
`/${page}` +
(NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA &&
currentChatSession
(currentChatSession
? `?assistantId=${currentChatSession.persona_id}`
: ""),
"_self"
@@ -63,7 +61,7 @@ export default function FunctionalHeader({
reset();
const newChatUrl =
`/${page}` +
(NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA && currentChatSession
(currentChatSession
? `?assistantId=${currentChatSession.persona_id}`
: "");
router.push(newChatUrl);
@@ -128,25 +126,6 @@ export default function FunctionalHeader({
</div>
)}
{/* <div
className={`absolute
${
documentSidebarToggled && !sidebarToggled
? "left-[calc(50%-125px)]"
: !documentSidebarToggled && sidebarToggled
? "left-[calc(50%+125px)]"
: "left-1/2"
}
${
documentSidebarToggled || sidebarToggled
? "mobile:w-[40vw] max-w-[50vw]"
: "mobile:w-[50vw] max-w-[60vw]"
}
top-1/2 transform -translate-x-1/2 -translate-y-1/2 transition-all duration-300`}
>
<ChatBanner />
</div> */}
<div className="invisible">
<LogoWithText
page={page}
@@ -156,8 +135,6 @@ export default function FunctionalHeader({
/>
</div>
{/* className="fixed cursor-pointer flex z-40 left-4 bg-black top-3 h-8" */}
<div className="absolute right-2 mobile:top-1 desktop:top-1 h-8 flex">
{setSharingModalVisible && !hideUserDropdown && (
<div
@@ -179,8 +156,7 @@ export default function FunctionalHeader({
className="desktop:hidden ml-2 my-auto"
href={
`/${page}` +
(NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA &&
currentChatSession
(currentChatSession
? `?assistantId=${currentChatSession.persona_id}`
: "")
}

View File

@@ -25,8 +25,6 @@ interface AssistantsContextProps {
ownedButHiddenAssistants: Persona[];
refreshAssistants: () => Promise<void>;
isImageGenerationAvailable: boolean;
recentAssistants: Persona[];
refreshRecentAssistants: (currentAssistant: number) => Promise<void>;
// Admin only
editablePersonas: Persona[];
allAssistants: Persona[];
@@ -56,35 +54,28 @@ export const AssistantsProvider: React.FC<{
const [editablePersonas, setEditablePersonas] = useState<Persona[]>([]);
const [allAssistants, setAllAssistants] = useState<Persona[]>([]);
const [pinnedAssistants, setPinnedAssistants] = useState<Persona[]>(
user?.preferences.pinned_assistants
? assistants.filter((assistant) =>
user?.preferences?.pinned_assistants?.includes(assistant.id)
)
: assistants.filter((a) => a.builtin_persona)
);
const [pinnedAssistants, setPinnedAssistants] = useState<Persona[]>(() => {
if (user?.preferences.pinned_assistants) {
return user.preferences.pinned_assistants
.map((id) => assistants.find((assistant) => assistant.id === id))
.filter((assistant): assistant is Persona => assistant !== undefined);
} else {
return assistants.filter((a) => a.builtin_persona);
}
});
useEffect(() => {
setPinnedAssistants(
user?.preferences.pinned_assistants
? assistants.filter((assistant) =>
user?.preferences?.pinned_assistants?.includes(assistant.id)
)
: assistants.filter((a) => a.builtin_persona)
);
setPinnedAssistants(() => {
if (user?.preferences.pinned_assistants) {
return user.preferences.pinned_assistants
.map((id) => assistants.find((assistant) => assistant.id === id))
.filter((assistant): assistant is Persona => assistant !== undefined);
} else {
return assistants.filter((a) => a.builtin_persona);
}
});
}, [user?.preferences?.pinned_assistants, assistants]);
const [recentAssistants, setRecentAssistants] = useState<Persona[]>(
user?.preferences.recent_assistants
?.filter((assistantId) =>
assistants.find((assistant) => assistant.id === assistantId)
)
.map(
(assistantId) =>
assistants.find((assistant) => assistant.id === assistantId)!
) || []
);
const [isImageGenerationAvailable, setIsImageGenerationAvailable] =
useState<boolean>(false);
@@ -135,28 +126,6 @@ export const AssistantsProvider: React.FC<{
fetchPersonas();
}, [isAdmin, isCurator]);
const refreshRecentAssistants = async (currentAssistant: number) => {
const response = await fetch("/api/user/recent-assistants", {
method: "PATCH",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
current_assistant: currentAssistant,
}),
});
if (!response.ok) {
return;
}
setRecentAssistants((recentAssistants) => [
assistants.find((assistant) => assistant.id === currentAssistant)!,
...recentAssistants.filter(
(assistant) => assistant.id !== currentAssistant
),
]);
};
const refreshAssistants = async () => {
try {
const response = await fetch("/api/persona", {
@@ -181,13 +150,6 @@ export const AssistantsProvider: React.FC<{
} catch (error) {
console.error("Error refreshing assistants:", error);
}
setRecentAssistants(
assistants.filter(
(assistant) =>
user?.preferences.recent_assistants?.includes(assistant.id) || false
)
);
};
const {
@@ -230,8 +192,6 @@ export const AssistantsProvider: React.FC<{
editablePersonas,
allAssistants,
isImageGenerationAvailable,
recentAssistants,
refreshRecentAssistants,
setPinnedAssistants,
pinnedAssistants,
}}

View File

@@ -2,7 +2,6 @@
import { useContext } from "react";
import { FiSidebar } from "react-icons/fi";
import { SettingsContext } from "../settings/SettingsProvider";
import { NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA } from "@/lib/constants";
import { LeftToLineIcon, NewChatIcon, RightToLineIcon } from "../icons/icons";
import {
Tooltip,
@@ -90,9 +89,7 @@ export default function LogoWithText({
className="my-auto mobile:hidden"
href={
`/${page}` +
(NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA && assistantId
? `?assistantId=${assistantId}`
: "")
(assistantId ? `?assistantId=${assistantId}` : "")
}
onClick={(e) => {
if (e.metaKey || e.ctrlKey) {

View File

@@ -18,10 +18,6 @@ export const NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED =
process.env.NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED?.toLowerCase() ===
"true";
export const NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA =
process.env.NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA?.toLowerCase() ===
"true";
export const GMAIL_AUTH_IS_ADMIN_COOKIE_NAME = "gmail_auth_is_admin";
export const GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME =

View File

@@ -1,24 +1,9 @@
// dependency for all admin user tests
import { test as setup } from "@playwright/test";
import { test as setup, expect } from "@playwright/test";
import { TEST_CREDENTIALS } from "./constants";
setup("authenticate", async ({ page }) => {
const { email, password } = TEST_CREDENTIALS;
setup("authenticate as admin", async ({ browser }) => {
const context = await browser.newContext({ storageState: "admin_auth.json" });
const page = await context.newPage();
await page.goto("http://localhost:3000/chat");
await page.waitForURL("http://localhost:3000/auth/login?next=%2Fchat");
await expect(page).toHaveTitle("Onyx");
await page.fill("#email", email);
await page.fill("#password", password);
// Click the login button
await page.click('button[type="submit"]');
await page.waitForURL("http://localhost:3000/chat");
await page.context().storageState({ path: "admin_auth.json" });
});

View File

@@ -1,65 +1,43 @@
import { test, expect } from "@chromatic-com/playwright";
import { test, expect } from "@playwright/test";
test(
"Admin - OAuth Redirect - Missing Code",
{
tag: "@admin",
},
async ({ page }, testInfo) => {
await page.goto(
"http://localhost:3000/admin/connectors/slack/oauth/callback?state=xyz"
);
test.use({ storageState: "admin_auth.json" });
await expect(page.locator("p.text-text-500")).toHaveText(
"Missing authorization code."
);
}
);
test("Admin - OAuth Redirect - Missing Code", async ({ page }) => {
await page.goto(
"http://localhost:3000/admin/connectors/slack/oauth/callback?state=xyz"
);
test(
"Admin - OAuth Redirect - Missing State",
{
tag: "@admin",
},
async ({ page }, testInfo) => {
await page.goto(
"http://localhost:3000/admin/connectors/slack/oauth/callback?code=123"
);
await expect(page.locator("p.text-text-500")).toHaveText(
"Missing authorization code."
);
});
await expect(page.locator("p.text-text-500")).toHaveText(
"Missing state parameter."
);
}
);
test("Admin - OAuth Redirect - Missing State", async ({ page }) => {
await page.goto(
"http://localhost:3000/admin/connectors/slack/oauth/callback?code=123"
);
test(
"Admin - OAuth Redirect - Invalid Connector",
{
tag: "@admin",
},
async ({ page }, testInfo) => {
await page.goto(
"http://localhost:3000/admin/connectors/invalid-connector/oauth/callback?code=123&state=xyz"
);
await expect(page.locator("p.text-text-500")).toHaveText(
"Missing state parameter."
);
});
await expect(page.locator("p.text-text-500")).toHaveText(
"invalid_connector is not a valid source type."
);
}
);
test("Admin - OAuth Redirect - Invalid Connector", async ({ page }) => {
await page.goto(
"http://localhost:3000/admin/connectors/invalid-connector/oauth/callback?code=123&state=xyz"
);
test(
"Admin - OAuth Redirect - No Session",
{
tag: "@admin",
},
async ({ page }, testInfo) => {
await page.goto(
"http://localhost:3000/admin/connectors/slack/oauth/callback?code=123&state=xyz"
);
await expect(page.locator("p.text-text-500")).toHaveText(
"invalid_connector is not a valid source type."
);
});
await expect(page.locator("p.text-text-500")).toHaveText(
"An error occurred during the OAuth process. Please try again."
);
}
);
test("Admin - OAuth Redirect - No Session", async ({ page }) => {
await page.goto(
"http://localhost:3000/admin/connectors/slack/oauth/callback?code=123&state=xyz"
);
await expect(page.locator("p.text-text-500")).toHaveText(
"An error occurred during the OAuth process. Please try again."
);
});

View File

@@ -2,6 +2,8 @@ import { test, expect } from "@playwright/test";
import chromaticSnpashots from "./chromaticSnpashots.json";
import type { Page } from "@playwright/test";
test.use({ storageState: "admin_auth.json" });
async function verifyAdminPageNavigation(
page: Page,
path: string,
@@ -13,7 +15,10 @@ async function verifyAdminPageNavigation(
}
) {
await page.goto(`http://localhost:3000/admin/${path}`);
await expect(page.locator("h1.text-3xl")).toHaveText(pageTitle);
await expect(page.locator("h1.text-3xl")).toHaveText(pageTitle, {
timeout: 2000,
});
if (options?.paragraphText) {
await expect(page.locator("p.text-sm").nth(0)).toHaveText(
@@ -35,18 +40,12 @@ async function verifyAdminPageNavigation(
}
for (const chromaticSnapshot of chromaticSnpashots) {
test(
`Admin - ${chromaticSnapshot.name}`,
{
tag: "@admin",
},
async ({ page }) => {
await verifyAdminPageNavigation(
page,
chromaticSnapshot.path,
chromaticSnapshot.pageTitle,
chromaticSnapshot.options
);
}
);
test(`Admin - ${chromaticSnapshot.name}`, async ({ page }) => {
await verifyAdminPageNavigation(
page,
chromaticSnapshot.path,
chromaticSnapshot.pageTitle,
chromaticSnapshot.options
);
});
}

View File

@@ -0,0 +1,54 @@
import { test, expect } from "@playwright/test";
// Use pre-signed in "admin" storage state
test.use({
storageState: "admin_auth.json",
});
test("Chat workflow", async ({ page }) => {
// Initial setup
await page.goto("http://localhost:3000/chat", { timeout: 3000 });
// Interact with Art assistant
await page.locator("button").filter({ hasText: "Art" }).click();
await page.getByPlaceholder("Message Art assistant...").fill("Hi");
await page.keyboard.press("Enter");
await page.waitForTimeout(3000);
// Start a new chat
await page.getByRole("link", { name: "Start New Chat" }).click();
await page.waitForNavigation({ waitUntil: "networkidle" });
// Check for expected text
await expect(page.getByText("Assistant for generating")).toBeVisible();
// Interact with General assistant
await page.locator("button").filter({ hasText: "General" }).click();
// Check URL after clicking General assistant
await expect(page).toHaveURL("http://localhost:3000/chat?assistantId=-1", {
timeout: 5000,
});
// Create a new assistant
await page.getByRole("button", { name: "Explore Assistants" }).click();
await page.getByRole("button", { name: "Create" }).click();
await page.getByTestId("name").click();
await page.getByTestId("name").fill("Test Assistant");
await page.getByTestId("description").click();
await page.getByTestId("description").fill("Test Assistant Description");
await page.getByTestId("system_prompt").click();
await page.getByTestId("system_prompt").fill("Test Assistant Instructions");
await page.getByRole("button", { name: "Create" }).click();
// Verify new assistant creation
await expect(page.getByText("Test Assistant Description")).toBeVisible({
timeout: 5000,
});
// Start another new chat
await page.getByRole("link", { name: "Start New Chat" }).click();
await expect(page.getByText("Assistant with access to")).toBeVisible({
timeout: 5000,
});
});

View File

@@ -1,4 +1,9 @@
export const TEST_CREDENTIALS = {
export const TEST_USER_CREDENTIALS = {
email: "user1@test.com",
password: "User1Password123!",
};
export const TEST_ADMIN_CREDENTIALS = {
email: "admin_user@test.com",
password: "TestPassword123!",
};

View File

@@ -0,0 +1,22 @@
import { chromium, FullConfig } from "@playwright/test";
import { loginAs } from "./utils/auth";
async function globalSetup(config: FullConfig) {
const browser = await chromium.launch();
const adminContext = await browser.newContext();
const adminPage = await adminContext.newPage();
await loginAs(adminPage, "admin");
await adminContext.storageState({ path: "admin_auth.json" });
await adminContext.close();
const userContext = await browser.newContext();
const userPage = await userContext.newPage();
await loginAs(userPage, "user");
await userContext.storageState({ path: "user_auth.json" });
await userContext.close();
await browser.close();
}
export default globalSetup;

View File

@@ -1,35 +0,0 @@
// Add this line
import { test, expect, takeSnapshot } from "@chromatic-com/playwright";
import { TEST_CREDENTIALS } from "./constants";
// Then use as normal 👇
test(
"Homepage",
{
tag: "@guest",
},
async ({ page }, testInfo) => {
// Test redirect to login, and redirect to search after login
const { email, password } = TEST_CREDENTIALS;
await page.goto("http://localhost:3000/chat");
await page.waitForURL("http://localhost:3000/auth/login?next=%2Fchat");
await expect(page).toHaveTitle("Onyx");
await takeSnapshot(page, "Before login", testInfo);
await page.fill("#email", email);
await page.fill("#password", password);
// Click the login button
await page.click('button[type="submit"]');
await page.waitForURL("http://localhost:3000/chat");
await page.getByPlaceholder("Send a message or try using @ or /");
await expect(page.locator("body")).not.toContainText("Initializing Onyx");
}
);

View File

@@ -0,0 +1,37 @@
import { Page } from "@playwright/test";
import { TEST_ADMIN_CREDENTIALS, TEST_USER_CREDENTIALS } from "../constants";
// Basic function which logs in a user (either admin or regular user) to the application
// It handles both successful login attempts and potential timeouts, with a retry mechanism
export async function loginAs(page: Page, userType: "admin" | "user") {
const { email, password } =
userType === "admin" ? TEST_ADMIN_CREDENTIALS : TEST_USER_CREDENTIALS;
await page.goto("http://localhost:3000/auth/login", { timeout: 1000 });
await page.fill("#email", email);
await page.fill("#password", password);
// Click the login button
await page.click('button[type="submit"]');
try {
await page.waitForURL("http://localhost:3000/chat", { timeout: 4000 });
} catch (error) {
console.log(`Timeout occurred. Current URL: ${page.url()}`);
// If redirect to /chat doesn't happen, go to /auth/login
await page.goto("http://localhost:3000/auth/signup", { timeout: 1000 });
await page.fill("#email", email);
await page.fill("#password", password);
// Click the login button
await page.click('button[type="submit"]');
try {
await page.waitForURL("http://localhost:3000/chat", { timeout: 4000 });
} catch (error) {
console.log(`Timeout occurred again. Current URL: ${page.url()}`);
}
}
}

15
web/user_auth.json Normal file
View File

@@ -0,0 +1,15 @@
{
"cookies": [
{
"name": "fastapiusersauth",
"value": "n_EMYYKHn4tQbuPTEbtN1gJ6dQTGek9omJPhO2GhHoA",
"domain": "localhost",
"path": "/",
"expires": 1738801376.508558,
"httpOnly": true,
"secure": false,
"sameSite": "Lax"
}
],
"origins": []
}