Compare commits

..

3 Commits

Author SHA1 Message Date
pablodanswer
dc089d6842 p 2025-01-25 18:16:21 -08:00
pablodanswer
ef1fc19de7 k 2025-01-25 18:16:01 -08:00
pablodanswer
842fcf4156 update 2025-01-25 18:15:32 -08:00
66 changed files with 478 additions and 3784 deletions

View File

@@ -1,75 +0,0 @@
"""add user files
Revision ID: 9aadf32dfeb4
Revises: f1ca58b2f2ec
Create Date: 2025-01-26 16:08:21.551022
"""
from alembic import op
import sqlalchemy as sa
import datetime
# revision identifiers, used by Alembic.
revision = "9aadf32dfeb4"
down_revision = "f1ca58b2f2ec"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create user_folder table without parent_id
op.create_table(
"user_folder",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column("name", sa.String(length=255), nullable=True),
sa.Column("description", sa.String(length=255), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
sa.Column("created_at", sa.DateTime(), default=datetime.datetime.utcnow),
)
# Create user_file table with folder_id instead of parent_folder_id
op.create_table(
"user_file",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
nullable=True,
),
sa.Column("file_type", sa.String(), nullable=True),
sa.Column("file_id", sa.String(length=255), nullable=False),
sa.Column("document_id", sa.String(length=255), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
default=datetime.datetime.utcnow,
),
)
# Create persona__user_file table
op.create_table(
"persona__user_file",
sa.Column(
"persona_id", sa.Integer(), sa.ForeignKey("persona.id"), primary_key=True
),
sa.Column(
"user_file_id",
sa.Integer(),
sa.ForeignKey("user_file.id"),
primary_key=True,
),
)
def downgrade() -> None:
# Drop the persona__user_file table
op.drop_table("persona__user_file")
# Drop the user_file table
op.drop_table("user_file")
# Drop the user_folder table
op.drop_table("user_folder")

View File

@@ -32,7 +32,6 @@ def perform_ttl_management_task(
@celery_app.task(
name="check_ttl_management_task",
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
)
def check_ttl_management_task(*, tenant_id: str | None) -> None:
@@ -57,7 +56,6 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:
@celery_app.task(
name="autogenerate_usage_report_task",
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
)
def autogenerate_usage_report_task(*, tenant_id: str | None) -> None:

View File

@@ -42,22 +42,24 @@ def _fetch_permissions_for_permission_ids(
if not permission_info or not doc_id:
return []
# Check cache first for all permission IDs
permissions = [
_PERMISSION_ID_PERMISSION_MAP[pid]
for pid in permission_ids
if pid in _PERMISSION_ID_PERMISSION_MAP
]
# If we found all permissions in cache, return them
if len(permissions) == len(permission_ids):
return permissions
owner_email = permission_info.get("owner_email")
drive_service = get_drive_service(
creds=google_drive_connector.creds,
user_email=(owner_email or google_drive_connector.primary_admin_email),
)
# Otherwise, fetch all permissions and update cache
fetched_permissions = execute_paginated_retrieval(
retrieval_function=drive_service.permissions().list,
list_key="permissions",
@@ -67,6 +69,7 @@ def _fetch_permissions_for_permission_ids(
)
permissions_for_doc_id = []
# Update cache and return all permissions
for permission in fetched_permissions:
permissions_for_doc_id.append(permission)
_PERMISSION_ID_PERMISSION_MAP[permission["id"]] = permission

View File

@@ -1,5 +1,6 @@
from datetime import timedelta
from typing import Any
from typing import cast
from celery import Celery
from celery import signals
@@ -7,6 +8,7 @@ from celery.beat import PersistentScheduler # type: ignore
from celery.signals import beat_init
import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
from onyx.db.engine import get_all_tenant_ids
from onyx.db.engine import SqlEngine
@@ -130,25 +132,21 @@ class DynamicTenantScheduler(PersistentScheduler):
# get current schedule and extract current tenants
current_schedule = self.schedule.items()
# there are no more per tenant beat tasks, so comment this out
# NOTE: we may not actualy need this scheduler any more and should
# test reverting to a regular beat schedule implementation
current_tenants = set()
for task_name, _ in current_schedule:
task_name = cast(str, task_name)
if task_name.startswith(ONYX_CLOUD_CELERY_TASK_PREFIX):
continue
# current_tenants = set()
# for task_name, _ in current_schedule:
# task_name = cast(str, task_name)
# if task_name.startswith(ONYX_CLOUD_CELERY_TASK_PREFIX):
# continue
if "_" in task_name:
# example: "check-for-condition-tenant_12345678-abcd-efgh-ijkl-12345678"
# -> "12345678-abcd-efgh-ijkl-12345678"
current_tenants.add(task_name.split("_")[-1])
logger.info(f"Found {len(current_tenants)} existing items in schedule")
# if "_" in task_name:
# # example: "check-for-condition-tenant_12345678-abcd-efgh-ijkl-12345678"
# # -> "12345678-abcd-efgh-ijkl-12345678"
# current_tenants.add(task_name.split("_")[-1])
# logger.info(f"Found {len(current_tenants)} existing items in schedule")
# for tenant_id in tenant_ids:
# if tenant_id not in current_tenants:
# logger.info(f"Processing new tenant: {tenant_id}")
for tenant_id in tenant_ids:
if tenant_id not in current_tenants:
logger.info(f"Processing new tenant: {tenant_id}")
new_schedule = self._generate_schedule(tenant_ids)

View File

@@ -16,10 +16,6 @@ from shared_configs.configs import MULTI_TENANT
# it's only important that they run relatively regularly
BEAT_EXPIRES_DEFAULT = 15 * 60 # 15 minutes (in seconds)
# hack to slow down task dispatch in the cloud until
# we have a better implementation (backpressure, etc)
CLOUD_BEAT_SCHEDULE_MULTIPLIER = 8
# tasks that only run in the cloud
# the name attribute must start with ONYX_CLOUD_CELERY_TASK_PREFIX = "cloud" to be filtered
# by the DynamicTenantScheduler
@@ -28,7 +24,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-alembic",
"task": OnyxCeleryTask.CLOUD_CHECK_ALEMBIC,
"schedule": timedelta(hours=1 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(hours=1),
"options": {
"queue": OnyxCeleryQueues.MONITORING,
"priority": OnyxCeleryPriority.HIGH,
@@ -39,7 +35,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-indexing",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=15 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=15),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -51,7 +47,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-connector-deletion",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=20 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=20),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -63,7 +59,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-vespa-sync",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=20 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=20),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -75,7 +71,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-prune",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=15 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=15),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -87,7 +83,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-vespa-sync",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=15 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=5),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -99,7 +95,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-doc-permissions-sync",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=30 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=30),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -111,7 +107,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-external-group-sync",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(seconds=20 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(seconds=20),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -123,7 +119,7 @@ cloud_tasks_to_schedule = [
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-background-processes",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(minutes=5 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
"schedule": timedelta(minutes=5),
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -141,9 +137,7 @@ if LLM_MODEL_UPDATE_API_URL:
{
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-llm-model-update",
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
"schedule": timedelta(
hours=1 * CLOUD_BEAT_SCHEDULE_MULTIPLIER
), # Check every hour
"schedule": timedelta(hours=1), # Check every hour
"options": {
"priority": OnyxCeleryPriority.HIGHEST,
"expires": BEAT_EXPIRES_DEFAULT,
@@ -227,7 +221,7 @@ if not MULTI_TENANT:
{
"name": "monitor-background-processes",
"task": OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,
"schedule": timedelta(minutes=15),
"schedule": timedelta(minutes=5),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,

View File

@@ -33,7 +33,6 @@ class TaskDependencyError(RuntimeError):
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
bind=True,
@@ -140,6 +139,13 @@ def try_generate_document_cc_pair_cleanup_tasks(
submitted=datetime.now(timezone.utc),
)
# create before setting fence to avoid race condition where the monitoring
# task updates the sync record before it is created
insert_sync_record(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.CONNECTOR_DELETION,
)
redis_connector.delete.set_fence(fence_payload)
try:
@@ -178,13 +184,6 @@ def try_generate_document_cc_pair_cleanup_tasks(
)
if tasks_generated is None:
raise ValueError("RedisConnectorDeletion.generate_tasks returned None")
insert_sync_record(
db_session=db_session,
entity_id=cc_pair_id,
sync_type=SyncType.CONNECTOR_DELETION,
)
except TaskDependencyError:
redis_connector.delete.set_fence(None)
raise

View File

@@ -91,7 +91,6 @@ def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> b
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
bind=True,
)

View File

@@ -91,7 +91,6 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
bind=True,
)

View File

@@ -45,7 +45,6 @@ from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import global_version
@@ -70,7 +69,6 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
tasks_created = 0
locked = False
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client_replica = get_redis_replica_client(tenant_id=tenant_id)
# we need to use celery's redis client to access its redis data
# (which lives on a different db number)
@@ -229,7 +227,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
# or be currently executing
try:
validate_indexing_fences(
tenant_id, redis_client_replica, redis_client_celery, lock_beat
tenant_id, self.app, redis_client, redis_client_celery, lock_beat
)
except Exception:
task_logger.exception("Exception while validating indexing fences")

View File

@@ -291,20 +291,17 @@ def validate_indexing_fence(
def validate_indexing_fences(
tenant_id: str | None,
r_replica: Redis,
celery_app: Celery,
r: Redis,
r_celery: Redis,
lock_beat: RedisLock,
) -> None:
"""Validates all indexing fences for this tenant ... aka makes sure
indexing tasks sent to celery are still in flight.
"""
reserved_indexing_tasks = celery_get_unacked_task_ids(
OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
)
# Use replica for this because the worst thing that happens
# is that we don't run the validation on this pass
for key_bytes in r_replica.scan_iter(
# validate all existing indexing jobs
for key_bytes in r.scan_iter(
RedisConnectorIndex.FENCE_PREFIX + "*", count=SCAN_ITER_COUNT_DEFAULT
):
lock_beat.reacquire()

View File

@@ -54,7 +54,6 @@ def _process_model_list_response(model_list_json: Any) -> list[str]:
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_LLM_MODEL_UPDATE,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
bind=True,

View File

@@ -4,7 +4,6 @@ from collections.abc import Callable
from datetime import timedelta
from itertools import islice
from typing import Any
from typing import Literal
from celery import shared_task
from celery import Task
@@ -27,7 +26,6 @@ from onyx.db.engine import get_all_tenant_ids
from onyx.db.engine import get_db_current_time
from onyx.db.engine import get_session_with_tenant
from onyx.db.enums import IndexingStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import DocumentSet
@@ -40,7 +38,6 @@ from onyx.redis.redis_pool import redis_lock_dump
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
_MONITORING_SOFT_TIME_LIMIT = 60 * 5 # 5 minutes
_MONITORING_TIME_LIMIT = _MONITORING_SOFT_TIME_LIMIT + 60 # 6 minutes
@@ -52,12 +49,6 @@ _CONNECTOR_INDEX_ATTEMPT_RUN_SUCCESS_KEY_FMT = (
"monitoring_connector_index_attempt_run_success:{cc_pair_id}:{index_attempt_id}"
)
_FINAL_METRIC_KEY_FMT = "sync_final_metrics:{sync_type}:{entity_id}:{sync_record_id}"
_SYNC_START_LATENCY_KEY_FMT = (
"sync_start_latency:{sync_type}:{entity_id}:{sync_record_id}"
)
def _mark_metric_as_emitted(redis_std: Redis, key: str) -> None:
"""Mark a metric as having been emitted by setting a Redis key with expiration"""
@@ -120,7 +111,6 @@ class Metric(BaseModel):
}.items()
if v is not None
}
task_logger.info(f"Emitting metric: {data}")
optional_telemetry(
record_type=RecordType.METRIC,
data=data,
@@ -199,371 +189,239 @@ def _build_connector_start_latency_metric(
f"Start latency for index attempt {recent_attempt.id}: {start_latency:.2f}s "
f"(desired: {desired_start_time}, actual: {recent_attempt.time_started})"
)
job_id = build_job_id("connector", str(cc_pair.id), str(recent_attempt.id))
return Metric(
key=metric_key,
name="connector_start_latency",
value=start_latency,
tags={
"job_id": job_id,
"connector_id": str(cc_pair.connector.id),
"source": str(cc_pair.connector.source),
},
tags={},
)
def _build_connector_final_metrics(
def _build_run_success_metrics(
cc_pair: ConnectorCredentialPair,
recent_attempts: list[IndexAttempt],
redis_std: Redis,
) -> list[Metric]:
"""
Final metrics for connector index attempts:
- Boolean success/fail metric
- If success, emit:
* duration (seconds)
* doc_count
"""
metrics = []
for attempt in recent_attempts:
metric_key = _CONNECTOR_INDEX_ATTEMPT_RUN_SUCCESS_KEY_FMT.format(
cc_pair_id=cc_pair.id,
index_attempt_id=attempt.id,
)
if _has_metric_been_emitted(redis_std, metric_key):
task_logger.info(
f"Skipping final metrics for connector {cc_pair.connector.id} "
f"index attempt {attempt.id}, already emitted."
f"Skipping metric for connector {cc_pair.connector.id} "
f"index attempt {attempt.id} because it has already been "
"emitted"
)
continue
# We only emit final metrics if the attempt is in a terminal state
if attempt.status not in [
if attempt.status in [
IndexingStatus.SUCCESS,
IndexingStatus.FAILED,
IndexingStatus.CANCELED,
]:
# Not finished; skip
continue
job_id = build_job_id("connector", str(cc_pair.id), str(attempt.id))
success = attempt.status == IndexingStatus.SUCCESS
metrics.append(
Metric(
key=metric_key, # We'll mark the same key for any final metrics
name="connector_run_succeeded",
value=success,
tags={
"job_id": job_id,
"connector_id": str(cc_pair.connector.id),
"source": str(cc_pair.connector.source),
"status": attempt.status.value,
},
task_logger.info(
f"Adding run success metric for index attempt {attempt.id} with status {attempt.status}"
)
)
if success:
# Make sure we have valid time_started
if attempt.time_started and attempt.time_updated:
duration_seconds = (
attempt.time_updated - attempt.time_started
).total_seconds()
metrics.append(
Metric(
key=None, # No need for a new key, or you can reuse the same if you prefer
name="connector_index_duration_seconds",
value=duration_seconds,
tags={
"job_id": job_id,
"connector_id": str(cc_pair.connector.id),
"source": str(cc_pair.connector.source),
},
)
)
else:
task_logger.error(
f"Index attempt {attempt.id} succeeded but has missing time "
f"(time_started={attempt.time_started}, time_updated={attempt.time_updated})."
)
# For doc counts, choose whichever field is more relevant
doc_count = attempt.total_docs_indexed or 0
metrics.append(
Metric(
key=None,
name="connector_index_doc_count",
value=doc_count,
tags={
"job_id": job_id,
"connector_id": str(cc_pair.connector.id),
"source": str(cc_pair.connector.source),
},
key=metric_key,
name="connector_run_succeeded",
value=attempt.status == IndexingStatus.SUCCESS,
tags={"source": str(cc_pair.connector.source)},
)
)
_mark_metric_as_emitted(redis_std, metric_key)
return metrics
def _collect_connector_metrics(db_session: Session, redis_std: Redis) -> list[Metric]:
"""Collect metrics about connector runs from the past hour"""
# NOTE: use get_db_current_time since the IndexAttempt times are set based on DB time
one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)
# Get all connector credential pairs
cc_pairs = db_session.scalars(select(ConnectorCredentialPair)).all()
# Might be more than one search setting, or just one
active_search_settings = get_active_search_settings(db_session)
active_search_settings = get_active_search_settings(db_session)
metrics = []
# If you want to process each cc_pair against each search setting:
for cc_pair in cc_pairs:
for search_settings in active_search_settings:
recent_attempts = (
db_session.query(IndexAttempt)
.filter(
IndexAttempt.connector_credential_pair_id == cc_pair.id,
IndexAttempt.search_settings_id == search_settings.id,
)
.order_by(IndexAttempt.time_created.desc())
.limit(2)
.all()
for cc_pair, search_settings in zip(cc_pairs, active_search_settings):
recent_attempts = (
db_session.query(IndexAttempt)
.filter(
IndexAttempt.connector_credential_pair_id == cc_pair.id,
IndexAttempt.search_settings_id == search_settings.id,
)
.order_by(IndexAttempt.time_created.desc())
.limit(2)
.all()
)
if not recent_attempts:
continue
if not recent_attempts:
continue
most_recent_attempt = recent_attempts[0]
second_most_recent_attempt = (
recent_attempts[1] if len(recent_attempts) > 1 else None
)
most_recent_attempt = recent_attempts[0]
second_most_recent_attempt = (
recent_attempts[1] if len(recent_attempts) > 1 else None
)
if one_hour_ago > most_recent_attempt.time_created:
continue
if one_hour_ago > most_recent_attempt.time_created:
continue
# Connector start latency
start_latency_metric = _build_connector_start_latency_metric(
cc_pair, most_recent_attempt, second_most_recent_attempt, redis_std
)
if start_latency_metric:
metrics.append(start_latency_metric)
# Connector start latency
start_latency_metric = _build_connector_start_latency_metric(
cc_pair, most_recent_attempt, second_most_recent_attempt, redis_std
)
if start_latency_metric:
metrics.append(start_latency_metric)
# Connector run success/failure
final_metrics = _build_connector_final_metrics(
cc_pair, recent_attempts, redis_std
)
metrics.extend(final_metrics)
# Connector run success/failure
run_success_metrics = _build_run_success_metrics(
cc_pair, recent_attempts, redis_std
)
metrics.extend(run_success_metrics)
return metrics
def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]:
"""
Collect metrics for document set and group syncing:
- Success/failure status
- Start latency (always)
- Duration & doc count (only if success)
- Throughput (docs/min) (only if success)
"""
"""Collect metrics about document set and group syncing speed"""
# NOTE: use get_db_current_time since the SyncRecord times are set based on DB time
one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)
# Get all sync records that ended in the last hour
# Get all sync records from the last hour
recent_sync_records = db_session.scalars(
select(SyncRecord)
.where(SyncRecord.sync_end_time.isnot(None))
.where(SyncRecord.sync_end_time >= one_hour_ago)
.order_by(SyncRecord.sync_end_time.desc())
.where(SyncRecord.sync_start_time >= one_hour_ago)
.order_by(SyncRecord.sync_start_time.desc())
).all()
task_logger.info(
f"Collecting sync metrics for {len(recent_sync_records)} sync records"
)
metrics = []
for sync_record in recent_sync_records:
# Build a job_id for correlation
job_id = build_job_id("sync_record", str(sync_record.id))
# Skip if no end time (sync still in progress)
if not sync_record.sync_end_time:
continue
# Emit a SUCCESS/FAIL boolean metric
# Use a single Redis key to avoid re-emitting final metrics
final_metric_key = _FINAL_METRIC_KEY_FMT.format(
sync_type=sync_record.sync_type,
entity_id=sync_record.entity_id,
sync_record_id=sync_record.id,
# Check if we already emitted a metric for this sync record
metric_key = (
f"sync_speed:{sync_record.sync_type}:"
f"{sync_record.entity_id}:{sync_record.id}"
)
if not _has_metric_been_emitted(redis_std, final_metric_key):
# Evaluate success
sync_succeeded = sync_record.sync_status == SyncStatus.SUCCESS
metrics.append(
Metric(
key=final_metric_key,
name="sync_run_succeeded",
value=sync_succeeded,
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
"status": str(sync_record.sync_status),
},
)
if _has_metric_been_emitted(redis_std, metric_key):
task_logger.info(
f"Skipping metric for sync record {sync_record.id} "
"because it has already been emitted"
)
continue
# If successful, emit additional metrics
if sync_succeeded:
if sync_record.sync_end_time and sync_record.sync_start_time:
duration_seconds = (
sync_record.sync_end_time - sync_record.sync_start_time
).total_seconds()
else:
task_logger.error(
f"Invalid times for sync record {sync_record.id}: "
f"start={sync_record.sync_start_time}, end={sync_record.sync_end_time}"
)
duration_seconds = None
# Calculate sync duration in minutes
sync_duration_mins = (
sync_record.sync_end_time - sync_record.sync_start_time
).total_seconds() / 60.0
doc_count = sync_record.num_docs_synced or 0
sync_speed = None
if duration_seconds and duration_seconds > 0:
duration_mins = duration_seconds / 60.0
sync_speed = (
doc_count / duration_mins if duration_mins > 0 else None
)
# Emit duration, doc count, speed
if duration_seconds is not None:
metrics.append(
Metric(
key=None,
name="sync_duration_seconds",
value=duration_seconds,
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
},
)
)
else:
task_logger.error(
f"Invalid sync record {sync_record.id} with no duration"
)
metrics.append(
Metric(
key=None,
name="sync_doc_count",
value=doc_count,
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
},
)
)
if sync_speed is not None:
metrics.append(
Metric(
key=None,
name="sync_speed_docs_per_min",
value=sync_speed,
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
},
)
)
else:
task_logger.error(
f"Invalid sync record {sync_record.id} with no duration"
)
# Mark final metrics as emitted so we don't re-emit
_mark_metric_as_emitted(redis_std, final_metric_key)
# Emit start latency
start_latency_key = _SYNC_START_LATENCY_KEY_FMT.format(
sync_type=sync_record.sync_type,
entity_id=sync_record.entity_id,
sync_record_id=sync_record.id,
# Calculate sync speed (docs/min) - avoid division by zero
sync_speed = (
sync_record.num_docs_synced / sync_duration_mins
if sync_duration_mins > 0
else None
)
if not _has_metric_been_emitted(redis_std, start_latency_key):
# Get the entity's last update time based on sync type
entity: DocumentSet | UserGroup | None = None
if sync_record.sync_type == SyncType.DOCUMENT_SET:
entity = db_session.scalar(
select(DocumentSet).where(DocumentSet.id == sync_record.entity_id)
)
elif sync_record.sync_type == SyncType.USER_GROUP:
entity = db_session.scalar(
select(UserGroup).where(UserGroup.id == sync_record.entity_id)
)
else:
task_logger.info(
f"Skipping sync record {sync_record.id} of type {sync_record.sync_type}."
)
continue
if entity is None:
task_logger.error(
f"Could not find entity for sync record {sync_record.id} "
f"(type={sync_record.sync_type}, id={sync_record.entity_id})."
)
continue
if sync_speed is None:
task_logger.error(
f"Something went wrong with sync speed calculation. "
f"Sync record: {sync_record.id}, duration: {sync_duration_mins}, "
f"docs synced: {sync_record.num_docs_synced}"
)
continue
# Calculate start latency in seconds:
# (actual sync start) - (last modified time)
if entity.time_last_modified_by_user and sync_record.sync_start_time:
start_latency = (
sync_record.sync_start_time - entity.time_last_modified_by_user
).total_seconds()
task_logger.info(
f"Calculated sync speed for record {sync_record.id}: {sync_speed} docs/min"
)
metrics.append(
Metric(
key=metric_key,
name="sync_speed_docs_per_min",
value=sync_speed,
tags={
"sync_type": str(sync_record.sync_type),
"status": str(sync_record.sync_status),
},
)
)
if start_latency < 0:
task_logger.error(
f"Negative start latency for sync record {sync_record.id} "
f"(start={sync_record.sync_start_time}, entity_modified={entity.time_last_modified_by_user})"
)
continue
# Add sync start latency metric
start_latency_key = (
f"sync_start_latency:{sync_record.sync_type}"
f":{sync_record.entity_id}:{sync_record.id}"
)
if _has_metric_been_emitted(redis_std, start_latency_key):
task_logger.info(
f"Skipping start latency metric for sync record {sync_record.id} "
"because it has already been emitted"
)
continue
metrics.append(
Metric(
key=start_latency_key,
name="sync_start_latency_seconds",
value=start_latency,
tags={
"job_id": job_id,
"sync_type": str(sync_record.sync_type),
},
)
)
# Get the entity's last update time based on sync type
entity: DocumentSet | UserGroup | None = None
if sync_record.sync_type == SyncType.DOCUMENT_SET:
entity = db_session.scalar(
select(DocumentSet).where(DocumentSet.id == sync_record.entity_id)
)
elif sync_record.sync_type == SyncType.USER_GROUP:
entity = db_session.scalar(
select(UserGroup).where(UserGroup.id == sync_record.entity_id)
)
else:
# Skip other sync types
task_logger.info(
f"Skipping sync record {sync_record.id} "
f"with type {sync_record.sync_type} "
f"and id {sync_record.entity_id} "
"because it is not a document set or user group"
)
continue
_mark_metric_as_emitted(redis_std, start_latency_key)
if entity is None:
task_logger.error(
f"Could not find entity for sync record {sync_record.id} "
f"with type {sync_record.sync_type} and id {sync_record.entity_id}"
)
continue
# Calculate start latency in seconds
start_latency = (
sync_record.sync_start_time - entity.time_last_modified_by_user
).total_seconds()
task_logger.info(
f"Calculated start latency for sync record {sync_record.id}: {start_latency} seconds"
)
if start_latency < 0:
task_logger.error(
f"Start latency is negative for sync record {sync_record.id} "
f"with type {sync_record.sync_type} and id {sync_record.entity_id}. "
f"Sync start time: {sync_record.sync_start_time}, "
f"Entity last modified: {entity.time_last_modified_by_user}"
)
continue
metrics.append(
Metric(
key=start_latency_key,
name="sync_start_latency_seconds",
value=start_latency,
tags={
"sync_type": str(sync_record.sync_type),
},
)
)
return metrics
def build_job_id(
job_type: Literal["connector", "sync_record"],
primary_id: str,
secondary_id: str | None = None,
) -> str:
if job_type == "connector":
if secondary_id is None:
raise ValueError(
"secondary_id (attempt_id) is required for connector job_type"
)
return f"connector:{primary_id}:attempt:{secondary_id}"
elif job_type == "sync_record":
return f"sync_record:{primary_id}"
@shared_task(
name=OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,
ignore_result=True,
soft_time_limit=_MONITORING_SOFT_TIME_LIMIT,
time_limit=_MONITORING_TIME_LIMIT,
queue=OnyxCeleryQueues.MONITORING,
@@ -601,7 +459,6 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
lambda: _collect_connector_metrics(db_session, redis_std),
lambda: _collect_sync_metrics(db_session, redis_std),
]
# Collect and log each metric
with get_session_with_tenant(tenant_id) as db_session:
for metric_fn in metric_functions:

View File

@@ -78,7 +78,6 @@ def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_PRUNING,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
bind=True,
)

View File

@@ -33,7 +33,6 @@ from onyx.document_index.interfaces import VespaDocumentFields
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES = 3
@@ -214,7 +213,6 @@ def document_by_cc_pair_cleanup_task(
@shared_task(
name=OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
ignore_result=True,
trail=False,
bind=True,
)
@@ -249,10 +247,6 @@ def cloud_beat_task_generator(
lock_beat.reacquire()
last_lock_time = current_time
# needed in the cloud
if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST:
continue
self.app.send_task(
task_name,
kwargs=dict(

View File

@@ -78,7 +78,6 @@ from onyx.redis.redis_connector_index import RedisConnectorIndex
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
from onyx.redis.redis_usergroup import RedisUserGroup
@@ -98,7 +97,6 @@ logger = setup_logger()
# which bloats the result metadata considerably. trail=False prevents this.
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
bind=True,
@@ -873,12 +871,7 @@ def monitor_ccpair_indexing_taskset(
redis_connector_index.reset()
@shared_task(
name=OnyxCeleryTask.MONITOR_VESPA_SYNC,
ignore_result=True,
soft_time_limit=300,
bind=True,
)
@shared_task(name=OnyxCeleryTask.MONITOR_VESPA_SYNC, soft_time_limit=300, bind=True)
def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
"""This is a celery beat task that monitors and finalizes various long running tasks.
@@ -902,17 +895,6 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
r = get_redis_client(tenant_id=tenant_id)
# Replica usage notes
#
# False negatives are OK. (aka fail to to see a key that exists on the master).
# We simply skip the monitoring work and it will be caught on the next pass.
#
# False positives are not OK, and are possible if we clear a fence on the master and
# then read from the replica. In this case, monitoring work could be done on a fence
# that no longer exists. To avoid this, we scan from the replica, but double check
# the result on the master.
r_replica = get_redis_replica_client(tenant_id=tenant_id)
lock_beat: RedisLock = r.lock(
OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK,
timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
@@ -972,19 +954,17 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
# scan and monitor activity to completion
phase_start = time.monotonic()
lock_beat.reacquire()
if r_replica.exists(RedisConnectorCredentialPair.get_fence_key()):
if r.exists(RedisConnectorCredentialPair.get_fence_key()):
monitor_connector_taskset(r)
if r.exists(RedisConnectorCredentialPair.get_fence_key()):
monitor_connector_taskset(r)
timings["connector"] = time.monotonic() - phase_start
timings["connector_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
phase_start = time.monotonic()
lock_beat.reacquire()
for key_bytes in r_replica.scan_iter(
for key_bytes in r.scan_iter(
RedisConnectorDelete.FENCE_PREFIX + "*", count=SCAN_ITER_COUNT_DEFAULT
):
if r.exists(key_bytes):
monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
lock_beat.reacquire()
timings["connector_deletion"] = time.monotonic() - phase_start
@@ -994,74 +974,66 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
phase_start = time.monotonic()
lock_beat.reacquire()
for key_bytes in r_replica.scan_iter(
for key_bytes in r.scan_iter(
RedisDocumentSet.FENCE_PREFIX + "*", count=SCAN_ITER_COUNT_DEFAULT
):
if r.exists(key_bytes):
with get_session_with_tenant(tenant_id) as db_session:
monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
with get_session_with_tenant(tenant_id) as db_session:
monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
lock_beat.reacquire()
timings["documentset"] = time.monotonic() - phase_start
timings["documentset_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
phase_start = time.monotonic()
lock_beat.reacquire()
for key_bytes in r_replica.scan_iter(
for key_bytes in r.scan_iter(
RedisUserGroup.FENCE_PREFIX + "*", count=SCAN_ITER_COUNT_DEFAULT
):
if r.exists(key_bytes):
monitor_usergroup_taskset = (
fetch_versioned_implementation_with_fallback(
"onyx.background.celery.tasks.vespa.tasks",
"monitor_usergroup_taskset",
noop_fallback,
)
)
with get_session_with_tenant(tenant_id) as db_session:
monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
monitor_usergroup_taskset = fetch_versioned_implementation_with_fallback(
"onyx.background.celery.tasks.vespa.tasks",
"monitor_usergroup_taskset",
noop_fallback,
)
with get_session_with_tenant(tenant_id) as db_session:
monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
lock_beat.reacquire()
timings["usergroup"] = time.monotonic() - phase_start
timings["usergroup_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
phase_start = time.monotonic()
lock_beat.reacquire()
for key_bytes in r_replica.scan_iter(
for key_bytes in r.scan_iter(
RedisConnectorPrune.FENCE_PREFIX + "*", count=SCAN_ITER_COUNT_DEFAULT
):
if r.exists(key_bytes):
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
lock_beat.reacquire()
timings["pruning"] = time.monotonic() - phase_start
timings["pruning_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
phase_start = time.monotonic()
lock_beat.reacquire()
for key_bytes in r_replica.scan_iter(
for key_bytes in r.scan_iter(
RedisConnectorIndex.FENCE_PREFIX + "*", count=SCAN_ITER_COUNT_DEFAULT
):
if r.exists(key_bytes):
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session)
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session)
lock_beat.reacquire()
timings["indexing"] = time.monotonic() - phase_start
timings["indexing_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
phase_start = time.monotonic()
lock_beat.reacquire()
for key_bytes in r_replica.scan_iter(
for key_bytes in r.scan_iter(
RedisConnectorPermissionSync.FENCE_PREFIX + "*",
count=SCAN_ITER_COUNT_DEFAULT,
):
if r.exists(key_bytes):
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_permissions_taskset(
tenant_id, key_bytes, r, db_session
)
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_permissions_taskset(tenant_id, key_bytes, r, db_session)
lock_beat.reacquire()
timings["permissions"] = time.monotonic() - phase_start
timings["permissions_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
except SoftTimeLimitExceeded:
task_logger.info(
"Soft time limit exceeded, task is being terminated gracefully."

View File

@@ -200,8 +200,6 @@ REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""
# this assumes that other redis settings remain the same as the primary
REDIS_REPLICA_HOST = os.environ.get("REDIS_REPLICA_HOST") or REDIS_HOST
REDIS_AUTH_KEY_PREFIX = "fastapi_users_token:"

View File

@@ -232,29 +232,20 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
}
# Get labels
label_dicts = (
confluence_object.get("metadata", {}).get("labels", {}).get("results", [])
)
page_labels = [label.get("name") for label in label_dicts if label.get("name")]
label_dicts = confluence_object["metadata"]["labels"]["results"]
page_labels = [label["name"] for label in label_dicts]
if page_labels:
doc_metadata["labels"] = page_labels
# Get last modified and author email
version_dict = confluence_object.get("version", {})
last_modified = (
datetime_from_string(version_dict.get("when"))
if version_dict.get("when")
else None
)
author_email = version_dict.get("by", {}).get("email")
title = confluence_object.get("title", "Untitled Document")
last_modified = datetime_from_string(confluence_object["version"]["when"])
author_email = confluence_object["version"].get("by", {}).get("email")
return Document(
id=object_url,
sections=[Section(link=object_url, text=object_text)],
source=DocumentSource.CONFLUENCE,
semantic_identifier=title,
semantic_identifier=confluence_object["title"],
doc_updated_at=last_modified,
primary_owners=(
[BasicExpertInfo(email=author_email)] if author_email else None

View File

@@ -6,7 +6,6 @@ from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Optional
from urllib.parse import unquote
import msal # type: ignore
from office365.graph_client import GraphClient # type: ignore
@@ -83,13 +82,8 @@ class SharepointConnector(LoadConnector, PollConnector):
sites_index = parts.index("sites")
site_url = "/".join(parts[: sites_index + 2])
folder = (
"/".join(unquote(part) for part in parts[sites_index + 2 :])
if len(parts) > sites_index + 2
else None
parts[sites_index + 2] if len(parts) > sites_index + 2 else None
)
# Handling for new URL structure
if folder and folder.startswith("Shared Documents/"):
folder = folder[len("Shared Documents/") :]
site_data_list.append(
SiteData(url=site_url, folder=folder, sites=[], driveitems=[])
)
@@ -117,19 +111,11 @@ class SharepointConnector(LoadConnector, PollConnector):
query = query.filter(filter_str)
driveitems = query.execute_query()
if element.folder:
expected_path = f"/root:/{element.folder}"
filtered_driveitems = [
item
for item in driveitems
if item.parent_reference.path.endswith(expected_path)
if element.folder in item.parent_reference.path
]
if len(filtered_driveitems) == 0:
all_paths = [
item.parent_reference.path for item in driveitems
]
logger.warning(
f"Nothing found for folder '{expected_path}' in any of valid paths: {all_paths}"
)
element.driveitems.extend(filtered_driveitems)
else:
element.driveitems.extend(driveitems)

View File

@@ -205,11 +205,6 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
primaryjoin="User.id == foreign(ConnectorCredentialPair.creator_id)",
)
folders: Mapped[list["UserFolder"]] = relationship(
"UserFolder", back_populates="user"
)
files: Mapped[list["UserFile"]] = relationship("UserFile", back_populates="user")
class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base):
pass
@@ -1563,12 +1558,6 @@ class Persona(Base):
secondary="persona__user_group",
viewonly=True,
)
# Relationship to UserFile
user_files: Mapped[list["UserFile"]] = relationship(
"UserFile",
secondary="persona__user_file",
back_populates="assistants",
)
labels: Mapped[list["PersonaLabel"]] = relationship(
"PersonaLabel",
secondary=Persona__PersonaLabel.__table__,
@@ -1585,15 +1574,6 @@ class Persona(Base):
)
class Persona__UserFile(Base):
__tablename__ = "persona__user_file"
persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
user_file_id: Mapped[int] = mapped_column(
ForeignKey("user_file.id"), primary_key=True
)
class PersonaLabel(Base):
__tablename__ = "persona_label"
@@ -2053,51 +2033,6 @@ class InputPrompt__User(Base):
disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
class UserFolder(Base):
__tablename__ = "user_folder"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[int] = mapped_column(ForeignKey("user.id"), nullable=False)
name: Mapped[str] = mapped_column(nullable=False)
description: Mapped[str] = mapped_column(nullable=False)
created_at: Mapped[datetime.datetime] = mapped_column(
default=datetime.datetime.utcnow
)
user: Mapped["User"] = relationship(back_populates="folders")
files: Mapped[list["UserFile"]] = relationship(back_populates="folder")
class UserDocument(str, Enum):
CHAT = "chat"
RECENT = "recent"
FILE = "file"
class UserFile(Base):
__tablename__ = "user_file"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[int | None] = mapped_column(ForeignKey("user.id"), nullable=False)
assistants: Mapped[list["Persona"]] = relationship(
"Persona",
secondary=Persona__UserFile.__table__,
back_populates="user_files",
)
folder_id: Mapped[int | None] = mapped_column(
ForeignKey("user_folder.id"), nullable=True
)
file_id: Mapped[str] = mapped_column(nullable=False)
document_id: Mapped[str] = mapped_column(nullable=False)
name: Mapped[str] = mapped_column(nullable=False)
created_at: Mapped[datetime.datetime] = mapped_column(
default=datetime.datetime.utcnow
)
user: Mapped["User"] = relationship(back_populates="files")
folder: Mapped["UserFolder"] = relationship(back_populates="files")
"""
Multi-tenancy related tables
"""

View File

@@ -8,64 +8,20 @@ from sqlalchemy.orm import Session
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import SyncRecord
from onyx.setup import setup_logger
logger = setup_logger()
def insert_sync_record(
db_session: Session,
entity_id: int,
entity_id: int | None,
sync_type: SyncType,
) -> SyncRecord:
"""Insert a new sync record into the database, cancelling any existing in-progress records.
"""Insert a new sync record into the database.
Args:
db_session: The database session to use
entity_id: The ID of the entity being synced (document set ID, user group ID, etc.)
sync_type: The type of sync operation
"""
# If an existing in-progress sync record exists, mark as cancelled
existing_in_progress_sync_record = fetch_latest_sync_record(
db_session, entity_id, sync_type, sync_status=SyncStatus.IN_PROGRESS
)
if existing_in_progress_sync_record is not None:
logger.info(
f"Cancelling existing in-progress sync record {existing_in_progress_sync_record.id} "
f"for entity_id={entity_id} sync_type={sync_type}"
)
mark_sync_records_as_cancelled(db_session, entity_id, sync_type)
return _create_sync_record(db_session, entity_id, sync_type)
def mark_sync_records_as_cancelled(
db_session: Session,
entity_id: int | None,
sync_type: SyncType,
) -> None:
stmt = (
update(SyncRecord)
.where(
and_(
SyncRecord.entity_id == entity_id,
SyncRecord.sync_type == sync_type,
SyncRecord.sync_status == SyncStatus.IN_PROGRESS,
)
)
.values(sync_status=SyncStatus.CANCELED)
)
db_session.execute(stmt)
db_session.commit()
def _create_sync_record(
db_session: Session,
entity_id: int | None,
sync_type: SyncType,
) -> SyncRecord:
"""Create and insert a new sync record into the database."""
sync_record = SyncRecord(
entity_id=entity_id,
sync_type=sync_type,
@@ -83,7 +39,6 @@ def fetch_latest_sync_record(
db_session: Session,
entity_id: int,
sync_type: SyncType,
sync_status: SyncStatus | None = None,
) -> SyncRecord | None:
"""Fetch the most recent sync record for a given entity ID and status.
@@ -104,9 +59,6 @@ def fetch_latest_sync_record(
.limit(1)
)
if sync_status is not None:
stmt = stmt.where(SyncRecord.sync_status == sync_status)
result = db_session.execute(stmt)
return result.scalar_one_or_none()

View File

@@ -1,29 +0,0 @@
from typing import List
from fastapi import UploadFile
from sqlalchemy.orm import Session
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.server.documents.connector import upload_files
from onyx.server.documents.models import FileUploadResponse
def create_user_files(
files: List[UploadFile],
folder_id: int | None,
user: User,
db_session: Session,
) -> FileUploadResponse:
upload_response = upload_files(files, db_session)
for file_path, file in zip(upload_response.file_paths, files):
new_file = UserFile(
user_id=user.id if user else None,
folder_id=folder_id if folder_id != -1 else None,
file_id=file_path,
document_id=file_path,
name=file.filename,
)
db_session.add(new_file)
db_session.commit()
return upload_response

View File

@@ -594,7 +594,6 @@ class VespaIndex(DocumentIndex):
primary_index=index_name == self.index_name,
)
large_chunks_enabled = multipass_config.enable_large_chunks
enriched_doc_infos = VespaIndex.enrich_basic_chunk_info(
index_name=index_name,
http_client=http_client,
@@ -663,7 +662,6 @@ class VespaIndex(DocumentIndex):
tenant_id=tenant_id,
large_chunks_enabled=large_chunks_enabled,
)
for doc_chunk_ids_batch in batch_generator(
chunks_to_delete, BATCH_SIZE
):

View File

@@ -97,7 +97,6 @@ from onyx.server.settings.api import basic_router as settings_router
from onyx.server.token_rate_limits.api import (
router as token_rate_limit_settings_router,
)
from onyx.server.user_documents.api import router as user_documents_router
from onyx.server.utils import BasicAuthenticationError
from onyx.setup import setup_multitenant_onyx
from onyx.setup import setup_onyx
@@ -287,7 +286,6 @@ def get_application() -> FastAPI:
include_router_with_global_prefix_prepended(application, input_prompt_router)
include_router_with_global_prefix_prepended(application, admin_input_prompt_router)
include_router_with_global_prefix_prepended(application, cc_pair_router)
include_router_with_global_prefix_prepended(application, user_documents_router)
include_router_with_global_prefix_prepended(application, folder_router)
include_router_with_global_prefix_prepended(application, document_set_router)
include_router_with_global_prefix_prepended(application, search_settings_router)

View File

@@ -21,7 +21,6 @@ from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PASSWORD
from onyx.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS
from onyx.configs.app_configs import REDIS_PORT
from onyx.configs.app_configs import REDIS_REPLICA_HOST
from onyx.configs.app_configs import REDIS_SSL
from onyx.configs.app_configs import REDIS_SSL_CA_CERTS
from onyx.configs.app_configs import REDIS_SSL_CERT_REQS
@@ -133,32 +132,23 @@ class RedisPool:
_instance: Optional["RedisPool"] = None
_lock: threading.Lock = threading.Lock()
_pool: redis.BlockingConnectionPool
_replica_pool: redis.BlockingConnectionPool
def __new__(cls) -> "RedisPool":
if not cls._instance:
with cls._lock:
if not cls._instance:
cls._instance = super(RedisPool, cls).__new__(cls)
cls._instance._init_pools()
cls._instance._init_pool()
return cls._instance
def _init_pools(self) -> None:
def _init_pool(self) -> None:
self._pool = RedisPool.create_pool(ssl=REDIS_SSL)
self._replica_pool = RedisPool.create_pool(
host=REDIS_REPLICA_HOST, ssl=REDIS_SSL
)
def get_client(self, tenant_id: str | None) -> Redis:
if tenant_id is None:
tenant_id = "public"
return TenantRedis(tenant_id, connection_pool=self._pool)
def get_replica_client(self, tenant_id: str | None) -> Redis:
if tenant_id is None:
tenant_id = "public"
return TenantRedis(tenant_id, connection_pool=self._replica_pool)
@staticmethod
def create_pool(
host: str = REDIS_HOST,
@@ -222,10 +212,6 @@ def get_redis_client(*, tenant_id: str | None) -> Redis:
return redis_pool.get_client(tenant_id)
def get_redis_replica_client(*, tenant_id: str | None) -> Redis:
return redis_pool.get_replica_client(tenant_id)
SSL_CERT_REQS_MAP = {
"none": ssl.CERT_NONE,
"optional": ssl.CERT_OPTIONAL,

View File

@@ -6184,7 +6184,7 @@
"chunk_ind": 0
},
{
"url": "https://docs.onyx.app/more/use_cases/support",
"url": "https://docs.onyx.app/more/use_cases/customer_support",
"title": "Customer Support",
"content": "Help your customer support team instantly answer any question across your entire product.\n\nAI Enabled Support\nCustomer support agents have one of the highest breadth jobs. They field requests that cover the entire surface area of the product and need to help your users find success on extremely short timelines. Because they're not the same people who designed or built the system, they often lack the depth of understanding needed - resulting in delays and escalations to other teams. Modern teams are leveraging AI to help their CS team optimize the speed and quality of these critical customer-facing interactions.\n\nThe Importance of Context\nThere are two critical components of AI copilots for customer support. The first is that the AI system needs to be connected with as much information as possible (not just support tools like Zendesk or Intercom) and that the knowledge needs to be as fresh as possible. Sometimes a fix might even be in places rarely checked by CS such as pull requests in a code repository. The second critical component is the ability of the AI system to break down difficult concepts and convoluted processes into more digestible descriptions and for your team members to be able to chat back and forth with the system to build a better understanding.\n\nOnyx takes care of both of these. The system connects up to over 30+ different applications and the knowledge is pulled in constantly so that the information access is always up to date.",
"title_embedding": [

View File

@@ -24,7 +24,7 @@
"chunk_ind": 0
},
{
"url": "https://docs.onyx.app/more/use_cases/support",
"url": "https://docs.onyx.app/more/use_cases/customer_support",
"title": "Customer Support",
"content": "Help your customer support team instantly answer any question across your entire product.\n\nAI Enabled Support\nCustomer support agents have one of the highest breadth jobs. They field requests that cover the entire surface area of the product and need to help your users find success on extremely short timelines. Because they're not the same people who designed or built the system, they often lack the depth of understanding needed - resulting in delays and escalations to other teams. Modern teams are leveraging AI to help their CS team optimize the speed and quality of these critical customer-facing interactions.\n\nThe Importance of Context\nThere are two critical components of AI copilots for customer support. The first is that the AI system needs to be connected with as much information as possible (not just support tools like Zendesk or Intercom) and that the knowledge needs to be as fresh as possible. Sometimes a fix might even be in places rarely checked by CS such as pull requests in a code repository. The second critical component is the ability of the AI system to break down difficult concepts and convoluted processes into more digestible descriptions and for your team members to be able to chat back and forth with the system to build a better understanding.\n\nOnyx takes care of both of these. The system connects up to over 30+ different applications and the knowledge is pulled in constantly so that the information access is always up to date.",
"chunk_ind": 0

View File

@@ -380,7 +380,12 @@ def check_drive_tokens(
return AuthStatus(authenticated=True)
def upload_files(files: list[UploadFile], db_session: Session) -> FileUploadResponse:
@router.post("/admin/connector/file/upload")
def upload_files(
files: list[UploadFile],
_: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> FileUploadResponse:
for file in files:
if not file.filename:
raise HTTPException(status_code=400, detail="File name cannot be empty")
@@ -441,15 +446,6 @@ def upload_files(files: list[UploadFile], db_session: Session) -> FileUploadResp
return FileUploadResponse(file_paths=deduped_file_paths)
@router.post("/admin/connector/file/upload")
def upload_files_api(
files: list[UploadFile],
_: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> FileUploadResponse:
return upload_files(files, db_session)
@router.get("/admin/connector")
def get_connectors_by_credential(
_: User = Depends(current_curator_or_admin_user),
@@ -933,21 +929,81 @@ def connector_run_once(
connector_id = run_info.connector_id
specified_credential_ids = run_info.credential_ids
if not specified_credential_ids:
try:
possible_credential_ids = get_connector_credential_ids(
run_info.connector_id, db_session
)
except ValueError:
raise HTTPException(
status_code=400, detail="No credentials specified for indexing"
status_code=404,
detail=f"Connector by id {connector_id} does not exist.",
)
try:
num_triggers = trigger_indexing_for_cc_pair(
specified_credential_ids,
connector_id,
run_info.from_beginning,
tenant_id,
db_session,
if not specified_credential_ids:
credential_ids = possible_credential_ids
else:
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
credential_ids = specified_credential_ids
else:
raise HTTPException(
status_code=400,
detail="Not all specified credentials are associated with connector",
)
if not credential_ids:
raise HTTPException(
status_code=400,
detail="Connector has no valid credentials, cannot create index attempts.",
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
# Prevents index attempts for cc pairs that already have an index attempt currently running
skipped_credentials = [
credential_id
for credential_id in credential_ids
if get_index_attempts_for_cc_pair(
cc_pair_identifier=ConnectorCredentialPairIdentifier(
connector_id=run_info.connector_id,
credential_id=credential_id,
),
only_current=True,
db_session=db_session,
disinclude_finished=True,
)
]
connector_credential_pairs = [
get_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
)
for credential_id in credential_ids
if credential_id not in skipped_credentials
]
num_triggers = 0
for cc_pair in connector_credential_pairs:
if cc_pair is not None:
indexing_mode = IndexingMode.UPDATE
if run_info.from_beginning:
indexing_mode = IndexingMode.REINDEX
mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session)
num_triggers += 1
logger.info(
f"connector_run_once - marking cc_pair with indexing trigger: "
f"connector={run_info.connector_id} "
f"cc_pair={cc_pair.id} "
f"indexing_trigger={indexing_mode}"
)
# run the beat task to pick up the triggers immediately
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},
)
msg = f"Marked {num_triggers} index attempts with indexing triggers."
return StatusResponse(
@@ -1119,82 +1175,3 @@ def get_basic_connector_indexing_status(
for cc_pair in cc_pairs
if cc_pair.connector.source != DocumentSource.INGESTION_API
]
def trigger_indexing_for_cc_pair(
specified_credential_ids: list[int],
connector_id: int,
from_beginning: bool,
tenant_id: str,
db_session: Session,
) -> int:
try:
possible_credential_ids = get_connector_credential_ids(connector_id, db_session)
except ValueError as e:
raise ValueError(f"Connector by id {connector_id} does not exist: {str(e)}")
if not specified_credential_ids:
credential_ids = possible_credential_ids
else:
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
credential_ids = specified_credential_ids
else:
raise ValueError(
"Not all specified credentials are associated with connector"
)
if not credential_ids:
raise ValueError(
"Connector has no valid credentials, cannot create index attempts."
)
# Prevents index attempts for cc pairs that already have an index attempt currently running
skipped_credentials = [
credential_id
for credential_id in credential_ids
if get_index_attempts_for_cc_pair(
cc_pair_identifier=ConnectorCredentialPairIdentifier(
connector_id=connector_id,
credential_id=credential_id,
),
only_current=True,
db_session=db_session,
disinclude_finished=True,
)
]
connector_credential_pairs = [
get_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
)
for credential_id in credential_ids
if credential_id not in skipped_credentials
]
num_triggers = 0
for cc_pair in connector_credential_pairs:
if cc_pair is not None:
indexing_mode = IndexingMode.UPDATE
if from_beginning:
indexing_mode = IndexingMode.REINDEX
mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session)
num_triggers += 1
logger.info(
f"connector_run_once - marking cc_pair with indexing trigger: "
f"connector={connector_id} "
f"cc_pair={cc_pair.id} "
f"indexing_trigger={indexing_mode}"
)
# run the beat task to pick up the triggers immediately
primary_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},
)
return num_triggers

View File

@@ -411,7 +411,7 @@ class FileUploadResponse(BaseModel):
class ObjectCreationIdResponse(BaseModel):
id: int
id: int | str
credential: CredentialSnapshot | None = None

View File

@@ -1,269 +0,0 @@
import time
from typing import List
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.credentials import create_credential
from onyx.db.engine import get_session
from onyx.db.enums import AccessType
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.db.user_documents import create_user_files
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.documents.models import FileUploadResponse
from onyx.server.user_documents.models import FileResponse
from onyx.server.user_documents.models import FileSystemResponse
from onyx.server.user_documents.models import FolderDetailResponse
from onyx.server.user_documents.models import FolderResponse
from onyx.server.user_documents.models import MessageResponse
router = APIRouter()
class FolderCreationRequest(BaseModel):
name: str
description: str
@router.post("/user/folder")
def create_folder(
request: FolderCreationRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FolderDetailResponse:
new_folder = UserFolder(
user_id=user.id if user else None,
name=request.name,
description=request.description,
)
db_session.add(new_folder)
db_session.commit()
return FolderDetailResponse(
id=new_folder.id,
name=new_folder.name,
description=new_folder.description,
files=[],
)
@router.get(
"/user/folder",
)
def get_folders(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> List[FolderResponse]:
user_id = user.id if user else None
folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all()
return [FolderResponse.from_model(folder) for folder in folders]
@router.get("/user/folder/{folder_id}")
def get_folder(
folder_id: int,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FolderDetailResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
return FolderDetailResponse(
id=folder.id,
name=folder.name,
files=[FileResponse.from_model(file) for file in folder.files],
)
@router.post("/user/file/upload")
def upload_user_files(
files: List[UploadFile] = File(...),
folder_id: int | None = Form(None),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FileUploadResponse:
file_upload_response = FileUploadResponse(
file_paths=create_user_files(files, folder_id, user, db_session).file_paths
)
for path in file_upload_response.file_paths:
connector_base = ConnectorBase(
name=f"UserFile-{int(time.time())}",
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={
"file_locations": [path],
},
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
connector = create_connector(
db_session=db_session,
connector_data=connector_base,
)
credential_info = CredentialBase(
credential_json={},
admin_public=True,
source=DocumentSource.FILE,
curator_public=True,
groups=[],
name=f"UserFileCredential-{int(time.time())}",
)
credential = create_credential(credential_info, user, db_session)
add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector.id,
credential_id=credential.id,
cc_pair_name=f"UserFileCCPair-{int(time.time())}",
access_type=AccessType.PUBLIC,
auto_sync_options=None,
groups=[],
)
# TODO: functional document indexing
# trigger_document_indexing(db_session, user.id)
return file_upload_response
@router.put("/user/folder/{folder_id}")
def update_folder(
folder_id: int,
name: str,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FolderDetailResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
folder.name = name
db_session.commit()
return FolderDetailResponse(
id=folder.id,
name=folder.name,
files=[FileResponse.from_model(file) for file in folder.files],
)
@router.delete("/user/folder/{folder_id}")
def delete_folder(
folder_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
db_session.delete(folder)
db_session.commit()
return MessageResponse(message="Folder deleted successfully")
@router.delete("/user/file/{file_id}")
def delete_file(
file_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
db_session.delete(file)
db_session.commit()
return MessageResponse(message="File deleted successfully")
class FileMoveRequest(BaseModel):
file_id: int
new_folder_id: int | None
@router.put("/user/file/{file_id}/move")
def move_file(
request: FileMoveRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FileResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == request.file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
file.folder_id = request.new_folder_id
db_session.commit()
return FileResponse.from_model(file)
@router.get("/user/file-system")
def get_file_system(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FileSystemResponse:
user_id = user.id if user else None
folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all()
files = db_session.query(UserFile).filter(UserFile.user_id == user_id).all()
return FileSystemResponse(
folders=[FolderResponse.from_model(folder) for folder in folders],
files=[FileResponse.from_model(file) for file in files],
)
@router.put("/user/file/{file_id}/rename")
def rename_file(
file_id: int,
name: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> FileResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
file.name = name
db_session.commit()
return FileResponse.from_model(file)

View File

@@ -1,49 +0,0 @@
from typing import List
from fastapi import APIRouter
from pydantic import BaseModel
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
router = APIRouter()
class FolderResponse(BaseModel):
id: int
name: str
description: str
@classmethod
def from_model(cls, model: UserFolder) -> "FolderResponse":
return cls(id=model.id, name=model.name, description=model.description)
class FileResponse(BaseModel):
id: int
name: str
document_id: str
folder_id: int | None = None
@classmethod
def from_model(cls, model: UserFile) -> "FileResponse":
return cls(
id=model.id,
name=model.name,
folder_id=model.folder_id,
document_id=model.document_id,
)
class FolderDetailResponse(FolderResponse):
files: List[FileResponse]
class MessageResponse(BaseModel):
message: str
class FileSystemResponse(BaseModel):
folders: list[FolderResponse]
files: list[FileResponse]

View File

@@ -123,7 +123,6 @@ def optional_telemetry(
headers={"Content-Type": "application/json"},
json=payload,
)
except Exception:
# This way it silences all thread level logging as well
pass

View File

@@ -197,7 +197,7 @@ ai_platform_doc = SeedPresaveDocument(
)
customer_support_doc = SeedPresaveDocument(
url="https://docs.onyx.app/more/use_cases/support",
url="https://docs.onyx.app/more/use_cases/customer_support",
title=customer_support_title,
content=customer_support,
title_embedding=model.encode(f"search_document: {customer_support_title}"),

View File

@@ -21,144 +21,35 @@ Options:
--doc-id : Document ID
--fields : Fields to update (JSON)
Example:
Example: (gets docs for a given tenant id and connector id)
python vespa_debug_tool.py --action list_docs --tenant-id my_tenant --connector-id 1 --n 5
"""
import argparse
import json
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from uuid import UUID
from pydantic import BaseModel
from sqlalchemy import and_
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import SearchRequest
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.engine import get_session_with_tenant
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.document_index_utils import get_document_chunk_ids
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
from onyx.document_index.vespa.index import VespaIndex
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import SEARCH_ENDPOINT
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import VESPA_APP_CONTAINER_URL
from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
logger = setup_logger()
class DocumentFilter(BaseModel):
# Document filter for link matching.
link: str | None = None
def build_vespa_filters(
filters: IndexFilters,
*,
include_hidden: bool = False,
remove_trailing_and: bool = False,
) -> str:
# Build a combined Vespa filter string from the given IndexFilters.
def _build_or_filters(key: str, vals: list[str] | None) -> str:
if vals is None:
return ""
valid_vals = [val for val in vals if val]
if not key or not valid_vals:
return ""
eq_elems = [f'{key} contains "{elem}"' for elem in valid_vals]
or_clause = " or ".join(eq_elems)
return f"({or_clause})"
def _build_time_filter(
cutoff: datetime | None,
untimed_doc_cutoff: timedelta = timedelta(days=92),
) -> str:
if not cutoff:
return ""
include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff
cutoff_secs = int(cutoff.timestamp())
if include_untimed:
return f"!({DOC_UPDATED_AT} < {cutoff_secs})"
return f"({DOC_UPDATED_AT} >= {cutoff_secs})"
filter_str = ""
if not include_hidden:
filter_str += f"AND !({HIDDEN}=true) "
if filters.tenant_id and MULTI_TENANT:
filter_str += f'AND ({TENANT_ID} contains "{filters.tenant_id}") '
if filters.access_control_list is not None:
acl_str = _build_or_filters(ACCESS_CONTROL_LIST, filters.access_control_list)
if acl_str:
filter_str += f"AND {acl_str} "
source_strs = (
[s.value for s in filters.source_type] if filters.source_type else None
)
source_str = _build_or_filters(SOURCE_TYPE, source_strs)
if source_str:
filter_str += f"AND {source_str} "
tags = filters.tags
if tags:
tag_attributes = [tag.tag_key + INDEX_SEPARATOR + tag.tag_value for tag in tags]
else:
tag_attributes = None
tag_str = _build_or_filters(METADATA_LIST, tag_attributes)
if tag_str:
filter_str += f"AND {tag_str} "
doc_set_str = _build_or_filters(DOCUMENT_SETS, filters.document_set)
if doc_set_str:
filter_str += f"AND {doc_set_str} "
time_filter = _build_time_filter(filters.time_cutoff)
if time_filter:
filter_str += f"AND {time_filter} "
if remove_trailing_and:
while filter_str.endswith(" and "):
filter_str = filter_str[:-5]
while filter_str.endswith("AND "):
filter_str = filter_str[:-4]
return filter_str.strip()
# Print Vespa configuration URLs
def print_vespa_config() -> None:
# Print Vespa configuration.
logger.info("Printing Vespa configuration.")
print(f"Vespa Application Endpoint: {VESPA_APPLICATION_ENDPOINT}")
print(f"Vespa App Container URL: {VESPA_APP_CONTAINER_URL}")
print(f"Vespa Search Endpoint: {SEARCH_ENDPOINT}")
print(f"Vespa Document ID Endpoint: {DOCUMENT_ID_ENDPOINT}")
# Check connectivity to Vespa endpoints
def check_vespa_connectivity() -> None:
# Check connectivity to Vespa endpoints.
logger.info("Checking Vespa connectivity.")
endpoints = [
f"{VESPA_APPLICATION_ENDPOINT}/ApplicationStatus",
f"{VESPA_APPLICATION_ENDPOINT}/tenant",
@@ -170,21 +61,17 @@ def check_vespa_connectivity() -> None:
try:
with get_vespa_http_client() as client:
response = client.get(endpoint)
logger.info(
f"Connected to Vespa at {endpoint}, status code {response.status_code}"
)
print(f"Successfully connected to Vespa at {endpoint}")
print(f"Status code: {response.status_code}")
print(f"Response: {response.text[:200]}...")
except Exception as e:
logger.error(f"Failed to connect to Vespa at {endpoint}: {str(e)}")
print(f"Failed to connect to Vespa at {endpoint}: {str(e)}")
print("Vespa connectivity check completed.")
# Get info about the default Vespa application
def get_vespa_info() -> Dict[str, Any]:
# Get info about the default Vespa application.
url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/application/default"
with get_vespa_http_client() as client:
response = client.get(url)
@@ -192,298 +79,121 @@ def get_vespa_info() -> Dict[str, Any]:
return response.json()
def get_index_name(tenant_id: str) -> str:
# Return the index name for a given tenant.
# Get index name for a tenant and connector pair
def get_index_name(tenant_id: str, connector_id: int) -> str:
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
cc_pair = get_connector_credential_pair_from_id(db_session, connector_id)
if not cc_pair:
raise ValueError(f"No connector found for id {connector_id}")
search_settings = get_current_search_settings(db_session)
if not search_settings:
raise ValueError(f"No search settings found for tenant {tenant_id}")
return search_settings.index_name
return search_settings.index_name if search_settings else "public"
def query_vespa(
yql: str, tenant_id: Optional[str] = None, limit: int = 10
) -> List[Dict[str, Any]]:
# Perform a Vespa query using YQL syntax.
filters = IndexFilters(tenant_id=tenant_id, access_control_list=[])
filter_string = build_vespa_filters(filters, remove_trailing_and=True)
full_yql = yql.strip()
if filter_string:
full_yql = f"{full_yql} {filter_string}"
full_yql = f"{full_yql} limit {limit}"
params = {"yql": full_yql, "timeout": "10s"}
search_request = SearchRequest(query="", limit=limit, offset=0)
params.update(search_request.model_dump())
logger.info(f"Executing Vespa query: {full_yql}")
# Perform a Vespa query using YQL syntax
def query_vespa(yql: str) -> List[Dict[str, Any]]:
params = {
"yql": yql,
"timeout": "10s",
}
with get_vespa_http_client() as client:
response = client.get(SEARCH_ENDPOINT, params=params)
response.raise_for_status()
result = response.json()
documents = result.get("root", {}).get("children", [])
logger.info(f"Found {len(documents)} documents from query.")
return documents
return response.json()["root"]["children"]
# Get first N documents
def get_first_n_documents(n: int = 10) -> List[Dict[str, Any]]:
# Get the first n documents from any source.
yql = "select * from sources * where true"
return query_vespa(yql, limit=n)
yql = f"select * from sources * where true limit {n};"
return query_vespa(yql)
# Pretty-print a list of documents
def print_documents(documents: List[Dict[str, Any]]) -> None:
# Pretty-print a list of documents.
for doc in documents:
print(json.dumps(doc, indent=2))
print("-" * 80)
# Get and print documents for a specific tenant and connector
def get_documents_for_tenant_connector(
tenant_id: str, connector_id: int, n: int = 10
) -> None:
# Get and print documents for a specific tenant and connector.
index_name = get_index_name(tenant_id)
logger.info(
f"Fetching documents for tenant={tenant_id}, connector_id={connector_id}"
)
yql = f"select * from sources {index_name} where true"
documents = query_vespa(yql, tenant_id, limit=n)
print(
f"First {len(documents)} documents for tenant {tenant_id}, connector {connector_id}:"
)
get_index_name(tenant_id, connector_id)
documents = get_first_n_documents(n)
print(f"First {n} documents for tenant {tenant_id}, connector {connector_id}:")
print_documents(documents)
# Search documents for a specific tenant and connector
def search_documents(
tenant_id: str, connector_id: int, query: str, n: int = 10
) -> None:
# Search documents for a specific tenant and connector.
index_name = get_index_name(tenant_id)
logger.info(
f"Searching documents for tenant={tenant_id}, connector_id={connector_id}, query='{query}'"
)
yql = f"select * from sources {index_name} where userInput(@query)"
documents = query_vespa(yql, tenant_id, limit=n)
print(f"Search results for query '{query}' in tenant {tenant_id}:")
index_name = get_index_name(tenant_id, connector_id)
yql = f"select * from sources {index_name} where userInput(@query) limit {n};"
documents = query_vespa(yql)
print(f"Search results for query '{query}':")
print_documents(documents)
# Update a specific document
def update_document(
tenant_id: str, connector_id: int, doc_id: str, fields: Dict[str, Any]
) -> None:
# Update a specific document.
index_name = get_index_name(tenant_id)
logger.info(
f"Updating document doc_id={doc_id} in tenant={tenant_id}, connector_id={connector_id}"
)
index_name = get_index_name(tenant_id, connector_id)
url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) + f"/{doc_id}"
update_request = {"fields": {k: {"assign": v} for k, v in fields.items()}}
with get_vespa_http_client() as client:
response = client.put(url, json=update_request)
response.raise_for_status()
logger.info(f"Document {doc_id} updated successfully.")
print(f"Document {doc_id} updated successfully")
# Delete a specific document
def delete_document(tenant_id: str, connector_id: int, doc_id: str) -> None:
# Delete a specific document.
index_name = get_index_name(tenant_id)
logger.info(
f"Deleting document doc_id={doc_id} in tenant={tenant_id}, connector_id={connector_id}"
)
index_name = get_index_name(tenant_id, connector_id)
url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) + f"/{doc_id}"
with get_vespa_http_client() as client:
response = client.delete(url)
response.raise_for_status()
logger.info(f"Document {doc_id} deleted successfully.")
print(f"Document {doc_id} deleted successfully")
def list_documents(n: int = 10, tenant_id: Optional[str] = None) -> None:
# List documents from any source, filtered by tenant if provided.
logger.info(f"Listing up to {n} documents for tenant={tenant_id or 'ALL'}")
yql = "select * from sources * where true"
if tenant_id:
yql += f" and tenant_id contains '{tenant_id}'"
documents = query_vespa(yql, tenant_id=tenant_id, limit=n)
print(f"Total documents found: {len(documents)}")
logger.info(f"Total documents found: {len(documents)}")
print(f"First {min(n, len(documents))} documents:")
for doc in documents[:n]:
print(json.dumps(doc, indent=2))
# List documents from any source
def list_documents(n: int = 10) -> None:
yql = f"select * from sources * where true limit {n};"
url = f"{VESPA_APP_CONTAINER_URL}/search/"
params = {
"yql": yql,
"timeout": "10s",
}
try:
with get_vespa_http_client() as client:
response = client.get(url, params=params)
response.raise_for_status()
documents = response.json()["root"]["children"]
print(f"First {n} documents:")
print_documents(documents)
except Exception as e:
print(f"Failed to list documents: {str(e)}")
# Get and print ACLs for documents of a specific tenant and connector
def get_document_acls(tenant_id: str, connector_id: int, n: int = 10) -> None:
index_name = get_index_name(tenant_id, connector_id)
yql = f"select documentid, access_control_list from sources {index_name} where true limit {n};"
documents = query_vespa(yql)
print(f"ACLs for {n} documents from tenant {tenant_id}, connector {connector_id}:")
for doc in documents:
print(f"Document ID: {doc['fields']['documentid']}")
print(
f"ACL: {json.dumps(doc['fields'].get('access_control_list', {}), indent=2)}"
)
print("-" * 80)
def get_document_and_chunk_counts(
tenant_id: str, cc_pair_id: int, filter_doc: DocumentFilter | None = None
) -> Dict[str, int]:
# Return a dict mapping each document ID to its chunk count for a given connector.
with get_session_with_tenant(tenant_id=tenant_id) as session:
doc_ids_data = (
session.query(DocumentByConnectorCredentialPair.id, Document.link)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.join(Document, DocumentByConnectorCredentialPair.id == Document.id)
.filter(ConnectorCredentialPair.id == cc_pair_id)
.distinct()
.all()
)
doc_ids = []
for doc_id, link in doc_ids_data:
if filter_doc and filter_doc.link:
if link and filter_doc.link.lower() in link.lower():
doc_ids.append(doc_id)
else:
doc_ids.append(doc_id)
chunk_counts_data = (
session.query(Document.id, Document.chunk_count)
.filter(Document.id.in_(doc_ids))
.all()
)
return {
doc_id: chunk_count
for doc_id, chunk_count in chunk_counts_data
if chunk_count is not None
}
def get_chunk_ids_for_connector(
tenant_id: str,
cc_pair_id: int,
index_name: str,
filter_doc: DocumentFilter | None = None,
) -> List[UUID]:
# Return chunk IDs for a given connector.
doc_id_to_new_chunk_cnt = get_document_and_chunk_counts(
tenant_id, cc_pair_id, filter_doc
)
doc_infos: List[EnrichedDocumentIndexingInfo] = [
VespaIndex.enrich_basic_chunk_info(
index_name=index_name,
http_client=get_vespa_http_client(),
document_id=doc_id,
previous_chunk_count=doc_id_to_new_chunk_cnt.get(doc_id, 0),
new_chunk_count=0,
)
for doc_id in doc_id_to_new_chunk_cnt.keys()
]
chunk_ids = get_document_chunk_ids(
enriched_document_info_list=doc_infos,
tenant_id=tenant_id,
large_chunks_enabled=False,
)
if not isinstance(chunk_ids, list):
raise ValueError(f"Expected list of chunk IDs, got {type(chunk_ids)}")
return chunk_ids
def get_document_acls(
tenant_id: str,
cc_pair_id: int,
n: int | None = 10,
filter_doc: DocumentFilter | None = None,
) -> None:
# Fetch document ACLs for the given tenant and connector pair.
index_name = get_index_name(tenant_id)
logger.info(
f"Fetching document ACLs for tenant={tenant_id}, cc_pair_id={cc_pair_id}"
)
chunk_ids: List[UUID] = get_chunk_ids_for_connector(
tenant_id, cc_pair_id, index_name, filter_doc
)
vespa_client = get_vespa_http_client()
target_ids = chunk_ids if n is None else chunk_ids[:n]
logger.info(
f"Found {len(chunk_ids)} chunk IDs, showing ACLs for {len(target_ids)}."
)
for doc_chunk_id in target_ids:
document_url = (
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{str(doc_chunk_id)}"
)
response = vespa_client.get(document_url)
if response.status_code == 200:
fields = response.json().get("fields", {})
document_id = fields.get("document_id") or fields.get(
"documentid", "Unknown"
)
acls = fields.get("access_control_list", {})
title = fields.get("title", "")
source_type = fields.get("source_type", "")
source_links_raw = fields.get("source_links", "{}")
try:
source_links = json.loads(source_links_raw)
except json.JSONDecodeError:
source_links = {}
print(f"Document Chunk ID: {doc_chunk_id}")
print(f"Document ID: {document_id}")
print(f"ACLs:\n{json.dumps(acls, indent=2)}")
print(f"Source Links: {source_links}")
print(f"Title: {title}")
print(f"Source Type: {source_type}")
if MULTI_TENANT:
print(f"Tenant ID: {fields.get('tenant_id', 'N/A')}")
print("-" * 80)
else:
logger.error(f"Failed to fetch document for chunk ID: {doc_chunk_id}")
print(f"Failed to fetch document for chunk ID: {doc_chunk_id}")
print(f"Status Code: {response.status_code}")
print("-" * 80)
class VespaDebugging:
# Class for managing Vespa debugging actions.
def __init__(self, tenant_id: str | None = None):
self.tenant_id = POSTGRES_DEFAULT_SCHEMA if not tenant_id else tenant_id
def print_config(self) -> None:
# Print Vespa config.
print_vespa_config()
def check_connectivity(self) -> None:
# Check Vespa connectivity.
check_vespa_connectivity()
def list_documents(self, n: int = 10) -> None:
# List documents for a tenant.
list_documents(n, self.tenant_id)
def search_documents(self, connector_id: int, query: str, n: int = 10) -> None:
# Search documents for a tenant and connector.
search_documents(self.tenant_id, connector_id, query, n)
def update_document(
self, connector_id: int, doc_id: str, fields: Dict[str, Any]
) -> None:
# Update a document.
update_document(self.tenant_id, connector_id, doc_id, fields)
def delete_document(self, connector_id: int, doc_id: str) -> None:
# Delete a document.
delete_document(self.tenant_id, connector_id, doc_id)
def acls_by_link(self, cc_pair_id: int, link: str) -> None:
# Get ACLs for a document matching a link.
get_document_acls(
self.tenant_id, cc_pair_id, n=None, filter_doc=DocumentFilter(link=link)
)
def acls(self, cc_pair_id: int, n: int | None = 10) -> None:
# Get ACLs for a connector.
get_document_acls(self.tenant_id, cc_pair_id, n)
def main() -> None:
# Main CLI entry point.
parser = argparse.ArgumentParser(description="Vespa debugging tool")
parser.add_argument(
"--action",
@@ -499,45 +209,60 @@ def main() -> None:
required=True,
help="Action to perform",
)
parser.add_argument("--tenant-id", help="Tenant ID")
parser.add_argument("--connector-id", type=int, help="Connector ID")
parser.add_argument(
"--n", type=int, default=10, help="Number of documents to retrieve"
"--tenant-id", help="Tenant ID (for update, delete, and get_acls actions)"
)
parser.add_argument(
"--connector-id",
type=int,
help="Connector ID (for update, delete, and get_acls actions)",
)
parser.add_argument(
"--n",
type=int,
default=10,
help="Number of documents to retrieve (for list_docs, search, update, and get_acls actions)",
)
parser.add_argument("--query", help="Search query (for search action)")
parser.add_argument("--doc-id", help="Document ID (for update and delete actions)")
parser.add_argument(
"--fields", help="Fields to update, in JSON format (for update)"
"--fields", help="Fields to update, in JSON format (for update action)"
)
args = parser.parse_args()
vespa_debug = VespaDebugging(args.tenant_id)
if args.action == "config":
vespa_debug.print_config()
print_vespa_config()
elif args.action == "connect":
vespa_debug.check_connectivity()
check_vespa_connectivity()
elif args.action == "list_docs":
vespa_debug.list_documents(args.n)
elif args.action == "search":
if not args.query or args.connector_id is None:
parser.error("--query and --connector-id are required for search action")
vespa_debug.search_documents(args.connector_id, args.query, args.n)
elif args.action == "update":
if not args.doc_id or not args.fields or args.connector_id is None:
parser.error(
"--doc-id, --fields, and --connector-id are required for update action"
# If tenant_id and connector_id are provided, list docs for that tenant/connector.
# Otherwise, list documents from any source.
if args.tenant_id and args.connector_id:
get_documents_for_tenant_connector(
args.tenant_id, args.connector_id, args.n
)
else:
list_documents(args.n)
elif args.action == "search":
if not args.query:
parser.error("--query is required for search action")
search_documents(args.tenant_id, args.connector_id, args.query, args.n)
elif args.action == "update":
if not args.doc_id or not args.fields:
parser.error("--doc-id and --fields are required for update action")
fields = json.loads(args.fields)
vespa_debug.update_document(args.connector_id, args.doc_id, fields)
update_document(args.tenant_id, args.connector_id, args.doc_id, fields)
elif args.action == "delete":
if not args.doc_id or args.connector_id is None:
parser.error("--doc-id and --connector-id are required for delete action")
vespa_debug.delete_document(args.connector_id, args.doc_id)
if not args.doc_id:
parser.error("--doc-id is required for delete action")
delete_document(args.tenant_id, args.connector_id, args.doc_id)
elif args.action == "get_acls":
if args.connector_id is None:
parser.error("--connector-id is required for get_acls action")
vespa_debug.acls(args.connector_id, args.n)
if not args.tenant_id or args.connector_id is None:
parser.error(
"--tenant-id and --connector-id are required for get_acls action"
)
get_document_acls(args.tenant_id, args.connector_id, args.n)
if __name__ == "__main__":

0
backend/test Normal file
View File

View File

@@ -18,9 +18,6 @@ FROM base AS builder
RUN apk add --no-cache libc6-compat
WORKDIR /app
# Add NODE_OPTIONS argument
ARG NODE_OPTIONS
# pull in source code / package.json / package-lock.json
COPY . .
@@ -81,8 +78,7 @@ ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}
ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}
# Use NODE_OPTIONS in the build command
RUN NODE_OPTIONS="${NODE_OPTIONS}" npx next build
RUN NODE_OPTIONS="--max-old-space-size=8192" npx next build
# Step 2. Production image, copy all the files and run next
FROM base AS runner

View File

@@ -86,16 +86,14 @@ const sentryWebpackPluginOptions = {
authToken: process.env.SENTRY_AUTH_TOKEN,
silent: !sentryEnabled, // Silence output when Sentry is disabled
dryRun: !sentryEnabled, // Don't upload source maps when Sentry is disabled
...(sentryEnabled && {
sourceMaps: {
include: ["./.next"],
ignore: ["node_modules"],
urlPrefix: "~/_next",
stripPrefix: ["webpack://_N_E/"],
validate: true,
cleanArtifacts: true,
},
}),
sourceMaps: {
include: ["./.next"],
ignore: ["node_modules"],
urlPrefix: "~/_next",
stripPrefix: ["webpack://_N_E/"],
validate: true,
cleanArtifacts: true,
},
};
// Export the module with conditional Sentry configuration

View File

@@ -80,7 +80,6 @@ import { errorHandlingFetcher } from "@/lib/fetcher";
import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal";
import { DeletePersonaButton } from "./[id]/DeletePersonaButton";
import Title from "@/components/ui/title";
import { FilePickerModal } from "@/app/my-documents/components/FilePicker";
function findSearchTool(tools: ToolSnapshot[]) {
return tools.find((tool) => tool.in_code_tool_id === "SearchTool");
@@ -145,7 +144,6 @@ export function AssistantEditor({
"#6FFFFF",
];
const [filePickerModalOpen, setFilePickerModalOpen] = useState(false);
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
// state to persist across formik reformatting
@@ -351,19 +349,6 @@ export function AssistantEditor({
<BackButton />
</div>
)}
{filePickerModalOpen && (
<FilePickerModal
isOpen={filePickerModalOpen}
onClose={() => {
setFilePickerModalOpen(false);
}}
onSave={() => {
setFilePickerModalOpen(false);
}}
title="Add Documents to your Assistant"
buttonContent="Add to Assistant"
/>
)}
{labelToDelete && (
<DeleteEntityModal
entityType="label"
@@ -761,23 +746,6 @@ export function AssistantEditor({
<div className="w-full max-w-4xl">
<div className="flex flex-col">
<Separator />
<div className="flex gap-x-2 py-2 flex justify-start">
<div>
<div className="flex items-center gap-x-2">
<p className="block font-medium text-sm">
My Documents
</p>
<Button
className="!p-.5 text-xs"
type="button"
onClick={() => setFilePickerModalOpen(true)}
>
Attach Files and Folders
</Button>
</div>
</div>
</div>
{searchTool && (
<>
<Separator />

View File

@@ -113,7 +113,6 @@ import {
import AssistantModal from "../assistants/mine/AssistantModal";
import { getSourceMetadata } from "@/lib/sources";
import { UserSettingsModal } from "./modal/UserSettingsModal";
import { FilePickerModal } from "../my-documents/components/FilePicker";
const TEMP_USER_MESSAGE_ID = -1;
const TEMP_ASSISTANT_MESSAGE_ID = -2;
@@ -188,8 +187,6 @@ export function ChatPage({
const settings = useContext(SettingsContext);
const enterpriseSettings = settings?.enterpriseSettings;
const [viewingFilePicker, setViewingFilePicker] = useState(false);
const [toggleDocSelection, setToggleDocSelection] = useState(false);
const [documentSidebarToggled, setDocumentSidebarToggled] = useState(false);
const [userSettingsToggled, setUserSettingsToggled] = useState(false);
@@ -2071,17 +2068,6 @@ export function ChatPage({
}}
/>
)}
{toggleDocSelection && (
<FilePickerModal
buttonContent="Set as Context"
title="User Documents"
isOpen={true}
onClose={() => setToggleDocSelection(false)}
onSave={() => {
setToggleDocSelection(false);
}}
/>
)}
{retrievalEnabled && documentSidebarToggled && settings?.isMobile && (
<div className="md:hidden">
@@ -2613,7 +2599,6 @@ export function ChatPage({
});
return;
}
onSubmit({
messageIdToResend:
previousMessage.messageId,
@@ -2764,9 +2749,6 @@ export function ChatPage({
</div>
)}
<ChatInputBar
toggleDocSelection={() => {
setToggleDocSelection(true);
}}
toggleDocumentSidebar={toggleDocumentSidebar}
availableSources={sources}
availableDocumentSets={documentSets}

View File

@@ -147,6 +147,24 @@ export const DocumentResults = forwardRef<HTMLDivElement, DocumentResultsProps>(
)}
</div>
</div>
<div
className={`sticky bottom-4 w-full left-0 flex justify-center transition-opacity duration-300 ${
hasSelectedDocuments
? "opacity-100"
: "opacity-0 pointer-events-none"
}`}
>
<button
className="text-sm font-medium py-2 px-4 rounded-full transition-colors bg-neutral-900 text-white"
onClick={clearSelectedDocuments}
>
{`Remove ${
delayedSelectedDocumentCount > 0
? delayedSelectedDocumentCount
: ""
} Source${delayedSelectedDocumentCount > 1 ? "s" : ""}`}
</button>
</div>
</div>
</div>
</>

View File

@@ -86,7 +86,6 @@ export const SourceChip = ({
);
interface ChatInputBarProps {
toggleDocSelection: () => void;
removeDocs: () => void;
showConfigureAPIKey: () => void;
selectedDocuments: OnyxDocument[];
@@ -113,7 +112,6 @@ interface ChatInputBarProps {
}
export function ChatInputBar({
toggleDocSelection,
retrievalEnabled,
removeDocs,
toggleDocumentSidebar,
@@ -662,19 +660,18 @@ export function ChatInputBar({
name="File"
Icon={FiPlusCircle}
onClick={() => {
toggleDocSelection();
// const input = document.createElement("input");
// input.type = "file";
// input.multiple = true;
// input.onchange = (event: any) => {
// const files = Array.from(
// event?.target?.files || []
// ) as File[];
// if (files.length > 0) {
// handleFileUpload(files);
// }
// };
// input.click();
const input = document.createElement("input");
input.type = "file";
input.multiple = true;
input.onchange = (event: any) => {
const files = Array.from(
event?.target?.files || []
) as File[];
if (files.length > 0) {
handleFileUpload(files);
}
};
input.click();
}}
tooltipContent={"Upload files"}
/>

View File

@@ -198,7 +198,7 @@ export function SearchSummary({
) : null;
return (
<div className="flex group w-fit items-center">
<div className="flex items-center">
{isEditing ? (
editInput
) : (
@@ -225,7 +225,7 @@ export function SearchSummary({
<Tooltip>
<TooltipTrigger asChild>
<button
className="ml-2 -my-2 mobile:hidden hover:bg-hover p-1 rounded flex-shrink-0 group-hover:opacity-100 opacity-0"
className="ml-2 -my-2 mobile:hidden hover:bg-hover p-1 rounded flex-shrink-0"
onClick={() => {
setIsEditing(true);
}}

View File

@@ -445,7 +445,7 @@ export function PagesTab({
)}
{isHistoryEmpty && (!folders || folders.length === 0) && (
<p className="text-sm max-w-full mt-2 w-[250px]">
<p className="text-sm mt-2 w-[250px]">
Try sending a message! Your chat history will appear here.
</p>
)}

View File

@@ -205,9 +205,9 @@ export default async function RootLayout({
>
<Suspense fallback={null}>
<PostHogPageView />
{process.env.NEXT_PUBLIC_POSTHOG_KEY && <WebVitals />}
{children}
</Suspense>
{children}
{process.env.NEXT_PUBLIC_POSTHOG_KEY && <WebVitals />}
</AppProvider>
);
}

View File

@@ -1,108 +0,0 @@
import React, { useState } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Plus, Upload, RefreshCw } from "lucide-react";
import {
Popover,
PopoverContent,
PopoverTrigger,
} from "@/components/ui/popover";
interface FolderActionsProps {
onRefresh: () => void;
onCreateFolder: (folderName: string) => void;
onUploadFiles: (files: FileList) => void;
}
export function FolderActions({
onRefresh,
onCreateFolder,
onUploadFiles,
}: FolderActionsProps) {
const [newFolderName, setNewFolderName] = useState("");
const [isCreatingFolder, setIsCreatingFolder] = useState(false);
const handleCreateFolder = () => {
if (newFolderName.trim()) {
onCreateFolder(newFolderName.trim());
setNewFolderName("");
setIsCreatingFolder(false);
}
};
const handleFileUpload = (event: React.ChangeEvent<HTMLInputElement>) => {
const files = event.target.files;
if (files) {
onUploadFiles(files);
}
};
return (
<div className="flex items-center space-x-2">
<Button
onClick={onRefresh}
variant="outline"
size="sm"
className="border-background-300 hover:bg-background-100"
>
<RefreshCw className="h-4 w-4 text-text-600" />
</Button>
<Popover open={isCreatingFolder} onOpenChange={setIsCreatingFolder}>
<PopoverTrigger asChild>
<Button
variant="outline"
size="sm"
className="border-background-300 hover:bg-background-100"
>
<Plus className="h-4 w-4 text-text-600" />
</Button>
</PopoverTrigger>
{isCreatingFolder && (
<PopoverContent className="w-56 p-3 bg-white shadow-md rounded-md">
<div className="space-y-2 flex flex-col">
<Input
type="text"
placeholder="New folder name"
value={newFolderName}
onChange={(e) => setNewFolderName(e.target.value)}
className="!w-full p-1 flex text-sm border border-background-300 focus:border-background-500 rounded"
/>
<div className="flex justify-between space-x-2">
<Button
onClick={handleCreateFolder}
size="sm"
className="bg-background-800 hover:bg-background-900 text-white text-xs"
>
Create
</Button>
<Button
onClick={() => setIsCreatingFolder(false)}
variant="outline"
size="sm"
className="border border-background-300 hover:bg-background-100 text-xs"
>
Cancel
</Button>
</div>
</div>
</PopoverContent>
)}
</Popover>
<Button
variant="outline"
size="sm"
className="border-background-300 hover:bg-background-100"
onClick={() => document.getElementById("file-upload")?.click()}
>
<Upload className="h-4 w-4 text-text-600" />
</Button>
<input
id="file-upload"
type="file"
multiple
onChange={handleFileUpload}
className="hidden"
/>
</div>
);
}

View File

@@ -1,42 +0,0 @@
import React from "react";
import { ChevronRight } from "lucide-react";
interface FolderBreadcrumbProps {
parents: { name: string; id: number }[];
currentFolder: { name: string; id: number };
onBreadcrumbClick: (id: number) => void;
}
export function FolderBreadcrumb({
parents,
onBreadcrumbClick,
currentFolder,
}: FolderBreadcrumbProps) {
return (
<div className="flex items-center space-x-2 text-sm text-text-500 mb-4">
<span
className="cursor-pointer hover:text-text-700"
onClick={() => onBreadcrumbClick(-1)}
>
Root
</span>
{parents.map((parent, index) => (
<React.Fragment key={index}>
<ChevronRight className="h-4 w-4" />
<span
className="cursor-pointer hover:text-text-700"
onClick={() => onBreadcrumbClick(parent.id)}
>
{parent.name}
</span>
</React.Fragment>
))}
{currentFolder && currentFolder.id !== -1 && (
<>
<ChevronRight className="h-4 w-4" />
<span className="text-text-700">{currentFolder.name}</span>
</>
)}
</div>
);
}

View File

@@ -1,160 +0,0 @@
import React, { useState } from "react";
import { MoveFileModal } from "./MoveFileModal";
import { FileItem, FolderItem } from "./MyDocumenItem";
interface FolderType {
id: number;
name: string;
}
interface FileType extends FolderType {
document_id: string;
folder_id: number | null;
}
interface FolderContentsProps {
pageLimit: number;
currentPage: number;
contents: {
folders: FolderType[];
files: FileType[];
};
onFolderClick: (folderId: number) => void;
currentFolder: number | null;
onDeleteItem: (itemId: number, isFolder: boolean) => void;
onDownloadItem: (documentId: string) => void;
onMoveItem: (
itemId: number,
destinationFolderId: number | null,
isFolder: boolean
) => void;
setPresentingDocument: (
document_id: string,
semantic_identifier: string
) => void;
onRenameItem: (itemId: number, newName: string, isFolder: boolean) => void;
folders: FolderType[];
}
export function FolderContents({
pageLimit,
currentPage,
setPresentingDocument,
contents,
onFolderClick,
currentFolder,
onDeleteItem,
onDownloadItem,
onMoveItem,
onRenameItem,
folders,
}: FolderContentsProps) {
const [isMoveModalOpen, setIsMoveModalOpen] = useState(false);
const [itemToMove, setItemToMove] = useState<{
id: number;
name: string;
isFolder: boolean;
} | null>(null);
const [editingItem, setEditingItem] = useState<{
id: number;
name: string;
isFolder: boolean;
} | null>(null);
const handleMove = (destinationFolderId: number | null) => {
if (itemToMove) {
onMoveItem(itemToMove.id, destinationFolderId, itemToMove.isFolder);
setIsMoveModalOpen(false);
setItemToMove(null);
}
};
const handleRename = (itemId: number, newName: string, isFolder: boolean) => {
onRenameItem(itemId, newName, isFolder);
setEditingItem(null);
};
const handleDragStart = (
e: React.DragEvent<HTMLDivElement>,
item: { id: number; isFolder: boolean; name: string }
) => {
e.dataTransfer.setData("application/json", JSON.stringify(item));
};
const handleDrop = (
e: React.DragEvent<HTMLDivElement>,
targetFolderId: number
) => {
e.preventDefault();
const item = JSON.parse(e.dataTransfer.getData("application/json"));
if (item && typeof item.id === "number") {
onMoveItem(item.id, targetFolderId, item.isFolder);
}
};
const startIndex = pageLimit * (currentPage - 1);
const endIndex = startIndex + pageLimit;
const itemsToDisplay = [...contents.folders, ...contents.files].slice(
startIndex,
endIndex
);
return (
<div className="flex-grow" onDragOver={(e) => e.preventDefault()}>
{itemsToDisplay.map((item) => {
if ("document_id" in item) {
return (
<FileItem
key={item.id}
file={{
name: item.name,
id: item.id,
document_id: item.document_id as string,
}}
setPresentingDocument={setPresentingDocument}
onDeleteItem={onDeleteItem}
onDownloadItem={onDownloadItem}
onMoveItem={(id) => {
setItemToMove({ id, name: item.name, isFolder: false });
setIsMoveModalOpen(true);
}}
editingItem={editingItem}
setEditingItem={setEditingItem}
handleRename={handleRename}
onDragStart={handleDragStart}
/>
);
} else {
return (
<FolderItem
key={item.id}
folder={item}
onFolderClick={onFolderClick}
onDeleteItem={onDeleteItem}
onMoveItem={(id) => {
setItemToMove({ id, name: item.name, isFolder: true });
setIsMoveModalOpen(true);
}}
editingItem={editingItem}
setEditingItem={setEditingItem}
handleRename={handleRename}
onDragStart={handleDragStart}
onDrop={handleDrop}
/>
);
}
})}
{itemToMove && (
<MoveFileModal
isOpen={isMoveModalOpen}
onClose={() => setIsMoveModalOpen(false)}
onMove={handleMove}
fileName={itemToMove.name}
currentFolderId={currentFolder}
/>
)}
</div>
);
}

View File

@@ -1,47 +0,0 @@
import React from "react";
import { Folder as FolderIcon } from "lucide-react";
interface FolderNode {
id: number;
name: string;
parent_id: number | null;
children?: FolderNode[];
}
interface FolderTreeProps {
treeData: FolderNode[];
onFolderClick: (folderId: number) => void;
}
function renderTree(
nodes: FolderNode[],
onFolderClick: (folderId: number) => void
) {
return (
<ul className="ml-4 list-none">
{nodes.map((node) => (
<li key={node.id} className="my-1">
<div
className="flex items-center cursor-pointer hover:text-text-700"
onClick={() => onFolderClick(node.id)}
>
<FolderIcon className="mr-1 h-4 w-4 text-text-600" />
<span>{node.name}</span>
</div>
{node.children &&
node.children.length > 0 &&
renderTree(node.children, onFolderClick)}
</li>
))}
</ul>
);
}
export function FolderTree({ treeData, onFolderClick }: FolderTreeProps) {
return (
<div className="w-64 border-r border-background-300 p-2 overflow-y-auto hidden lg:block">
<h2 className="font-bold text-sm mb-2">Folders</h2>
{renderTree(treeData, onFolderClick)}
</div>
);
}

View File

@@ -1,113 +0,0 @@
import React, { useState, useEffect } from "react";
import { Folder } from "lucide-react";
interface Folder {
id: number | null;
name: string;
}
interface MoveFileModalProps {
isOpen: boolean;
onClose: () => void;
onMove: (destinationFolderId: number | null) => void;
fileName: string;
currentFolderId: number | null;
}
export function MoveFileModal({
isOpen,
onClose,
onMove,
fileName,
currentFolderId,
}: MoveFileModalProps) {
const [folders, setFolders] = useState<Folder[]>([]);
const [selectedFolder, setSelectedFolder] = useState<Folder | null>(null);
useEffect(() => {
if (isOpen) {
const loadFolders = async () => {
const res = await fetch("/api/user/folder");
const data = await res.json();
setFolders(data);
};
loadFolders();
}
}, [isOpen]);
if (!isOpen) return null;
return (
<div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
<div className="bg-white rounded-lg p-6 w-96">
<h2 className="text-xl font-semibold mb-4">
Move &quot;{fileName}&quot;
</h2>
<div className="mb-4">
<span className="font-medium">Choose a folder:</span>
<div className="max-h-60 overflow-y-auto mt-2 border rounded">
{folders.map((folder) => (
<div
key={folder.id}
className="flex items-center justify-between py-2 px-3 hover:bg-background-100 cursor-pointer"
onClick={() => setSelectedFolder(folder)}
>
<div className="flex items-center">
<Folder className="mr-2 h-5 w-5" />
<span>{folder.name}</span>
{folder.id === currentFolderId && (
<span className="text-sm my-auto ml-2 text-text-500">
(Current folder)
</span>
)}
</div>
<div
className={`w-4 h-4 rounded-full border ${
selectedFolder?.id === folder.id
? "bg-blue-600 border-blue-600"
: "border-blue-300 border-2"
}`}
/>
</div>
))}
<div
className="flex items-center justify-between py-2 px-3 hover:bg-background-100 cursor-pointer"
onClick={() => setSelectedFolder({ id: null, name: "Root" })}
>
<div className="flex items-center">
<Folder className="mr-2 h-5 w-5" />
<span>Root</span>
</div>
<div
className={`w-4 h-4 rounded-full border ${
selectedFolder?.id === null
? "bg-blue-600 border-blue-600"
: "border-blue-300 border-2"
}`}
/>
</div>
</div>
</div>
<div className="flex justify-end space-x-2">
<button
className="px-4 py-2 cursor-pointer text-text-600 hover:bg-background-100 rounded"
onClick={onClose}
>
Cancel
</button>
<button
className={`px-4 py-2 text-white rounded ${
selectedFolder
? "bg-blue-600 hover:bg-blue-700 cursor-pointer"
: "bg-blue-400 cursor-not-allowed"
}`}
onClick={() => selectedFolder && onMove(selectedFolder.id)}
disabled={!selectedFolder}
>
Move
</button>
</div>
</div>
</div>
);
}

View File

@@ -1,343 +0,0 @@
import React, { useEffect, useRef, useState } from "react";
import {
FolderIcon,
FileIcon,
DownloadIcon,
TrashIcon,
PencilIcon,
InfoIcon,
CheckIcon,
XIcon,
} from "lucide-react";
interface FolderItemProps {
folder: { name: string; id: number };
onFolderClick: (folderId: number) => void;
onDeleteItem: (itemId: number, isFolder: boolean) => void;
onMoveItem: (folderId: number) => void;
editingItem: { id: number; name: string; isFolder: boolean } | null;
setEditingItem: React.Dispatch<
React.SetStateAction<{ id: number; name: string; isFolder: boolean } | null>
>;
handleRename: (id: number, newName: string, isFolder: boolean) => void;
onDragStart: (
e: React.DragEvent<HTMLDivElement>,
item: { id: number; isFolder: boolean; name: string }
) => void;
onDrop: (e: React.DragEvent<HTMLDivElement>, targetFolderId: number) => void;
}
export function FolderItem({
folder,
onFolderClick,
onDeleteItem,
onMoveItem,
editingItem,
setEditingItem,
handleRename,
onDragStart,
onDrop,
}: FolderItemProps) {
const [showMenu, setShowMenu] = useState<undefined | number>(undefined);
const [newName, setNewName] = useState(folder.name);
const isEditing =
editingItem && editingItem.id === folder.id && editingItem.isFolder;
const folderItemRef = useRef<HTMLDivElement>(null);
const handleContextMenu = (e: React.MouseEvent) => {
console.log("Context menu clicked");
e.preventDefault();
const xPos =
e.clientX - folderItemRef.current?.getBoundingClientRect().left! - 40;
setShowMenu(xPos);
};
const startEditing = () => {
setEditingItem({ id: folder.id, name: folder.name, isFolder: true });
setNewName(folder.name);
setShowMenu(undefined);
};
const submitRename = (e: React.MouseEvent) => {
e.stopPropagation();
handleRename(folder.id, newName, true);
};
const cancelEditing = (e: React.MouseEvent) => {
e.stopPropagation();
setEditingItem(null);
setNewName(folder.name);
};
useEffect(() => {
document.addEventListener("click", (e) => {
setShowMenu(undefined);
});
return () => {
document.removeEventListener("click", () => {});
};
}, [showMenu]);
return (
<div
ref={folderItemRef}
className="flex items-center justify-between p-2 hover:bg-background-100 cursor-pointer relative"
onClick={() => !isEditing && onFolderClick(folder.id)}
onContextMenu={handleContextMenu}
draggable={!isEditing}
onDragStart={(e) =>
onDragStart(e, { id: folder.id, isFolder: true, name: folder.name })
}
onDragOver={(e) => e.preventDefault()}
onDrop={(e) => onDrop(e, folder.id)}
>
<div className="flex items-center">
<FolderIcon className="mr-2" />
{isEditing ? (
<div className="flex items-center">
<input
onClick={(e) => e.stopPropagation()}
type="text"
value={newName}
onChange={(e) => {
e.stopPropagation();
setNewName(e.target.value);
}}
className="border rounded px-2 py-1 mr-2"
autoFocus
/>
<button
onClick={submitRename}
className="text-green-500 hover:text-green-700 mr-2"
>
<CheckIcon className="h-4 w-4" />
</button>
<button
onClick={cancelEditing}
className="text-red-500 hover:text-red-700"
>
<XIcon className="h-4 w-4" />
</button>
</div>
) : (
<span>{folder.name}</span>
)}
</div>
{showMenu && !isEditing && (
<div
className="absolute bg-white border rounded shadow py-1 right-0 top-full mt-1 z-50"
style={{ left: showMenu }}
>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm"
onClick={(e) => {
e.stopPropagation();
startEditing();
}}
>
Rename
</button>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm"
onClick={(e) => {
e.stopPropagation();
onMoveItem(folder.id);
setShowMenu(undefined);
}}
>
Move
</button>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm text-red-600"
onClick={(e) => {
e.stopPropagation();
onDeleteItem(folder.id, true);
setShowMenu(undefined);
}}
>
Delete
</button>
</div>
)}
</div>
);
}
interface FileItemProps {
file: { name: string; id: number; document_id: string };
onDeleteItem: (itemId: number, isFolder: boolean) => void;
onDownloadItem: (documentId: string) => void;
onMoveItem: (fileId: number) => void;
editingItem: { id: number; name: string; isFolder: boolean } | null;
setEditingItem: React.Dispatch<
React.SetStateAction<{ id: number; name: string; isFolder: boolean } | null>
>;
setPresentingDocument: (
document_id: string,
semantic_identifier: string
) => void;
handleRename: (fileId: number, newName: string, isFolder: boolean) => void;
onDragStart: (
e: React.DragEvent<HTMLDivElement>,
item: { id: number; isFolder: boolean; name: string }
) => void;
}
export function FileItem({
setPresentingDocument,
file,
onDeleteItem,
onDownloadItem,
onMoveItem,
editingItem,
setEditingItem,
handleRename,
onDragStart,
}: FileItemProps) {
const [showMenu, setShowMenu] = useState<undefined | number>();
const [newFileName, setNewFileName] = useState(file.name);
const isEditing =
editingItem && editingItem.id === file.id && !editingItem.isFolder;
const fileItemRef = useRef<HTMLDivElement>(null);
const handleContextMenu = (e: React.MouseEvent) => {
e.preventDefault();
const xPos =
e.clientX - fileItemRef.current?.getBoundingClientRect().left! - 40;
setShowMenu(xPos);
};
useEffect(() => {
document.addEventListener("click", (e) => {
if (fileItemRef.current?.contains(e.target as Node)) {
return;
}
setShowMenu(undefined);
});
document.addEventListener("contextmenu", (e) => {
if (fileItemRef.current?.contains(e.target as Node)) {
return;
}
setShowMenu(undefined);
});
return () => {
document.removeEventListener("click", () => {});
document.removeEventListener("contextmenu", () => {});
};
}, [showMenu]);
const startEditing = () => {
setEditingItem({ id: file.id, name: file.name, isFolder: false });
setNewFileName(file.name);
setShowMenu(undefined);
};
const submitRename = (e: React.MouseEvent) => {
e.stopPropagation();
handleRename(file.id, newFileName, false);
};
const cancelEditing = (e: React.MouseEvent) => {
e.stopPropagation();
setEditingItem(null);
setNewFileName(file.name);
};
return (
<div
ref={fileItemRef}
key={file.id}
className="flex items-center w-full justify-between p-2 hover:bg-background-100 cursor-pointer relative"
onContextMenu={handleContextMenu}
draggable={!isEditing}
onDragStart={(e) =>
onDragStart(e, { id: file.id, isFolder: false, name: file.name })
}
>
<button
onClick={() => setPresentingDocument(file.document_id, file.name)}
className="flex items-center flex-grow"
>
<FileIcon className="mr-2" />
{isEditing ? (
<div className="flex items-center">
<input
onClick={(e) => e.stopPropagation()}
type="text"
value={newFileName}
onChange={(e) => {
e.stopPropagation();
setNewFileName(e.target.value);
}}
className="border rounded px-2 py-1 mr-2"
autoFocus
/>
<button
onClick={submitRename}
className="text-green-500 hover:text-green-700 mr-2"
>
<CheckIcon className="h-4 w-4" />
</button>
<button
onClick={cancelEditing}
className="text-red-500 hover:text-red-700"
>
<XIcon className="h-4 w-4" />
</button>
</div>
) : (
<p className="flex text-wrap text-left line-clamp-2">{file.name}</p>
)}
</button>
{showMenu && !isEditing && (
<div
className="absolute bg-white max-w-40 border rounded shadow py-1 right-0 top-full mt-1 z-50"
style={{ left: showMenu }}
>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm"
onClick={(e) => {
e.stopPropagation();
onDownloadItem(file.document_id);
setShowMenu(undefined);
}}
>
Download
</button>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm"
onClick={(e) => {
e.stopPropagation();
startEditing();
}}
>
Rename
</button>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm"
onClick={(e) => {
e.stopPropagation();
onMoveItem(file.id);
setShowMenu(undefined);
}}
>
Move
</button>
<button
className="block w-full text-left px-4 py-2 hover:bg-background-100 text-sm text-red-600"
onClick={(e) => {
e.stopPropagation();
onDeleteItem(file.id, false);
setShowMenu(undefined);
}}
>
Delete
</button>
</div>
)}
</div>
);
}

View File

@@ -1,574 +0,0 @@
"use client";
import React, { useCallback, useEffect, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation";
import { Search, Grid, List, Plus, RefreshCw, Upload } from "lucide-react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { usePopup } from "@/components/admin/connectors/Popup";
import { FolderActions } from "./FolderActions";
import { FolderContents } from "./FolderContents";
import TextView from "@/components/chat_search/TextView";
import { PageSelector } from "@/components/PageSelector";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import {
Popover,
PopoverContent,
PopoverTrigger,
} from "@/components/ui/popover";
import { Label } from "@/components/ui/label";
interface FolderResponse {
id: number;
name: string;
description: string;
}
interface FileResponse {
id: number;
name: string;
document_id: string;
folder_id: number | null;
}
interface FolderContentsResponse {
folders: FolderResponse[];
files: FileResponse[];
}
const IconButton: React.FC<{
icon: React.ComponentType;
onClick: () => void;
active: boolean;
}> = ({ icon: Icon, onClick, active }) => (
<button
className={`p-2 flex-none h-10 w-10 flex items-center justify-center rounded ${
active ? "bg-gray-200" : "hover:bg-gray-100"
}`}
onClick={onClick}
>
<Icon />
</button>
);
const CreateFolderPopover: React.FC<{
onCreateFolder: (name: string, description: string) => void;
}> = ({ onCreateFolder }) => {
const [folderName, setFolderName] = useState("");
const [folderDescription, setFolderDescription] = useState("");
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (folderName.trim()) {
onCreateFolder(folderName.trim(), folderDescription.trim());
setFolderName("");
setFolderDescription("");
}
};
return (
<Popover>
<PopoverTrigger asChild>
<Button className="inline-flex items-center justify-center relative shrink-0 h-9 px-4 py-2 rounded-lg min-w-[5rem] active:scale-[0.985] whitespace-nowrap pl-2 pr-3 gap-1">
<Plus className="h-5 w-5" />
Create Folder
</Button>
</PopoverTrigger>
<PopoverContent className="w-80">
<form onSubmit={handleSubmit} className="space-y-4">
<div className="w-full space-y-2">
<Label htmlFor="folderName">Folder Name</Label>
<Input
className="w-full"
id="folderName"
value={folderName}
onChange={(e) => setFolderName(e.target.value)}
placeholder="Enter folder name"
required
/>
</div>
<div className="w-full space-y-2">
<Label htmlFor="folderDescription">Description (optional)</Label>
<Input
className="w-full"
id="folderDescription"
value={folderDescription}
onChange={(e) => setFolderDescription(e.target.value)}
placeholder="Enter folder description"
/>
</div>
<Button type="submit">Create Folder</Button>
</form>
</PopoverContent>
</Popover>
);
};
export default function MyDocuments() {
const [currentFolder, setCurrentFolder] = useState<number | null>(null);
const [folderContents, setFolderContents] =
useState<FolderContentsResponse | null>(null);
const [folders, setFolders] = useState<FolderResponse[]>([]);
const [page, setPage] = useState<number>(1);
const pageLimit = 10;
const searchParams = useSearchParams();
const router = useRouter();
const { popup, setPopup } = usePopup();
const [presentingDocument, setPresentingDocument] =
useState<MinimalOnyxDocument | null>(null);
const [view, setView] = useState<"grid" | "list">("grid");
const [searchQuery, setSearchQuery] = useState("");
const folderIdFromParams = parseInt(searchParams.get("folder") || "0", 10);
const fetchFolders = useCallback(async () => {
try {
const response = await fetch("/api/user/folder");
if (!response.ok) {
throw new Error("Failed to fetch folders");
}
const data = await response.json();
setFolders(data);
} catch (error) {
console.error("Error fetching folders:", error);
setPopup({
message: "Failed to fetch folders",
type: "error",
});
}
}, []);
const fetchFolderContents = useCallback(
async (folderId: number | null) => {
try {
const response = await fetch(
`/api/user/file-system?page=${page}&folder_id=${folderId || ""}`
);
if (!response.ok) {
throw new Error("Failed to fetch folder contents");
}
const data = await response.json();
setFolderContents(data);
} catch (error) {
console.error("Error fetching folder contents:", error);
setPopup({
message: "Failed to fetch folder contents",
type: "error",
});
}
},
[page]
);
useEffect(() => {
fetchFolders();
}, [fetchFolders]);
useEffect(() => {
setCurrentFolder(folderIdFromParams || null);
fetchFolderContents(folderIdFromParams || null);
}, [folderIdFromParams, fetchFolderContents]);
const refreshFolderContents = useCallback(() => {
fetchFolderContents(currentFolder);
}, [fetchFolderContents, currentFolder]);
const handleFolderClick = (id: number) => {
router.push(`/my-documents?folder=${id}`);
setPage(1);
};
const handleCreateFolder = useCallback(
async (name: string, description: string) => {
try {
const response = await fetch("/api/user/folder", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name, description }),
});
if (response.ok) {
fetchFolders();
refreshFolderContents();
setPopup({
message: "Folder created successfully",
type: "success",
});
} else {
throw new Error("Failed to create folder");
}
} catch (error) {
console.error("Error creating folder:", error);
setPopup({
message: "Failed to create folder",
type: "error",
});
}
},
[fetchFolders, refreshFolderContents, setPopup]
);
const handleUploadFiles = useCallback(
async (event: React.ChangeEvent<HTMLInputElement>) => {
const files = event.target.files;
if (files) {
const formData = new FormData();
for (let i = 0; i < files.length; i++) {
formData.append("files", files[i]);
}
formData.append(
"folder_id",
currentFolder ? currentFolder.toString() : ""
);
try {
const response = await fetch("/api/user/file/upload", {
method: "POST",
body: formData,
});
if (response.ok) {
refreshFolderContents();
setPopup({
message: "Files uploaded successfully",
type: "success",
});
} else {
throw new Error("Failed to upload files");
}
} catch (error) {
console.error("Error uploading files:", error);
setPopup({
message: "Failed to upload files",
type: "error",
});
}
setPage(1);
}
},
[currentFolder, refreshFolderContents, setPopup, setPage]
);
const handleDeleteItem = async (itemId: number, isFolder: boolean) => {
try {
const endpoint = isFolder
? `/api/user/folder/${itemId}`
: `/api/user/file/${itemId}`;
const response = await fetch(endpoint, {
method: "DELETE",
});
if (response.ok) {
if (isFolder) {
fetchFolders();
}
refreshFolderContents();
setPopup({
message: `${isFolder ? "Folder" : "File"} deleted successfully`,
type: "success",
});
} else {
throw new Error(`Failed to delete ${isFolder ? "folder" : "file"}`);
}
} catch (error) {
console.error("Error deleting item:", error);
setPopup({
message: `Failed to delete ${isFolder ? "folder" : "file"}`,
type: "error",
});
}
};
const handleMoveItem = async (
itemId: number,
destinationFolderId: number | null,
isFolder: boolean
) => {
const endpoint = isFolder
? `/api/user/folder/${itemId}/move`
: `/api/user/file/${itemId}/move`;
try {
const response = await fetch(endpoint, {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
new_folder_id: destinationFolderId,
[isFolder ? "folder_id" : "file_id"]: itemId,
}),
});
if (response.ok) {
refreshFolderContents();
setPopup({
message: `${isFolder ? "Folder" : "File"} moved successfully`,
type: "success",
});
} else {
throw new Error("Failed to move item");
}
} catch (error) {
console.error("Error moving item:", error);
setPopup({
message: "Failed to move item",
type: "error",
});
}
};
const handleDownloadItem = async (documentId: string) => {
try {
const response = await fetch(
`/api/chat/file/${encodeURIComponent(documentId)}`,
{
method: "GET",
}
);
if (!response.ok) {
throw new Error("Failed to fetch file");
}
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
const contentDisposition = response.headers.get("Content-Disposition");
const fileName = contentDisposition
? contentDisposition.split("filename=")[1]
: "document";
const link = document.createElement("a");
link.href = url;
link.download = fileName || "document";
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
window.URL.revokeObjectURL(url);
} catch (error) {
console.error("Error downloading file:", error);
setPopup({
message: "Failed to download file",
type: "error",
});
}
};
const onRenameItem = async (
itemId: number,
newName: string,
isFolder: boolean
) => {
const endpoint = isFolder
? `/api/user/folder/${itemId}?name=${encodeURIComponent(newName)}`
: `/api/user/file/${itemId}/rename?name=${encodeURIComponent(newName)}`;
try {
const response = await fetch(endpoint, {
method: "PUT",
});
if (response.ok) {
if (isFolder) {
fetchFolders();
}
refreshFolderContents();
setPopup({
message: `${isFolder ? "Folder" : "File"} renamed successfully`,
type: "success",
});
} else {
throw new Error("Failed to rename item");
}
} catch (error) {
console.error("Error renaming item:", error);
setPopup({
message: `Failed to rename ${isFolder ? "folder" : "file"}`,
type: "error",
});
}
};
return (
<div className="min-h-full w-full min-w-0 flex-1">
<header className="flex bg-background w-full items-center justify-between gap-4 pl-11 pr-3 pt-2 md:pl-8 -translate-y-px">
<h1 className=" flex items-center gap-1.5 text-lg font-medium leading-tight tracking-tight max-md:hidden">
<Grid className="h-5 w-5" />
My Documents
</h1>
<div className="flex items-center gap-2">
<Button
className="inline-flex items-center justify-center relative shrink-0 h-9 px-4 py-2 rounded-lg min-w-[5rem] active:scale-[0.985] whitespace-nowrap pl-2 pr-3 gap-1"
onClick={refreshFolderContents}
>
<RefreshCw className="h-5 w-5" />
Refresh
</Button>
<CreateFolderPopover onCreateFolder={handleCreateFolder} />
<label className="inline-flex items-center justify-center relative shrink-0 h-9 px-4 py-2 rounded-lg min-w-[5rem] active:scale-[0.985] whitespace-nowrap pl-2 pr-3 gap-1 cursor-pointer bg-primary text-primary-foreground hover:bg-primary/90">
<Upload className="h-5 w-5" />
Upload Files
<input
type="file"
multiple
className="hidden"
onChange={handleUploadFiles}
/>
</label>
</div>
</header>
<main className="mx-auto mt-4 w-full max-w-7xl flex-1 px-4 pb-20 md:pl-8 lg:mt-6 md:pr-8 2xl:pr-14">
<div className=" top-3 z-[5] flex gap-4 bg-gradient-to-b via-50% max-lg:flex-col lg:sticky lg:items-center">
<div className="w-full md:max-w-96">
<div className="bg-background-000 border border-border-200 hover:border-border-100 transition-colors placeholder:text-text-500 focus:border-accent-secondary-100 focus-within:!border-accent-secondary-100 focus:ring-0 focus:outline-none disabled:cursor-not-allowed disabled:opacity-50 h-11 px-3 rounded-[0.6rem] w-full inline-flex cursor-text items-stretch gap-2">
<div className="flex items-center">
<Search className="h-4 w-4 text-text-400" />
</div>
<Input
type="text"
placeholder="Search documents..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
className="w-full placeholder:text-text-500 m-0 bg-transparent p-0 focus:outline-none focus:ring-0 disabled:cursor-not-allowed disabled:opacity-50"
/>
</div>
</div>
<div className="flex-1 items-center gap-3 md:flex lg:justify-end">
<div className="flex items-center gap-0.5 max-md:mb-3">
<IconButton
icon={List}
onClick={() => setView("list")}
active={view === "list"}
/>
<IconButton
icon={Grid}
onClick={() => setView("grid")}
active={view === "grid"}
/>
</div>
</div>
</div>
{presentingDocument && (
<TextView
presentingDocument={presentingDocument}
onClose={() => setPresentingDocument(null)}
/>
)}
{popup}
<div className="flex-grow">
{folderContents ? (
folderContents.folders.length > 0 ||
folderContents.files.length > 0 ? (
<div
className={`mt-4 grid gap-3 md:mt-8 ${
view === "grid" ? "md:grid-cols-2" : ""
} md:gap-6`}
>
{folderContents.folders.map((folder) => (
<a
key={folder.id}
className={`from-[#F9F8F4]/80 to-[#F7F6F0] border-0.5 border-border hover:from-[#F9F8F4] hover:to-[#F7F6F0] hover:border-border-200 text-md group relative flex cursor-pointer ${
view === "list" ? "flex-row items-center" : "flex-col"
} overflow-x-hidden text-ellipsis rounded-xl bg-gradient-to-b py-4 pl-5 pr-4 transition-all ease-in-out hover:shadow-sm active:scale-[0.98]`}
href={`/my-documents?folder=${folder.id}`}
onClick={(e) => {
e.preventDefault();
handleFolderClick(folder.id);
}}
>
<div
className={`flex ${
view === "list" ? "flex-row items-center" : "flex-col"
} flex-1`}
>
<div className="font-tiempos flex items-center">
<Grid className="h-5 w-5 mr-2 text-yellow-500" />
<span className="text-truncate inline-block max-w-md">
{folder.name}
</span>
</div>
<div
className={`text-text-400 ${
view === "list" ? "ml-4" : "mt-1"
} line-clamp-2 text-xs`}
>
{folder.description}
</div>
</div>
<div className="text-text-500 mt-3 flex justify-between text-xs">
&nbsp;
<span>
Updated <span data-state="closed">5 months ago</span>
</span>
</div>
</a>
))}
{folderContents.files.map((file) => (
<a
key={file.id}
className={`from-background-100 to-background-100/30 border-0.5 border-border-300 hover:from-background-000 hover:to-background-000/80 hover:border-border-200 text-md group relative flex cursor-pointer ${
view === "list" ? "flex-row items-center" : "flex-col"
} overflow-x-hidden text-ellipsis rounded-xl bg-gradient-to-b py-4 pl-5 pr-4 transition-all ease-in-out hover:shadow-sm active:scale-[0.98]`}
href="#"
onClick={(e) => {
e.preventDefault();
setPresentingDocument({
document_id: file.document_id,
semantic_identifier: file.name,
});
}}
>
<div
className={`flex ${
view === "list" ? "flex-row items-center" : "flex-col"
} flex-1`}
>
<div className="font-tiempos flex items-center">
<List className="h-5 w-5 mr-2 text-blue-500" />
<span className="text-truncate inline-block max-w-md">
{file.name}
</span>
</div>
<div
className={`text-text-300 ${
view === "list" ? "ml-4" : "mt-1"
} line-clamp-2 text-xs`}
>
Document ID: {file.document_id}
</div>
</div>
<div className="text-text-500 mt-3 flex justify-between text-xs">
&nbsp;
<span>
Updated <span data-state="closed">5 months ago</span>
</span>
</div>
</a>
))}
</div>
) : (
<p>No content in this folder</p>
)
) : (
<p>Loading...</p>
)}
<div className="mt-3 flex">
<div className="mx-auto">
<PageSelector
currentPage={page}
totalPages={Math.ceil(
((folderContents?.files?.length || 0) +
(folderContents?.folders?.length || 0)) /
pageLimit
)}
onPageChange={(newPage) => {
setPage(newPage);
window.scrollTo({
top: 0,
left: 0,
behavior: "smooth",
});
}}
/>
</div>
</div>
</div>
</main>
</div>
);
}

View File

@@ -1,19 +0,0 @@
"use client";
import Title from "@/components/ui/title";
import SidebarWrapper from "../assistants/SidebarWrapper";
import MyDocuments from "./MyDocuments";
export default function WrappedUserDocuments({
initiallyToggled,
}: {
initiallyToggled: boolean;
}) {
return (
<SidebarWrapper size="lg" initiallyToggled={initiallyToggled}>
<div className="mx-auto max-w-4xl w-full">
<MyDocuments />
</div>
</SidebarWrapper>
);
}

View File

@@ -1,55 +0,0 @@
import React from "react";
import { ChevronRight } from "lucide-react";
import { FolderNode } from "./types";
interface BreadcrumbProps {
currentFolder: FolderNode | null;
setCurrentFolder: React.Dispatch<React.SetStateAction<FolderNode | null>>;
rootFolder: FolderNode;
}
export const Breadcrumb: React.FC<BreadcrumbProps> = ({
currentFolder,
setCurrentFolder,
rootFolder,
}) => {
const breadcrumbs = [];
let folder: FolderNode | null = currentFolder;
while (folder) {
breadcrumbs.unshift(folder);
folder = folder.parent_id
? findFolderById(rootFolder, folder.parent_id)
: null;
}
return (
<div className="flex items-center text-sm">
<span
className="cursor-pointer hover:underline"
onClick={() => setCurrentFolder(rootFolder)}
>
Root
</span>
{breadcrumbs.map((folder, index) => (
<React.Fragment key={folder.id}>
<ChevronRight className="mx-1 h-4 w-4 text-gray-400" />
<span
className="cursor-pointer hover:underline"
onClick={() => setCurrentFolder(folder)}
>
{folder.name}
</span>
</React.Fragment>
))}
</div>
);
};
function findFolderById(root: FolderNode, id: number): FolderNode | null {
if (root.id === id) return root;
for (const child of root.children) {
const found = findFolderById(child, id);
if (found) return found;
}
return null;
}

View File

@@ -1,53 +0,0 @@
import React from "react";
import { Checkbox } from "@/components/ui/checkbox";
import { File as FileIcon } from "lucide-react";
import { UserFile } from "./types";
interface FileListItemProps {
file: UserFile;
isSelected: boolean;
onSelect: () => void;
view: "grid" | "list";
}
export const FileListItem: React.FC<FileListItemProps> = ({
file,
isSelected,
onSelect,
view,
}) => {
return (
<div
className={`p-2 s${
view === "grid"
? "flex flex-col items-center"
: "flex items-center hover:bg-gray-100 rounded cursor-pointer"
}`}
onClick={onSelect}
>
<div
className={`flex w-full items-center ${
view === "grid" ? "flex-col" : ""
}`}
>
<Checkbox
checked={isSelected}
className={view === "grid" ? "ml-4 mb-2" : "mr-2"}
/>
<FileIcon
className={`${
view === "grid" ? "h-12 w-12 mb-2" : "h-5 w-5 mr-2"
} text-gray-500`}
/>
<span
className={`max-w-full text-sm truncate ${
view === "grid" ? "text-center" : ""
}`}
>
{file.name}
</span>
</div>
</div>
);
};

View File

@@ -1,339 +0,0 @@
import React, { useState, useEffect } from "react";
import { Button } from "@/components/ui/button";
import { Modal } from "@/components/Modal";
import { Grid, List, UploadIcon } from "lucide-react";
import { FolderTreeItem } from "./FolderTreeItem";
import { FileListItem } from "./FileListItem";
import { Breadcrumb } from "./Breadcrumb";
import { SelectedItemsList } from "./SelectedItemsList";
import {
FolderNode,
UserFolder,
UserFile,
FilePickerModalProps,
} from "./types";
import { Separator } from "@/components/ui/separator";
const ListIcon = () => <List className="h-4 w-4" />;
const GridIcon = () => <Grid className="h-4 w-4" />;
const IconButton: React.FC<{
icon: React.ComponentType;
onClick: () => void;
active: boolean;
}> = ({ icon: Icon, onClick, active }) => (
<button
className={`p-2 flex-none h-10 w-10 flex items-center justify-center rounded ${
active ? "bg-gray-200" : "hover:bg-gray-100"
}`}
onClick={onClick}
>
<Icon />
</button>
);
function buildTree(folders: UserFolder[], files: UserFile[]): FolderNode {
const folderMap: { [key: number]: FolderNode } = {};
const rootNode: FolderNode = {
id: 0,
name: "Root",
parent_id: null,
children: [],
files: [],
};
folders.forEach((folder) => {
folderMap[folder.id] = { ...folder, children: [], files: [] };
});
files.forEach((file) => {
if (file.parent_folder_id === null) {
rootNode.files.push(file);
} else if (folderMap[file.parent_folder_id]) {
folderMap[file.parent_folder_id].files.push(file);
}
});
folders.forEach((folder) => {
if (folder.parent_id === null) {
rootNode.children.push(folderMap[folder.id]);
} else if (folderMap[folder.parent_id]) {
folderMap[folder.parent_id].children.push(folderMap[folder.id]);
}
});
return rootNode;
}
export const FilePickerModal: React.FC<FilePickerModalProps> = ({
isOpen,
onClose,
onSave,
title,
buttonContent,
}) => {
const [allFolders, setAllFolders] = useState<UserFolder[]>([]);
const [allFiles, setAllFiles] = useState<UserFile[]>([]);
const [fileSystem, setFileSystem] = useState<FolderNode | null>(null);
const [currentFolder, setCurrentFolder] = useState<FolderNode | null>(null);
const [uploadedFiles, setUploadedFiles] = useState<File[]>([]);
const [links, setLinks] = useState<string[]>([]);
const [selectedItems, setSelectedItems] = useState<{
files: number[];
folders: number[];
}>({ files: [], folders: [] });
const [view, setView] = useState<"grid" | "list">("list");
useEffect(() => {
const loadFileSystem = async () => {
const res = await fetch("/api/user/file-system");
const data = await res.json();
const folders = data.folders.map((f: any) => ({
id: f.id,
name: f.name,
parent_id: f.parent_id,
}));
const files = data.files.map((f: any) => ({
id: f.id,
name: f.name,
parent_folder_id: f.parent_folder_id,
}));
setAllFolders(folders);
setAllFiles(files);
const tree = buildTree(folders, files);
setFileSystem(tree);
setCurrentFolder(tree);
};
if (isOpen) {
loadFileSystem();
}
}, [isOpen]);
const handleSave = () => {
onSave(selectedItems);
onClose();
};
const handleRemoveSelectedItem = (type: "file" | "folder", id: number) => {
setSelectedItems((prev) => ({
...prev,
[type === "file" ? "files" : "folders"]: prev[
type === "file" ? "files" : "folders"
].filter((itemId) => itemId !== id),
}));
};
const handleRemoveUploadedFile = (name: string) => {
setUploadedFiles((prev) => prev.filter((file) => file.name !== name));
};
const handleFolderClick = (folder: FolderNode) => {
setCurrentFolder(folder);
};
const handleFileSelect = (fileId: number) => {
setSelectedItems((prev) => ({
...prev,
files: prev.files.includes(fileId)
? prev.files.filter((id) => id !== fileId)
: [...prev.files, fileId],
}));
};
const handleFileUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (files) {
setUploadedFiles((prev) => [...prev, ...Array.from(files)]);
}
};
const calculateTokens = () => {
// This is a placeholder calculation. Replace with actual token calculation logic.
return selectedItems.files.length * 10 + selectedItems.folders.length * 50;
};
if (!fileSystem || !currentFolder) return null;
return (
<Modal
hideDividerForTitle
onOutsideClick={onClose}
className="max-w-4xl flex flex-col w-full !overflow-hidden h-[70vh]"
title={title}
>
<div className="flex w-full items-center flex-col h-full">
<div className="grid h-full grid-cols-2 overflow-y-hidden w-full">
<div className="w-full pb-4 border-r overflow-y-auto">
<div className="mb-4 flex gap-x-2 w-full">
<div className="w-full relative">
<input
type="text"
placeholder="Search files and folders..."
className="w-full pl-10 pr-4 py-2 border border-gray-300 rounded-md focus:border-transparent"
/>
<div className="absolute inset-y-0 left-0 pl-3 flex items-center pointer-events-none">
<svg
className="h-5 w-5 text-text-dark"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z"
/>
</svg>
</div>
</div>
<div className="px-2 flex space-x-2">
<IconButton
icon={ListIcon}
onClick={() => setView("list")}
active={view === "list"}
/>
<IconButton
icon={GridIcon}
onClick={() => setView("grid")}
active={view === "grid"}
/>
</div>
</div>
<div className="flex-grow overflow-y-auto">
<div
className={`${view === "grid" ? "grid grid-cols-4 gap-4" : ""}`}
>
{currentFolder.children.map((folder) => (
<div
key={folder.id}
className={` ${
view === "grid"
? "flex flex-col items-center"
: "flex items-center"
}`}
onClick={() => handleFolderClick(folder)}
>
<FolderTreeItem
node={folder}
selectedItems={selectedItems}
setSelectedItems={setSelectedItems}
setCurrentFolder={setCurrentFolder}
depth={0}
view={view}
/>
</div>
))}
{currentFolder.files.map((file) => (
<FileListItem
key={file.id}
file={file}
isSelected={selectedItems.files.includes(file.id)}
onSelect={() => handleFileSelect(file.id)}
view={view}
/>
))}
</div>
</div>
</div>
{/* NOTE: update */}
<div className="w-full px-4 pb-4 m-2 flex flex-col h-[450px] ">
<div className="shrink flex h-full overflow-y-auto mb-1 ">
<SelectedItemsList
links={links}
selectedItems={selectedItems}
allFolders={allFolders}
allFiles={allFiles}
uploadedFiles={uploadedFiles}
onRemove={handleRemoveSelectedItem}
onRemoveUploadedFile={handleRemoveUploadedFile}
/>
</div>
<div className="flex flex-col">
<div className="p-4 flex-none border rounded-lg bg-neutral-50">
<label
htmlFor="file-upload"
className="cursor-pointer flex items-center justify-center space-x-2"
>
<UploadIcon className="w-5 h-5 text-gray-600" />
<span className="text-sm font-medium text-gray-700">
Upload files
</span>
</label>
<input
id="file-upload"
type="file"
multiple
className="hidden"
onChange={handleFileUpload}
/>
</div>
<Separator className="my-2" />
<div className="flex flex-col">
<div className="flex flex-col gap-y-2">
<p className="text-sm text-text-subtle">
Add links to the context
</p>
</div>
<form
className="flex gap-x-4 mt-2"
onSubmit={(e) => e.preventDefault()}
>
<div className="w-full gap-x-2 flex">
<input
type="url"
placeholder="Enter URL"
className="w-full px-3 py-2 border border-gray-300 rounded-md text-sm"
onChange={(e) => {
// Handle URL input change
console.log(e.target.value);
// You might want to add state to store this value
}}
/>
<Button
type="button"
onClick={(e) => {
const input = e.currentTarget.form?.querySelector(
'input[type="url"]'
) as HTMLInputElement;
if (input && input.value) {
setLinks((prevLinks) => [...prevLinks, input.value]);
input.value = "";
}
}}
>
Add
</Button>
</div>
</form>
</div>
</div>
</div>
</div>
<div className="pt-4 flex-col w-full flex border-t mt-auto items-center justify-between">
<div className="mb-4 font-medium text-lg text-text-dark">
Total tokens: {calculateTokens()}
</div>
<div className="flex justify-center">
<Button
className="text-lg"
size="lg"
onClick={handleSave}
variant="default"
>
{buttonContent}
</Button>
</div>
</div>
</div>
</Modal>
);
};

View File

@@ -1,99 +0,0 @@
import React from "react";
import { Checkbox } from "@/components/ui/checkbox";
import { Folder as FolderIcon } from "lucide-react";
import { FolderNode } from "./types";
interface FolderTreeItemProps {
node: FolderNode;
selectedItems: { files: number[]; folders: number[] };
setSelectedItems: React.Dispatch<
React.SetStateAction<{ files: number[]; folders: number[] }>
>;
setCurrentFolder: React.Dispatch<React.SetStateAction<FolderNode | null>>;
depth: number;
view: "grid" | "list";
}
export const FolderTreeItem: React.FC<FolderTreeItemProps> = ({
node,
selectedItems,
setSelectedItems,
setCurrentFolder,
depth,
view,
}) => {
const isFolderSelected = selectedItems.folders.includes(node.id);
const handleFolderSelect = (e: React.MouseEvent) => {
e.stopPropagation();
setSelectedItems((prev) => ({
...prev,
folders: isFolderSelected
? prev.folders.filter((id) => id !== node.id)
: [...prev.folders, node.id],
}));
};
return (
<a
className="from-[#F9F8F2] border border-border w-full to-[#F9F8F2]/30 border-0.5 border-border-300 hover:from-[#F9F8F2] hover:to-[#F9F8F2]/80 hover:border-border-200 text-md group relative flex cursor-pointer flex-col overflow-x-hidden text-ellipsis rounded-xl bg-gradient-to-b py-3 pl-5 pr-4 transition-all ease-in-out hover:shadow-sm "
onClick={() => setCurrentFolder(node)}
>
<div className="flex flex-1 flex-col">
<div className="flex">
<span className="text-truncate text-text-dark inline-block max-w-md">
{node.name}
</span>
</div>
<div className="text-text-500 mt-1 line-clamp-2 text-xs">
This folder contains 1000 files and describes the state of the company
{/* Add folder description or other details here */}
</div>
</div>
<div className="text-text-500 mt-1 flex justify-between text-xs">
&nbsp;
<span>
Updated <span data-state="closed">47 minutes ago</span>
</span>
</div>
</a>
);
};
{
/* Original implementation commented out
<div
className={` p-2 w-full ${
view === "grid"
? "flex flex-col rounded items-center"
: "flex items-center hover:bg-gray-100 rounded-gl cursor-pointer"
}`}
onClick={() => setCurrentFolder(node)}
>
<div
className={`flex overflow-hidden w-full items-center ${
view === "grid" ? "flex-col" : ""
}`}
>
<Checkbox
checked={isFolderSelected}
onCheckedChange={() => {}}
onClick={handleFolderSelect}
className={view === "grid" ? "my-1" : "mr-2"}
/>
<FolderIcon
className={`${
view === "grid" ? "h-12 w-12 mb-2" : "h-5 w-5 mr-2"
} text-blue-500`}
/>
<span
className={`max-w-full text-sm truncate ${
view === "grid" ? "text-center" : ""
}`}
>
{node.name}
</span>
</div>
</div>
*/
}

View File

@@ -1,102 +0,0 @@
import React from "react";
import { Button } from "@/components/ui/button";
import { X } from "lucide-react";
import { UserFolder, UserFile } from "./types";
interface SelectedItemsListProps {
uploadedFiles: File[];
selectedItems: { files: number[]; folders: number[] };
allFolders: UserFolder[];
allFiles: UserFile[];
onRemove: (type: "file" | "folder", id: number) => void;
onRemoveUploadedFile: (name: string) => void;
links: string[];
}
export const SelectedItemsList: React.FC<SelectedItemsListProps> = ({
links,
uploadedFiles,
selectedItems,
allFolders,
allFiles,
onRemove,
onRemoveUploadedFile,
}) => {
const selectedFolders = allFolders.filter((folder) =>
selectedItems.folders.includes(folder.id)
);
const selectedFiles = allFiles.filter((file) =>
selectedItems.files.includes(file.id)
);
return (
<div className="h-full w-full flex flex-col">
<h3 className="font-semibold mb-2">Selected Items</h3>
<div className="w-full overflow-y-auto border-t border-t-text-subtle flex-grow">
<div className="space-y-2">
{links.map((link: string) => (
<div
key={link}
className="flex w-full items-center justify-between bg-gray-100 p-1.5 rounded"
>
<span className="text-sm">{link}</span>
<Button variant="ghost" size="sm">
<X className="h-4 w-4" />
</Button>
</div>
))}
{uploadedFiles.map((file) => (
<div
key={file.name}
className="flex w-full items-center justify-between bg-gray-100 p-1.5 rounded"
>
<span className="text-sm">
{file.name}{" "}
<span className="text-xs w-full truncate text-gray-500">
(uploaded)
</span>
</span>
<Button
variant="ghost"
size="sm"
onClick={() => onRemoveUploadedFile(file.name)}
>
<X className="h-4 w-4" />
</Button>
</div>
))}
{selectedFolders.map((folder) => (
<div
key={folder.id}
className="flex items-center justify-between bg-gray-100 p-2 rounded"
>
<span className="text-sm">{folder.name}</span>
<Button
variant="ghost"
size="sm"
onClick={() => onRemove("folder", folder.id)}
>
<X className="h-4 w-4" />
</Button>
</div>
))}
{selectedFiles.map((file) => (
<div
key={file.id}
className="flex items-center justify-between bg-gray-100 p-2 rounded"
>
<span className="w-full truncate text-sm">{file.name}</span>
<Button
variant="ghost"
size="sm"
onClick={() => onRemove("file", file.id)}
>
<X className="h-4 w-4" />
</Button>
</div>
))}
</div>
</div>
</div>
);
};

View File

@@ -1,24 +0,0 @@
export interface UserFolder {
id: number;
name: string;
parent_id: number | null;
}
export interface UserFile {
id: number;
name: string;
parent_folder_id: number | null;
}
export interface FolderNode extends UserFolder {
children: FolderNode[];
files: UserFile[];
}
export interface FilePickerModalProps {
isOpen: boolean;
onClose: () => void;
onSave: (selectedItems: { files: number[]; folders: number[] }) => void;
title: string;
buttonContent: string;
}

View File

@@ -1,53 +0,0 @@
import { fetchChatData } from "@/lib/chat/fetchChatData";
import WrappedDocuments from "./WrappedDocuments";
import { redirect } from "next/navigation";
import { ChatProvider } from "@/components/context/ChatContext";
export default async function GalleryPage(props: {
searchParams: Promise<{ [key: string]: string }>;
}) {
const searchParams = await props.searchParams;
const data = await fetchChatData(searchParams);
if ("redirect" in data) {
redirect(data.redirect);
}
const {
chatSessions,
toggleSidebar,
shouldShowWelcomeModal,
availableSources,
ccPairs,
documentSets,
tags,
llmProviders,
defaultAssistantId,
folders,
inputPrompts,
openedFolders,
} = data;
return (
<ChatProvider
value={{
chatSessions,
availableSources,
ccPairs,
documentSets,
tags,
availableDocumentSets: documentSets,
availableTags: tags,
llmProviders,
shouldShowWelcomeModal,
defaultAssistantId,
folders,
toggledSidebar: false,
inputPrompts,
openedFolders,
}}
>
<WrappedDocuments initiallyToggled={toggleSidebar} />
</ChatProvider>
);
}

View File

@@ -1,64 +0,0 @@
import { useState, useEffect, useCallback } from "react";
// API functions
const fetchDocuments = async (): Promise<Document[]> => {
const response = await fetch("/api/manage/admin/documents");
if (!response.ok) {
throw new Error("Failed to fetch documents");
}
return response.json();
};
const deleteDocument = async (documentId: number): Promise<void> => {
const response = await fetch(`/api/manage/admin/documents/${documentId}`, {
method: "DELETE",
});
if (!response.ok) {
throw new Error("Failed to delete document");
}
};
export interface Document {
id: number;
document_id: string;
}
// Custom hook
export const useDocuments = () => {
const [documents, setDocuments] = useState<Document[]>([]);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const loadDocuments = useCallback(async () => {
setIsLoading(true);
setError(null);
try {
const fetchedDocuments = await fetchDocuments();
setDocuments(fetchedDocuments);
} catch (err) {
setError("Failed to load documents err: " + err);
} finally {
setIsLoading(false);
}
}, []);
const handleDeleteDocument = async (documentId: number) => {
try {
await deleteDocument(documentId);
await loadDocuments();
} catch (err) {
setError("Failed to delete document");
}
};
useEffect(() => {
loadDocuments();
}, [loadDocuments]);
return {
documents,
isLoading,
error,
loadDocuments,
handleDeleteDocument,
};
};

View File

@@ -1,5 +1,5 @@
import { redirect } from "next/navigation";
export default async function Page() {
redirect("/chat");
redirect("/auth/login");
}

View File

@@ -116,7 +116,7 @@ export function Modal({
{icon && icon({ size: 30 })}
</h2>
</div>
{!hideDividerForTitle ? <Separator /> : <div className="my-4" />}
{!hideDividerForTitle && <Separator />}
</>
)}
</div>

View File

@@ -77,9 +77,7 @@ export function LabelWithTooltip({
}
export function SubLabel({ children }: { children: string | JSX.Element }) {
return (
<div className="text-xs text-subtle whitespace-pre-line">{children}</div>
);
return <div className="text-xs text-subtle">{children}</div>;
}
export function ManualErrorMessage({ children }: { children: string }) {

View File

@@ -9,11 +9,11 @@ import {
DialogTitle,
} from "@/components/ui/dialog";
import { Download, XIcon, ZoomIn, ZoomOut } from "lucide-react";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import { OnyxDocument } from "@/lib/search/interfaces";
import { MinimalMarkdown } from "./MinimalMarkdown";
interface TextViewProps {
presentingDocument: MinimalOnyxDocument;
presentingDocument: OnyxDocument;
onClose: () => void;
}

View File

@@ -130,6 +130,13 @@ const SelectItem = React.forwardRef<
)}
{...props}
>
{!hideCheck && (
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
<SelectPrimitive.ItemIndicator>
<Check className="h-4 w-4" />
</SelectPrimitive.ItemIndicator>
</span>
)}
{!selected && Icon && (
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
<Icon className="h-4 w-4" />

View File

@@ -481,9 +481,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
name: "sites",
optional: true,
description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required).
• Specifying 'https://onyxai.sharepoint.com/sites/support' for example will only index documents within this site.
• Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example will only index documents within this folder.
`,
},

View File

@@ -44,15 +44,13 @@ export interface Quote {
export interface QuotesInfoPacket {
quotes: Quote[];
}
export interface MinimalOnyxDocument {
document_id: string;
semantic_identifier: string | null;
}
export interface OnyxDocument extends MinimalOnyxDocument {
export interface OnyxDocument {
document_id: string;
link: string;
source_type: ValidSources;
blurb: string;
semantic_identifier: string | null;
boost: number;
hidden: boolean;
score: number;