mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-28 21:25:44 +00:00
Compare commits
3 Commits
embed_imag
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4ffe3afcf | ||
|
|
9139a54926 | ||
|
|
cdfde4ca26 |
@@ -677,7 +677,6 @@ def connector_document_extraction(
|
||||
logger.debug(f"Indexing batch of documents: {batch_description}")
|
||||
memory_tracer.increment_and_maybe_trace()
|
||||
|
||||
# cc4a
|
||||
if processing_mode == ProcessingMode.FILE_SYSTEM:
|
||||
# File system only - write directly to persistent storage,
|
||||
# skip chunking/embedding/Vespa but still track documents in DB
|
||||
|
||||
@@ -228,6 +228,9 @@ class DocumentSource(str, Enum):
|
||||
MOCK_CONNECTOR = "mock_connector"
|
||||
# Special case for user files
|
||||
USER_FILE = "user_file"
|
||||
# Raw files for Craft sandbox access (xlsx, pptx, docx, etc.)
|
||||
# Uses RAW_BINARY processing mode - no text extraction
|
||||
CRAFT_FILE = "craft_file"
|
||||
|
||||
|
||||
class FederatedConnectorSource(str, Enum):
|
||||
|
||||
@@ -6,6 +6,8 @@ from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import and_
|
||||
from sqlalchemy import delete
|
||||
@@ -226,6 +228,50 @@ def get_documents_by_ids(
|
||||
return list(documents)
|
||||
|
||||
|
||||
def get_documents_by_source(
|
||||
db_session: Session,
|
||||
source: DocumentSource,
|
||||
creator_id: UUID | None = None,
|
||||
) -> list[DbDocument]:
|
||||
"""Get all documents associated with a specific source type.
|
||||
|
||||
This queries through the connector relationship to find all documents
|
||||
that were indexed by connectors of the given source type.
|
||||
|
||||
Args:
|
||||
db_session: Database session
|
||||
source: The document source type to filter by
|
||||
creator_id: If provided, only return documents from connectors
|
||||
created by this user. Filters via ConnectorCredentialPair.
|
||||
"""
|
||||
stmt = (
|
||||
select(DbDocument)
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
)
|
||||
.join(
|
||||
ConnectorCredentialPair,
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id
|
||||
== ConnectorCredentialPair.connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id
|
||||
== ConnectorCredentialPair.credential_id,
|
||||
),
|
||||
)
|
||||
.join(
|
||||
Connector,
|
||||
ConnectorCredentialPair.connector_id == Connector.id,
|
||||
)
|
||||
.where(Connector.source == source)
|
||||
)
|
||||
if creator_id is not None:
|
||||
stmt = stmt.where(ConnectorCredentialPair.creator_id == creator_id)
|
||||
stmt = stmt.distinct()
|
||||
documents = db_session.execute(stmt).scalars().all()
|
||||
return list(documents)
|
||||
|
||||
|
||||
def _apply_last_updated_cursor_filter(
|
||||
stmt: Select,
|
||||
cursor_last_modified: datetime | None,
|
||||
@@ -1527,3 +1573,40 @@ def get_document_kg_entities_and_relationships(
|
||||
def get_num_chunks_for_document(db_session: Session, document_id: str) -> int:
|
||||
stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)
|
||||
return db_session.execute(stmt).scalar_one_or_none() or 0
|
||||
|
||||
|
||||
def update_document_metadata__no_commit(
|
||||
db_session: Session,
|
||||
document_id: str,
|
||||
doc_metadata: dict[str, Any],
|
||||
) -> None:
|
||||
"""Update the doc_metadata field for a document.
|
||||
|
||||
Note: Does not commit. Caller is responsible for committing.
|
||||
|
||||
Args:
|
||||
db_session: Database session
|
||||
document_id: The ID of the document to update
|
||||
doc_metadata: The new metadata dictionary to set
|
||||
"""
|
||||
stmt = (
|
||||
update(DbDocument)
|
||||
.where(DbDocument.id == document_id)
|
||||
.values(doc_metadata=doc_metadata)
|
||||
)
|
||||
db_session.execute(stmt)
|
||||
|
||||
|
||||
def delete_document_by_id__no_commit(
|
||||
db_session: Session,
|
||||
document_id: str,
|
||||
) -> None:
|
||||
"""Delete a single document and its connector credential pair relationships.
|
||||
|
||||
Note: Does not commit. Caller is responsible for committing.
|
||||
|
||||
This uses delete_documents_complete__no_commit which handles
|
||||
all foreign key relationships (KG entities, relationships, chunk stats,
|
||||
cc pair associations, feedback, tags).
|
||||
"""
|
||||
delete_documents_complete__no_commit(db_session, [document_id])
|
||||
|
||||
@@ -60,7 +60,8 @@ class ProcessingMode(str, PyEnum):
|
||||
"""Determines how documents are processed after fetching."""
|
||||
|
||||
REGULAR = "REGULAR" # Full pipeline: chunk → embed → Vespa
|
||||
FILE_SYSTEM = "FILE_SYSTEM" # Write to file system only
|
||||
FILE_SYSTEM = "FILE_SYSTEM" # Write to file system only (JSON documents)
|
||||
RAW_BINARY = "RAW_BINARY" # Write raw binary to S3 (no text extraction)
|
||||
|
||||
|
||||
class SyncType(str, PyEnum):
|
||||
|
||||
@@ -27,6 +27,7 @@ from onyx.server.features.build.api.models import BuildConnectorStatus
|
||||
from onyx.server.features.build.api.models import RateLimitResponse
|
||||
from onyx.server.features.build.api.rate_limit import get_user_rate_limit_status
|
||||
from onyx.server.features.build.api.sessions_api import router as sessions_router
|
||||
from onyx.server.features.build.api.user_library import router as user_library_router
|
||||
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
|
||||
from onyx.server.features.build.sandbox import get_sandbox_manager
|
||||
from onyx.server.features.build.session.manager import SessionManager
|
||||
@@ -51,9 +52,10 @@ def require_onyx_craft_enabled(user: User = Depends(current_user)) -> User:
|
||||
|
||||
router = APIRouter(prefix="/build", dependencies=[Depends(require_onyx_craft_enabled)])
|
||||
|
||||
# Include sub-routers for sessions and messages
|
||||
# Include sub-routers for sessions, messages, and user library
|
||||
router.include_router(sessions_router, tags=["build"])
|
||||
router.include_router(messages_router, tags=["build"])
|
||||
router.include_router(user_library_router, tags=["build"])
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -86,14 +88,24 @@ def get_build_connectors(
|
||||
On the build configure page, all users (including admins) only see connectors
|
||||
they own/created. Users can create new connectors if they don't have one of a type.
|
||||
"""
|
||||
cc_pairs = get_connector_credential_pairs_for_user(
|
||||
# Fetch both FILE_SYSTEM (standard connectors) and RAW_BINARY (User Library) connectors
|
||||
file_system_cc_pairs = get_connector_credential_pairs_for_user(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
get_editable=False,
|
||||
eager_load_connector=True,
|
||||
eager_load_credential=True,
|
||||
processing_mode=ProcessingMode.FILE_SYSTEM, # Only show FILE_SYSTEM connectors
|
||||
processing_mode=ProcessingMode.FILE_SYSTEM,
|
||||
)
|
||||
raw_binary_cc_pairs = get_connector_credential_pairs_for_user(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
get_editable=False,
|
||||
eager_load_connector=True,
|
||||
eager_load_credential=True,
|
||||
processing_mode=ProcessingMode.RAW_BINARY,
|
||||
)
|
||||
cc_pairs = file_system_cc_pairs + raw_binary_cc_pairs
|
||||
|
||||
# Filter to only show connectors created by the current user
|
||||
# All users (including admins) must create their own connectors on the build configure page
|
||||
|
||||
871
backend/onyx/server/features/build/api/user_library.py
Normal file
871
backend/onyx/server/features/build/api/user_library.py
Normal file
@@ -0,0 +1,871 @@
|
||||
"""API endpoints for User Library file management in Craft.
|
||||
|
||||
This module provides endpoints for uploading and managing raw binary files
|
||||
(xlsx, pptx, docx, csv, etc.) that are stored directly in S3 for sandbox access.
|
||||
|
||||
Files are stored at:
|
||||
s3://{bucket}/{tenant_id}/knowledge/{user_id}/user_library/{path}
|
||||
|
||||
And synced to sandbox at:
|
||||
/workspace/files/user_library/{path}
|
||||
|
||||
Known Issues / TODOs:
|
||||
- Memory: Upload endpoints read entire file content into memory (up to 500MB).
|
||||
Should be refactored to stream uploads directly to S3 via multipart upload
|
||||
for better memory efficiency under concurrent load.
|
||||
- Transaction safety: Multi-file uploads are not atomic. If the endpoint fails
|
||||
mid-batch (e.g., file 3 of 5 exceeds storage quota), files 1-2 are already
|
||||
persisted to S3 and DB. A partial upload is not catastrophic but the response
|
||||
implies atomicity that doesn't exist.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import mimetypes
|
||||
import re
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from io import BytesIO
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import File
|
||||
from fastapi import Form
|
||||
from fastapi import HTTPException
|
||||
from fastapi import Query
|
||||
from fastapi import UploadFile
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.background.celery.versioned_apps.client import app as celery_app
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.db.connector_credential_pair import update_connector_credential_pair
|
||||
from onyx.db.document import upsert_document_by_connector_credential_pair
|
||||
from onyx.db.document import upsert_documents
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.models import User
|
||||
from onyx.document_index.interfaces import DocumentMetadata
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
|
||||
from onyx.server.features.build.indexing.persistent_document_writer import (
|
||||
get_persistent_document_writer,
|
||||
)
|
||||
from onyx.server.features.build.indexing.persistent_document_writer import (
|
||||
S3PersistentDocumentWriter,
|
||||
)
|
||||
from onyx.server.features.build.utils import sanitize_filename as api_sanitize_filename
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(prefix="/user-library")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class LibraryEntryResponse(BaseModel):
|
||||
"""Response for a single library entry (file or directory)."""
|
||||
|
||||
id: str # document_id
|
||||
name: str
|
||||
path: str
|
||||
is_directory: bool
|
||||
file_size: int | None
|
||||
mime_type: str | None
|
||||
sync_enabled: bool
|
||||
created_at: datetime
|
||||
children: list["LibraryEntryResponse"] | None = None
|
||||
|
||||
|
||||
class CreateDirectoryRequest(BaseModel):
|
||||
"""Request to create a virtual directory."""
|
||||
|
||||
name: str
|
||||
parent_path: str = "/"
|
||||
|
||||
|
||||
class UploadResponse(BaseModel):
|
||||
"""Response after successful file upload."""
|
||||
|
||||
entries: list[LibraryEntryResponse]
|
||||
total_uploaded: int
|
||||
total_size_bytes: int
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _sanitize_path(path: str) -> str:
|
||||
"""Sanitize a file path, removing traversal attempts and normalizing.
|
||||
|
||||
Removes '..' and '.' segments and ensures the path starts with '/'.
|
||||
Only allows alphanumeric characters, hyphens, underscores, dots, spaces,
|
||||
and forward slashes. All other characters are stripped.
|
||||
"""
|
||||
parts = path.split("/")
|
||||
sanitized_parts: list[str] = []
|
||||
for p in parts:
|
||||
if not p or p == ".." or p == ".":
|
||||
continue
|
||||
# Strip any character not in the whitelist
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9\-_. ]", "", p)
|
||||
if cleaned:
|
||||
sanitized_parts.append(cleaned)
|
||||
return "/" + "/".join(sanitized_parts)
|
||||
|
||||
|
||||
def _build_document_id(user_id: str, path: str) -> str:
|
||||
"""Build a document ID for a craft file.
|
||||
|
||||
Deterministic: re-uploading the same file to the same path will produce the
|
||||
same document ID, allowing upsert to overwrite the previous record.
|
||||
|
||||
Uses a hash of the path to avoid collisions from separator replacement
|
||||
(e.g., "/a/b_c" vs "/a_b/c" would collide with naive slash-to-underscore).
|
||||
"""
|
||||
path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||
return f"CRAFT_FILE__{user_id}__{path_hash}"
|
||||
|
||||
|
||||
def _trigger_sandbox_sync(
|
||||
user_id: str, tenant_id: str, source: str | None = None
|
||||
) -> None:
|
||||
"""Trigger sandbox file sync task.
|
||||
|
||||
Args:
|
||||
user_id: The user ID whose sandbox should be synced
|
||||
tenant_id: The tenant ID for S3 path construction
|
||||
source: Optional source type (e.g., "user_library"). If specified,
|
||||
only syncs that source's directory with --delete flag.
|
||||
"""
|
||||
celery_app.send_task(
|
||||
OnyxCeleryTask.SANDBOX_FILE_SYNC,
|
||||
kwargs={"user_id": user_id, "tenant_id": tenant_id, "source": source},
|
||||
queue=OnyxCeleryQueues.SANDBOX,
|
||||
)
|
||||
|
||||
|
||||
def _get_user_storage_bytes(db_session: Session, user_id: UUID) -> int:
|
||||
"""Get total storage usage for a user's library files.
|
||||
|
||||
Uses SQL aggregation to sum file_size from doc_metadata JSONB for all
|
||||
CRAFT_FILE documents owned by this user, avoiding loading all documents
|
||||
into Python memory.
|
||||
"""
|
||||
from sqlalchemy import and_
|
||||
from sqlalchemy import cast
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import Integer
|
||||
from sqlalchemy import select
|
||||
|
||||
from onyx.db.models import Connector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.db.models import Document as DbDocument
|
||||
from onyx.db.models import DocumentByConnectorCredentialPair
|
||||
|
||||
stmt = (
|
||||
select(
|
||||
func.coalesce(
|
||||
func.sum(
|
||||
cast(
|
||||
DbDocument.doc_metadata["file_size"].as_string(),
|
||||
Integer,
|
||||
)
|
||||
),
|
||||
0,
|
||||
)
|
||||
)
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
)
|
||||
.join(
|
||||
ConnectorCredentialPair,
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id
|
||||
== ConnectorCredentialPair.connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id
|
||||
== ConnectorCredentialPair.credential_id,
|
||||
),
|
||||
)
|
||||
.join(
|
||||
Connector,
|
||||
ConnectorCredentialPair.connector_id == Connector.id,
|
||||
)
|
||||
.where(Connector.source == DocumentSource.CRAFT_FILE)
|
||||
.where(ConnectorCredentialPair.creator_id == user_id)
|
||||
.where(DbDocument.doc_metadata["is_directory"].as_boolean().is_not(True))
|
||||
)
|
||||
result = db_session.execute(stmt).scalar()
|
||||
return int(result or 0)
|
||||
|
||||
|
||||
def _get_or_create_craft_connector(db_session: Session, user: User) -> tuple[int, int]:
|
||||
"""Get or create the CRAFT_FILE connector for a user.
|
||||
|
||||
Returns:
|
||||
Tuple of (connector_id, credential_id)
|
||||
|
||||
Note: We need to create a credential even though CRAFT_FILE doesn't require
|
||||
authentication. This is because Onyx's connector-credential pair system
|
||||
requires a credential for all connectors. The credential is empty ({}).
|
||||
|
||||
This function handles recovery from partial creation failures by detecting
|
||||
orphaned connectors (connectors without cc_pairs) and completing their setup.
|
||||
"""
|
||||
from onyx.connectors.models import InputType
|
||||
from onyx.db.connector import create_connector
|
||||
from onyx.db.connector import fetch_connectors
|
||||
from onyx.db.connector_credential_pair import add_credential_to_connector
|
||||
from onyx.db.connector_credential_pair import (
|
||||
get_connector_credential_pairs_for_user,
|
||||
)
|
||||
from onyx.db.credentials import create_credential
|
||||
from onyx.db.credentials import fetch_credentials_for_user
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ProcessingMode
|
||||
from onyx.server.documents.models import ConnectorBase
|
||||
from onyx.server.documents.models import CredentialBase
|
||||
|
||||
# Check if user already has a complete CRAFT_FILE cc_pair
|
||||
cc_pairs = get_connector_credential_pairs_for_user(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
get_editable=False,
|
||||
eager_load_connector=True,
|
||||
eager_load_credential=True,
|
||||
processing_mode=ProcessingMode.RAW_BINARY,
|
||||
)
|
||||
|
||||
for cc_pair in cc_pairs:
|
||||
if cc_pair.connector.source == DocumentSource.CRAFT_FILE:
|
||||
return cc_pair.connector.id, cc_pair.credential.id
|
||||
|
||||
# Check for orphaned connector (created but cc_pair creation failed previously)
|
||||
# An orphaned connector has no cc_pairs. We check credentials to verify
|
||||
# it belongs to this user (Connector doesn't have creator_id directly).
|
||||
existing_connectors = fetch_connectors(
|
||||
db_session, sources=[DocumentSource.CRAFT_FILE]
|
||||
)
|
||||
orphaned_connector = None
|
||||
for conn in existing_connectors:
|
||||
if conn.name != "User Library":
|
||||
continue
|
||||
# Verify this connector has no cc_pairs (i.e., is actually orphaned)
|
||||
# and that a matching credential exists for this user
|
||||
if not conn.credentials:
|
||||
orphaned_connector = conn
|
||||
break
|
||||
|
||||
if orphaned_connector:
|
||||
connector_id = orphaned_connector.id
|
||||
logger.info(
|
||||
f"Found orphaned User Library connector {connector_id}, completing setup"
|
||||
)
|
||||
else:
|
||||
# Create new connector
|
||||
connector_data = ConnectorBase(
|
||||
name="User Library",
|
||||
source=DocumentSource.CRAFT_FILE,
|
||||
input_type=InputType.LOAD_STATE,
|
||||
connector_specific_config={"disabled_paths": []},
|
||||
refresh_freq=None,
|
||||
prune_freq=None,
|
||||
)
|
||||
connector_response = create_connector(
|
||||
db_session=db_session,
|
||||
connector_data=connector_data,
|
||||
)
|
||||
connector_id = connector_response.id
|
||||
|
||||
# Try to reuse an existing User Library credential for this user
|
||||
existing_credentials = fetch_credentials_for_user(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
)
|
||||
credential = None
|
||||
for cred in existing_credentials:
|
||||
if (
|
||||
cred.source == DocumentSource.CRAFT_FILE
|
||||
and cred.name == "User Library Credential"
|
||||
):
|
||||
credential = cred
|
||||
break
|
||||
|
||||
if credential is None:
|
||||
# Create credential (empty - no auth needed, but required by the system)
|
||||
credential_data = CredentialBase(
|
||||
credential_json={},
|
||||
admin_public=False,
|
||||
source=DocumentSource.CRAFT_FILE,
|
||||
name="User Library Credential",
|
||||
)
|
||||
credential = create_credential(
|
||||
credential_data=credential_data,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
# Link them with RAW_BINARY processing mode
|
||||
add_credential_to_connector(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential.id,
|
||||
user=user,
|
||||
cc_pair_name="User Library",
|
||||
access_type=AccessType.PRIVATE,
|
||||
groups=None,
|
||||
processing_mode=ProcessingMode.RAW_BINARY,
|
||||
)
|
||||
|
||||
db_session.commit()
|
||||
return connector_id, credential.id
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/tree")
|
||||
def get_library_tree(
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[LibraryEntryResponse]:
|
||||
"""Get user's uploaded files as a tree structure.
|
||||
|
||||
Returns all CRAFT_FILE documents for the user, organized hierarchically.
|
||||
"""
|
||||
from onyx.db.document import get_documents_by_source
|
||||
|
||||
# Get CRAFT_FILE documents for this user (filtered at SQL level)
|
||||
user_docs = get_documents_by_source(
|
||||
db_session=db_session,
|
||||
source=DocumentSource.CRAFT_FILE,
|
||||
creator_id=user.id,
|
||||
)
|
||||
|
||||
# Build tree structure
|
||||
entries: list[LibraryEntryResponse] = []
|
||||
now = datetime.now(timezone.utc)
|
||||
for doc in user_docs:
|
||||
doc_metadata = doc.doc_metadata or {}
|
||||
entries.append(
|
||||
LibraryEntryResponse(
|
||||
id=doc.id,
|
||||
name=doc.semantic_id.split("/")[-1] if doc.semantic_id else "unknown",
|
||||
path=doc.semantic_id or "",
|
||||
is_directory=doc_metadata.get("is_directory", False),
|
||||
file_size=doc_metadata.get("file_size"),
|
||||
mime_type=doc_metadata.get("mime_type"),
|
||||
sync_enabled=not doc_metadata.get("sync_disabled", False),
|
||||
created_at=doc.last_modified or now,
|
||||
)
|
||||
)
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_files(
|
||||
files: list[UploadFile] = File(...),
|
||||
path: str = Form("/"),
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UploadResponse:
|
||||
"""Upload files directly to S3 and track in PostgreSQL.
|
||||
|
||||
Files are stored as raw binary (no text extraction) for access by
|
||||
the sandbox agent using Python libraries like openpyxl, python-pptx, etc.
|
||||
"""
|
||||
tenant_id = get_current_tenant_id()
|
||||
if tenant_id is None:
|
||||
raise HTTPException(status_code=500, detail="Tenant ID not found")
|
||||
|
||||
# Validate file count
|
||||
if len(files) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD} per upload.",
|
||||
)
|
||||
|
||||
# Check cumulative storage usage
|
||||
existing_usage = _get_user_storage_bytes(db_session, user.id)
|
||||
|
||||
# Get or create connector
|
||||
connector_id, credential_id = _get_or_create_craft_connector(db_session, user)
|
||||
|
||||
# Get the persistent document writer
|
||||
writer = get_persistent_document_writer(
|
||||
user_id=str(user.id),
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
uploaded_entries: list[LibraryEntryResponse] = []
|
||||
total_size = 0
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Sanitize the base path
|
||||
base_path = _sanitize_path(path)
|
||||
|
||||
for file in files:
|
||||
# TODO: Stream directly to S3 via multipart upload instead of reading
|
||||
# entire file into memory. With 500MB max file size, this can OOM under
|
||||
# concurrent uploads.
|
||||
content = await file.read()
|
||||
file_size = len(content)
|
||||
|
||||
# Validate individual file size
|
||||
if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024*1024)}MB",
|
||||
)
|
||||
|
||||
# Validate cumulative storage (existing + this upload batch)
|
||||
total_size += file_size
|
||||
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024*1024*1024)}GB",
|
||||
)
|
||||
|
||||
# Sanitize filename
|
||||
safe_filename = api_sanitize_filename(file.filename or "unnamed")
|
||||
file_path = f"{base_path}/{safe_filename}".replace("//", "/")
|
||||
|
||||
# Write raw binary to storage
|
||||
storage_key = writer.write_raw_file(
|
||||
path=file_path,
|
||||
content=content,
|
||||
content_type=file.content_type,
|
||||
)
|
||||
|
||||
# Track in document table
|
||||
doc_id = _build_document_id(str(user.id), file_path)
|
||||
doc_metadata = DocumentMetadata(
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
document_id=doc_id,
|
||||
semantic_identifier=f"user_library{file_path}",
|
||||
first_link=storage_key,
|
||||
doc_metadata={
|
||||
"storage_key": storage_key,
|
||||
"file_path": file_path,
|
||||
"file_size": file_size,
|
||||
"mime_type": file.content_type,
|
||||
"is_directory": False,
|
||||
},
|
||||
)
|
||||
upsert_documents(db_session, [doc_metadata])
|
||||
upsert_document_by_connector_credential_pair(
|
||||
db_session, connector_id, credential_id, [doc_id]
|
||||
)
|
||||
|
||||
uploaded_entries.append(
|
||||
LibraryEntryResponse(
|
||||
id=doc_id,
|
||||
name=safe_filename,
|
||||
path=file_path,
|
||||
is_directory=False,
|
||||
file_size=file_size,
|
||||
mime_type=file.content_type,
|
||||
sync_enabled=True,
|
||||
created_at=now,
|
||||
)
|
||||
)
|
||||
|
||||
# Mark connector as having succeeded (sets last_successful_index_time)
|
||||
# This allows the demo data toggle to be disabled
|
||||
update_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
status=ConnectorCredentialPairStatus.ACTIVE,
|
||||
net_docs=len(uploaded_entries),
|
||||
run_dt=now,
|
||||
)
|
||||
|
||||
# Trigger sandbox sync for user_library source only
|
||||
_trigger_sandbox_sync(str(user.id), tenant_id, source="user_library")
|
||||
|
||||
logger.info(
|
||||
f"Uploaded {len(uploaded_entries)} files ({total_size} bytes) for user {user.id}"
|
||||
)
|
||||
|
||||
return UploadResponse(
|
||||
entries=uploaded_entries,
|
||||
total_uploaded=len(uploaded_entries),
|
||||
total_size_bytes=total_size,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/upload-zip")
|
||||
async def upload_zip(
|
||||
file: UploadFile = File(...),
|
||||
path: str = Form("/"),
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UploadResponse:
|
||||
"""Upload and extract a zip file, storing each extracted file to S3.
|
||||
|
||||
Preserves the directory structure from the zip file.
|
||||
"""
|
||||
tenant_id = get_current_tenant_id()
|
||||
if tenant_id is None:
|
||||
raise HTTPException(status_code=500, detail="Tenant ID not found")
|
||||
|
||||
# Read zip content
|
||||
content = await file.read()
|
||||
if len(content) > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Zip file exceeds maximum size of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024*1024*1024)}GB",
|
||||
)
|
||||
|
||||
# Check cumulative storage usage
|
||||
existing_usage = _get_user_storage_bytes(db_session, user.id)
|
||||
|
||||
# Get or create connector
|
||||
connector_id, credential_id = _get_or_create_craft_connector(db_session, user)
|
||||
|
||||
# Get the persistent document writer
|
||||
writer = get_persistent_document_writer(
|
||||
user_id=str(user.id),
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
uploaded_entries: list[LibraryEntryResponse] = []
|
||||
total_size = 0
|
||||
base_path = _sanitize_path(path)
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(BytesIO(content), "r") as zip_file:
|
||||
# Check file count
|
||||
if len(zip_file.namelist()) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Zip contains too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD}.",
|
||||
)
|
||||
|
||||
# Zip bomb protection: check total decompressed size before extracting
|
||||
declared_total = sum(
|
||||
info.file_size for info in zip_file.infolist() if not info.is_dir()
|
||||
)
|
||||
max_decompressed = USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
|
||||
if existing_usage + declared_total > max_decompressed:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=(
|
||||
f"Zip decompressed size ({declared_total // (1024*1024)}MB) "
|
||||
f"would exceed storage limit."
|
||||
),
|
||||
)
|
||||
|
||||
for zip_info in zip_file.infolist():
|
||||
# Skip directories
|
||||
if zip_info.is_dir():
|
||||
continue
|
||||
|
||||
# Skip hidden files and __MACOSX
|
||||
if (
|
||||
zip_info.filename.startswith("__MACOSX")
|
||||
or "/." in zip_info.filename
|
||||
):
|
||||
continue
|
||||
|
||||
# Read file content
|
||||
file_content = zip_file.read(zip_info.filename)
|
||||
file_size = len(file_content)
|
||||
|
||||
# Validate individual file size
|
||||
if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:
|
||||
logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
|
||||
continue
|
||||
|
||||
total_size += file_size
|
||||
|
||||
# Validate cumulative storage
|
||||
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024*1024*1024)}GB",
|
||||
)
|
||||
|
||||
# Build path preserving zip structure
|
||||
sanitized_zip_path = _sanitize_path(zip_info.filename)
|
||||
file_path = f"{base_path}{sanitized_zip_path}".replace("//", "/")
|
||||
file_name = file_path.split("/")[-1]
|
||||
|
||||
# Guess content type
|
||||
content_type, _ = mimetypes.guess_type(file_name)
|
||||
|
||||
# Write raw binary to storage
|
||||
storage_key = writer.write_raw_file(
|
||||
path=file_path,
|
||||
content=file_content,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
# Track in document table
|
||||
doc_id = _build_document_id(str(user.id), file_path)
|
||||
doc_metadata = DocumentMetadata(
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
document_id=doc_id,
|
||||
semantic_identifier=f"user_library{file_path}",
|
||||
first_link=storage_key,
|
||||
doc_metadata={
|
||||
"storage_key": storage_key,
|
||||
"file_path": file_path,
|
||||
"file_size": file_size,
|
||||
"mime_type": content_type,
|
||||
"is_directory": False,
|
||||
},
|
||||
)
|
||||
upsert_documents(db_session, [doc_metadata])
|
||||
upsert_document_by_connector_credential_pair(
|
||||
db_session, connector_id, credential_id, [doc_id]
|
||||
)
|
||||
|
||||
uploaded_entries.append(
|
||||
LibraryEntryResponse(
|
||||
id=doc_id,
|
||||
name=file_name,
|
||||
path=file_path,
|
||||
is_directory=False,
|
||||
file_size=file_size,
|
||||
mime_type=content_type,
|
||||
sync_enabled=True,
|
||||
created_at=now,
|
||||
)
|
||||
)
|
||||
|
||||
except zipfile.BadZipFile:
|
||||
raise HTTPException(status_code=400, detail="Invalid zip file")
|
||||
|
||||
# Mark connector as having succeeded (sets last_successful_index_time)
|
||||
# This allows the demo data toggle to be disabled
|
||||
update_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
status=ConnectorCredentialPairStatus.ACTIVE,
|
||||
net_docs=len(uploaded_entries),
|
||||
run_dt=now,
|
||||
)
|
||||
|
||||
# Trigger sandbox sync for user_library source only
|
||||
_trigger_sandbox_sync(str(user.id), tenant_id, source="user_library")
|
||||
|
||||
logger.info(
|
||||
f"Extracted {len(uploaded_entries)} files ({total_size} bytes) from zip for user {user.id}"
|
||||
)
|
||||
|
||||
return UploadResponse(
|
||||
entries=uploaded_entries,
|
||||
total_uploaded=len(uploaded_entries),
|
||||
total_size_bytes=total_size,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/directories")
|
||||
def create_directory(
|
||||
request: CreateDirectoryRequest,
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> LibraryEntryResponse:
|
||||
"""Create a virtual directory.
|
||||
|
||||
Directories are tracked as documents with is_directory=True.
|
||||
No S3 object is created (S3 doesn't have real directories).
|
||||
"""
|
||||
# Get or create connector
|
||||
connector_id, credential_id = _get_or_create_craft_connector(db_session, user)
|
||||
|
||||
# Build path
|
||||
parent_path = _sanitize_path(request.parent_path)
|
||||
safe_name = api_sanitize_filename(request.name)
|
||||
dir_path = f"{parent_path}/{safe_name}".replace("//", "/")
|
||||
|
||||
# Track in document table
|
||||
doc_id = _build_document_id(str(user.id), dir_path)
|
||||
doc_metadata = DocumentMetadata(
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
document_id=doc_id,
|
||||
semantic_identifier=f"user_library{dir_path}",
|
||||
first_link="",
|
||||
doc_metadata={
|
||||
"is_directory": True,
|
||||
},
|
||||
)
|
||||
upsert_documents(db_session, [doc_metadata])
|
||||
upsert_document_by_connector_credential_pair(
|
||||
db_session, connector_id, credential_id, [doc_id]
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
return LibraryEntryResponse(
|
||||
id=doc_id,
|
||||
name=safe_name,
|
||||
path=dir_path,
|
||||
is_directory=True,
|
||||
file_size=None,
|
||||
mime_type=None,
|
||||
sync_enabled=True,
|
||||
created_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/files/{document_id}/toggle")
|
||||
def toggle_file_sync(
|
||||
document_id: str,
|
||||
enabled: bool = Query(...),
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, Any]:
|
||||
"""Enable/disable syncing a file to sandboxes.
|
||||
|
||||
When sync is disabled, the file's metadata is updated with sync_disabled=True.
|
||||
The sandbox sync task will exclude these files when syncing to the sandbox.
|
||||
|
||||
If the item is a directory, all children are also toggled.
|
||||
"""
|
||||
from onyx.db.document import get_document
|
||||
from onyx.db.document import get_documents_by_source
|
||||
from onyx.db.document import update_document_metadata__no_commit
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
if tenant_id is None:
|
||||
raise HTTPException(status_code=500, detail="Tenant ID not found")
|
||||
|
||||
# Verify ownership
|
||||
user_prefix = f"CRAFT_FILE__{user.id}__"
|
||||
if not document_id.startswith(user_prefix):
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Not authorized to modify this file"
|
||||
)
|
||||
|
||||
# Get document
|
||||
doc = get_document(document_id, db_session)
|
||||
if doc is None:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
# Update metadata for this document
|
||||
new_metadata = dict(doc.doc_metadata or {})
|
||||
new_metadata["sync_disabled"] = not enabled
|
||||
update_document_metadata__no_commit(db_session, document_id, new_metadata)
|
||||
|
||||
# If this is a directory, also toggle all children
|
||||
doc_metadata = doc.doc_metadata or {}
|
||||
if doc_metadata.get("is_directory"):
|
||||
folder_path = doc.semantic_id
|
||||
if folder_path:
|
||||
# Get CRAFT_FILE documents for this user (filtered at SQL level)
|
||||
all_docs = get_documents_by_source(
|
||||
db_session=db_session,
|
||||
source=DocumentSource.CRAFT_FILE,
|
||||
creator_id=user.id,
|
||||
)
|
||||
# Find children of this folder
|
||||
for child_doc in all_docs:
|
||||
if child_doc.semantic_id and child_doc.semantic_id.startswith(
|
||||
folder_path + "/"
|
||||
):
|
||||
# Update metadata
|
||||
child_metadata = dict(child_doc.doc_metadata or {})
|
||||
child_metadata["sync_disabled"] = not enabled
|
||||
update_document_metadata__no_commit(
|
||||
db_session, child_doc.id, child_metadata
|
||||
)
|
||||
|
||||
db_session.commit()
|
||||
|
||||
return {"success": True, "sync_enabled": enabled}
|
||||
|
||||
|
||||
@router.delete("/files/{document_id}")
|
||||
def delete_file(
|
||||
document_id: str,
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, Any]:
|
||||
"""Delete a file from both S3 and the document table."""
|
||||
from onyx.db.document import delete_document_by_id__no_commit
|
||||
from onyx.db.document import get_document
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
if tenant_id is None:
|
||||
raise HTTPException(status_code=500, detail="Tenant ID not found")
|
||||
|
||||
# Verify ownership
|
||||
user_prefix = f"CRAFT_FILE__{user.id}__"
|
||||
if not document_id.startswith(user_prefix):
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Not authorized to delete this file"
|
||||
)
|
||||
|
||||
# Get document
|
||||
doc = get_document(document_id, db_session)
|
||||
if doc is None:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
# Delete from storage if it's a file (not directory)
|
||||
doc_metadata = doc.doc_metadata or {}
|
||||
if not doc_metadata.get("is_directory"):
|
||||
file_path = doc_metadata.get("file_path")
|
||||
if file_path:
|
||||
writer = get_persistent_document_writer(
|
||||
user_id=str(user.id),
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
try:
|
||||
if isinstance(writer, S3PersistentDocumentWriter):
|
||||
writer.delete_raw_file_by_path(file_path)
|
||||
else:
|
||||
writer.delete_raw_file(file_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete file at path {file_path}: {e}")
|
||||
else:
|
||||
# Fallback for documents created before file_path was stored
|
||||
storage_key = doc_metadata.get("storage_key") or doc_metadata.get("s3_key")
|
||||
if storage_key:
|
||||
writer = get_persistent_document_writer(
|
||||
user_id=str(user.id),
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
try:
|
||||
if isinstance(writer, S3PersistentDocumentWriter):
|
||||
writer.delete_raw_file(storage_key)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Cannot delete file in local mode without file_path: {document_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete storage object {storage_key}: {e}"
|
||||
)
|
||||
|
||||
# Delete from document table
|
||||
delete_document_by_id__no_commit(db_session, document_id)
|
||||
db_session.commit()
|
||||
|
||||
# Trigger sync to apply changes
|
||||
_trigger_sandbox_sync(str(user.id), tenant_id, source="user_library")
|
||||
|
||||
return {"success": True, "deleted": document_id}
|
||||
@@ -132,3 +132,25 @@ ACP_MESSAGE_TIMEOUT = float(os.environ.get("ACP_MESSAGE_TIMEOUT", "900.0"))
|
||||
# Free users always get 5 messages total (not configurable)
|
||||
# Per-user overrides are managed via PostHog feature flag "craft-has-usage-limits"
|
||||
CRAFT_PAID_USER_RATE_LIMIT = int(os.environ.get("CRAFT_PAID_USER_RATE_LIMIT", "25"))
|
||||
|
||||
# ============================================================================
|
||||
# User Library Configuration
|
||||
# For user-uploaded raw files (xlsx, pptx, docx, etc.) in Craft
|
||||
# ============================================================================
|
||||
|
||||
# Maximum size per file in MB (default 500MB)
|
||||
USER_LIBRARY_MAX_FILE_SIZE_MB = int(
|
||||
os.environ.get("USER_LIBRARY_MAX_FILE_SIZE_MB", "500")
|
||||
)
|
||||
USER_LIBRARY_MAX_FILE_SIZE_BYTES = USER_LIBRARY_MAX_FILE_SIZE_MB * 1024 * 1024
|
||||
|
||||
# Maximum total storage per user in GB (default 10GB)
|
||||
USER_LIBRARY_MAX_TOTAL_SIZE_GB = int(
|
||||
os.environ.get("USER_LIBRARY_MAX_TOTAL_SIZE_GB", "10")
|
||||
)
|
||||
USER_LIBRARY_MAX_TOTAL_SIZE_BYTES = USER_LIBRARY_MAX_TOTAL_SIZE_GB * 1024 * 1024 * 1024
|
||||
|
||||
# Maximum files per single upload request (default 100)
|
||||
USER_LIBRARY_MAX_FILES_PER_UPLOAD = int(
|
||||
os.environ.get("USER_LIBRARY_MAX_FILES_PER_UPLOAD", "100")
|
||||
)
|
||||
|
||||
@@ -272,6 +272,70 @@ class PersistentDocumentWriter:
|
||||
|
||||
logger.debug(f"Wrote document to {path}")
|
||||
|
||||
def write_raw_file(
|
||||
self,
|
||||
path: str,
|
||||
content: bytes,
|
||||
content_type: str | None = None, # noqa: ARG002
|
||||
) -> str:
|
||||
"""Write a raw binary file to local filesystem (for User Library).
|
||||
|
||||
Unlike write_documents which serializes Document objects to JSON, this method
|
||||
writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.
|
||||
|
||||
Args:
|
||||
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
|
||||
content: Raw binary content to write
|
||||
content_type: MIME type of the file (stored as metadata, unused locally)
|
||||
|
||||
Returns:
|
||||
Full filesystem path where file was written
|
||||
"""
|
||||
# Build full path: {base_path}/{tenant}/knowledge/{user}/user_library/{path}
|
||||
# Normalize path - ensure it starts with / and doesn't have double slashes
|
||||
normalized_path = "/" + path.lstrip("/")
|
||||
full_path = (
|
||||
self.base_path
|
||||
/ self.tenant_id
|
||||
/ "knowledge"
|
||||
/ self.user_id
|
||||
/ "user_library"
|
||||
/ normalized_path.lstrip("/")
|
||||
)
|
||||
|
||||
# Create parent directories if they don't exist
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write the raw binary content
|
||||
with open(full_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
logger.debug(f"Wrote raw file to {full_path}")
|
||||
return str(full_path)
|
||||
|
||||
def delete_raw_file(self, path: str) -> None:
|
||||
"""Delete a raw file from local filesystem.
|
||||
|
||||
Args:
|
||||
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
|
||||
"""
|
||||
# Build full path
|
||||
normalized_path = "/" + path.lstrip("/")
|
||||
full_path = (
|
||||
self.base_path
|
||||
/ self.tenant_id
|
||||
/ "knowledge"
|
||||
/ self.user_id
|
||||
/ "user_library"
|
||||
/ normalized_path.lstrip("/")
|
||||
)
|
||||
|
||||
if full_path.exists():
|
||||
full_path.unlink()
|
||||
logger.debug(f"Deleted raw file at {full_path}")
|
||||
else:
|
||||
logger.warning(f"File not found for deletion: {full_path}")
|
||||
|
||||
|
||||
class S3PersistentDocumentWriter:
|
||||
"""Writes indexed documents to S3 with hierarchical structure.
|
||||
@@ -375,6 +439,73 @@ class S3PersistentDocumentWriter:
|
||||
logger.error(f"Failed to write to S3: {e}")
|
||||
raise
|
||||
|
||||
def write_raw_file(
|
||||
self,
|
||||
path: str,
|
||||
content: bytes,
|
||||
content_type: str | None = None,
|
||||
) -> str:
|
||||
"""Write a raw binary file to S3 (for User Library).
|
||||
|
||||
Unlike write_documents which serializes Document objects to JSON, this method
|
||||
writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.
|
||||
|
||||
Args:
|
||||
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
|
||||
content: Raw binary content to write
|
||||
content_type: MIME type of the file
|
||||
|
||||
Returns:
|
||||
S3 key where file was written
|
||||
"""
|
||||
# Build S3 key: {tenant}/knowledge/{user}/user_library/{path}
|
||||
normalized_path = path.lstrip("/")
|
||||
s3_key = (
|
||||
f"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}"
|
||||
)
|
||||
|
||||
s3_client = self._get_s3_client()
|
||||
|
||||
try:
|
||||
s3_client.put_object(
|
||||
Bucket=self.bucket,
|
||||
Key=s3_key,
|
||||
Body=content,
|
||||
ContentType=content_type or "application/octet-stream",
|
||||
)
|
||||
logger.debug(f"Wrote raw file to s3://{self.bucket}/{s3_key}")
|
||||
return s3_key
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to write raw file to S3: {e}")
|
||||
raise
|
||||
|
||||
def delete_raw_file(self, s3_key: str) -> None:
|
||||
"""Delete a raw file from S3.
|
||||
|
||||
Args:
|
||||
s3_key: Full S3 key of the file to delete
|
||||
"""
|
||||
s3_client = self._get_s3_client()
|
||||
|
||||
try:
|
||||
s3_client.delete_object(Bucket=self.bucket, Key=s3_key)
|
||||
logger.debug(f"Deleted raw file at s3://{self.bucket}/{s3_key}")
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to delete raw file from S3: {e}")
|
||||
raise
|
||||
|
||||
def delete_raw_file_by_path(self, path: str) -> None:
|
||||
"""Delete a raw file from S3 by its relative path.
|
||||
|
||||
Args:
|
||||
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
|
||||
"""
|
||||
normalized_path = path.lstrip("/")
|
||||
s3_key = (
|
||||
f"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}"
|
||||
)
|
||||
self.delete_raw_file(s3_key)
|
||||
|
||||
|
||||
def get_persistent_document_writer(
|
||||
user_id: str,
|
||||
|
||||
@@ -125,6 +125,7 @@ class SandboxManager(ABC):
|
||||
user_work_area: str | None = None,
|
||||
user_level: str | None = None,
|
||||
use_demo_data: bool = False,
|
||||
excluded_user_library_paths: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Set up a session workspace within an existing sandbox.
|
||||
|
||||
@@ -149,6 +150,9 @@ class SandboxManager(ABC):
|
||||
user_work_area: User's work area for demo persona (e.g., "engineering")
|
||||
user_level: User's level for demo persona (e.g., "ic", "manager")
|
||||
use_demo_data: If True, symlink files/ to demo data; else to user files
|
||||
excluded_user_library_paths: List of paths within user_library to exclude
|
||||
from the sandbox (e.g., ["/data/file.xlsx"]). Only applies when
|
||||
use_demo_data=False. Files at these paths won't be accessible.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If workspace setup fails
|
||||
@@ -423,7 +427,9 @@ class SandboxManager(ABC):
|
||||
For Kubernetes backend: Executes `s5cmd sync` in the file-sync sidecar container.
|
||||
For Local backend: No-op since files are directly accessible via symlink.
|
||||
|
||||
This is idempotent - only downloads changed files.
|
||||
This is idempotent - only downloads changed files. File visibility in
|
||||
sessions is controlled via filtered symlinks in setup_session_workspace(),
|
||||
not at the sync level.
|
||||
|
||||
Args:
|
||||
sandbox_id: The sandbox UUID
|
||||
|
||||
@@ -61,10 +61,10 @@ CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
|
||||
"file_pattern": "`PAGE_TITLE.json`",
|
||||
"scan_depth": 1,
|
||||
},
|
||||
"org_info": {
|
||||
"summary": "Organizational structure and user identity",
|
||||
"file_pattern": "Various JSON files",
|
||||
"scan_depth": 0,
|
||||
"user_library": {
|
||||
"summary": "User-uploaded files (spreadsheets, documents, presentations, etc.)",
|
||||
"file_pattern": "Any file format",
|
||||
"scan_depth": 1,
|
||||
},
|
||||
}
|
||||
DEFAULT_SCAN_DEPTH = 1
|
||||
|
||||
@@ -1087,6 +1087,7 @@ done
|
||||
user_work_area: str | None = None,
|
||||
user_level: str | None = None,
|
||||
use_demo_data: bool = False,
|
||||
excluded_user_library_paths: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Set up a session workspace within an existing sandbox pod.
|
||||
|
||||
@@ -1115,6 +1116,8 @@ done
|
||||
user_level: User's level for demo persona (e.g., "ic", "manager")
|
||||
use_demo_data: If True, symlink files/ to /workspace/demo_data;
|
||||
else to /workspace/files (S3-synced user files)
|
||||
excluded_user_library_paths: List of paths within user_library/ to exclude
|
||||
(e.g., ["/data/file.xlsx"]). These files won't be accessible in the session.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If workspace setup fails
|
||||
@@ -1194,6 +1197,91 @@ printf '%s' '{org_structure_escaped}' > {session_path}/org_info/organization_str
|
||||
# Create files symlink to demo data (baked into image)
|
||||
echo "Creating files symlink to demo data: {symlink_target}"
|
||||
ln -sf {symlink_target} {session_path}/files
|
||||
"""
|
||||
elif excluded_user_library_paths:
|
||||
# User files with exclusions: create filtered symlink structure
|
||||
# Instead of symlinking to /workspace/files directly, create a directory
|
||||
# with symlinks to each top-level item, then filter user_library contents
|
||||
#
|
||||
# Use newline-delimited exclusion list written via heredoc to avoid
|
||||
# shell injection from path names. Paths are also pre-sanitized by
|
||||
# _sanitize_path() which enforces an alphanumeric whitelist.
|
||||
# The heredoc delimiter is randomized to prevent a filename from
|
||||
# accidentally terminating the heredoc early.
|
||||
excluded_paths_lines = "\n".join(
|
||||
p.lstrip("/") for p in excluded_user_library_paths
|
||||
)
|
||||
heredoc_delim = f"_EXCL_{uuid4().hex[:12]}_"
|
||||
files_symlink_setup = f"""
|
||||
# Create filtered files directory with exclusions
|
||||
mkdir -p {session_path}/files
|
||||
|
||||
# Symlink all top-level directories except user_library
|
||||
for item in /workspace/files/*; do
|
||||
[ -e "$item" ] || continue
|
||||
name=$(basename "$item")
|
||||
if [ "$name" != "user_library" ]; then
|
||||
ln -sf "$item" {session_path}/files/"$name"
|
||||
fi
|
||||
done
|
||||
|
||||
# Write excluded paths to a temp file (one per line, via heredoc for safety)
|
||||
EXCL_FILE=$(mktemp)
|
||||
cat > "$EXCL_FILE" << '{heredoc_delim}'
|
||||
{excluded_paths_lines}
|
||||
{heredoc_delim}
|
||||
|
||||
# Check if a relative path is excluded (exact match or child of excluded dir)
|
||||
is_excluded() {{
|
||||
local rel_path="$1"
|
||||
while IFS= read -r excl || [ -n "$excl" ]; do
|
||||
[ -z "$excl" ] && continue
|
||||
if [ "$rel_path" = "$excl" ]; then
|
||||
return 0
|
||||
fi
|
||||
case "$rel_path" in
|
||||
"$excl"/*) return 0 ;;
|
||||
esac
|
||||
done < "$EXCL_FILE"
|
||||
return 1
|
||||
}}
|
||||
|
||||
# Recursively create symlinks for non-excluded files
|
||||
create_filtered_symlinks() {{
|
||||
src_dir="$1"
|
||||
dst_dir="$2"
|
||||
rel_base="$3"
|
||||
|
||||
for item in "$src_dir"/*; do
|
||||
[ -e "$item" ] || continue
|
||||
name=$(basename "$item")
|
||||
if [ -n "$rel_base" ]; then
|
||||
rel_path="$rel_base/$name"
|
||||
else
|
||||
rel_path="$name"
|
||||
fi
|
||||
|
||||
if is_excluded "$rel_path"; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ -d "$item" ]; then
|
||||
mkdir -p "$dst_dir/$name"
|
||||
create_filtered_symlinks "$item" "$dst_dir/$name" "$rel_path"
|
||||
rmdir "$dst_dir/$name" 2>/dev/null || true
|
||||
else
|
||||
ln -sf "$item" "$dst_dir/$name"
|
||||
fi
|
||||
done
|
||||
}}
|
||||
|
||||
if [ -d "/workspace/files/user_library" ]; then
|
||||
mkdir -p {session_path}/files/user_library
|
||||
create_filtered_symlinks /workspace/files/user_library {session_path}/files/user_library ""
|
||||
rmdir {session_path}/files/user_library 2>/dev/null || true
|
||||
fi
|
||||
|
||||
rm -f "$EXCL_FILE"
|
||||
"""
|
||||
else:
|
||||
# Normal mode: symlink to user's S3-synced knowledge files
|
||||
@@ -2008,6 +2096,10 @@ echo "Session config regeneration complete"
|
||||
|
||||
This is safe to call multiple times - s5cmd sync is idempotent.
|
||||
|
||||
Note: For user_library source, --delete is NOT used since deletions
|
||||
are handled explicitly by the delete_file API endpoint. File visibility
|
||||
in sessions is controlled via filtered symlinks in setup_session_workspace().
|
||||
|
||||
Args:
|
||||
sandbox_id: The sandbox UUID
|
||||
user_id: The user ID (for S3 path construction)
|
||||
@@ -2033,9 +2125,20 @@ echo "Session config regeneration complete"
|
||||
|
||||
# s5cmd sync: high-performance parallel S3 sync
|
||||
# --delete: mirror S3 to local (remove files that no longer exist in source)
|
||||
# Use --delete for external connectors (gmail, google_drive) where
|
||||
# files can be removed from the source.
|
||||
# Do NOT use --delete for user_library - files are only added by user
|
||||
# uploads, and deletions are handled explicitly by the delete_file endpoint.
|
||||
# timeout: prevent zombie processes from kubectl exec disconnections
|
||||
# trap: kill child processes on exit/disconnect
|
||||
source_info = f" (source={source})" if source else ""
|
||||
|
||||
# Only use --delete for external connectors where the source of truth is external.
|
||||
# For user_library, we only add files - deletions are handled explicitly.
|
||||
# When source is None (sync all), we also skip --delete to be safe.
|
||||
use_delete = source is not None and source != "user_library"
|
||||
delete_flag = " --delete" if use_delete else ""
|
||||
|
||||
sync_script = f"""
|
||||
# Kill child processes on exit/disconnect to prevent zombie s5cmd workers
|
||||
cleanup() {{ pkill -P $$ 2>/dev/null || true; }}
|
||||
@@ -2052,7 +2155,7 @@ mkdir -p "{local_path}"
|
||||
# Exit codes: 0=success, 1=success with warnings, 124=timeout
|
||||
sync_exit_code=0
|
||||
timeout --signal=TERM --kill-after=10s 5m \
|
||||
/s5cmd --stat sync --delete "{s3_path}" "{local_path}" 2>&1 || sync_exit_code=$?
|
||||
/s5cmd --stat sync{delete_flag} "{s3_path}" "{local_path}" 2>&1 || sync_exit_code=$?
|
||||
|
||||
echo "=== Sync finished (exit code: $sync_exit_code) ==="
|
||||
|
||||
|
||||
@@ -152,6 +152,117 @@ class LocalSandboxManager(SandboxManager):
|
||||
"""
|
||||
return self._get_sandbox_path(sandbox_id) / "sessions" / str(session_id)
|
||||
|
||||
def _setup_filtered_files(
|
||||
self,
|
||||
session_path: Path,
|
||||
source_path: Path,
|
||||
excluded_paths: list[str],
|
||||
) -> None:
|
||||
"""Set up files directory with filtered symlinks based on exclusions.
|
||||
|
||||
Instead of symlinking the entire source directory, this creates a files/
|
||||
directory structure where:
|
||||
- Top-level items (except user_library) are symlinked directly
|
||||
- user_library/ is created as a real directory with filtered symlinks
|
||||
|
||||
Args:
|
||||
session_path: Path to the session directory
|
||||
source_path: Path to the user's knowledge files (e.g., /storage/tenant/knowledge/user/)
|
||||
excluded_paths: List of paths within user_library to exclude
|
||||
(e.g., ["/data/file.xlsx", "/reports/old.pdf"])
|
||||
"""
|
||||
files_dir = session_path / "files"
|
||||
files_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Normalize excluded paths for comparison (remove leading slash)
|
||||
excluded_set = {p.lstrip("/") for p in excluded_paths}
|
||||
|
||||
if not source_path.exists():
|
||||
logger.warning(f"Source path does not exist: {source_path}")
|
||||
return
|
||||
|
||||
# Iterate through top-level items in source
|
||||
for item in source_path.iterdir():
|
||||
target_link = files_dir / item.name
|
||||
|
||||
if item.name == "user_library":
|
||||
# user_library needs filtered handling
|
||||
self._setup_filtered_user_library(
|
||||
target_dir=target_link,
|
||||
source_dir=item,
|
||||
excluded_set=excluded_set,
|
||||
base_path="",
|
||||
)
|
||||
else:
|
||||
# Other directories/files: symlink directly
|
||||
if not target_link.exists():
|
||||
target_link.symlink_to(item, target_is_directory=item.is_dir())
|
||||
|
||||
def _setup_filtered_user_library(
|
||||
self,
|
||||
target_dir: Path,
|
||||
source_dir: Path,
|
||||
excluded_set: set[str],
|
||||
base_path: str,
|
||||
) -> bool:
|
||||
"""Recursively set up user_library with filtered symlinks.
|
||||
|
||||
Creates directory structure and symlinks only non-excluded files.
|
||||
Only creates directories if they will contain at least one enabled file.
|
||||
|
||||
Args:
|
||||
target_dir: Where to create the filtered structure
|
||||
source_dir: Source user_library directory
|
||||
excluded_set: Set of excluded relative paths (e.g., {"data/file.xlsx"})
|
||||
base_path: Current path relative to user_library root (for recursion)
|
||||
|
||||
Returns:
|
||||
True if any content was created (files or non-empty subdirectories)
|
||||
"""
|
||||
if not source_dir.exists():
|
||||
return False
|
||||
|
||||
has_content = False
|
||||
|
||||
for item in source_dir.iterdir():
|
||||
# Build relative path for exclusion check
|
||||
rel_path = (
|
||||
f"{base_path}/{item.name}".lstrip("/") if base_path else item.name
|
||||
)
|
||||
target_link = target_dir / item.name
|
||||
|
||||
if item.is_dir():
|
||||
# Check if entire directory is excluded
|
||||
if rel_path in excluded_set:
|
||||
logger.debug(f"Excluding directory: user_library/{rel_path}")
|
||||
continue
|
||||
|
||||
# Recurse into directory - only create if it has content
|
||||
subdir_has_content = self._setup_filtered_user_library(
|
||||
target_dir=target_link,
|
||||
source_dir=item,
|
||||
excluded_set=excluded_set,
|
||||
base_path=rel_path,
|
||||
)
|
||||
if subdir_has_content:
|
||||
has_content = True
|
||||
else:
|
||||
# Check if file is excluded
|
||||
if rel_path in excluded_set:
|
||||
logger.debug(f"Excluding file: user_library/{rel_path}")
|
||||
continue
|
||||
|
||||
# Create parent directory if needed (lazy creation)
|
||||
if not target_dir.exists():
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create symlink to file
|
||||
if not target_link.exists():
|
||||
target_link.symlink_to(item)
|
||||
has_content = True
|
||||
|
||||
return has_content
|
||||
|
||||
def provision(
|
||||
self,
|
||||
sandbox_id: UUID,
|
||||
@@ -271,6 +382,7 @@ class LocalSandboxManager(SandboxManager):
|
||||
user_work_area: str | None = None,
|
||||
user_level: str | None = None,
|
||||
use_demo_data: bool = False,
|
||||
excluded_user_library_paths: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Set up a session workspace within an existing sandbox.
|
||||
|
||||
@@ -280,7 +392,7 @@ class LocalSandboxManager(SandboxManager):
|
||||
3. .venv/ (from template)
|
||||
4. AGENTS.md
|
||||
5. .agent/skills/
|
||||
6. files/ (symlink to demo data OR user's file_system_path)
|
||||
6. files/ (symlink to demo data OR filtered user files)
|
||||
7. opencode.json
|
||||
8. org_info/ (if demo_data is enabled, the org structure and user identity for the user's demo persona)
|
||||
9. attachments/
|
||||
@@ -297,6 +409,8 @@ class LocalSandboxManager(SandboxManager):
|
||||
user_work_area: User's work area for demo persona (e.g., "engineering")
|
||||
user_level: User's level for demo persona (e.g., "ic", "manager")
|
||||
use_demo_data: If True, symlink files/ to demo data; else to user files
|
||||
excluded_user_library_paths: List of paths within user_library/ to exclude
|
||||
(e.g., ["/data/file.xlsx"]). These files won't be linked in the sandbox.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If workspace setup fails
|
||||
@@ -320,7 +434,7 @@ class LocalSandboxManager(SandboxManager):
|
||||
logger.debug(f"Session directory created at {session_path}")
|
||||
|
||||
try:
|
||||
# Setup files symlink - choose between demo data or user files
|
||||
# Setup files access - choose between demo data or user files
|
||||
if use_demo_data:
|
||||
# Demo mode: symlink to demo data directory
|
||||
symlink_target = Path(DEMO_DATA_PATH)
|
||||
@@ -329,17 +443,33 @@ class LocalSandboxManager(SandboxManager):
|
||||
f"Demo data directory does not exist: {symlink_target}"
|
||||
)
|
||||
logger.info(f"Setting up files symlink to demo data: {symlink_target}")
|
||||
elif file_system_path:
|
||||
# Normal mode: symlink to user's knowledge files
|
||||
symlink_target = Path(file_system_path)
|
||||
logger.debug(
|
||||
f"Setting up files symlink to user files: {symlink_target}"
|
||||
self._directory_manager.setup_files_symlink(
|
||||
session_path, symlink_target
|
||||
)
|
||||
elif file_system_path:
|
||||
source_path = Path(file_system_path)
|
||||
# Check if we have exclusions for user_library
|
||||
if excluded_user_library_paths:
|
||||
# Create filtered file structure with symlinks to enabled files only
|
||||
logger.debug(
|
||||
f"Setting up filtered files with {len(excluded_user_library_paths)} exclusions"
|
||||
)
|
||||
self._setup_filtered_files(
|
||||
session_path=session_path,
|
||||
source_path=source_path,
|
||||
excluded_paths=excluded_user_library_paths,
|
||||
)
|
||||
else:
|
||||
# No exclusions: simple symlink to entire directory
|
||||
logger.debug(
|
||||
f"Setting up files symlink to user files: {source_path}"
|
||||
)
|
||||
self._directory_manager.setup_files_symlink(
|
||||
session_path, source_path
|
||||
)
|
||||
else:
|
||||
raise ValueError("No files symlink target provided")
|
||||
|
||||
self._directory_manager.setup_files_symlink(session_path, symlink_target)
|
||||
logger.debug("Files symlink ready")
|
||||
logger.debug("Files ready")
|
||||
|
||||
# Setup org_info directory with user identity (at session root)
|
||||
if user_work_area:
|
||||
@@ -1061,7 +1191,8 @@ class LocalSandboxManager(SandboxManager):
|
||||
"""No-op for local mode - files are directly accessible via symlink.
|
||||
|
||||
In local mode, the sandbox's files/ directory is a symlink to the
|
||||
local persistent document storage, so no sync is needed.
|
||||
local persistent document storage, so no sync is needed. File visibility
|
||||
in sessions is controlled via filtered symlinks in setup_session_workspace().
|
||||
|
||||
Args:
|
||||
sandbox_id: The sandbox UUID (unused)
|
||||
|
||||
@@ -2,12 +2,16 @@
|
||||
|
||||
from collections.abc import Iterator
|
||||
from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
from uuid import UUID
|
||||
|
||||
from celery import shared_task
|
||||
from celery import Task
|
||||
from redis.lock import Lock as RedisLock
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.background.celery.apps.app_base import task_logger
|
||||
from onyx.configs.constants import CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
@@ -263,6 +267,52 @@ def _acquire_sandbox_file_sync_lock(lock: RedisLock) -> Iterator[bool]:
|
||||
lock.release()
|
||||
|
||||
|
||||
def _get_disabled_user_library_paths(db_session: "Session", user_id: str) -> list[str]:
|
||||
"""Get list of disabled user library file paths for exclusion during sync.
|
||||
|
||||
Queries the document table for CRAFT_FILE documents with sync_disabled=True
|
||||
and returns their relative paths within user_library/.
|
||||
|
||||
Args:
|
||||
db_session: Database session
|
||||
user_id: The user ID to filter documents
|
||||
|
||||
Returns:
|
||||
List of relative file paths to exclude (e.g., ["/data/file.xlsx", "/old/report.pdf"])
|
||||
"""
|
||||
from uuid import UUID
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.db.document import get_documents_by_source
|
||||
|
||||
disabled_paths: list[str] = []
|
||||
|
||||
# Get CRAFT_FILE documents for this user (filtered at SQL level)
|
||||
documents = get_documents_by_source(
|
||||
db_session=db_session,
|
||||
source=DocumentSource.CRAFT_FILE,
|
||||
creator_id=UUID(user_id),
|
||||
)
|
||||
|
||||
for doc in documents:
|
||||
doc_metadata = doc.doc_metadata or {}
|
||||
if not doc_metadata.get("sync_disabled"):
|
||||
continue
|
||||
|
||||
# Extract file path from semantic_id
|
||||
# semantic_id format: "user_library/path/to/file.xlsx"
|
||||
# Include both files AND directories - the shell script in
|
||||
# setup_session_workspace() handles directory exclusion by
|
||||
# checking if paths are children of an excluded directory.
|
||||
semantic_id = doc.semantic_id or ""
|
||||
if semantic_id.startswith("user_library"):
|
||||
file_path = semantic_id[len("user_library") :]
|
||||
if file_path:
|
||||
disabled_paths.append(file_path)
|
||||
|
||||
return disabled_paths
|
||||
|
||||
|
||||
@shared_task(
|
||||
name=OnyxCeleryTask.SANDBOX_FILE_SYNC,
|
||||
soft_time_limit=TIMEOUT_SECONDS,
|
||||
@@ -285,10 +335,14 @@ def sync_sandbox_files(
|
||||
Per-user locking ensures only one sync runs at a time for a given user.
|
||||
If a sync is already in progress, this task will wait until it completes.
|
||||
|
||||
Note: File visibility in sessions is controlled via filtered symlinks in
|
||||
setup_session_workspace(), not at the sync level. The sync mirrors S3
|
||||
faithfully; disabled files are excluded only when creating new sessions.
|
||||
|
||||
Args:
|
||||
user_id: The user ID whose sandbox should be synced
|
||||
tenant_id: The tenant ID for S3 path construction
|
||||
source: Optional source type (e.g., "gmail", "google_drive").
|
||||
source: Optional source type (e.g., "gmail", "google_drive", "user_library").
|
||||
If None, syncs all sources.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -73,10 +73,10 @@ CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
|
||||
"file_pattern": "`PAGE_TITLE.json`",
|
||||
"scan_depth": 1,
|
||||
},
|
||||
"org_info": {
|
||||
"summary": "Organizational structure and user identity",
|
||||
"file_pattern": "Various JSON files",
|
||||
"scan_depth": 0,
|
||||
"user_library": {
|
||||
"summary": "User-uploaded files (spreadsheets, documents, presentations, etc.)",
|
||||
"file_pattern": "Any file format",
|
||||
"scan_depth": 1,
|
||||
},
|
||||
}
|
||||
DEFAULT_SCAN_DEPTH = 1
|
||||
|
||||
@@ -75,6 +75,9 @@ from onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client impo
|
||||
SSEKeepalive,
|
||||
)
|
||||
from onyx.server.features.build.sandbox.models import LLMProviderConfig
|
||||
from onyx.server.features.build.sandbox.tasks.tasks import (
|
||||
_get_disabled_user_library_paths,
|
||||
)
|
||||
from onyx.server.features.build.session.prompts import BUILD_NAMING_SYSTEM_PROMPT
|
||||
from onyx.server.features.build.session.prompts import BUILD_NAMING_USER_PROMPT
|
||||
from onyx.server.features.build.session.prompts import (
|
||||
@@ -563,6 +566,18 @@ class SessionManager:
|
||||
user_name = user.personal_name if user else None
|
||||
user_role = user.personal_role if user else None
|
||||
|
||||
# Get excluded user library paths (files with sync_disabled=True)
|
||||
# Only query if not using demo data (user library only applies to user files)
|
||||
excluded_user_library_paths: list[str] | None = None
|
||||
if not demo_data_enabled:
|
||||
excluded_user_library_paths = _get_disabled_user_library_paths(
|
||||
self._db_session, str(user_id)
|
||||
)
|
||||
if excluded_user_library_paths:
|
||||
logger.debug(
|
||||
f"Excluding {len(excluded_user_library_paths)} disabled user library paths"
|
||||
)
|
||||
|
||||
self._sandbox_manager.setup_session_workspace(
|
||||
sandbox_id=sandbox.id,
|
||||
session_id=build_session.id,
|
||||
@@ -575,7 +590,9 @@ class SessionManager:
|
||||
user_work_area=user_work_area,
|
||||
user_level=user_level,
|
||||
use_demo_data=demo_data_enabled,
|
||||
excluded_user_library_paths=excluded_user_library_paths,
|
||||
)
|
||||
|
||||
sandbox_id = sandbox.id
|
||||
logger.info(
|
||||
f"Successfully created session {session_id} with workspace in sandbox {sandbox.id}"
|
||||
|
||||
1
backend/tests/integration/tests/build/__init__.py
Normal file
1
backend/tests/integration/tests/build/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Integration tests for Craft/Build features
|
||||
207
backend/tests/integration/tests/build/test_user_library.py
Normal file
207
backend/tests/integration/tests/build/test_user_library.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
Integration tests for the User Library API.
|
||||
|
||||
Tests the CRUD operations for user-uploaded raw files in Craft.
|
||||
"""
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
import requests
|
||||
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
|
||||
API_SERVER_URL = "http://localhost:3000"
|
||||
|
||||
|
||||
def _get_user_library_tree(user: DATestUser) -> list[dict]:
|
||||
"""Get the user's library tree (returns a flat list of entries)."""
|
||||
response = requests.get(
|
||||
f"{API_SERVER_URL}/api/build/user-library/tree",
|
||||
headers=user.headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _upload_files(user: DATestUser, path: str, files: list[tuple[str, bytes]]) -> dict:
|
||||
"""Upload files to the user library."""
|
||||
multipart_files = [("files", (name, content)) for name, content in files]
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/api/build/user-library/upload",
|
||||
headers=user.headers,
|
||||
data={"path": path},
|
||||
files=multipart_files,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _upload_zip(user: DATestUser, path: str, zip_data: bytes) -> dict:
|
||||
"""Upload and extract a zip file."""
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/api/build/user-library/upload-zip",
|
||||
headers=user.headers,
|
||||
data={"path": path},
|
||||
files={"file": ("archive.zip", zip_data)},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _create_directory(user: DATestUser, name: str, parent_path: str = "/") -> dict:
|
||||
"""Create a directory in the user library."""
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/api/build/user-library/directories",
|
||||
headers=user.headers,
|
||||
json={"name": name, "parent_path": parent_path},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _toggle_sync(user: DATestUser, document_id: str, enabled: bool) -> None:
|
||||
"""Toggle sync status for a file."""
|
||||
response = requests.patch(
|
||||
f"{API_SERVER_URL}/api/build/user-library/files/{document_id}/toggle",
|
||||
headers=user.headers,
|
||||
params={"enabled": str(enabled).lower()},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
def _delete_file(user: DATestUser, document_id: str) -> None:
|
||||
"""Delete a file from the user library."""
|
||||
response = requests.delete(
|
||||
f"{API_SERVER_URL}/api/build/user-library/files/{document_id}",
|
||||
headers=user.headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
def test_user_library_upload_file(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test uploading a single file to the user library."""
|
||||
# Upload a simple CSV file
|
||||
csv_content = b"name,value\nfoo,1\nbar,2"
|
||||
result = _upload_files(admin_user, "/", [("data.csv", csv_content)])
|
||||
|
||||
assert "entries" in result
|
||||
assert len(result["entries"]) == 1
|
||||
assert result["entries"][0]["name"] == "data.csv"
|
||||
assert result["entries"][0]["sync_enabled"] is True
|
||||
|
||||
# Verify it appears in the tree
|
||||
tree = _get_user_library_tree(admin_user)
|
||||
assert any(entry["name"] == "data.csv" for entry in tree)
|
||||
|
||||
|
||||
def test_user_library_upload_to_directory(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test uploading files to a specific directory path."""
|
||||
# Create a directory first
|
||||
_create_directory(admin_user, "reports")
|
||||
|
||||
# Upload to that directory
|
||||
xlsx_content = b"fake xlsx content"
|
||||
result = _upload_files(admin_user, "/reports", [("quarterly.xlsx", xlsx_content)])
|
||||
|
||||
assert len(result["entries"]) == 1
|
||||
assert result["entries"][0]["path"] == "/reports/quarterly.xlsx"
|
||||
|
||||
|
||||
def test_user_library_upload_zip(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test uploading and extracting a zip file."""
|
||||
# Create an in-memory zip file
|
||||
zip_buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("folder/file1.txt", "content1")
|
||||
zf.writestr("folder/file2.txt", "content2")
|
||||
zf.writestr("root.txt", "root content")
|
||||
zip_buffer.seek(0)
|
||||
|
||||
result = _upload_zip(admin_user, "/", zip_buffer.read())
|
||||
|
||||
# Should have 3 files extracted
|
||||
assert "entries" in result
|
||||
assert len(result["entries"]) >= 3
|
||||
|
||||
|
||||
def test_user_library_create_directory(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test creating a directory."""
|
||||
result = _create_directory(admin_user, "my-data")
|
||||
|
||||
assert result["is_directory"] is True
|
||||
assert result["name"] == "my-data"
|
||||
|
||||
# Verify in tree
|
||||
tree = _get_user_library_tree(admin_user)
|
||||
assert any(entry["name"] == "my-data" for entry in tree)
|
||||
|
||||
|
||||
def test_user_library_toggle_sync(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test toggling sync status for a file."""
|
||||
# Upload a file
|
||||
result = _upload_files(admin_user, "/", [("test.txt", b"hello")])
|
||||
document_id = result["entries"][0]["id"]
|
||||
|
||||
# Initially sync is enabled
|
||||
assert result["entries"][0]["sync_enabled"] is True
|
||||
|
||||
# Disable sync
|
||||
_toggle_sync(admin_user, document_id, False)
|
||||
|
||||
# Verify sync is disabled
|
||||
tree = _get_user_library_tree(admin_user)
|
||||
entry = next((e for e in tree if e["id"] == document_id), None)
|
||||
assert entry is not None
|
||||
assert entry["sync_enabled"] is False
|
||||
|
||||
|
||||
def test_user_library_delete_file(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test deleting a file."""
|
||||
# Upload a file
|
||||
result = _upload_files(admin_user, "/", [("delete-me.txt", b"temp")])
|
||||
document_id = result["entries"][0]["id"]
|
||||
|
||||
# Delete it
|
||||
_delete_file(admin_user, document_id)
|
||||
|
||||
# Verify it's gone
|
||||
tree = _get_user_library_tree(admin_user)
|
||||
assert not any(entry["id"] == document_id for entry in tree)
|
||||
|
||||
|
||||
def test_user_library_isolation_between_users(
|
||||
reset: None, admin_user: DATestUser # noqa: ARG001
|
||||
) -> None:
|
||||
"""Test that users can only see their own files."""
|
||||
from tests.integration.common_utils.managers.user import UserManager
|
||||
|
||||
other_user: DATestUser = UserManager.create(name="other_user")
|
||||
|
||||
# Admin uploads a file
|
||||
_upload_files(admin_user, "/", [("admin-file.txt", b"admin data")])
|
||||
|
||||
# Other user uploads a file
|
||||
_upload_files(other_user, "/", [("other-file.txt", b"other data")])
|
||||
|
||||
# Admin should only see their file
|
||||
admin_tree = _get_user_library_tree(admin_user)
|
||||
assert any(e["name"] == "admin-file.txt" for e in admin_tree)
|
||||
assert not any(e["name"] == "other-file.txt" for e in admin_tree)
|
||||
|
||||
# Other user should only see their file
|
||||
other_tree = _get_user_library_tree(other_user)
|
||||
assert any(e["name"] == "other-file.txt" for e in other_tree)
|
||||
assert not any(e["name"] == "admin-file.txt" for e in other_tree)
|
||||
@@ -263,7 +263,7 @@ function BuildSessionButton({
|
||||
? "Deleted"
|
||||
: deleteError
|
||||
? "Delete Failed"
|
||||
: "Delete Build"
|
||||
: "Delete Craft"
|
||||
}
|
||||
icon={deleteSuccess ? SvgCheckCircle : SvgTrash}
|
||||
onClose={isDeleting || deleteSuccess ? undefined : closeModal}
|
||||
|
||||
@@ -623,3 +623,141 @@ export async function deleteConnector(
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// User Library API
|
||||
// =============================================================================
|
||||
|
||||
import {
|
||||
LibraryEntry,
|
||||
CreateDirectoryRequest,
|
||||
UploadResponse,
|
||||
} from "@/app/craft/types/user-library";
|
||||
|
||||
const USER_LIBRARY_BASE = `${API_BASE}/user-library`;
|
||||
|
||||
/**
|
||||
* Fetch the user's library tree (uploaded files).
|
||||
*/
|
||||
export async function fetchLibraryTree(): Promise<LibraryEntry[]> {
|
||||
const res = await fetch(`${USER_LIBRARY_BASE}/tree`);
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Failed to fetch library tree: ${res.status}`);
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload files to the user library.
|
||||
*/
|
||||
export async function uploadLibraryFiles(
|
||||
path: string,
|
||||
files: File[]
|
||||
): Promise<UploadResponse> {
|
||||
const formData = new FormData();
|
||||
formData.append("path", path);
|
||||
for (const file of files) {
|
||||
formData.append("files", file);
|
||||
}
|
||||
|
||||
const res = await fetch(`${USER_LIBRARY_BASE}/upload`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errorData = await res.json().catch(() => ({}));
|
||||
throw new Error(
|
||||
errorData.detail || `Failed to upload files: ${res.status}`
|
||||
);
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload and extract a zip file to the user library.
|
||||
*/
|
||||
export async function uploadLibraryZip(
|
||||
path: string,
|
||||
file: File
|
||||
): Promise<UploadResponse> {
|
||||
const formData = new FormData();
|
||||
formData.append("path", path);
|
||||
formData.append("file", file);
|
||||
|
||||
const res = await fetch(`${USER_LIBRARY_BASE}/upload-zip`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errorData = await res.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || `Failed to upload zip: ${res.status}`);
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a directory in the user library.
|
||||
*/
|
||||
export async function createLibraryDirectory(
|
||||
request: CreateDirectoryRequest
|
||||
): Promise<LibraryEntry> {
|
||||
const res = await fetch(`${USER_LIBRARY_BASE}/directories`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(request),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errorData = await res.json().catch(() => ({}));
|
||||
throw new Error(
|
||||
errorData.detail || `Failed to create directory: ${res.status}`
|
||||
);
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle sync status for a file/directory in the user library.
|
||||
*/
|
||||
export async function toggleLibraryFileSync(
|
||||
documentId: string,
|
||||
enabled: boolean
|
||||
): Promise<void> {
|
||||
const res = await fetch(
|
||||
`${USER_LIBRARY_BASE}/files/${encodeURIComponent(
|
||||
documentId
|
||||
)}/toggle?enabled=${enabled}`,
|
||||
{
|
||||
method: "PATCH",
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const errorData = await res.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || `Failed to toggle sync: ${res.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a file/directory from the user library.
|
||||
*/
|
||||
export async function deleteLibraryFile(documentId: string): Promise<void> {
|
||||
const res = await fetch(
|
||||
`${USER_LIBRARY_BASE}/files/${encodeURIComponent(documentId)}`,
|
||||
{
|
||||
method: "DELETE",
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const errorData = await res.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || `Failed to delete file: ${res.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
26
web/src/app/craft/types/user-library.ts
Normal file
26
web/src/app/craft/types/user-library.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* Types for User Library - raw binary file uploads in Craft.
|
||||
*/
|
||||
|
||||
export interface LibraryEntry {
|
||||
id: string; // document_id
|
||||
name: string;
|
||||
path: string;
|
||||
is_directory: boolean;
|
||||
file_size: number | null;
|
||||
mime_type: string | null;
|
||||
sync_enabled: boolean;
|
||||
created_at: string;
|
||||
children?: LibraryEntry[];
|
||||
}
|
||||
|
||||
export interface CreateDirectoryRequest {
|
||||
name: string;
|
||||
parent_path: string;
|
||||
}
|
||||
|
||||
export interface UploadResponse {
|
||||
entries: LibraryEntry[];
|
||||
total_uploaded: number;
|
||||
total_size_bytes: number;
|
||||
}
|
||||
@@ -131,10 +131,6 @@ export default function ConfigureConnectorModal({
|
||||
setStep("credential");
|
||||
};
|
||||
|
||||
const handleConnectorSuccess = () => {
|
||||
onSuccess();
|
||||
};
|
||||
|
||||
// Dynamic title and description based on flow type
|
||||
const getStepTitle = () => {
|
||||
if (isSingleStep) {
|
||||
@@ -192,14 +188,14 @@ export default function ConfigureConnectorModal({
|
||||
onOAuthRedirect={handleOAuthRedirect}
|
||||
refresh={refreshCredentials}
|
||||
isSingleStep={isSingleStep}
|
||||
onConnectorSuccess={handleConnectorSuccess}
|
||||
onConnectorSuccess={onSuccess}
|
||||
setPopup={setPopup}
|
||||
/>
|
||||
) : selectedCredential ? (
|
||||
<ConnectorConfigStep
|
||||
connectorType={connectorType}
|
||||
credential={selectedCredential}
|
||||
onSuccess={handleConnectorSuccess}
|
||||
onSuccess={onSuccess}
|
||||
onBack={handleBack}
|
||||
setPopup={setPopup}
|
||||
/>
|
||||
|
||||
@@ -29,10 +29,14 @@ export function ConnectorInfoOverlay({ visible }: ConnectorInfoOverlayProps) {
|
||||
|
||||
interface ReprovisionWarningOverlayProps {
|
||||
visible: boolean;
|
||||
onUpdate?: () => void;
|
||||
isUpdating?: boolean;
|
||||
}
|
||||
|
||||
export function ReprovisionWarningOverlay({
|
||||
visible,
|
||||
onUpdate,
|
||||
isUpdating,
|
||||
}: ReprovisionWarningOverlayProps) {
|
||||
return (
|
||||
<div
|
||||
@@ -48,6 +52,8 @@ export function ReprovisionWarningOverlay({
|
||||
text="Click Update to apply your changes"
|
||||
description="Your sandbox will be recreated with your new settings. Previously running sessions will not be affected by your changes."
|
||||
close={false}
|
||||
actions={isUpdating ? "Updating..." : "Update"}
|
||||
onAction={isUpdating ? undefined : onUpdate}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -105,13 +105,21 @@ export default function ConnectorCard({
|
||||
const sourceMetadata = getSourceMetadata(connectorType);
|
||||
const status: ConnectorStatus = config?.status || "not_connected";
|
||||
const isConnected = status !== "not_connected" && status !== "deleting";
|
||||
|
||||
const isDeleting = status === "deleting";
|
||||
|
||||
// Check if this connector type is always available (doesn't need connection setup)
|
||||
const isAlwaysConnected = sourceMetadata.alwaysConnected ?? false;
|
||||
const customDescription = sourceMetadata.customDescription;
|
||||
|
||||
const handleCardClick = () => {
|
||||
if (isDeleting) {
|
||||
return; // No action while deleting
|
||||
}
|
||||
// Always-connected connectors always go to onConfigure
|
||||
if (isAlwaysConnected) {
|
||||
onConfigure();
|
||||
return;
|
||||
}
|
||||
if (isConnected) {
|
||||
setPopoverOpen(true);
|
||||
} else {
|
||||
@@ -119,7 +127,11 @@ export default function ConnectorCard({
|
||||
}
|
||||
};
|
||||
|
||||
const rightContent = isDeleting ? null : isConnected ? (
|
||||
// Always-connected connectors show a settings icon
|
||||
// Regular connectors show popover menu when connected, plug icon when not
|
||||
const rightContent = isDeleting ? null : isAlwaysConnected ? (
|
||||
<IconButton icon={SvgSettings} internal />
|
||||
) : isConnected ? (
|
||||
<Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
|
||||
<Popover.Trigger asChild>
|
||||
<IconButton
|
||||
@@ -164,21 +176,32 @@ export default function ConnectorCard({
|
||||
<IconButton icon={SvgPlug} internal />
|
||||
);
|
||||
|
||||
// Always-connected connectors show as "primary" variant
|
||||
const cardVariant =
|
||||
isAlwaysConnected || isConnected ? "primary" : "secondary";
|
||||
|
||||
// Use custom description if provided, otherwise show status
|
||||
const descriptionContent = customDescription ? (
|
||||
<Text secondaryBody text03>
|
||||
{customDescription}
|
||||
</Text>
|
||||
) : (
|
||||
<StatusDescription
|
||||
status={status}
|
||||
docsIndexed={config?.docs_indexed || 0}
|
||||
/>
|
||||
);
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(!isDeleting && "cursor-pointer")}
|
||||
onClick={handleCardClick}
|
||||
>
|
||||
<Card variant={isConnected ? "primary" : "secondary"}>
|
||||
<Card variant={cardVariant}>
|
||||
<LineItemLayout
|
||||
icon={sourceMetadata.icon}
|
||||
title={sourceMetadata.displayName}
|
||||
description={
|
||||
<StatusDescription
|
||||
status={status}
|
||||
docsIndexed={config?.docs_indexed || 0}
|
||||
/>
|
||||
}
|
||||
description={descriptionContent}
|
||||
rightChildren={rightContent}
|
||||
center
|
||||
/>
|
||||
|
||||
529
web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
Normal file
529
web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
Normal file
@@ -0,0 +1,529 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback, useRef, useMemo } from "react";
|
||||
import useSWR from "swr";
|
||||
import {
|
||||
fetchLibraryTree,
|
||||
uploadLibraryFiles,
|
||||
uploadLibraryZip,
|
||||
createLibraryDirectory,
|
||||
toggleLibraryFileSync,
|
||||
deleteLibraryFile,
|
||||
} from "@/app/craft/services/apiServices";
|
||||
import { LibraryEntry } from "@/app/craft/types/user-library";
|
||||
import { cn } from "@/lib/utils";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import Button from "@/refresh-components/buttons/Button";
|
||||
import Modal from "@/refresh-components/Modal";
|
||||
import { Section } from "@/layouts/general-layouts";
|
||||
import {
|
||||
SvgFolder,
|
||||
SvgFolderOpen,
|
||||
SvgChevronRight,
|
||||
SvgUploadCloud,
|
||||
SvgPlus,
|
||||
SvgTrash,
|
||||
SvgFileText,
|
||||
} from "@opal/icons";
|
||||
import Switch from "@/refresh-components/inputs/Switch";
|
||||
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
|
||||
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
|
||||
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
|
||||
|
||||
/**
|
||||
* Build a hierarchical tree from a flat list of library entries.
|
||||
* Entries have paths like "user_library/test" or "user_library/test/file.pdf"
|
||||
*/
|
||||
function buildTreeFromFlatList(flatList: LibraryEntry[]): LibraryEntry[] {
|
||||
// Create a map of path -> entry (with children array initialized)
|
||||
const pathToEntry = new Map<string, LibraryEntry>();
|
||||
|
||||
// First pass: create entries with empty children arrays
|
||||
for (const entry of flatList) {
|
||||
pathToEntry.set(entry.path, { ...entry, children: [] });
|
||||
}
|
||||
|
||||
// Second pass: build parent-child relationships
|
||||
const rootEntries: LibraryEntry[] = [];
|
||||
|
||||
for (const entry of flatList) {
|
||||
const entryWithChildren = pathToEntry.get(entry.path)!;
|
||||
|
||||
// Find parent path by removing the last segment
|
||||
const pathParts = entry.path.split("/");
|
||||
pathParts.pop(); // Remove last segment (filename or folder name)
|
||||
const parentPath = pathParts.join("/");
|
||||
|
||||
const parent = pathToEntry.get(parentPath);
|
||||
if (parent && parent.children) {
|
||||
parent.children.push(entryWithChildren);
|
||||
} else {
|
||||
// No parent found, this is a root-level entry
|
||||
rootEntries.push(entryWithChildren);
|
||||
}
|
||||
}
|
||||
|
||||
return rootEntries;
|
||||
}
|
||||
|
||||
interface UserLibraryModalProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
onChanges?: () => void; // Called when files are uploaded, deleted, or sync toggled
|
||||
}
|
||||
|
||||
export default function UserLibraryModal({
|
||||
open,
|
||||
onClose,
|
||||
onChanges,
|
||||
}: UserLibraryModalProps) {
|
||||
const [expandedPaths, setExpandedPaths] = useState<Set<string>>(new Set());
|
||||
const [isUploading, setIsUploading] = useState(false);
|
||||
const [uploadError, setUploadError] = useState<string | null>(null);
|
||||
const [entryToDelete, setEntryToDelete] = useState<LibraryEntry | null>(null);
|
||||
const [showNewFolderModal, setShowNewFolderModal] = useState(false);
|
||||
const [newFolderName, setNewFolderName] = useState("");
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const uploadTargetPathRef = useRef<string>("/");
|
||||
|
||||
// Fetch library tree
|
||||
const {
|
||||
data: tree,
|
||||
error,
|
||||
isLoading,
|
||||
mutate,
|
||||
} = useSWR(open ? "/api/build/user-library/tree" : null, fetchLibraryTree, {
|
||||
revalidateOnFocus: false,
|
||||
});
|
||||
|
||||
// Build hierarchical tree from flat list
|
||||
const hierarchicalTree = useMemo(() => {
|
||||
if (!tree) return [];
|
||||
return buildTreeFromFlatList(tree);
|
||||
}, [tree]);
|
||||
|
||||
const toggleFolder = useCallback((path: string) => {
|
||||
setExpandedPaths((prev) => {
|
||||
const newSet = new Set(prev);
|
||||
if (newSet.has(path)) {
|
||||
newSet.delete(path);
|
||||
} else {
|
||||
newSet.add(path);
|
||||
}
|
||||
return newSet;
|
||||
});
|
||||
}, []);
|
||||
|
||||
const handleFileUpload = useCallback(
|
||||
async (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const files = event.target.files;
|
||||
if (!files || files.length === 0) return;
|
||||
|
||||
setIsUploading(true);
|
||||
setUploadError(null);
|
||||
|
||||
const targetPath = uploadTargetPathRef.current;
|
||||
|
||||
try {
|
||||
const fileArray = Array.from(files);
|
||||
// Check if it's a single zip file
|
||||
const firstFile = fileArray[0];
|
||||
if (
|
||||
fileArray.length === 1 &&
|
||||
firstFile &&
|
||||
firstFile.name.endsWith(".zip")
|
||||
) {
|
||||
await uploadLibraryZip(targetPath, firstFile);
|
||||
} else {
|
||||
await uploadLibraryFiles(targetPath, fileArray);
|
||||
}
|
||||
mutate();
|
||||
onChanges?.(); // Notify parent that changes were made
|
||||
} catch (err) {
|
||||
setUploadError(err instanceof Error ? err.message : "Upload failed");
|
||||
} finally {
|
||||
setIsUploading(false);
|
||||
uploadTargetPathRef.current = "/";
|
||||
// Reset input
|
||||
event.target.value = "";
|
||||
}
|
||||
},
|
||||
[mutate, onChanges]
|
||||
);
|
||||
|
||||
const handleUploadToFolder = useCallback((folderPath: string) => {
|
||||
uploadTargetPathRef.current = folderPath;
|
||||
fileInputRef.current?.click();
|
||||
}, []);
|
||||
|
||||
const handleToggleSync = useCallback(
|
||||
async (entry: LibraryEntry, enabled: boolean) => {
|
||||
try {
|
||||
await toggleLibraryFileSync(entry.id, enabled);
|
||||
mutate();
|
||||
onChanges?.(); // Notify parent that changes were made
|
||||
} catch (err) {
|
||||
console.error("Failed to toggle sync:", err);
|
||||
}
|
||||
},
|
||||
[mutate, onChanges]
|
||||
);
|
||||
|
||||
const handleDeleteConfirm = useCallback(async () => {
|
||||
if (!entryToDelete) return;
|
||||
|
||||
try {
|
||||
await deleteLibraryFile(entryToDelete.id);
|
||||
mutate();
|
||||
onChanges?.(); // Notify parent that changes were made
|
||||
} catch (err) {
|
||||
console.error("Failed to delete:", err);
|
||||
} finally {
|
||||
setEntryToDelete(null);
|
||||
}
|
||||
}, [entryToDelete, mutate, onChanges]);
|
||||
|
||||
const handleCreateDirectory = useCallback(async () => {
|
||||
const name = newFolderName.trim();
|
||||
if (!name) return;
|
||||
|
||||
try {
|
||||
await createLibraryDirectory({ name, parent_path: "/" });
|
||||
mutate();
|
||||
} catch (err) {
|
||||
console.error("Failed to create directory:", err);
|
||||
setUploadError(
|
||||
err instanceof Error ? err.message : "Failed to create folder"
|
||||
);
|
||||
} finally {
|
||||
setShowNewFolderModal(false);
|
||||
setNewFolderName("");
|
||||
}
|
||||
}, [mutate, newFolderName]);
|
||||
|
||||
const formatFileSize = (bytes: number | null): string => {
|
||||
if (bytes === null) return "";
|
||||
if (bytes < 1024) return `${bytes} B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
||||
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
||||
};
|
||||
|
||||
const fileCount = hierarchicalTree.length;
|
||||
|
||||
return (
|
||||
<>
|
||||
<Modal open={open} onOpenChange={(isOpen) => !isOpen && onClose()}>
|
||||
<Modal.Content width="md" height="fit">
|
||||
<Modal.Header
|
||||
icon={SvgFileText}
|
||||
title="Your Files"
|
||||
description="Upload files for your agent to read (Excel, Word, PowerPoint, etc.)"
|
||||
onClose={onClose}
|
||||
/>
|
||||
<Modal.Body>
|
||||
<Section flexDirection="column" gap={1} alignItems="stretch">
|
||||
{/* Action buttons */}
|
||||
<Section flexDirection="row" justifyContent="end" gap={0.5}>
|
||||
<Button
|
||||
secondary
|
||||
onClick={() => setShowNewFolderModal(true)}
|
||||
leftIcon={SvgPlus}
|
||||
>
|
||||
New Folder
|
||||
</Button>
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
multiple
|
||||
className="hidden"
|
||||
onChange={handleFileUpload}
|
||||
disabled={isUploading}
|
||||
accept=".xlsx,.xls,.docx,.doc,.pptx,.ppt,.csv,.json,.txt,.pdf,.zip"
|
||||
/>
|
||||
<Button
|
||||
secondary
|
||||
onClick={() => handleUploadToFolder("/")}
|
||||
leftIcon={SvgUploadCloud}
|
||||
disabled={isUploading}
|
||||
>
|
||||
{isUploading ? "Uploading..." : "Upload"}
|
||||
</Button>
|
||||
</Section>
|
||||
|
||||
{/* Upload error */}
|
||||
{uploadError && (
|
||||
<div className="p-2 bg-status-error-01 rounded-08">
|
||||
<Text secondaryBody>{uploadError}</Text>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* File list */}
|
||||
<div className="w-full border border-border-01 rounded-08 min-h-[200px] max-h-[400px] overflow-auto">
|
||||
{isLoading ? (
|
||||
<div className="flex flex-col items-center justify-center p-8">
|
||||
<Text secondaryBody text03>
|
||||
Loading files...
|
||||
</Text>
|
||||
</div>
|
||||
) : error ? (
|
||||
<div className="flex flex-col items-center justify-center p-8">
|
||||
<Text secondaryBody text03>
|
||||
Failed to load files
|
||||
</Text>
|
||||
</div>
|
||||
) : fileCount === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center p-8">
|
||||
<SvgFileText size={32} className="mb-3 stroke-text-02" />
|
||||
<Text secondaryBody text03>
|
||||
No files uploaded yet
|
||||
</Text>
|
||||
<Text secondaryBody text02 className="mt-1 text-center">
|
||||
Upload Excel, Word, PowerPoint, or other files for your
|
||||
agent to work with
|
||||
</Text>
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-full p-2">
|
||||
<LibraryTreeView
|
||||
entries={hierarchicalTree}
|
||||
expandedPaths={expandedPaths}
|
||||
onToggleFolder={toggleFolder}
|
||||
onToggleSync={handleToggleSync}
|
||||
onDelete={setEntryToDelete}
|
||||
onUploadToFolder={handleUploadToFolder}
|
||||
formatFileSize={formatFileSize}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</Section>
|
||||
</Modal.Body>
|
||||
|
||||
<Modal.Footer>
|
||||
<Button onClick={onClose}>Done</Button>
|
||||
</Modal.Footer>
|
||||
</Modal.Content>
|
||||
</Modal>
|
||||
|
||||
{/* Delete confirmation modal */}
|
||||
{entryToDelete && (
|
||||
<ConfirmEntityModal
|
||||
danger
|
||||
entityType={entryToDelete.is_directory ? "folder" : "file"}
|
||||
entityName={entryToDelete.name}
|
||||
action="delete"
|
||||
actionButtonText="Delete"
|
||||
additionalDetails={
|
||||
entryToDelete.is_directory
|
||||
? "This will delete the folder and all its contents."
|
||||
: "This file will be removed from your library."
|
||||
}
|
||||
onClose={() => setEntryToDelete(null)}
|
||||
onSubmit={handleDeleteConfirm}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* New folder modal */}
|
||||
<Modal
|
||||
open={showNewFolderModal}
|
||||
onOpenChange={(isOpen) => {
|
||||
if (!isOpen) {
|
||||
setShowNewFolderModal(false);
|
||||
setNewFolderName("");
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Modal.Content width="sm" height="fit">
|
||||
<Modal.Header
|
||||
icon={SvgFolder}
|
||||
title="New Folder"
|
||||
onClose={() => {
|
||||
setShowNewFolderModal(false);
|
||||
setNewFolderName("");
|
||||
}}
|
||||
/>
|
||||
<Modal.Body>
|
||||
<Section flexDirection="column" gap={0.5} alignItems="stretch">
|
||||
<Text secondaryBody text03>
|
||||
Folder name
|
||||
</Text>
|
||||
<InputTypeIn
|
||||
value={newFolderName}
|
||||
onChange={(e) => setNewFolderName(e.target.value)}
|
||||
placeholder="Enter folder name"
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" && newFolderName.trim()) {
|
||||
handleCreateDirectory();
|
||||
}
|
||||
}}
|
||||
autoFocus
|
||||
/>
|
||||
</Section>
|
||||
</Modal.Body>
|
||||
<Modal.Footer>
|
||||
<Button
|
||||
secondary
|
||||
onClick={() => {
|
||||
setShowNewFolderModal(false);
|
||||
setNewFolderName("");
|
||||
}}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleCreateDirectory}
|
||||
disabled={!newFolderName.trim()}
|
||||
>
|
||||
Create
|
||||
</Button>
|
||||
</Modal.Footer>
|
||||
</Modal.Content>
|
||||
</Modal>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
interface LibraryTreeViewProps {
|
||||
entries: LibraryEntry[];
|
||||
expandedPaths: Set<string>;
|
||||
onToggleFolder: (path: string) => void;
|
||||
onToggleSync: (entry: LibraryEntry, enabled: boolean) => void;
|
||||
onDelete: (entry: LibraryEntry) => void;
|
||||
onUploadToFolder: (folderPath: string) => void;
|
||||
formatFileSize: (bytes: number | null) => string;
|
||||
depth?: number;
|
||||
}
|
||||
|
||||
function LibraryTreeView({
|
||||
entries,
|
||||
expandedPaths,
|
||||
onToggleFolder,
|
||||
onToggleSync,
|
||||
onDelete,
|
||||
onUploadToFolder,
|
||||
formatFileSize,
|
||||
depth = 0,
|
||||
}: LibraryTreeViewProps) {
|
||||
// Sort entries: directories first, then alphabetically
|
||||
const sortedEntries = [...entries].sort((a, b) => {
|
||||
if (a.is_directory && !b.is_directory) return -1;
|
||||
if (!a.is_directory && b.is_directory) return 1;
|
||||
return a.name.localeCompare(b.name);
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
{sortedEntries.map((entry) => {
|
||||
const isExpanded = expandedPaths.has(entry.path);
|
||||
|
||||
return (
|
||||
<div key={entry.id}>
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center py-2 px-3 hover:bg-background-tint-02 rounded-08 transition-colors",
|
||||
"group"
|
||||
)}
|
||||
style={{ paddingLeft: `${depth * 20 + 12}px` }}
|
||||
>
|
||||
{/* Expand/collapse for directories */}
|
||||
{entry.is_directory ? (
|
||||
<button
|
||||
onClick={() => onToggleFolder(entry.path)}
|
||||
className="p-0.5 rounded hover:bg-background-tint-03"
|
||||
>
|
||||
<SvgChevronRight
|
||||
size={14}
|
||||
className={cn(
|
||||
"stroke-text-03 transition-transform",
|
||||
isExpanded && "rotate-90"
|
||||
)}
|
||||
/>
|
||||
</button>
|
||||
) : (
|
||||
<span className="w-5" />
|
||||
)}
|
||||
|
||||
{/* Icon */}
|
||||
{entry.is_directory ? (
|
||||
isExpanded ? (
|
||||
<SvgFolderOpen size={16} className="stroke-text-03 mx-1.5" />
|
||||
) : (
|
||||
<SvgFolder size={16} className="stroke-text-03 mx-1.5" />
|
||||
)
|
||||
) : (
|
||||
<SvgFileText size={16} className="stroke-text-03 mx-1.5" />
|
||||
)}
|
||||
|
||||
{/* Name */}
|
||||
<Text secondaryBody text04 className="flex-1 truncate">
|
||||
{entry.name}
|
||||
</Text>
|
||||
|
||||
{/* File size */}
|
||||
{!entry.is_directory && entry.file_size !== null && (
|
||||
<Text secondaryBody text02 className="mx-2 flex-shrink-0">
|
||||
{formatFileSize(entry.file_size)}
|
||||
</Text>
|
||||
)}
|
||||
|
||||
{/* Actions - show on hover */}
|
||||
<div className="flex items-center gap-2 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||
{/* Upload to folder button - only for directories */}
|
||||
{entry.is_directory && (
|
||||
<SimpleTooltip tooltip="Upload to this folder">
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
// Strip "user_library" prefix from path for upload API
|
||||
const uploadPath =
|
||||
entry.path.replace(/^user_library/, "") || "/";
|
||||
onUploadToFolder(uploadPath);
|
||||
}}
|
||||
className="p-1 rounded hover:bg-background-tint-03 transition-colors"
|
||||
>
|
||||
<SvgUploadCloud size={14} className="stroke-text-03" />
|
||||
</button>
|
||||
</SimpleTooltip>
|
||||
)}
|
||||
<button
|
||||
onClick={() => onDelete(entry)}
|
||||
className="p-1 rounded hover:bg-status-error-01 transition-colors"
|
||||
>
|
||||
<SvgTrash size={14} className="stroke-text-03" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Sync toggle - always visible, rightmost */}
|
||||
<SimpleTooltip
|
||||
tooltip={
|
||||
entry.sync_enabled
|
||||
? "Synced to sandbox - click to disable"
|
||||
: "Not synced - click to enable"
|
||||
}
|
||||
>
|
||||
<Switch
|
||||
checked={entry.sync_enabled}
|
||||
onCheckedChange={(checked) => onToggleSync(entry, checked)}
|
||||
/>
|
||||
</SimpleTooltip>
|
||||
</div>
|
||||
|
||||
{/* Children */}
|
||||
{entry.is_directory && isExpanded && entry.children && (
|
||||
<LibraryTreeView
|
||||
entries={entry.children}
|
||||
expandedPaths={expandedPaths}
|
||||
onToggleFolder={onToggleFolder}
|
||||
onToggleSync={onToggleSync}
|
||||
onDelete={onDelete}
|
||||
onUploadToFolder={onUploadToFolder}
|
||||
formatFileSize={formatFileSize}
|
||||
depth={depth + 1}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -14,8 +14,12 @@ import { useBuildConnectors } from "@/app/craft/hooks/useBuildConnectors";
|
||||
import { BuildLLMPopover } from "@/app/craft/components/BuildLLMPopover";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import Card from "@/refresh-components/cards/Card";
|
||||
import { SvgPlug, SvgSettings, SvgChevronDown } from "@opal/icons";
|
||||
import { FiInfo } from "react-icons/fi";
|
||||
import {
|
||||
SvgPlug,
|
||||
SvgSettings,
|
||||
SvgChevronDown,
|
||||
SvgInfoSmall,
|
||||
} from "@opal/icons";
|
||||
import { ValidSources } from "@/lib/types";
|
||||
import ConnectorCard, {
|
||||
BuildConnectorConfig,
|
||||
@@ -23,6 +27,7 @@ import ConnectorCard, {
|
||||
import ConfigureConnectorModal from "@/app/craft/v1/configure/components/ConfigureConnectorModal";
|
||||
import ComingSoonConnectors from "@/app/craft/v1/configure/components/ComingSoonConnectors";
|
||||
import DemoDataConfirmModal from "@/app/craft/v1/configure/components/DemoDataConfirmModal";
|
||||
import UserLibraryModal from "@/app/craft/v1/configure/components/UserLibraryModal";
|
||||
import {
|
||||
ConnectorInfoOverlay,
|
||||
ReprovisionWarningOverlay,
|
||||
@@ -63,6 +68,7 @@ const BUILD_CONNECTORS: ValidSources[] = [
|
||||
ValidSources.Linear,
|
||||
ValidSources.Fireflies,
|
||||
ValidSources.Hubspot,
|
||||
ValidSources.CraftFile, // User's uploaded files
|
||||
];
|
||||
|
||||
interface SelectedConnectorState {
|
||||
@@ -87,6 +93,7 @@ export default function BuildConfigPage() {
|
||||
const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
|
||||
const [showDemoDataConfirmModal, setShowDemoDataConfirmModal] =
|
||||
useState(false);
|
||||
const [showUserLibraryModal, setShowUserLibraryModal] = useState(false);
|
||||
const [pendingDemoDataEnabled, setPendingDemoDataEnabled] = useState<
|
||||
boolean | null
|
||||
>(null);
|
||||
@@ -95,6 +102,7 @@ export default function BuildConfigPage() {
|
||||
const [pendingLlmSelection, setPendingLlmSelection] =
|
||||
useState<BuildLlmSelection | null>(null);
|
||||
const [pendingDemoData, setPendingDemoData] = useState<boolean | null>(null);
|
||||
const [userLibraryChanged, setUserLibraryChanged] = useState(false);
|
||||
const [isUpdating, setIsUpdating] = useState(false);
|
||||
|
||||
// Track original values (set on mount and after Update)
|
||||
@@ -152,12 +160,13 @@ export default function BuildConfigPage() {
|
||||
originalDemoData !== null &&
|
||||
pendingDemoData !== originalDemoData;
|
||||
|
||||
return llmChanged || demoDataChanged;
|
||||
return llmChanged || demoDataChanged || userLibraryChanged;
|
||||
}, [
|
||||
pendingLlmSelection,
|
||||
pendingDemoData,
|
||||
originalLlmSelection,
|
||||
originalDemoData,
|
||||
userLibraryChanged,
|
||||
]);
|
||||
|
||||
// Compute display name for the pending LLM selection
|
||||
@@ -208,9 +217,12 @@ export default function BuildConfigPage() {
|
||||
}, [pendingDemoDataEnabled]);
|
||||
|
||||
// Restore changes - revert pending state to original values
|
||||
// Note: User Library changes cannot be reverted (files already uploaded/deleted/toggled)
|
||||
// so we just reset the flag - user needs to manually undo file changes if desired
|
||||
const handleRestoreChanges = useCallback(() => {
|
||||
setPendingLlmSelection(originalLlmSelection);
|
||||
setPendingDemoData(originalDemoData);
|
||||
setUserLibraryChanged(false);
|
||||
}, [originalLlmSelection, originalDemoData]);
|
||||
|
||||
// Update - apply pending changes and re-provision sandbox
|
||||
@@ -236,6 +248,9 @@ export default function BuildConfigPage() {
|
||||
|
||||
// 3. Start provisioning a new session with updated settings
|
||||
ensurePreProvisionedSession();
|
||||
|
||||
// 4. Reset User Library change flag (sandbox now has the updated files)
|
||||
setUserLibraryChanged(false);
|
||||
} catch (error) {
|
||||
console.error("Failed to update settings:", error);
|
||||
} finally {
|
||||
@@ -518,7 +533,10 @@ export default function BuildConfigPage() {
|
||||
<div className="flex items-center gap-2">
|
||||
<SimpleTooltip tooltip="The demo dataset contains 1000 files across various connectors">
|
||||
<span className="inline-flex items-center cursor-help">
|
||||
<FiInfo size={16} className="text-text-03" />
|
||||
<SvgInfoSmall
|
||||
size={16}
|
||||
className="text-text-03"
|
||||
/>
|
||||
</span>
|
||||
</SimpleTooltip>
|
||||
<Text mainUiAction>Use Demo Dataset</Text>
|
||||
@@ -541,24 +559,32 @@ export default function BuildConfigPage() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="w-full grid grid-cols-1 md:grid-cols-2 gap-2 pt-2">
|
||||
{connectorStates.map(({ type, config }) => (
|
||||
<ConnectorCard
|
||||
key={type}
|
||||
connectorType={type}
|
||||
config={config}
|
||||
onConfigure={() => {
|
||||
// Only open modal for unconfigured connectors
|
||||
if (!config) {
|
||||
if (isBasicUser) {
|
||||
setShowNotAllowedModal(true);
|
||||
} else {
|
||||
setSelectedConnector({ type, config });
|
||||
{connectorStates.map(({ type, config }) => {
|
||||
const metadata = getSourceMetadata(type);
|
||||
return (
|
||||
<ConnectorCard
|
||||
key={type}
|
||||
connectorType={type}
|
||||
config={config}
|
||||
onConfigure={() => {
|
||||
// Connectors marked as alwaysConnected open their custom modal
|
||||
if (metadata.alwaysConnected) {
|
||||
setShowUserLibraryModal(true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}}
|
||||
onDelete={() => config && setConnectorToDelete(config)}
|
||||
/>
|
||||
))}
|
||||
// Only open modal for unconfigured connectors
|
||||
if (!config) {
|
||||
if (isBasicUser) {
|
||||
setShowNotAllowedModal(true);
|
||||
} else {
|
||||
setSelectedConnector({ type, config });
|
||||
}
|
||||
}
|
||||
}}
|
||||
onDelete={() => config && setConnectorToDelete(config)}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
<ComingSoonConnectors />
|
||||
</Section>
|
||||
@@ -567,7 +593,11 @@ export default function BuildConfigPage() {
|
||||
|
||||
{/* Sticky overlay for reprovision warning */}
|
||||
<div className="sticky z-toast bottom-10 w-fit mx-auto">
|
||||
<ReprovisionWarningOverlay visible={hasChanges && !isLoading} />
|
||||
<ReprovisionWarningOverlay
|
||||
visible={hasChanges && !isLoading}
|
||||
onUpdate={handleUpdate}
|
||||
isUpdating={isUpdating || isPreProvisioning}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Fixed overlay for connector info - centered on screen like the modal */}
|
||||
@@ -615,6 +645,12 @@ export default function BuildConfigPage() {
|
||||
pendingDemoDataEnabled={pendingDemoDataEnabled}
|
||||
onConfirm={handleDemoDataConfirm}
|
||||
/>
|
||||
|
||||
<UserLibraryModal
|
||||
open={showUserLibraryModal}
|
||||
onClose={() => setShowUserLibraryModal(false)}
|
||||
onChanges={() => setUserLibraryChanged(true)}
|
||||
/>
|
||||
</SettingsLayouts.Root>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -459,6 +459,7 @@ export const credentialTemplates: Record<ValidSources, any> = {
|
||||
google_sites: null,
|
||||
file: null,
|
||||
user_file: null,
|
||||
craft_file: null, // User Library - managed through dedicated UI
|
||||
wikipedia: null,
|
||||
mediawiki: null,
|
||||
web: null,
|
||||
|
||||
@@ -142,6 +142,11 @@ export interface SourceMetadata {
|
||||
uniqueKey?: string;
|
||||
// For federated connectors, this stores the base source type for the icon
|
||||
baseSourceType?: ValidSources;
|
||||
// For connectors that are always available (don't need connection setup)
|
||||
// e.g., User Library (CraftFile) where users just upload files
|
||||
alwaysConnected?: boolean;
|
||||
// Custom description to show instead of status (e.g., "Manage your uploaded files")
|
||||
customDescription?: string;
|
||||
}
|
||||
|
||||
export interface SearchDefaultOverrides {
|
||||
|
||||
@@ -67,6 +67,11 @@ interface PartialSourceMetadata {
|
||||
// federated connectors store the base source type if it's a source
|
||||
// that has both indexed connectors and federated connectors
|
||||
baseSourceType?: ValidSources;
|
||||
// For connectors that are always available (don't need connection setup)
|
||||
// e.g., User Library (CraftFile) where users just upload files
|
||||
alwaysConnected?: boolean;
|
||||
// Custom description to show instead of status (e.g., "Manage your uploaded files")
|
||||
customDescription?: string;
|
||||
}
|
||||
|
||||
type SourceMap = {
|
||||
@@ -422,6 +427,16 @@ export const SOURCE_METADATA_MAP: SourceMap = {
|
||||
category: SourceCategory.Other,
|
||||
},
|
||||
|
||||
// Craft-specific sources
|
||||
craft_file: {
|
||||
icon: SvgFileText,
|
||||
displayName: "Your Files",
|
||||
category: SourceCategory.Other,
|
||||
isPopular: false, // Hidden from standard Add Connector page
|
||||
alwaysConnected: true, // No setup required, just upload files
|
||||
customDescription: "Manage your uploaded files",
|
||||
},
|
||||
|
||||
// Placeholder (non-null default)
|
||||
not_applicable: {
|
||||
icon: SvgGlobe,
|
||||
|
||||
@@ -514,6 +514,9 @@ export enum ValidSources {
|
||||
Bitbucket = "bitbucket",
|
||||
TestRail = "testrail",
|
||||
|
||||
// Craft-specific sources
|
||||
CraftFile = "craft_file",
|
||||
|
||||
// Federated Connectors
|
||||
FederatedSlack = "federated_slack",
|
||||
}
|
||||
@@ -548,6 +551,7 @@ export type ConfigurableSources = Exclude<
|
||||
| ValidSources.IngestionApi
|
||||
| ValidSources.FederatedSlack // is part of ValiedSources.Slack
|
||||
| ValidSources.UserFile
|
||||
| ValidSources.CraftFile // User Library - managed through dedicated UI
|
||||
>;
|
||||
|
||||
export const oauthSupportedSources: ConfigurableSources[] = [
|
||||
|
||||
@@ -331,7 +331,7 @@ function MessageInner(
|
||||
|
||||
{/* Actions */}
|
||||
{actions && (
|
||||
<div className="flex items-start justify-end shrink-0">
|
||||
<div className="flex items-center justify-end shrink-0 self-center pr-2">
|
||||
<Button
|
||||
secondary
|
||||
onClick={onAction}
|
||||
|
||||
Reference in New Issue
Block a user