Compare commits

...

3 Commits

Author SHA1 Message Date
rohoswagger
c4ffe3afcf agents.md improvements 2026-02-11 19:13:30 -08:00
rohoswagger
9139a54926 clean up dead code paths 2026-02-11 19:06:39 -08:00
rohoswagger
cdfde4ca26 feat(craft): file/folder connector 2026-02-11 15:21:10 -08:00
30 changed files with 2486 additions and 66 deletions

View File

@@ -677,7 +677,6 @@ def connector_document_extraction(
logger.debug(f"Indexing batch of documents: {batch_description}")
memory_tracer.increment_and_maybe_trace()
# cc4a
if processing_mode == ProcessingMode.FILE_SYSTEM:
# File system only - write directly to persistent storage,
# skip chunking/embedding/Vespa but still track documents in DB

View File

@@ -228,6 +228,9 @@ class DocumentSource(str, Enum):
MOCK_CONNECTOR = "mock_connector"
# Special case for user files
USER_FILE = "user_file"
# Raw files for Craft sandbox access (xlsx, pptx, docx, etc.)
# Uses RAW_BINARY processing mode - no text extraction
CRAFT_FILE = "craft_file"
class FederatedConnectorSource(str, Enum):

View File

@@ -6,6 +6,8 @@ from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from uuid import UUID
from sqlalchemy import and_
from sqlalchemy import delete
@@ -226,6 +228,50 @@ def get_documents_by_ids(
return list(documents)
def get_documents_by_source(
db_session: Session,
source: DocumentSource,
creator_id: UUID | None = None,
) -> list[DbDocument]:
"""Get all documents associated with a specific source type.
This queries through the connector relationship to find all documents
that were indexed by connectors of the given source type.
Args:
db_session: Database session
source: The document source type to filter by
creator_id: If provided, only return documents from connectors
created by this user. Filters via ConnectorCredentialPair.
"""
stmt = (
select(DbDocument)
.join(
DocumentByConnectorCredentialPair,
DbDocument.id == DocumentByConnectorCredentialPair.id,
)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.join(
Connector,
ConnectorCredentialPair.connector_id == Connector.id,
)
.where(Connector.source == source)
)
if creator_id is not None:
stmt = stmt.where(ConnectorCredentialPair.creator_id == creator_id)
stmt = stmt.distinct()
documents = db_session.execute(stmt).scalars().all()
return list(documents)
def _apply_last_updated_cursor_filter(
stmt: Select,
cursor_last_modified: datetime | None,
@@ -1527,3 +1573,40 @@ def get_document_kg_entities_and_relationships(
def get_num_chunks_for_document(db_session: Session, document_id: str) -> int:
stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)
return db_session.execute(stmt).scalar_one_or_none() or 0
def update_document_metadata__no_commit(
db_session: Session,
document_id: str,
doc_metadata: dict[str, Any],
) -> None:
"""Update the doc_metadata field for a document.
Note: Does not commit. Caller is responsible for committing.
Args:
db_session: Database session
document_id: The ID of the document to update
doc_metadata: The new metadata dictionary to set
"""
stmt = (
update(DbDocument)
.where(DbDocument.id == document_id)
.values(doc_metadata=doc_metadata)
)
db_session.execute(stmt)
def delete_document_by_id__no_commit(
db_session: Session,
document_id: str,
) -> None:
"""Delete a single document and its connector credential pair relationships.
Note: Does not commit. Caller is responsible for committing.
This uses delete_documents_complete__no_commit which handles
all foreign key relationships (KG entities, relationships, chunk stats,
cc pair associations, feedback, tags).
"""
delete_documents_complete__no_commit(db_session, [document_id])

View File

@@ -60,7 +60,8 @@ class ProcessingMode(str, PyEnum):
"""Determines how documents are processed after fetching."""
REGULAR = "REGULAR" # Full pipeline: chunk → embed → Vespa
FILE_SYSTEM = "FILE_SYSTEM" # Write to file system only
FILE_SYSTEM = "FILE_SYSTEM" # Write to file system only (JSON documents)
RAW_BINARY = "RAW_BINARY" # Write raw binary to S3 (no text extraction)
class SyncType(str, PyEnum):

View File

@@ -27,6 +27,7 @@ from onyx.server.features.build.api.models import BuildConnectorStatus
from onyx.server.features.build.api.models import RateLimitResponse
from onyx.server.features.build.api.rate_limit import get_user_rate_limit_status
from onyx.server.features.build.api.sessions_api import router as sessions_router
from onyx.server.features.build.api.user_library import router as user_library_router
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
from onyx.server.features.build.sandbox import get_sandbox_manager
from onyx.server.features.build.session.manager import SessionManager
@@ -51,9 +52,10 @@ def require_onyx_craft_enabled(user: User = Depends(current_user)) -> User:
router = APIRouter(prefix="/build", dependencies=[Depends(require_onyx_craft_enabled)])
# Include sub-routers for sessions and messages
# Include sub-routers for sessions, messages, and user library
router.include_router(sessions_router, tags=["build"])
router.include_router(messages_router, tags=["build"])
router.include_router(user_library_router, tags=["build"])
# -----------------------------------------------------------------------------
@@ -86,14 +88,24 @@ def get_build_connectors(
On the build configure page, all users (including admins) only see connectors
they own/created. Users can create new connectors if they don't have one of a type.
"""
cc_pairs = get_connector_credential_pairs_for_user(
# Fetch both FILE_SYSTEM (standard connectors) and RAW_BINARY (User Library) connectors
file_system_cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
user=user,
get_editable=False,
eager_load_connector=True,
eager_load_credential=True,
processing_mode=ProcessingMode.FILE_SYSTEM, # Only show FILE_SYSTEM connectors
processing_mode=ProcessingMode.FILE_SYSTEM,
)
raw_binary_cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
user=user,
get_editable=False,
eager_load_connector=True,
eager_load_credential=True,
processing_mode=ProcessingMode.RAW_BINARY,
)
cc_pairs = file_system_cc_pairs + raw_binary_cc_pairs
# Filter to only show connectors created by the current user
# All users (including admins) must create their own connectors on the build configure page

View File

@@ -0,0 +1,871 @@
"""API endpoints for User Library file management in Craft.
This module provides endpoints for uploading and managing raw binary files
(xlsx, pptx, docx, csv, etc.) that are stored directly in S3 for sandbox access.
Files are stored at:
s3://{bucket}/{tenant_id}/knowledge/{user_id}/user_library/{path}
And synced to sandbox at:
/workspace/files/user_library/{path}
Known Issues / TODOs:
- Memory: Upload endpoints read entire file content into memory (up to 500MB).
Should be refactored to stream uploads directly to S3 via multipart upload
for better memory efficiency under concurrent load.
- Transaction safety: Multi-file uploads are not atomic. If the endpoint fails
mid-batch (e.g., file 3 of 5 exceeds storage quota), files 1-2 are already
persisted to S3 and DB. A partial upload is not catastrophic but the response
implies atomicity that doesn't exist.
"""
import hashlib
import mimetypes
import re
import zipfile
from datetime import datetime
from datetime import timezone
from io import BytesIO
from typing import Any
from uuid import UUID
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.client import app as celery_app
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.connector_credential_pair import update_connector_credential_pair
from onyx.db.document import upsert_document_by_connector_credential_pair
from onyx.db.document import upsert_documents
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentMetadata
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
from onyx.server.features.build.indexing.persistent_document_writer import (
get_persistent_document_writer,
)
from onyx.server.features.build.indexing.persistent_document_writer import (
S3PersistentDocumentWriter,
)
from onyx.server.features.build.utils import sanitize_filename as api_sanitize_filename
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
router = APIRouter(prefix="/user-library")
# =============================================================================
# Pydantic Models
# =============================================================================
class LibraryEntryResponse(BaseModel):
"""Response for a single library entry (file or directory)."""
id: str # document_id
name: str
path: str
is_directory: bool
file_size: int | None
mime_type: str | None
sync_enabled: bool
created_at: datetime
children: list["LibraryEntryResponse"] | None = None
class CreateDirectoryRequest(BaseModel):
"""Request to create a virtual directory."""
name: str
parent_path: str = "/"
class UploadResponse(BaseModel):
"""Response after successful file upload."""
entries: list[LibraryEntryResponse]
total_uploaded: int
total_size_bytes: int
# =============================================================================
# Helper Functions
# =============================================================================
def _sanitize_path(path: str) -> str:
"""Sanitize a file path, removing traversal attempts and normalizing.
Removes '..' and '.' segments and ensures the path starts with '/'.
Only allows alphanumeric characters, hyphens, underscores, dots, spaces,
and forward slashes. All other characters are stripped.
"""
parts = path.split("/")
sanitized_parts: list[str] = []
for p in parts:
if not p or p == ".." or p == ".":
continue
# Strip any character not in the whitelist
cleaned = re.sub(r"[^a-zA-Z0-9\-_. ]", "", p)
if cleaned:
sanitized_parts.append(cleaned)
return "/" + "/".join(sanitized_parts)
def _build_document_id(user_id: str, path: str) -> str:
"""Build a document ID for a craft file.
Deterministic: re-uploading the same file to the same path will produce the
same document ID, allowing upsert to overwrite the previous record.
Uses a hash of the path to avoid collisions from separator replacement
(e.g., "/a/b_c" vs "/a_b/c" would collide with naive slash-to-underscore).
"""
path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
return f"CRAFT_FILE__{user_id}__{path_hash}"
def _trigger_sandbox_sync(
user_id: str, tenant_id: str, source: str | None = None
) -> None:
"""Trigger sandbox file sync task.
Args:
user_id: The user ID whose sandbox should be synced
tenant_id: The tenant ID for S3 path construction
source: Optional source type (e.g., "user_library"). If specified,
only syncs that source's directory with --delete flag.
"""
celery_app.send_task(
OnyxCeleryTask.SANDBOX_FILE_SYNC,
kwargs={"user_id": user_id, "tenant_id": tenant_id, "source": source},
queue=OnyxCeleryQueues.SANDBOX,
)
def _get_user_storage_bytes(db_session: Session, user_id: UUID) -> int:
"""Get total storage usage for a user's library files.
Uses SQL aggregation to sum file_size from doc_metadata JSONB for all
CRAFT_FILE documents owned by this user, avoiding loading all documents
into Python memory.
"""
from sqlalchemy import and_
from sqlalchemy import cast
from sqlalchemy import func
from sqlalchemy import Integer
from sqlalchemy import select
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document as DbDocument
from onyx.db.models import DocumentByConnectorCredentialPair
stmt = (
select(
func.coalesce(
func.sum(
cast(
DbDocument.doc_metadata["file_size"].as_string(),
Integer,
)
),
0,
)
)
.join(
DocumentByConnectorCredentialPair,
DbDocument.id == DocumentByConnectorCredentialPair.id,
)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.join(
Connector,
ConnectorCredentialPair.connector_id == Connector.id,
)
.where(Connector.source == DocumentSource.CRAFT_FILE)
.where(ConnectorCredentialPair.creator_id == user_id)
.where(DbDocument.doc_metadata["is_directory"].as_boolean().is_not(True))
)
result = db_session.execute(stmt).scalar()
return int(result or 0)
def _get_or_create_craft_connector(db_session: Session, user: User) -> tuple[int, int]:
"""Get or create the CRAFT_FILE connector for a user.
Returns:
Tuple of (connector_id, credential_id)
Note: We need to create a credential even though CRAFT_FILE doesn't require
authentication. This is because Onyx's connector-credential pair system
requires a credential for all connectors. The credential is empty ({}).
This function handles recovery from partial creation failures by detecting
orphaned connectors (connectors without cc_pairs) and completing their setup.
"""
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector import fetch_connectors
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import (
get_connector_credential_pairs_for_user,
)
from onyx.db.credentials import create_credential
from onyx.db.credentials import fetch_credentials_for_user
from onyx.db.enums import AccessType
from onyx.db.enums import ProcessingMode
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
# Check if user already has a complete CRAFT_FILE cc_pair
cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
user=user,
get_editable=False,
eager_load_connector=True,
eager_load_credential=True,
processing_mode=ProcessingMode.RAW_BINARY,
)
for cc_pair in cc_pairs:
if cc_pair.connector.source == DocumentSource.CRAFT_FILE:
return cc_pair.connector.id, cc_pair.credential.id
# Check for orphaned connector (created but cc_pair creation failed previously)
# An orphaned connector has no cc_pairs. We check credentials to verify
# it belongs to this user (Connector doesn't have creator_id directly).
existing_connectors = fetch_connectors(
db_session, sources=[DocumentSource.CRAFT_FILE]
)
orphaned_connector = None
for conn in existing_connectors:
if conn.name != "User Library":
continue
# Verify this connector has no cc_pairs (i.e., is actually orphaned)
# and that a matching credential exists for this user
if not conn.credentials:
orphaned_connector = conn
break
if orphaned_connector:
connector_id = orphaned_connector.id
logger.info(
f"Found orphaned User Library connector {connector_id}, completing setup"
)
else:
# Create new connector
connector_data = ConnectorBase(
name="User Library",
source=DocumentSource.CRAFT_FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={"disabled_paths": []},
refresh_freq=None,
prune_freq=None,
)
connector_response = create_connector(
db_session=db_session,
connector_data=connector_data,
)
connector_id = connector_response.id
# Try to reuse an existing User Library credential for this user
existing_credentials = fetch_credentials_for_user(
db_session=db_session,
user=user,
)
credential = None
for cred in existing_credentials:
if (
cred.source == DocumentSource.CRAFT_FILE
and cred.name == "User Library Credential"
):
credential = cred
break
if credential is None:
# Create credential (empty - no auth needed, but required by the system)
credential_data = CredentialBase(
credential_json={},
admin_public=False,
source=DocumentSource.CRAFT_FILE,
name="User Library Credential",
)
credential = create_credential(
credential_data=credential_data,
user=user,
db_session=db_session,
)
# Link them with RAW_BINARY processing mode
add_credential_to_connector(
db_session=db_session,
connector_id=connector_id,
credential_id=credential.id,
user=user,
cc_pair_name="User Library",
access_type=AccessType.PRIVATE,
groups=None,
processing_mode=ProcessingMode.RAW_BINARY,
)
db_session.commit()
return connector_id, credential.id
# =============================================================================
# API Endpoints
# =============================================================================
@router.get("/tree")
def get_library_tree(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[LibraryEntryResponse]:
"""Get user's uploaded files as a tree structure.
Returns all CRAFT_FILE documents for the user, organized hierarchically.
"""
from onyx.db.document import get_documents_by_source
# Get CRAFT_FILE documents for this user (filtered at SQL level)
user_docs = get_documents_by_source(
db_session=db_session,
source=DocumentSource.CRAFT_FILE,
creator_id=user.id,
)
# Build tree structure
entries: list[LibraryEntryResponse] = []
now = datetime.now(timezone.utc)
for doc in user_docs:
doc_metadata = doc.doc_metadata or {}
entries.append(
LibraryEntryResponse(
id=doc.id,
name=doc.semantic_id.split("/")[-1] if doc.semantic_id else "unknown",
path=doc.semantic_id or "",
is_directory=doc_metadata.get("is_directory", False),
file_size=doc_metadata.get("file_size"),
mime_type=doc_metadata.get("mime_type"),
sync_enabled=not doc_metadata.get("sync_disabled", False),
created_at=doc.last_modified or now,
)
)
return entries
@router.post("/upload")
async def upload_files(
files: list[UploadFile] = File(...),
path: str = Form("/"),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UploadResponse:
"""Upload files directly to S3 and track in PostgreSQL.
Files are stored as raw binary (no text extraction) for access by
the sandbox agent using Python libraries like openpyxl, python-pptx, etc.
"""
tenant_id = get_current_tenant_id()
if tenant_id is None:
raise HTTPException(status_code=500, detail="Tenant ID not found")
# Validate file count
if len(files) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:
raise HTTPException(
status_code=400,
detail=f"Too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD} per upload.",
)
# Check cumulative storage usage
existing_usage = _get_user_storage_bytes(db_session, user.id)
# Get or create connector
connector_id, credential_id = _get_or_create_craft_connector(db_session, user)
# Get the persistent document writer
writer = get_persistent_document_writer(
user_id=str(user.id),
tenant_id=tenant_id,
)
uploaded_entries: list[LibraryEntryResponse] = []
total_size = 0
now = datetime.now(timezone.utc)
# Sanitize the base path
base_path = _sanitize_path(path)
for file in files:
# TODO: Stream directly to S3 via multipart upload instead of reading
# entire file into memory. With 500MB max file size, this can OOM under
# concurrent uploads.
content = await file.read()
file_size = len(content)
# Validate individual file size
if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:
raise HTTPException(
status_code=400,
detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024*1024)}MB",
)
# Validate cumulative storage (existing + this upload batch)
total_size += file_size
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
raise HTTPException(
status_code=400,
detail=f"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024*1024*1024)}GB",
)
# Sanitize filename
safe_filename = api_sanitize_filename(file.filename or "unnamed")
file_path = f"{base_path}/{safe_filename}".replace("//", "/")
# Write raw binary to storage
storage_key = writer.write_raw_file(
path=file_path,
content=content,
content_type=file.content_type,
)
# Track in document table
doc_id = _build_document_id(str(user.id), file_path)
doc_metadata = DocumentMetadata(
connector_id=connector_id,
credential_id=credential_id,
document_id=doc_id,
semantic_identifier=f"user_library{file_path}",
first_link=storage_key,
doc_metadata={
"storage_key": storage_key,
"file_path": file_path,
"file_size": file_size,
"mime_type": file.content_type,
"is_directory": False,
},
)
upsert_documents(db_session, [doc_metadata])
upsert_document_by_connector_credential_pair(
db_session, connector_id, credential_id, [doc_id]
)
uploaded_entries.append(
LibraryEntryResponse(
id=doc_id,
name=safe_filename,
path=file_path,
is_directory=False,
file_size=file_size,
mime_type=file.content_type,
sync_enabled=True,
created_at=now,
)
)
# Mark connector as having succeeded (sets last_successful_index_time)
# This allows the demo data toggle to be disabled
update_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
status=ConnectorCredentialPairStatus.ACTIVE,
net_docs=len(uploaded_entries),
run_dt=now,
)
# Trigger sandbox sync for user_library source only
_trigger_sandbox_sync(str(user.id), tenant_id, source="user_library")
logger.info(
f"Uploaded {len(uploaded_entries)} files ({total_size} bytes) for user {user.id}"
)
return UploadResponse(
entries=uploaded_entries,
total_uploaded=len(uploaded_entries),
total_size_bytes=total_size,
)
@router.post("/upload-zip")
async def upload_zip(
file: UploadFile = File(...),
path: str = Form("/"),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UploadResponse:
"""Upload and extract a zip file, storing each extracted file to S3.
Preserves the directory structure from the zip file.
"""
tenant_id = get_current_tenant_id()
if tenant_id is None:
raise HTTPException(status_code=500, detail="Tenant ID not found")
# Read zip content
content = await file.read()
if len(content) > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
raise HTTPException(
status_code=400,
detail=f"Zip file exceeds maximum size of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024*1024*1024)}GB",
)
# Check cumulative storage usage
existing_usage = _get_user_storage_bytes(db_session, user.id)
# Get or create connector
connector_id, credential_id = _get_or_create_craft_connector(db_session, user)
# Get the persistent document writer
writer = get_persistent_document_writer(
user_id=str(user.id),
tenant_id=tenant_id,
)
uploaded_entries: list[LibraryEntryResponse] = []
total_size = 0
base_path = _sanitize_path(path)
now = datetime.now(timezone.utc)
try:
with zipfile.ZipFile(BytesIO(content), "r") as zip_file:
# Check file count
if len(zip_file.namelist()) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:
raise HTTPException(
status_code=400,
detail=f"Zip contains too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD}.",
)
# Zip bomb protection: check total decompressed size before extracting
declared_total = sum(
info.file_size for info in zip_file.infolist() if not info.is_dir()
)
max_decompressed = USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
if existing_usage + declared_total > max_decompressed:
raise HTTPException(
status_code=400,
detail=(
f"Zip decompressed size ({declared_total // (1024*1024)}MB) "
f"would exceed storage limit."
),
)
for zip_info in zip_file.infolist():
# Skip directories
if zip_info.is_dir():
continue
# Skip hidden files and __MACOSX
if (
zip_info.filename.startswith("__MACOSX")
or "/." in zip_info.filename
):
continue
# Read file content
file_content = zip_file.read(zip_info.filename)
file_size = len(file_content)
# Validate individual file size
if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:
logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
continue
total_size += file_size
# Validate cumulative storage
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
raise HTTPException(
status_code=400,
detail=f"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024*1024*1024)}GB",
)
# Build path preserving zip structure
sanitized_zip_path = _sanitize_path(zip_info.filename)
file_path = f"{base_path}{sanitized_zip_path}".replace("//", "/")
file_name = file_path.split("/")[-1]
# Guess content type
content_type, _ = mimetypes.guess_type(file_name)
# Write raw binary to storage
storage_key = writer.write_raw_file(
path=file_path,
content=file_content,
content_type=content_type,
)
# Track in document table
doc_id = _build_document_id(str(user.id), file_path)
doc_metadata = DocumentMetadata(
connector_id=connector_id,
credential_id=credential_id,
document_id=doc_id,
semantic_identifier=f"user_library{file_path}",
first_link=storage_key,
doc_metadata={
"storage_key": storage_key,
"file_path": file_path,
"file_size": file_size,
"mime_type": content_type,
"is_directory": False,
},
)
upsert_documents(db_session, [doc_metadata])
upsert_document_by_connector_credential_pair(
db_session, connector_id, credential_id, [doc_id]
)
uploaded_entries.append(
LibraryEntryResponse(
id=doc_id,
name=file_name,
path=file_path,
is_directory=False,
file_size=file_size,
mime_type=content_type,
sync_enabled=True,
created_at=now,
)
)
except zipfile.BadZipFile:
raise HTTPException(status_code=400, detail="Invalid zip file")
# Mark connector as having succeeded (sets last_successful_index_time)
# This allows the demo data toggle to be disabled
update_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
status=ConnectorCredentialPairStatus.ACTIVE,
net_docs=len(uploaded_entries),
run_dt=now,
)
# Trigger sandbox sync for user_library source only
_trigger_sandbox_sync(str(user.id), tenant_id, source="user_library")
logger.info(
f"Extracted {len(uploaded_entries)} files ({total_size} bytes) from zip for user {user.id}"
)
return UploadResponse(
entries=uploaded_entries,
total_uploaded=len(uploaded_entries),
total_size_bytes=total_size,
)
@router.post("/directories")
def create_directory(
request: CreateDirectoryRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> LibraryEntryResponse:
"""Create a virtual directory.
Directories are tracked as documents with is_directory=True.
No S3 object is created (S3 doesn't have real directories).
"""
# Get or create connector
connector_id, credential_id = _get_or_create_craft_connector(db_session, user)
# Build path
parent_path = _sanitize_path(request.parent_path)
safe_name = api_sanitize_filename(request.name)
dir_path = f"{parent_path}/{safe_name}".replace("//", "/")
# Track in document table
doc_id = _build_document_id(str(user.id), dir_path)
doc_metadata = DocumentMetadata(
connector_id=connector_id,
credential_id=credential_id,
document_id=doc_id,
semantic_identifier=f"user_library{dir_path}",
first_link="",
doc_metadata={
"is_directory": True,
},
)
upsert_documents(db_session, [doc_metadata])
upsert_document_by_connector_credential_pair(
db_session, connector_id, credential_id, [doc_id]
)
db_session.commit()
return LibraryEntryResponse(
id=doc_id,
name=safe_name,
path=dir_path,
is_directory=True,
file_size=None,
mime_type=None,
sync_enabled=True,
created_at=datetime.now(timezone.utc),
)
@router.patch("/files/{document_id}/toggle")
def toggle_file_sync(
document_id: str,
enabled: bool = Query(...),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> dict[str, Any]:
"""Enable/disable syncing a file to sandboxes.
When sync is disabled, the file's metadata is updated with sync_disabled=True.
The sandbox sync task will exclude these files when syncing to the sandbox.
If the item is a directory, all children are also toggled.
"""
from onyx.db.document import get_document
from onyx.db.document import get_documents_by_source
from onyx.db.document import update_document_metadata__no_commit
tenant_id = get_current_tenant_id()
if tenant_id is None:
raise HTTPException(status_code=500, detail="Tenant ID not found")
# Verify ownership
user_prefix = f"CRAFT_FILE__{user.id}__"
if not document_id.startswith(user_prefix):
raise HTTPException(
status_code=403, detail="Not authorized to modify this file"
)
# Get document
doc = get_document(document_id, db_session)
if doc is None:
raise HTTPException(status_code=404, detail="File not found")
# Update metadata for this document
new_metadata = dict(doc.doc_metadata or {})
new_metadata["sync_disabled"] = not enabled
update_document_metadata__no_commit(db_session, document_id, new_metadata)
# If this is a directory, also toggle all children
doc_metadata = doc.doc_metadata or {}
if doc_metadata.get("is_directory"):
folder_path = doc.semantic_id
if folder_path:
# Get CRAFT_FILE documents for this user (filtered at SQL level)
all_docs = get_documents_by_source(
db_session=db_session,
source=DocumentSource.CRAFT_FILE,
creator_id=user.id,
)
# Find children of this folder
for child_doc in all_docs:
if child_doc.semantic_id and child_doc.semantic_id.startswith(
folder_path + "/"
):
# Update metadata
child_metadata = dict(child_doc.doc_metadata or {})
child_metadata["sync_disabled"] = not enabled
update_document_metadata__no_commit(
db_session, child_doc.id, child_metadata
)
db_session.commit()
return {"success": True, "sync_enabled": enabled}
@router.delete("/files/{document_id}")
def delete_file(
document_id: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> dict[str, Any]:
"""Delete a file from both S3 and the document table."""
from onyx.db.document import delete_document_by_id__no_commit
from onyx.db.document import get_document
tenant_id = get_current_tenant_id()
if tenant_id is None:
raise HTTPException(status_code=500, detail="Tenant ID not found")
# Verify ownership
user_prefix = f"CRAFT_FILE__{user.id}__"
if not document_id.startswith(user_prefix):
raise HTTPException(
status_code=403, detail="Not authorized to delete this file"
)
# Get document
doc = get_document(document_id, db_session)
if doc is None:
raise HTTPException(status_code=404, detail="File not found")
# Delete from storage if it's a file (not directory)
doc_metadata = doc.doc_metadata or {}
if not doc_metadata.get("is_directory"):
file_path = doc_metadata.get("file_path")
if file_path:
writer = get_persistent_document_writer(
user_id=str(user.id),
tenant_id=tenant_id,
)
try:
if isinstance(writer, S3PersistentDocumentWriter):
writer.delete_raw_file_by_path(file_path)
else:
writer.delete_raw_file(file_path)
except Exception as e:
logger.warning(f"Failed to delete file at path {file_path}: {e}")
else:
# Fallback for documents created before file_path was stored
storage_key = doc_metadata.get("storage_key") or doc_metadata.get("s3_key")
if storage_key:
writer = get_persistent_document_writer(
user_id=str(user.id),
tenant_id=tenant_id,
)
try:
if isinstance(writer, S3PersistentDocumentWriter):
writer.delete_raw_file(storage_key)
else:
logger.warning(
f"Cannot delete file in local mode without file_path: {document_id}"
)
except Exception as e:
logger.warning(
f"Failed to delete storage object {storage_key}: {e}"
)
# Delete from document table
delete_document_by_id__no_commit(db_session, document_id)
db_session.commit()
# Trigger sync to apply changes
_trigger_sandbox_sync(str(user.id), tenant_id, source="user_library")
return {"success": True, "deleted": document_id}

View File

@@ -132,3 +132,25 @@ ACP_MESSAGE_TIMEOUT = float(os.environ.get("ACP_MESSAGE_TIMEOUT", "900.0"))
# Free users always get 5 messages total (not configurable)
# Per-user overrides are managed via PostHog feature flag "craft-has-usage-limits"
CRAFT_PAID_USER_RATE_LIMIT = int(os.environ.get("CRAFT_PAID_USER_RATE_LIMIT", "25"))
# ============================================================================
# User Library Configuration
# For user-uploaded raw files (xlsx, pptx, docx, etc.) in Craft
# ============================================================================
# Maximum size per file in MB (default 500MB)
USER_LIBRARY_MAX_FILE_SIZE_MB = int(
os.environ.get("USER_LIBRARY_MAX_FILE_SIZE_MB", "500")
)
USER_LIBRARY_MAX_FILE_SIZE_BYTES = USER_LIBRARY_MAX_FILE_SIZE_MB * 1024 * 1024
# Maximum total storage per user in GB (default 10GB)
USER_LIBRARY_MAX_TOTAL_SIZE_GB = int(
os.environ.get("USER_LIBRARY_MAX_TOTAL_SIZE_GB", "10")
)
USER_LIBRARY_MAX_TOTAL_SIZE_BYTES = USER_LIBRARY_MAX_TOTAL_SIZE_GB * 1024 * 1024 * 1024
# Maximum files per single upload request (default 100)
USER_LIBRARY_MAX_FILES_PER_UPLOAD = int(
os.environ.get("USER_LIBRARY_MAX_FILES_PER_UPLOAD", "100")
)

View File

@@ -272,6 +272,70 @@ class PersistentDocumentWriter:
logger.debug(f"Wrote document to {path}")
def write_raw_file(
self,
path: str,
content: bytes,
content_type: str | None = None, # noqa: ARG002
) -> str:
"""Write a raw binary file to local filesystem (for User Library).
Unlike write_documents which serializes Document objects to JSON, this method
writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.
Args:
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
content: Raw binary content to write
content_type: MIME type of the file (stored as metadata, unused locally)
Returns:
Full filesystem path where file was written
"""
# Build full path: {base_path}/{tenant}/knowledge/{user}/user_library/{path}
# Normalize path - ensure it starts with / and doesn't have double slashes
normalized_path = "/" + path.lstrip("/")
full_path = (
self.base_path
/ self.tenant_id
/ "knowledge"
/ self.user_id
/ "user_library"
/ normalized_path.lstrip("/")
)
# Create parent directories if they don't exist
full_path.parent.mkdir(parents=True, exist_ok=True)
# Write the raw binary content
with open(full_path, "wb") as f:
f.write(content)
logger.debug(f"Wrote raw file to {full_path}")
return str(full_path)
def delete_raw_file(self, path: str) -> None:
"""Delete a raw file from local filesystem.
Args:
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
"""
# Build full path
normalized_path = "/" + path.lstrip("/")
full_path = (
self.base_path
/ self.tenant_id
/ "knowledge"
/ self.user_id
/ "user_library"
/ normalized_path.lstrip("/")
)
if full_path.exists():
full_path.unlink()
logger.debug(f"Deleted raw file at {full_path}")
else:
logger.warning(f"File not found for deletion: {full_path}")
class S3PersistentDocumentWriter:
"""Writes indexed documents to S3 with hierarchical structure.
@@ -375,6 +439,73 @@ class S3PersistentDocumentWriter:
logger.error(f"Failed to write to S3: {e}")
raise
def write_raw_file(
self,
path: str,
content: bytes,
content_type: str | None = None,
) -> str:
"""Write a raw binary file to S3 (for User Library).
Unlike write_documents which serializes Document objects to JSON, this method
writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.
Args:
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
content: Raw binary content to write
content_type: MIME type of the file
Returns:
S3 key where file was written
"""
# Build S3 key: {tenant}/knowledge/{user}/user_library/{path}
normalized_path = path.lstrip("/")
s3_key = (
f"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}"
)
s3_client = self._get_s3_client()
try:
s3_client.put_object(
Bucket=self.bucket,
Key=s3_key,
Body=content,
ContentType=content_type or "application/octet-stream",
)
logger.debug(f"Wrote raw file to s3://{self.bucket}/{s3_key}")
return s3_key
except ClientError as e:
logger.error(f"Failed to write raw file to S3: {e}")
raise
def delete_raw_file(self, s3_key: str) -> None:
"""Delete a raw file from S3.
Args:
s3_key: Full S3 key of the file to delete
"""
s3_client = self._get_s3_client()
try:
s3_client.delete_object(Bucket=self.bucket, Key=s3_key)
logger.debug(f"Deleted raw file at s3://{self.bucket}/{s3_key}")
except ClientError as e:
logger.error(f"Failed to delete raw file from S3: {e}")
raise
def delete_raw_file_by_path(self, path: str) -> None:
"""Delete a raw file from S3 by its relative path.
Args:
path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
"""
normalized_path = path.lstrip("/")
s3_key = (
f"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}"
)
self.delete_raw_file(s3_key)
def get_persistent_document_writer(
user_id: str,

View File

@@ -125,6 +125,7 @@ class SandboxManager(ABC):
user_work_area: str | None = None,
user_level: str | None = None,
use_demo_data: bool = False,
excluded_user_library_paths: list[str] | None = None,
) -> None:
"""Set up a session workspace within an existing sandbox.
@@ -149,6 +150,9 @@ class SandboxManager(ABC):
user_work_area: User's work area for demo persona (e.g., "engineering")
user_level: User's level for demo persona (e.g., "ic", "manager")
use_demo_data: If True, symlink files/ to demo data; else to user files
excluded_user_library_paths: List of paths within user_library to exclude
from the sandbox (e.g., ["/data/file.xlsx"]). Only applies when
use_demo_data=False. Files at these paths won't be accessible.
Raises:
RuntimeError: If workspace setup fails
@@ -423,7 +427,9 @@ class SandboxManager(ABC):
For Kubernetes backend: Executes `s5cmd sync` in the file-sync sidecar container.
For Local backend: No-op since files are directly accessible via symlink.
This is idempotent - only downloads changed files.
This is idempotent - only downloads changed files. File visibility in
sessions is controlled via filtered symlinks in setup_session_workspace(),
not at the sync level.
Args:
sandbox_id: The sandbox UUID

View File

@@ -61,10 +61,10 @@ CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
"file_pattern": "`PAGE_TITLE.json`",
"scan_depth": 1,
},
"org_info": {
"summary": "Organizational structure and user identity",
"file_pattern": "Various JSON files",
"scan_depth": 0,
"user_library": {
"summary": "User-uploaded files (spreadsheets, documents, presentations, etc.)",
"file_pattern": "Any file format",
"scan_depth": 1,
},
}
DEFAULT_SCAN_DEPTH = 1

View File

@@ -1087,6 +1087,7 @@ done
user_work_area: str | None = None,
user_level: str | None = None,
use_demo_data: bool = False,
excluded_user_library_paths: list[str] | None = None,
) -> None:
"""Set up a session workspace within an existing sandbox pod.
@@ -1115,6 +1116,8 @@ done
user_level: User's level for demo persona (e.g., "ic", "manager")
use_demo_data: If True, symlink files/ to /workspace/demo_data;
else to /workspace/files (S3-synced user files)
excluded_user_library_paths: List of paths within user_library/ to exclude
(e.g., ["/data/file.xlsx"]). These files won't be accessible in the session.
Raises:
RuntimeError: If workspace setup fails
@@ -1194,6 +1197,91 @@ printf '%s' '{org_structure_escaped}' > {session_path}/org_info/organization_str
# Create files symlink to demo data (baked into image)
echo "Creating files symlink to demo data: {symlink_target}"
ln -sf {symlink_target} {session_path}/files
"""
elif excluded_user_library_paths:
# User files with exclusions: create filtered symlink structure
# Instead of symlinking to /workspace/files directly, create a directory
# with symlinks to each top-level item, then filter user_library contents
#
# Use newline-delimited exclusion list written via heredoc to avoid
# shell injection from path names. Paths are also pre-sanitized by
# _sanitize_path() which enforces an alphanumeric whitelist.
# The heredoc delimiter is randomized to prevent a filename from
# accidentally terminating the heredoc early.
excluded_paths_lines = "\n".join(
p.lstrip("/") for p in excluded_user_library_paths
)
heredoc_delim = f"_EXCL_{uuid4().hex[:12]}_"
files_symlink_setup = f"""
# Create filtered files directory with exclusions
mkdir -p {session_path}/files
# Symlink all top-level directories except user_library
for item in /workspace/files/*; do
[ -e "$item" ] || continue
name=$(basename "$item")
if [ "$name" != "user_library" ]; then
ln -sf "$item" {session_path}/files/"$name"
fi
done
# Write excluded paths to a temp file (one per line, via heredoc for safety)
EXCL_FILE=$(mktemp)
cat > "$EXCL_FILE" << '{heredoc_delim}'
{excluded_paths_lines}
{heredoc_delim}
# Check if a relative path is excluded (exact match or child of excluded dir)
is_excluded() {{
local rel_path="$1"
while IFS= read -r excl || [ -n "$excl" ]; do
[ -z "$excl" ] && continue
if [ "$rel_path" = "$excl" ]; then
return 0
fi
case "$rel_path" in
"$excl"/*) return 0 ;;
esac
done < "$EXCL_FILE"
return 1
}}
# Recursively create symlinks for non-excluded files
create_filtered_symlinks() {{
src_dir="$1"
dst_dir="$2"
rel_base="$3"
for item in "$src_dir"/*; do
[ -e "$item" ] || continue
name=$(basename "$item")
if [ -n "$rel_base" ]; then
rel_path="$rel_base/$name"
else
rel_path="$name"
fi
if is_excluded "$rel_path"; then
continue
fi
if [ -d "$item" ]; then
mkdir -p "$dst_dir/$name"
create_filtered_symlinks "$item" "$dst_dir/$name" "$rel_path"
rmdir "$dst_dir/$name" 2>/dev/null || true
else
ln -sf "$item" "$dst_dir/$name"
fi
done
}}
if [ -d "/workspace/files/user_library" ]; then
mkdir -p {session_path}/files/user_library
create_filtered_symlinks /workspace/files/user_library {session_path}/files/user_library ""
rmdir {session_path}/files/user_library 2>/dev/null || true
fi
rm -f "$EXCL_FILE"
"""
else:
# Normal mode: symlink to user's S3-synced knowledge files
@@ -2008,6 +2096,10 @@ echo "Session config regeneration complete"
This is safe to call multiple times - s5cmd sync is idempotent.
Note: For user_library source, --delete is NOT used since deletions
are handled explicitly by the delete_file API endpoint. File visibility
in sessions is controlled via filtered symlinks in setup_session_workspace().
Args:
sandbox_id: The sandbox UUID
user_id: The user ID (for S3 path construction)
@@ -2033,9 +2125,20 @@ echo "Session config regeneration complete"
# s5cmd sync: high-performance parallel S3 sync
# --delete: mirror S3 to local (remove files that no longer exist in source)
# Use --delete for external connectors (gmail, google_drive) where
# files can be removed from the source.
# Do NOT use --delete for user_library - files are only added by user
# uploads, and deletions are handled explicitly by the delete_file endpoint.
# timeout: prevent zombie processes from kubectl exec disconnections
# trap: kill child processes on exit/disconnect
source_info = f" (source={source})" if source else ""
# Only use --delete for external connectors where the source of truth is external.
# For user_library, we only add files - deletions are handled explicitly.
# When source is None (sync all), we also skip --delete to be safe.
use_delete = source is not None and source != "user_library"
delete_flag = " --delete" if use_delete else ""
sync_script = f"""
# Kill child processes on exit/disconnect to prevent zombie s5cmd workers
cleanup() {{ pkill -P $$ 2>/dev/null || true; }}
@@ -2052,7 +2155,7 @@ mkdir -p "{local_path}"
# Exit codes: 0=success, 1=success with warnings, 124=timeout
sync_exit_code=0
timeout --signal=TERM --kill-after=10s 5m \
/s5cmd --stat sync --delete "{s3_path}" "{local_path}" 2>&1 || sync_exit_code=$?
/s5cmd --stat sync{delete_flag} "{s3_path}" "{local_path}" 2>&1 || sync_exit_code=$?
echo "=== Sync finished (exit code: $sync_exit_code) ==="

View File

@@ -152,6 +152,117 @@ class LocalSandboxManager(SandboxManager):
"""
return self._get_sandbox_path(sandbox_id) / "sessions" / str(session_id)
def _setup_filtered_files(
self,
session_path: Path,
source_path: Path,
excluded_paths: list[str],
) -> None:
"""Set up files directory with filtered symlinks based on exclusions.
Instead of symlinking the entire source directory, this creates a files/
directory structure where:
- Top-level items (except user_library) are symlinked directly
- user_library/ is created as a real directory with filtered symlinks
Args:
session_path: Path to the session directory
source_path: Path to the user's knowledge files (e.g., /storage/tenant/knowledge/user/)
excluded_paths: List of paths within user_library to exclude
(e.g., ["/data/file.xlsx", "/reports/old.pdf"])
"""
files_dir = session_path / "files"
files_dir.mkdir(parents=True, exist_ok=True)
# Normalize excluded paths for comparison (remove leading slash)
excluded_set = {p.lstrip("/") for p in excluded_paths}
if not source_path.exists():
logger.warning(f"Source path does not exist: {source_path}")
return
# Iterate through top-level items in source
for item in source_path.iterdir():
target_link = files_dir / item.name
if item.name == "user_library":
# user_library needs filtered handling
self._setup_filtered_user_library(
target_dir=target_link,
source_dir=item,
excluded_set=excluded_set,
base_path="",
)
else:
# Other directories/files: symlink directly
if not target_link.exists():
target_link.symlink_to(item, target_is_directory=item.is_dir())
def _setup_filtered_user_library(
self,
target_dir: Path,
source_dir: Path,
excluded_set: set[str],
base_path: str,
) -> bool:
"""Recursively set up user_library with filtered symlinks.
Creates directory structure and symlinks only non-excluded files.
Only creates directories if they will contain at least one enabled file.
Args:
target_dir: Where to create the filtered structure
source_dir: Source user_library directory
excluded_set: Set of excluded relative paths (e.g., {"data/file.xlsx"})
base_path: Current path relative to user_library root (for recursion)
Returns:
True if any content was created (files or non-empty subdirectories)
"""
if not source_dir.exists():
return False
has_content = False
for item in source_dir.iterdir():
# Build relative path for exclusion check
rel_path = (
f"{base_path}/{item.name}".lstrip("/") if base_path else item.name
)
target_link = target_dir / item.name
if item.is_dir():
# Check if entire directory is excluded
if rel_path in excluded_set:
logger.debug(f"Excluding directory: user_library/{rel_path}")
continue
# Recurse into directory - only create if it has content
subdir_has_content = self._setup_filtered_user_library(
target_dir=target_link,
source_dir=item,
excluded_set=excluded_set,
base_path=rel_path,
)
if subdir_has_content:
has_content = True
else:
# Check if file is excluded
if rel_path in excluded_set:
logger.debug(f"Excluding file: user_library/{rel_path}")
continue
# Create parent directory if needed (lazy creation)
if not target_dir.exists():
target_dir.mkdir(parents=True, exist_ok=True)
# Create symlink to file
if not target_link.exists():
target_link.symlink_to(item)
has_content = True
return has_content
def provision(
self,
sandbox_id: UUID,
@@ -271,6 +382,7 @@ class LocalSandboxManager(SandboxManager):
user_work_area: str | None = None,
user_level: str | None = None,
use_demo_data: bool = False,
excluded_user_library_paths: list[str] | None = None,
) -> None:
"""Set up a session workspace within an existing sandbox.
@@ -280,7 +392,7 @@ class LocalSandboxManager(SandboxManager):
3. .venv/ (from template)
4. AGENTS.md
5. .agent/skills/
6. files/ (symlink to demo data OR user's file_system_path)
6. files/ (symlink to demo data OR filtered user files)
7. opencode.json
8. org_info/ (if demo_data is enabled, the org structure and user identity for the user's demo persona)
9. attachments/
@@ -297,6 +409,8 @@ class LocalSandboxManager(SandboxManager):
user_work_area: User's work area for demo persona (e.g., "engineering")
user_level: User's level for demo persona (e.g., "ic", "manager")
use_demo_data: If True, symlink files/ to demo data; else to user files
excluded_user_library_paths: List of paths within user_library/ to exclude
(e.g., ["/data/file.xlsx"]). These files won't be linked in the sandbox.
Raises:
RuntimeError: If workspace setup fails
@@ -320,7 +434,7 @@ class LocalSandboxManager(SandboxManager):
logger.debug(f"Session directory created at {session_path}")
try:
# Setup files symlink - choose between demo data or user files
# Setup files access - choose between demo data or user files
if use_demo_data:
# Demo mode: symlink to demo data directory
symlink_target = Path(DEMO_DATA_PATH)
@@ -329,17 +443,33 @@ class LocalSandboxManager(SandboxManager):
f"Demo data directory does not exist: {symlink_target}"
)
logger.info(f"Setting up files symlink to demo data: {symlink_target}")
elif file_system_path:
# Normal mode: symlink to user's knowledge files
symlink_target = Path(file_system_path)
logger.debug(
f"Setting up files symlink to user files: {symlink_target}"
self._directory_manager.setup_files_symlink(
session_path, symlink_target
)
elif file_system_path:
source_path = Path(file_system_path)
# Check if we have exclusions for user_library
if excluded_user_library_paths:
# Create filtered file structure with symlinks to enabled files only
logger.debug(
f"Setting up filtered files with {len(excluded_user_library_paths)} exclusions"
)
self._setup_filtered_files(
session_path=session_path,
source_path=source_path,
excluded_paths=excluded_user_library_paths,
)
else:
# No exclusions: simple symlink to entire directory
logger.debug(
f"Setting up files symlink to user files: {source_path}"
)
self._directory_manager.setup_files_symlink(
session_path, source_path
)
else:
raise ValueError("No files symlink target provided")
self._directory_manager.setup_files_symlink(session_path, symlink_target)
logger.debug("Files symlink ready")
logger.debug("Files ready")
# Setup org_info directory with user identity (at session root)
if user_work_area:
@@ -1061,7 +1191,8 @@ class LocalSandboxManager(SandboxManager):
"""No-op for local mode - files are directly accessible via symlink.
In local mode, the sandbox's files/ directory is a symlink to the
local persistent document storage, so no sync is needed.
local persistent document storage, so no sync is needed. File visibility
in sessions is controlled via filtered symlinks in setup_session_workspace().
Args:
sandbox_id: The sandbox UUID (unused)

View File

@@ -2,12 +2,16 @@
from collections.abc import Iterator
from contextlib import contextmanager
from typing import TYPE_CHECKING
from uuid import UUID
from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock
if TYPE_CHECKING:
from sqlalchemy.orm import Session
from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.constants import CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
@@ -263,6 +267,52 @@ def _acquire_sandbox_file_sync_lock(lock: RedisLock) -> Iterator[bool]:
lock.release()
def _get_disabled_user_library_paths(db_session: "Session", user_id: str) -> list[str]:
"""Get list of disabled user library file paths for exclusion during sync.
Queries the document table for CRAFT_FILE documents with sync_disabled=True
and returns their relative paths within user_library/.
Args:
db_session: Database session
user_id: The user ID to filter documents
Returns:
List of relative file paths to exclude (e.g., ["/data/file.xlsx", "/old/report.pdf"])
"""
from uuid import UUID
from onyx.configs.constants import DocumentSource
from onyx.db.document import get_documents_by_source
disabled_paths: list[str] = []
# Get CRAFT_FILE documents for this user (filtered at SQL level)
documents = get_documents_by_source(
db_session=db_session,
source=DocumentSource.CRAFT_FILE,
creator_id=UUID(user_id),
)
for doc in documents:
doc_metadata = doc.doc_metadata or {}
if not doc_metadata.get("sync_disabled"):
continue
# Extract file path from semantic_id
# semantic_id format: "user_library/path/to/file.xlsx"
# Include both files AND directories - the shell script in
# setup_session_workspace() handles directory exclusion by
# checking if paths are children of an excluded directory.
semantic_id = doc.semantic_id or ""
if semantic_id.startswith("user_library"):
file_path = semantic_id[len("user_library") :]
if file_path:
disabled_paths.append(file_path)
return disabled_paths
@shared_task(
name=OnyxCeleryTask.SANDBOX_FILE_SYNC,
soft_time_limit=TIMEOUT_SECONDS,
@@ -285,10 +335,14 @@ def sync_sandbox_files(
Per-user locking ensures only one sync runs at a time for a given user.
If a sync is already in progress, this task will wait until it completes.
Note: File visibility in sessions is controlled via filtered symlinks in
setup_session_workspace(), not at the sync level. The sync mirrors S3
faithfully; disabled files are excluded only when creating new sessions.
Args:
user_id: The user ID whose sandbox should be synced
tenant_id: The tenant ID for S3 path construction
source: Optional source type (e.g., "gmail", "google_drive").
source: Optional source type (e.g., "gmail", "google_drive", "user_library").
If None, syncs all sources.
Returns:

View File

@@ -73,10 +73,10 @@ CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
"file_pattern": "`PAGE_TITLE.json`",
"scan_depth": 1,
},
"org_info": {
"summary": "Organizational structure and user identity",
"file_pattern": "Various JSON files",
"scan_depth": 0,
"user_library": {
"summary": "User-uploaded files (spreadsheets, documents, presentations, etc.)",
"file_pattern": "Any file format",
"scan_depth": 1,
},
}
DEFAULT_SCAN_DEPTH = 1

View File

@@ -75,6 +75,9 @@ from onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client impo
SSEKeepalive,
)
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.tasks.tasks import (
_get_disabled_user_library_paths,
)
from onyx.server.features.build.session.prompts import BUILD_NAMING_SYSTEM_PROMPT
from onyx.server.features.build.session.prompts import BUILD_NAMING_USER_PROMPT
from onyx.server.features.build.session.prompts import (
@@ -563,6 +566,18 @@ class SessionManager:
user_name = user.personal_name if user else None
user_role = user.personal_role if user else None
# Get excluded user library paths (files with sync_disabled=True)
# Only query if not using demo data (user library only applies to user files)
excluded_user_library_paths: list[str] | None = None
if not demo_data_enabled:
excluded_user_library_paths = _get_disabled_user_library_paths(
self._db_session, str(user_id)
)
if excluded_user_library_paths:
logger.debug(
f"Excluding {len(excluded_user_library_paths)} disabled user library paths"
)
self._sandbox_manager.setup_session_workspace(
sandbox_id=sandbox.id,
session_id=build_session.id,
@@ -575,7 +590,9 @@ class SessionManager:
user_work_area=user_work_area,
user_level=user_level,
use_demo_data=demo_data_enabled,
excluded_user_library_paths=excluded_user_library_paths,
)
sandbox_id = sandbox.id
logger.info(
f"Successfully created session {session_id} with workspace in sandbox {sandbox.id}"

View File

@@ -0,0 +1 @@
# Integration tests for Craft/Build features

View File

@@ -0,0 +1,207 @@
"""
Integration tests for the User Library API.
Tests the CRUD operations for user-uploaded raw files in Craft.
"""
import io
import zipfile
import requests
from tests.integration.common_utils.test_models import DATestUser
API_SERVER_URL = "http://localhost:3000"
def _get_user_library_tree(user: DATestUser) -> list[dict]:
"""Get the user's library tree (returns a flat list of entries)."""
response = requests.get(
f"{API_SERVER_URL}/api/build/user-library/tree",
headers=user.headers,
)
response.raise_for_status()
return response.json()
def _upload_files(user: DATestUser, path: str, files: list[tuple[str, bytes]]) -> dict:
"""Upload files to the user library."""
multipart_files = [("files", (name, content)) for name, content in files]
response = requests.post(
f"{API_SERVER_URL}/api/build/user-library/upload",
headers=user.headers,
data={"path": path},
files=multipart_files,
)
response.raise_for_status()
return response.json()
def _upload_zip(user: DATestUser, path: str, zip_data: bytes) -> dict:
"""Upload and extract a zip file."""
response = requests.post(
f"{API_SERVER_URL}/api/build/user-library/upload-zip",
headers=user.headers,
data={"path": path},
files={"file": ("archive.zip", zip_data)},
)
response.raise_for_status()
return response.json()
def _create_directory(user: DATestUser, name: str, parent_path: str = "/") -> dict:
"""Create a directory in the user library."""
response = requests.post(
f"{API_SERVER_URL}/api/build/user-library/directories",
headers=user.headers,
json={"name": name, "parent_path": parent_path},
)
response.raise_for_status()
return response.json()
def _toggle_sync(user: DATestUser, document_id: str, enabled: bool) -> None:
"""Toggle sync status for a file."""
response = requests.patch(
f"{API_SERVER_URL}/api/build/user-library/files/{document_id}/toggle",
headers=user.headers,
params={"enabled": str(enabled).lower()},
)
response.raise_for_status()
def _delete_file(user: DATestUser, document_id: str) -> None:
"""Delete a file from the user library."""
response = requests.delete(
f"{API_SERVER_URL}/api/build/user-library/files/{document_id}",
headers=user.headers,
)
response.raise_for_status()
def test_user_library_upload_file(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test uploading a single file to the user library."""
# Upload a simple CSV file
csv_content = b"name,value\nfoo,1\nbar,2"
result = _upload_files(admin_user, "/", [("data.csv", csv_content)])
assert "entries" in result
assert len(result["entries"]) == 1
assert result["entries"][0]["name"] == "data.csv"
assert result["entries"][0]["sync_enabled"] is True
# Verify it appears in the tree
tree = _get_user_library_tree(admin_user)
assert any(entry["name"] == "data.csv" for entry in tree)
def test_user_library_upload_to_directory(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test uploading files to a specific directory path."""
# Create a directory first
_create_directory(admin_user, "reports")
# Upload to that directory
xlsx_content = b"fake xlsx content"
result = _upload_files(admin_user, "/reports", [("quarterly.xlsx", xlsx_content)])
assert len(result["entries"]) == 1
assert result["entries"][0]["path"] == "/reports/quarterly.xlsx"
def test_user_library_upload_zip(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test uploading and extracting a zip file."""
# Create an in-memory zip file
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr("folder/file1.txt", "content1")
zf.writestr("folder/file2.txt", "content2")
zf.writestr("root.txt", "root content")
zip_buffer.seek(0)
result = _upload_zip(admin_user, "/", zip_buffer.read())
# Should have 3 files extracted
assert "entries" in result
assert len(result["entries"]) >= 3
def test_user_library_create_directory(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test creating a directory."""
result = _create_directory(admin_user, "my-data")
assert result["is_directory"] is True
assert result["name"] == "my-data"
# Verify in tree
tree = _get_user_library_tree(admin_user)
assert any(entry["name"] == "my-data" for entry in tree)
def test_user_library_toggle_sync(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test toggling sync status for a file."""
# Upload a file
result = _upload_files(admin_user, "/", [("test.txt", b"hello")])
document_id = result["entries"][0]["id"]
# Initially sync is enabled
assert result["entries"][0]["sync_enabled"] is True
# Disable sync
_toggle_sync(admin_user, document_id, False)
# Verify sync is disabled
tree = _get_user_library_tree(admin_user)
entry = next((e for e in tree if e["id"] == document_id), None)
assert entry is not None
assert entry["sync_enabled"] is False
def test_user_library_delete_file(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test deleting a file."""
# Upload a file
result = _upload_files(admin_user, "/", [("delete-me.txt", b"temp")])
document_id = result["entries"][0]["id"]
# Delete it
_delete_file(admin_user, document_id)
# Verify it's gone
tree = _get_user_library_tree(admin_user)
assert not any(entry["id"] == document_id for entry in tree)
def test_user_library_isolation_between_users(
reset: None, admin_user: DATestUser # noqa: ARG001
) -> None:
"""Test that users can only see their own files."""
from tests.integration.common_utils.managers.user import UserManager
other_user: DATestUser = UserManager.create(name="other_user")
# Admin uploads a file
_upload_files(admin_user, "/", [("admin-file.txt", b"admin data")])
# Other user uploads a file
_upload_files(other_user, "/", [("other-file.txt", b"other data")])
# Admin should only see their file
admin_tree = _get_user_library_tree(admin_user)
assert any(e["name"] == "admin-file.txt" for e in admin_tree)
assert not any(e["name"] == "other-file.txt" for e in admin_tree)
# Other user should only see their file
other_tree = _get_user_library_tree(other_user)
assert any(e["name"] == "other-file.txt" for e in other_tree)
assert not any(e["name"] == "admin-file.txt" for e in other_tree)

View File

@@ -263,7 +263,7 @@ function BuildSessionButton({
? "Deleted"
: deleteError
? "Delete Failed"
: "Delete Build"
: "Delete Craft"
}
icon={deleteSuccess ? SvgCheckCircle : SvgTrash}
onClose={isDeleting || deleteSuccess ? undefined : closeModal}

View File

@@ -623,3 +623,141 @@ export async function deleteConnector(
);
}
}
// =============================================================================
// User Library API
// =============================================================================
import {
LibraryEntry,
CreateDirectoryRequest,
UploadResponse,
} from "@/app/craft/types/user-library";
const USER_LIBRARY_BASE = `${API_BASE}/user-library`;
/**
* Fetch the user's library tree (uploaded files).
*/
export async function fetchLibraryTree(): Promise<LibraryEntry[]> {
const res = await fetch(`${USER_LIBRARY_BASE}/tree`);
if (!res.ok) {
throw new Error(`Failed to fetch library tree: ${res.status}`);
}
return res.json();
}
/**
* Upload files to the user library.
*/
export async function uploadLibraryFiles(
path: string,
files: File[]
): Promise<UploadResponse> {
const formData = new FormData();
formData.append("path", path);
for (const file of files) {
formData.append("files", file);
}
const res = await fetch(`${USER_LIBRARY_BASE}/upload`, {
method: "POST",
body: formData,
});
if (!res.ok) {
const errorData = await res.json().catch(() => ({}));
throw new Error(
errorData.detail || `Failed to upload files: ${res.status}`
);
}
return res.json();
}
/**
* Upload and extract a zip file to the user library.
*/
export async function uploadLibraryZip(
path: string,
file: File
): Promise<UploadResponse> {
const formData = new FormData();
formData.append("path", path);
formData.append("file", file);
const res = await fetch(`${USER_LIBRARY_BASE}/upload-zip`, {
method: "POST",
body: formData,
});
if (!res.ok) {
const errorData = await res.json().catch(() => ({}));
throw new Error(errorData.detail || `Failed to upload zip: ${res.status}`);
}
return res.json();
}
/**
* Create a directory in the user library.
*/
export async function createLibraryDirectory(
request: CreateDirectoryRequest
): Promise<LibraryEntry> {
const res = await fetch(`${USER_LIBRARY_BASE}/directories`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(request),
});
if (!res.ok) {
const errorData = await res.json().catch(() => ({}));
throw new Error(
errorData.detail || `Failed to create directory: ${res.status}`
);
}
return res.json();
}
/**
* Toggle sync status for a file/directory in the user library.
*/
export async function toggleLibraryFileSync(
documentId: string,
enabled: boolean
): Promise<void> {
const res = await fetch(
`${USER_LIBRARY_BASE}/files/${encodeURIComponent(
documentId
)}/toggle?enabled=${enabled}`,
{
method: "PATCH",
}
);
if (!res.ok) {
const errorData = await res.json().catch(() => ({}));
throw new Error(errorData.detail || `Failed to toggle sync: ${res.status}`);
}
}
/**
* Delete a file/directory from the user library.
*/
export async function deleteLibraryFile(documentId: string): Promise<void> {
const res = await fetch(
`${USER_LIBRARY_BASE}/files/${encodeURIComponent(documentId)}`,
{
method: "DELETE",
}
);
if (!res.ok) {
const errorData = await res.json().catch(() => ({}));
throw new Error(errorData.detail || `Failed to delete file: ${res.status}`);
}
}

View File

@@ -0,0 +1,26 @@
/**
* Types for User Library - raw binary file uploads in Craft.
*/
export interface LibraryEntry {
id: string; // document_id
name: string;
path: string;
is_directory: boolean;
file_size: number | null;
mime_type: string | null;
sync_enabled: boolean;
created_at: string;
children?: LibraryEntry[];
}
export interface CreateDirectoryRequest {
name: string;
parent_path: string;
}
export interface UploadResponse {
entries: LibraryEntry[];
total_uploaded: number;
total_size_bytes: number;
}

View File

@@ -131,10 +131,6 @@ export default function ConfigureConnectorModal({
setStep("credential");
};
const handleConnectorSuccess = () => {
onSuccess();
};
// Dynamic title and description based on flow type
const getStepTitle = () => {
if (isSingleStep) {
@@ -192,14 +188,14 @@ export default function ConfigureConnectorModal({
onOAuthRedirect={handleOAuthRedirect}
refresh={refreshCredentials}
isSingleStep={isSingleStep}
onConnectorSuccess={handleConnectorSuccess}
onConnectorSuccess={onSuccess}
setPopup={setPopup}
/>
) : selectedCredential ? (
<ConnectorConfigStep
connectorType={connectorType}
credential={selectedCredential}
onSuccess={handleConnectorSuccess}
onSuccess={onSuccess}
onBack={handleBack}
setPopup={setPopup}
/>

View File

@@ -29,10 +29,14 @@ export function ConnectorInfoOverlay({ visible }: ConnectorInfoOverlayProps) {
interface ReprovisionWarningOverlayProps {
visible: boolean;
onUpdate?: () => void;
isUpdating?: boolean;
}
export function ReprovisionWarningOverlay({
visible,
onUpdate,
isUpdating,
}: ReprovisionWarningOverlayProps) {
return (
<div
@@ -48,6 +52,8 @@ export function ReprovisionWarningOverlay({
text="Click Update to apply your changes"
description="Your sandbox will be recreated with your new settings. Previously running sessions will not be affected by your changes."
close={false}
actions={isUpdating ? "Updating..." : "Update"}
onAction={isUpdating ? undefined : onUpdate}
/>
</div>
);

View File

@@ -105,13 +105,21 @@ export default function ConnectorCard({
const sourceMetadata = getSourceMetadata(connectorType);
const status: ConnectorStatus = config?.status || "not_connected";
const isConnected = status !== "not_connected" && status !== "deleting";
const isDeleting = status === "deleting";
// Check if this connector type is always available (doesn't need connection setup)
const isAlwaysConnected = sourceMetadata.alwaysConnected ?? false;
const customDescription = sourceMetadata.customDescription;
const handleCardClick = () => {
if (isDeleting) {
return; // No action while deleting
}
// Always-connected connectors always go to onConfigure
if (isAlwaysConnected) {
onConfigure();
return;
}
if (isConnected) {
setPopoverOpen(true);
} else {
@@ -119,7 +127,11 @@ export default function ConnectorCard({
}
};
const rightContent = isDeleting ? null : isConnected ? (
// Always-connected connectors show a settings icon
// Regular connectors show popover menu when connected, plug icon when not
const rightContent = isDeleting ? null : isAlwaysConnected ? (
<IconButton icon={SvgSettings} internal />
) : isConnected ? (
<Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
<Popover.Trigger asChild>
<IconButton
@@ -164,21 +176,32 @@ export default function ConnectorCard({
<IconButton icon={SvgPlug} internal />
);
// Always-connected connectors show as "primary" variant
const cardVariant =
isAlwaysConnected || isConnected ? "primary" : "secondary";
// Use custom description if provided, otherwise show status
const descriptionContent = customDescription ? (
<Text secondaryBody text03>
{customDescription}
</Text>
) : (
<StatusDescription
status={status}
docsIndexed={config?.docs_indexed || 0}
/>
);
return (
<div
className={cn(!isDeleting && "cursor-pointer")}
onClick={handleCardClick}
>
<Card variant={isConnected ? "primary" : "secondary"}>
<Card variant={cardVariant}>
<LineItemLayout
icon={sourceMetadata.icon}
title={sourceMetadata.displayName}
description={
<StatusDescription
status={status}
docsIndexed={config?.docs_indexed || 0}
/>
}
description={descriptionContent}
rightChildren={rightContent}
center
/>

View File

@@ -0,0 +1,529 @@
"use client";
import { useState, useCallback, useRef, useMemo } from "react";
import useSWR from "swr";
import {
fetchLibraryTree,
uploadLibraryFiles,
uploadLibraryZip,
createLibraryDirectory,
toggleLibraryFileSync,
deleteLibraryFile,
} from "@/app/craft/services/apiServices";
import { LibraryEntry } from "@/app/craft/types/user-library";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import Modal from "@/refresh-components/Modal";
import { Section } from "@/layouts/general-layouts";
import {
SvgFolder,
SvgFolderOpen,
SvgChevronRight,
SvgUploadCloud,
SvgPlus,
SvgTrash,
SvgFileText,
} from "@opal/icons";
import Switch from "@/refresh-components/inputs/Switch";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
/**
* Build a hierarchical tree from a flat list of library entries.
* Entries have paths like "user_library/test" or "user_library/test/file.pdf"
*/
function buildTreeFromFlatList(flatList: LibraryEntry[]): LibraryEntry[] {
// Create a map of path -> entry (with children array initialized)
const pathToEntry = new Map<string, LibraryEntry>();
// First pass: create entries with empty children arrays
for (const entry of flatList) {
pathToEntry.set(entry.path, { ...entry, children: [] });
}
// Second pass: build parent-child relationships
const rootEntries: LibraryEntry[] = [];
for (const entry of flatList) {
const entryWithChildren = pathToEntry.get(entry.path)!;
// Find parent path by removing the last segment
const pathParts = entry.path.split("/");
pathParts.pop(); // Remove last segment (filename or folder name)
const parentPath = pathParts.join("/");
const parent = pathToEntry.get(parentPath);
if (parent && parent.children) {
parent.children.push(entryWithChildren);
} else {
// No parent found, this is a root-level entry
rootEntries.push(entryWithChildren);
}
}
return rootEntries;
}
interface UserLibraryModalProps {
open: boolean;
onClose: () => void;
onChanges?: () => void; // Called when files are uploaded, deleted, or sync toggled
}
export default function UserLibraryModal({
open,
onClose,
onChanges,
}: UserLibraryModalProps) {
const [expandedPaths, setExpandedPaths] = useState<Set<string>>(new Set());
const [isUploading, setIsUploading] = useState(false);
const [uploadError, setUploadError] = useState<string | null>(null);
const [entryToDelete, setEntryToDelete] = useState<LibraryEntry | null>(null);
const [showNewFolderModal, setShowNewFolderModal] = useState(false);
const [newFolderName, setNewFolderName] = useState("");
const fileInputRef = useRef<HTMLInputElement>(null);
const uploadTargetPathRef = useRef<string>("/");
// Fetch library tree
const {
data: tree,
error,
isLoading,
mutate,
} = useSWR(open ? "/api/build/user-library/tree" : null, fetchLibraryTree, {
revalidateOnFocus: false,
});
// Build hierarchical tree from flat list
const hierarchicalTree = useMemo(() => {
if (!tree) return [];
return buildTreeFromFlatList(tree);
}, [tree]);
const toggleFolder = useCallback((path: string) => {
setExpandedPaths((prev) => {
const newSet = new Set(prev);
if (newSet.has(path)) {
newSet.delete(path);
} else {
newSet.add(path);
}
return newSet;
});
}, []);
const handleFileUpload = useCallback(
async (event: React.ChangeEvent<HTMLInputElement>) => {
const files = event.target.files;
if (!files || files.length === 0) return;
setIsUploading(true);
setUploadError(null);
const targetPath = uploadTargetPathRef.current;
try {
const fileArray = Array.from(files);
// Check if it's a single zip file
const firstFile = fileArray[0];
if (
fileArray.length === 1 &&
firstFile &&
firstFile.name.endsWith(".zip")
) {
await uploadLibraryZip(targetPath, firstFile);
} else {
await uploadLibraryFiles(targetPath, fileArray);
}
mutate();
onChanges?.(); // Notify parent that changes were made
} catch (err) {
setUploadError(err instanceof Error ? err.message : "Upload failed");
} finally {
setIsUploading(false);
uploadTargetPathRef.current = "/";
// Reset input
event.target.value = "";
}
},
[mutate, onChanges]
);
const handleUploadToFolder = useCallback((folderPath: string) => {
uploadTargetPathRef.current = folderPath;
fileInputRef.current?.click();
}, []);
const handleToggleSync = useCallback(
async (entry: LibraryEntry, enabled: boolean) => {
try {
await toggleLibraryFileSync(entry.id, enabled);
mutate();
onChanges?.(); // Notify parent that changes were made
} catch (err) {
console.error("Failed to toggle sync:", err);
}
},
[mutate, onChanges]
);
const handleDeleteConfirm = useCallback(async () => {
if (!entryToDelete) return;
try {
await deleteLibraryFile(entryToDelete.id);
mutate();
onChanges?.(); // Notify parent that changes were made
} catch (err) {
console.error("Failed to delete:", err);
} finally {
setEntryToDelete(null);
}
}, [entryToDelete, mutate, onChanges]);
const handleCreateDirectory = useCallback(async () => {
const name = newFolderName.trim();
if (!name) return;
try {
await createLibraryDirectory({ name, parent_path: "/" });
mutate();
} catch (err) {
console.error("Failed to create directory:", err);
setUploadError(
err instanceof Error ? err.message : "Failed to create folder"
);
} finally {
setShowNewFolderModal(false);
setNewFolderName("");
}
}, [mutate, newFolderName]);
const formatFileSize = (bytes: number | null): string => {
if (bytes === null) return "";
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
};
const fileCount = hierarchicalTree.length;
return (
<>
<Modal open={open} onOpenChange={(isOpen) => !isOpen && onClose()}>
<Modal.Content width="md" height="fit">
<Modal.Header
icon={SvgFileText}
title="Your Files"
description="Upload files for your agent to read (Excel, Word, PowerPoint, etc.)"
onClose={onClose}
/>
<Modal.Body>
<Section flexDirection="column" gap={1} alignItems="stretch">
{/* Action buttons */}
<Section flexDirection="row" justifyContent="end" gap={0.5}>
<Button
secondary
onClick={() => setShowNewFolderModal(true)}
leftIcon={SvgPlus}
>
New Folder
</Button>
<input
ref={fileInputRef}
type="file"
multiple
className="hidden"
onChange={handleFileUpload}
disabled={isUploading}
accept=".xlsx,.xls,.docx,.doc,.pptx,.ppt,.csv,.json,.txt,.pdf,.zip"
/>
<Button
secondary
onClick={() => handleUploadToFolder("/")}
leftIcon={SvgUploadCloud}
disabled={isUploading}
>
{isUploading ? "Uploading..." : "Upload"}
</Button>
</Section>
{/* Upload error */}
{uploadError && (
<div className="p-2 bg-status-error-01 rounded-08">
<Text secondaryBody>{uploadError}</Text>
</div>
)}
{/* File list */}
<div className="w-full border border-border-01 rounded-08 min-h-[200px] max-h-[400px] overflow-auto">
{isLoading ? (
<div className="flex flex-col items-center justify-center p-8">
<Text secondaryBody text03>
Loading files...
</Text>
</div>
) : error ? (
<div className="flex flex-col items-center justify-center p-8">
<Text secondaryBody text03>
Failed to load files
</Text>
</div>
) : fileCount === 0 ? (
<div className="flex flex-col items-center justify-center p-8">
<SvgFileText size={32} className="mb-3 stroke-text-02" />
<Text secondaryBody text03>
No files uploaded yet
</Text>
<Text secondaryBody text02 className="mt-1 text-center">
Upload Excel, Word, PowerPoint, or other files for your
agent to work with
</Text>
</div>
) : (
<div className="w-full p-2">
<LibraryTreeView
entries={hierarchicalTree}
expandedPaths={expandedPaths}
onToggleFolder={toggleFolder}
onToggleSync={handleToggleSync}
onDelete={setEntryToDelete}
onUploadToFolder={handleUploadToFolder}
formatFileSize={formatFileSize}
/>
</div>
)}
</div>
</Section>
</Modal.Body>
<Modal.Footer>
<Button onClick={onClose}>Done</Button>
</Modal.Footer>
</Modal.Content>
</Modal>
{/* Delete confirmation modal */}
{entryToDelete && (
<ConfirmEntityModal
danger
entityType={entryToDelete.is_directory ? "folder" : "file"}
entityName={entryToDelete.name}
action="delete"
actionButtonText="Delete"
additionalDetails={
entryToDelete.is_directory
? "This will delete the folder and all its contents."
: "This file will be removed from your library."
}
onClose={() => setEntryToDelete(null)}
onSubmit={handleDeleteConfirm}
/>
)}
{/* New folder modal */}
<Modal
open={showNewFolderModal}
onOpenChange={(isOpen) => {
if (!isOpen) {
setShowNewFolderModal(false);
setNewFolderName("");
}
}}
>
<Modal.Content width="sm" height="fit">
<Modal.Header
icon={SvgFolder}
title="New Folder"
onClose={() => {
setShowNewFolderModal(false);
setNewFolderName("");
}}
/>
<Modal.Body>
<Section flexDirection="column" gap={0.5} alignItems="stretch">
<Text secondaryBody text03>
Folder name
</Text>
<InputTypeIn
value={newFolderName}
onChange={(e) => setNewFolderName(e.target.value)}
placeholder="Enter folder name"
onKeyDown={(e) => {
if (e.key === "Enter" && newFolderName.trim()) {
handleCreateDirectory();
}
}}
autoFocus
/>
</Section>
</Modal.Body>
<Modal.Footer>
<Button
secondary
onClick={() => {
setShowNewFolderModal(false);
setNewFolderName("");
}}
>
Cancel
</Button>
<Button
onClick={handleCreateDirectory}
disabled={!newFolderName.trim()}
>
Create
</Button>
</Modal.Footer>
</Modal.Content>
</Modal>
</>
);
}
interface LibraryTreeViewProps {
entries: LibraryEntry[];
expandedPaths: Set<string>;
onToggleFolder: (path: string) => void;
onToggleSync: (entry: LibraryEntry, enabled: boolean) => void;
onDelete: (entry: LibraryEntry) => void;
onUploadToFolder: (folderPath: string) => void;
formatFileSize: (bytes: number | null) => string;
depth?: number;
}
function LibraryTreeView({
entries,
expandedPaths,
onToggleFolder,
onToggleSync,
onDelete,
onUploadToFolder,
formatFileSize,
depth = 0,
}: LibraryTreeViewProps) {
// Sort entries: directories first, then alphabetically
const sortedEntries = [...entries].sort((a, b) => {
if (a.is_directory && !b.is_directory) return -1;
if (!a.is_directory && b.is_directory) return 1;
return a.name.localeCompare(b.name);
});
return (
<>
{sortedEntries.map((entry) => {
const isExpanded = expandedPaths.has(entry.path);
return (
<div key={entry.id}>
<div
className={cn(
"flex items-center py-2 px-3 hover:bg-background-tint-02 rounded-08 transition-colors",
"group"
)}
style={{ paddingLeft: `${depth * 20 + 12}px` }}
>
{/* Expand/collapse for directories */}
{entry.is_directory ? (
<button
onClick={() => onToggleFolder(entry.path)}
className="p-0.5 rounded hover:bg-background-tint-03"
>
<SvgChevronRight
size={14}
className={cn(
"stroke-text-03 transition-transform",
isExpanded && "rotate-90"
)}
/>
</button>
) : (
<span className="w-5" />
)}
{/* Icon */}
{entry.is_directory ? (
isExpanded ? (
<SvgFolderOpen size={16} className="stroke-text-03 mx-1.5" />
) : (
<SvgFolder size={16} className="stroke-text-03 mx-1.5" />
)
) : (
<SvgFileText size={16} className="stroke-text-03 mx-1.5" />
)}
{/* Name */}
<Text secondaryBody text04 className="flex-1 truncate">
{entry.name}
</Text>
{/* File size */}
{!entry.is_directory && entry.file_size !== null && (
<Text secondaryBody text02 className="mx-2 flex-shrink-0">
{formatFileSize(entry.file_size)}
</Text>
)}
{/* Actions - show on hover */}
<div className="flex items-center gap-2 opacity-0 group-hover:opacity-100 transition-opacity">
{/* Upload to folder button - only for directories */}
{entry.is_directory && (
<SimpleTooltip tooltip="Upload to this folder">
<button
onClick={(e) => {
e.stopPropagation();
// Strip "user_library" prefix from path for upload API
const uploadPath =
entry.path.replace(/^user_library/, "") || "/";
onUploadToFolder(uploadPath);
}}
className="p-1 rounded hover:bg-background-tint-03 transition-colors"
>
<SvgUploadCloud size={14} className="stroke-text-03" />
</button>
</SimpleTooltip>
)}
<button
onClick={() => onDelete(entry)}
className="p-1 rounded hover:bg-status-error-01 transition-colors"
>
<SvgTrash size={14} className="stroke-text-03" />
</button>
</div>
{/* Sync toggle - always visible, rightmost */}
<SimpleTooltip
tooltip={
entry.sync_enabled
? "Synced to sandbox - click to disable"
: "Not synced - click to enable"
}
>
<Switch
checked={entry.sync_enabled}
onCheckedChange={(checked) => onToggleSync(entry, checked)}
/>
</SimpleTooltip>
</div>
{/* Children */}
{entry.is_directory && isExpanded && entry.children && (
<LibraryTreeView
entries={entry.children}
expandedPaths={expandedPaths}
onToggleFolder={onToggleFolder}
onToggleSync={onToggleSync}
onDelete={onDelete}
onUploadToFolder={onUploadToFolder}
formatFileSize={formatFileSize}
depth={depth + 1}
/>
)}
</div>
);
})}
</>
);
}

View File

@@ -14,8 +14,12 @@ import { useBuildConnectors } from "@/app/craft/hooks/useBuildConnectors";
import { BuildLLMPopover } from "@/app/craft/components/BuildLLMPopover";
import Text from "@/refresh-components/texts/Text";
import Card from "@/refresh-components/cards/Card";
import { SvgPlug, SvgSettings, SvgChevronDown } from "@opal/icons";
import { FiInfo } from "react-icons/fi";
import {
SvgPlug,
SvgSettings,
SvgChevronDown,
SvgInfoSmall,
} from "@opal/icons";
import { ValidSources } from "@/lib/types";
import ConnectorCard, {
BuildConnectorConfig,
@@ -23,6 +27,7 @@ import ConnectorCard, {
import ConfigureConnectorModal from "@/app/craft/v1/configure/components/ConfigureConnectorModal";
import ComingSoonConnectors from "@/app/craft/v1/configure/components/ComingSoonConnectors";
import DemoDataConfirmModal from "@/app/craft/v1/configure/components/DemoDataConfirmModal";
import UserLibraryModal from "@/app/craft/v1/configure/components/UserLibraryModal";
import {
ConnectorInfoOverlay,
ReprovisionWarningOverlay,
@@ -63,6 +68,7 @@ const BUILD_CONNECTORS: ValidSources[] = [
ValidSources.Linear,
ValidSources.Fireflies,
ValidSources.Hubspot,
ValidSources.CraftFile, // User's uploaded files
];
interface SelectedConnectorState {
@@ -87,6 +93,7 @@ export default function BuildConfigPage() {
const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
const [showDemoDataConfirmModal, setShowDemoDataConfirmModal] =
useState(false);
const [showUserLibraryModal, setShowUserLibraryModal] = useState(false);
const [pendingDemoDataEnabled, setPendingDemoDataEnabled] = useState<
boolean | null
>(null);
@@ -95,6 +102,7 @@ export default function BuildConfigPage() {
const [pendingLlmSelection, setPendingLlmSelection] =
useState<BuildLlmSelection | null>(null);
const [pendingDemoData, setPendingDemoData] = useState<boolean | null>(null);
const [userLibraryChanged, setUserLibraryChanged] = useState(false);
const [isUpdating, setIsUpdating] = useState(false);
// Track original values (set on mount and after Update)
@@ -152,12 +160,13 @@ export default function BuildConfigPage() {
originalDemoData !== null &&
pendingDemoData !== originalDemoData;
return llmChanged || demoDataChanged;
return llmChanged || demoDataChanged || userLibraryChanged;
}, [
pendingLlmSelection,
pendingDemoData,
originalLlmSelection,
originalDemoData,
userLibraryChanged,
]);
// Compute display name for the pending LLM selection
@@ -208,9 +217,12 @@ export default function BuildConfigPage() {
}, [pendingDemoDataEnabled]);
// Restore changes - revert pending state to original values
// Note: User Library changes cannot be reverted (files already uploaded/deleted/toggled)
// so we just reset the flag - user needs to manually undo file changes if desired
const handleRestoreChanges = useCallback(() => {
setPendingLlmSelection(originalLlmSelection);
setPendingDemoData(originalDemoData);
setUserLibraryChanged(false);
}, [originalLlmSelection, originalDemoData]);
// Update - apply pending changes and re-provision sandbox
@@ -236,6 +248,9 @@ export default function BuildConfigPage() {
// 3. Start provisioning a new session with updated settings
ensurePreProvisionedSession();
// 4. Reset User Library change flag (sandbox now has the updated files)
setUserLibraryChanged(false);
} catch (error) {
console.error("Failed to update settings:", error);
} finally {
@@ -518,7 +533,10 @@ export default function BuildConfigPage() {
<div className="flex items-center gap-2">
<SimpleTooltip tooltip="The demo dataset contains 1000 files across various connectors">
<span className="inline-flex items-center cursor-help">
<FiInfo size={16} className="text-text-03" />
<SvgInfoSmall
size={16}
className="text-text-03"
/>
</span>
</SimpleTooltip>
<Text mainUiAction>Use Demo Dataset</Text>
@@ -541,24 +559,32 @@ export default function BuildConfigPage() {
</div>
</div>
<div className="w-full grid grid-cols-1 md:grid-cols-2 gap-2 pt-2">
{connectorStates.map(({ type, config }) => (
<ConnectorCard
key={type}
connectorType={type}
config={config}
onConfigure={() => {
// Only open modal for unconfigured connectors
if (!config) {
if (isBasicUser) {
setShowNotAllowedModal(true);
} else {
setSelectedConnector({ type, config });
{connectorStates.map(({ type, config }) => {
const metadata = getSourceMetadata(type);
return (
<ConnectorCard
key={type}
connectorType={type}
config={config}
onConfigure={() => {
// Connectors marked as alwaysConnected open their custom modal
if (metadata.alwaysConnected) {
setShowUserLibraryModal(true);
return;
}
}
}}
onDelete={() => config && setConnectorToDelete(config)}
/>
))}
// Only open modal for unconfigured connectors
if (!config) {
if (isBasicUser) {
setShowNotAllowedModal(true);
} else {
setSelectedConnector({ type, config });
}
}
}}
onDelete={() => config && setConnectorToDelete(config)}
/>
);
})}
</div>
<ComingSoonConnectors />
</Section>
@@ -567,7 +593,11 @@ export default function BuildConfigPage() {
{/* Sticky overlay for reprovision warning */}
<div className="sticky z-toast bottom-10 w-fit mx-auto">
<ReprovisionWarningOverlay visible={hasChanges && !isLoading} />
<ReprovisionWarningOverlay
visible={hasChanges && !isLoading}
onUpdate={handleUpdate}
isUpdating={isUpdating || isPreProvisioning}
/>
</div>
{/* Fixed overlay for connector info - centered on screen like the modal */}
@@ -615,6 +645,12 @@ export default function BuildConfigPage() {
pendingDemoDataEnabled={pendingDemoDataEnabled}
onConfirm={handleDemoDataConfirm}
/>
<UserLibraryModal
open={showUserLibraryModal}
onClose={() => setShowUserLibraryModal(false)}
onChanges={() => setUserLibraryChanged(true)}
/>
</SettingsLayouts.Root>
</div>
);

View File

@@ -459,6 +459,7 @@ export const credentialTemplates: Record<ValidSources, any> = {
google_sites: null,
file: null,
user_file: null,
craft_file: null, // User Library - managed through dedicated UI
wikipedia: null,
mediawiki: null,
web: null,

View File

@@ -142,6 +142,11 @@ export interface SourceMetadata {
uniqueKey?: string;
// For federated connectors, this stores the base source type for the icon
baseSourceType?: ValidSources;
// For connectors that are always available (don't need connection setup)
// e.g., User Library (CraftFile) where users just upload files
alwaysConnected?: boolean;
// Custom description to show instead of status (e.g., "Manage your uploaded files")
customDescription?: string;
}
export interface SearchDefaultOverrides {

View File

@@ -67,6 +67,11 @@ interface PartialSourceMetadata {
// federated connectors store the base source type if it's a source
// that has both indexed connectors and federated connectors
baseSourceType?: ValidSources;
// For connectors that are always available (don't need connection setup)
// e.g., User Library (CraftFile) where users just upload files
alwaysConnected?: boolean;
// Custom description to show instead of status (e.g., "Manage your uploaded files")
customDescription?: string;
}
type SourceMap = {
@@ -422,6 +427,16 @@ export const SOURCE_METADATA_MAP: SourceMap = {
category: SourceCategory.Other,
},
// Craft-specific sources
craft_file: {
icon: SvgFileText,
displayName: "Your Files",
category: SourceCategory.Other,
isPopular: false, // Hidden from standard Add Connector page
alwaysConnected: true, // No setup required, just upload files
customDescription: "Manage your uploaded files",
},
// Placeholder (non-null default)
not_applicable: {
icon: SvgGlobe,

View File

@@ -514,6 +514,9 @@ export enum ValidSources {
Bitbucket = "bitbucket",
TestRail = "testrail",
// Craft-specific sources
CraftFile = "craft_file",
// Federated Connectors
FederatedSlack = "federated_slack",
}
@@ -548,6 +551,7 @@ export type ConfigurableSources = Exclude<
| ValidSources.IngestionApi
| ValidSources.FederatedSlack // is part of ValiedSources.Slack
| ValidSources.UserFile
| ValidSources.CraftFile // User Library - managed through dedicated UI
>;
export const oauthSupportedSources: ConfigurableSources[] = [

View File

@@ -331,7 +331,7 @@ function MessageInner(
{/* Actions */}
{actions && (
<div className="flex items-start justify-end shrink-0">
<div className="flex items-center justify-end shrink-0 self-center pr-2">
<Button
secondary
onClick={onAction}