Compare commits

...

46 Commits

Author SHA1 Message Date
roshan
7063fa354b thorough packet audit (#7643) 2026-01-21 14:10:08 -08:00
Wenxi
d3f19294db fix: race condition? (#7645) 2026-01-21 14:08:53 -08:00
Wenxi
3df1cb0759 feat: pre-provision sandboxes (#7633) 2026-01-21 13:39:50 -08:00
Chris Weaver
06c561b3fd . (#7641) 2026-01-21 13:22:23 -08:00
Wenxi
7358bb4bc2 mypy (#7635) 2026-01-21 11:58:45 -08:00
roshan
a16e65d5cb video background (#7631) 2026-01-21 10:42:21 -08:00
Wenxi
0cd4ad921e feat: file uplod (#7630) 2026-01-21 10:30:37 -08:00
roshan
28194a58b5 very very crude fe implementation (#7617) 2026-01-20 19:58:20 -08:00
Chris Weaver
ed6e134b28 Sandbox cleanup (#7615) 2026-01-20 19:11:38 -08:00
roshan
2f2d65a950 chore: be fixes + improvements (#7613)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-01-20 19:01:08 -08:00
Chris Weaver
2d436fb207 Better connector permissioning (#7612) 2026-01-20 18:50:49 -08:00
Chris Weaver
c0d9c17312 Fix web template (#7610) 2026-01-20 18:12:57 -08:00
Chris Weaver
5462460ae7 Update connectors to include sandbox only mode (#7599) 2026-01-20 17:28:31 -08:00
roshan
fa44774ef3 chore: big back be cleanup (#7602) 2026-01-20 17:04:58 -08:00
Wenxi
e2bd32e405 feat: build admin panel (#7596) 2026-01-20 15:21:21 -08:00
rohoswagger
74431ea0a8 port over send message 2026-01-20 14:13:42 -08:00
rohoswagger
9ed9c95007 rate limit send message endpoint 2026-01-20 14:13:02 -08:00
roshan
b06cf041d7 feat: rate limiting for onyx build (#7556) 2026-01-20 14:11:05 -08:00
Chris Weaver
fe2cc230b5 improve template set up (#7591) 2026-01-20 14:06:26 -08:00
Weves
544bcf8e7f Merge branch 'main' into claude-code-for-all 2026-01-20 14:00:05 -08:00
Chris Weaver
b818709b7d Fix ports (#7580) 2026-01-20 11:26:40 -08:00
Wenxi
3ccf71c5ee whuang/cc4a fe cont (#7554) 2026-01-19 20:10:01 -08:00
roshan
e8cac79b96 messages apis + migrations (#7553) 2026-01-19 20:08:21 -08:00
Wenxi
81967d3507 mock router (#7552) 2026-01-19 19:16:56 -08:00
Wenxi
6c5197cb74 mock backend (#7551) 2026-01-19 19:14:07 -08:00
Wenxi
6d3e4809e3 feat: fe draft (#7549) 2026-01-19 19:10:03 -08:00
Chris Weaver
f61a275aa5 feat: basic sandbox implementation (#7523)
Co-authored-by: Wenxi <wenxi@onyx.app>
2026-01-19 18:42:20 -08:00
roshan
3715d1ed75 feat(cc4a): session + sandbox endpoints (#7545)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-01-19 17:16:41 -08:00
roshan
6e3fa6fbac basic build models + migrations (#7542)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-01-19 16:11:19 -08:00
Chris Weaver
537bdf75a9 Improve plan (#7522) 2026-01-19 15:34:37 -08:00
Wenxi
f706a411f1 tiny reorg of fe files (#7516) 2026-01-19 15:34:37 -08:00
Wenxi
e54c7c377a feat: dramatic intro animation + framer dep (#7512) 2026-01-19 15:34:37 -08:00
Weves
9371cbecb4 High level overview plan 2026-01-19 15:34:37 -08:00
Wenxi
1ae7e4feee feat: init configs file and move simple_cli_client configs (#7507) 2026-01-19 15:34:37 -08:00
Weves
2649876f64 . 2026-01-19 15:34:37 -08:00
Weves
4628b711be . 2026-01-19 15:34:37 -08:00
Weves
1dd8e07310 Add file viewer 2026-01-19 15:34:37 -08:00
Weves
295de1268e Add slide autogeneration 2026-01-19 15:34:37 -08:00
Weves
f8ef7d5ea5 more 2026-01-19 15:34:37 -08:00
Weves
343cd5cb1b improvements 2026-01-19 15:34:37 -08:00
Weves
08228e1a10 improve 2026-01-19 15:34:37 -08:00
Weves
95df31aa01 Basic implementation 2026-01-19 15:34:37 -08:00
Weves
ba45f9bc77 . 2026-01-19 15:34:37 -08:00
Weves
be088a0964 . 2026-01-19 15:34:37 -08:00
Weves
67bb013063 add more connectors + initial /build endpoints 2026-01-19 15:34:37 -08:00
Weves
9140c4c449 Open Cowork 2026-01-19 15:34:37 -08:00
140 changed files with 23633 additions and 84 deletions

17
.vscode/launch.json vendored
View File

@@ -577,6 +577,23 @@
"group": "3"
}
},
{
"name": "Build Sandbox Templates",
"type": "debugpy",
"request": "launch",
"module": "onyx.server.features.build.sandbox.build_templates",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"console": "integratedTerminal",
"presentation": {
"group": "3"
},
"consoleTitle": "Build Sandbox Templates"
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",

View File

@@ -0,0 +1,149 @@
FROM python:3.11.7-slim-bookworm
LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is the web/frontend container of Onyx which \
contains code for both the Community and Enterprise editions of Onyx. If you do not \
have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true" \
PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"
# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
mkdir -p /var/log/onyx && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
# Install system dependencies
# cmake needed for psycopg (postgres)
# libpq-dev needed for psycopg (postgres)
# curl included just for users' convenience
# zip for Vespa step futher down
# ca-certificates for HTTPS
# nodejs and npm needed for building Next.js template
RUN apt-get update && \
apt-get install -y \
cmake \
curl \
zip \
ca-certificates \
libgnutls30 \
libblkid1 \
libmount1 \
libsmartcols1 \
libuuid1 \
libxmlsec1-dev \
pkg-config \
gcc \
nano \
vim \
nodejs \
npm && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade \
-r /tmp/requirements.txt \
-r /tmp/ee-requirements.txt && \
pip uninstall -y py && \
playwright install chromium && \
playwright install-deps chromium && \
chown -R onyx:onyx /app && \
ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
# xserver-common and xvfb included by playwright installation but not needed after
# perl-base is part of the base Python Debian image but not needed for Onyx functionality
# perl-base could only be removed with --allow-remove-essential
apt-get update && \
apt-get remove -y --allow-remove-essential \
perl-base \
xserver-common \
xvfb \
cmake \
libldap-2.5-0 \
libxmlsec1-dev \
pkg-config \
gcc && \
# Install here to avoid some packages being cleaned up above
apt-get install -y \
libxmlsec1-openssl \
# Install postgresql-client for easy manual tests
postgresql-client && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
rm -rf ~/.cache/uv /tmp/*.txt && \
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# Build sandbox templates
# This must happen after Python dependencies are installed but before cleanup of build tools
# Templates are built once per image and copied for each sandbox (much faster than generating)
# Note: We copy just the files we need here since the full onyx directory isn't copied yet
COPY --chown=onyx:onyx ./onyx/server/features/build/initial-requirements.txt /tmp/sandbox-requirements.txt
COPY --chown=onyx:onyx ./onyx/server/features/build/sandbox/build_templates.py /tmp/build_templates.py
RUN mkdir -p /templates && \
python /tmp/build_templates.py --requirements /tmp/sandbox-requirements.txt && \
chown -R onyx:onyx /templates && \
rm -f /tmp/build_templates.py /tmp/sandbox-requirements.txt
# Pre-downloading models for setups with limited egress
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
RUN python -c "import tiktoken; \
tiktoken.get_encoding('cl100k_base')"
# Set up application files
WORKDIR /app
# Enterprise Version Files
COPY --chown=onyx:onyx ./ee /app/ee
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# Set up application files
COPY --chown=onyx:onyx ./onyx /app/onyx
COPY --chown=onyx:onyx ./shared_configs /app/shared_configs
COPY --chown=onyx:onyx ./alembic /app/alembic
COPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants
COPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini
COPY supervisord.conf /usr/etc/supervisord.conf
COPY --chown=onyx:onyx ./static /app/static
# Escape hatch scripts
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh
# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets
ENV PYTHONPATH=/app
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]

View File

@@ -0,0 +1,33 @@
"""add_processing_mode_to_connector_credential_pair
Revision ID: 0ab5805121ef
Revises: 7cd906f37fc6
Create Date: 2026-01-20 15:49:44.136116
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "0ab5805121ef"
down_revision = "7cd906f37fc6"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"connector_credential_pair",
sa.Column(
"processing_mode",
sa.String(),
nullable=False,
server_default="REGULAR",
),
)
def downgrade() -> None:
op.drop_column("connector_credential_pair", "processing_mode")

View File

@@ -0,0 +1,83 @@
"""create_build_message_table
Revision ID: 26b589bf8be7
Revises: df6cbd9a37cc
Create Date: 2026-01-19 17:51:08.289325
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "26b589bf8be7"
down_revision = "df6cbd9a37cc"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Reuse existing messagetype enum from chat_message table
# Build messages only use: USER, ASSISTANT, SYSTEM
# Note: The existing enum has uppercase values but MessageType in code uses lowercase
# This works because SQLAlchemy handles the conversion when native_enum=False
# Create build_message table
op.create_table(
"build_message",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"type",
sa.Enum(
"SYSTEM",
"USER",
"ASSISTANT",
"DANSWER",
name="messagetype",
create_type=False,
native_enum=False,
),
nullable=False,
),
sa.Column(
"content",
sa.Text(),
nullable=False,
),
sa.Column(
"message_metadata",
postgresql.JSONB(),
nullable=True,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# Create index for build_message
op.create_index(
"ix_build_message_session_created",
"build_message",
["session_id", sa.text("created_at DESC")],
unique=False,
)
def downgrade() -> None:
# Drop index
op.drop_index("ix_build_message_session_created", table_name="build_message")
# Drop table
op.drop_table("build_message")

View File

@@ -0,0 +1,84 @@
"""create_sandbox_table
Revision ID: 484b9fa1ac89
Revises: 96086064c5db
Create Date: 2026-01-19 14:47:52.829749
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "484b9fa1ac89"
down_revision = "96086064c5db"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create sandbox status enum
sandbox_status_enum = sa.Enum(
"provisioning",
"running",
"idle",
"terminated",
name="sandboxstatus",
native_enum=False,
)
# Create sandbox table
op.create_table(
"sandbox",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
unique=True,
),
sa.Column("container_id", sa.String(), nullable=True),
sa.Column(
"status",
sandbox_status_enum,
nullable=False,
server_default="provisioning",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
# Create indexes for sandbox
op.create_index(
"ix_sandbox_status",
"sandbox",
["status"],
unique=False,
)
op.create_index(
"ix_sandbox_container_id",
"sandbox",
["container_id"],
unique=False,
)
def downgrade() -> None:
# Drop indexes
op.drop_index("ix_sandbox_container_id", table_name="sandbox")
op.drop_index("ix_sandbox_status", table_name="sandbox")
# Drop table
op.drop_table("sandbox")
# Drop enum
sa.Enum(name="sandboxstatus").drop(op.get_bind(), checkfirst=True)

View File

@@ -0,0 +1,27 @@
"""add nextjs_port to sandbox
Revision ID: 7cd906f37fc6
Revises: 26b589bf8be7
Create Date: 2026-01-20
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "7cd906f37fc6"
down_revision: Union[str, None] = "26b589bf8be7"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("sandbox", sa.Column("nextjs_port", sa.Integer(), nullable=True))
def downgrade() -> None:
op.drop_column("sandbox", "nextjs_port")

View File

@@ -0,0 +1,86 @@
"""create_build_session_table
Revision ID: 96086064c5db
Revises: 8b5ce697290e
Create Date: 2026-01-19 14:47:38.156803
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "96086064c5db"
down_revision = "8b5ce697290e"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create build_session status enum
build_session_status_enum = sa.Enum(
"active",
"idle",
name="buildsessionstatus",
native_enum=False,
)
# Create build_session table
op.create_table(
"build_session",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=True,
),
sa.Column("name", sa.String(), nullable=True),
sa.Column(
"status",
build_session_status_enum,
nullable=False,
server_default="active",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"last_activity_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# Create indexes for build_session
op.create_index(
"ix_build_session_user_created",
"build_session",
["user_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_build_session_status",
"build_session",
["status"],
unique=False,
)
def downgrade() -> None:
# Drop indexes
op.drop_index("ix_build_session_status", table_name="build_session")
op.drop_index("ix_build_session_user_created", table_name="build_session")
# Drop table
op.drop_table("build_session")
# Drop enum
sa.Enum(name="buildsessionstatus").drop(op.get_bind(), checkfirst=True)

View File

@@ -0,0 +1,86 @@
"""create_artifact_table
Revision ID: a441232d9c5a
Revises: 484b9fa1ac89
Create Date: 2026-01-19 14:47:57.226496
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "a441232d9c5a"
down_revision = "484b9fa1ac89"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create artifact type enum
artifact_type_enum = sa.Enum(
"web_app",
"pptx",
"docx",
"markdown",
"excel",
"image",
name="artifacttype",
native_enum=False,
)
# Create artifact table
op.create_table(
"artifact",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("type", artifact_type_enum, nullable=False),
sa.Column("path", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# Create indexes for artifact
op.create_index(
"ix_artifact_session_created",
"artifact",
["session_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_artifact_type",
"artifact",
["type"],
unique=False,
)
def downgrade() -> None:
# Drop indexes
op.drop_index("ix_artifact_type", table_name="artifact")
op.drop_index("ix_artifact_session_created", table_name="artifact")
# Drop table
op.drop_table("artifact")
# Drop enum
sa.Enum(name="artifacttype").drop(op.get_bind(), checkfirst=True)

View File

@@ -0,0 +1,57 @@
"""create_snapshot_table
Revision ID: df6cbd9a37cc
Revises: a441232d9c5a
Create Date: 2026-01-19 14:48:00.757530
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "df6cbd9a37cc"
down_revision = "a441232d9c5a"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create snapshot table (no enum needed)
op.create_table(
"snapshot",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("storage_path", sa.String(), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# Create index for snapshot
op.create_index(
"ix_snapshot_session_created",
"snapshot",
["session_id", sa.text("created_at DESC")],
unique=False,
)
def downgrade() -> None:
# Drop index
op.drop_index("ix_snapshot_session_created", table_name="snapshot")
# Drop table
op.drop_table("snapshot")

View File

@@ -134,5 +134,7 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.docprocessing",
# Docfetching worker tasks
"onyx.background.celery.tasks.docfetching",
# Sandbox cleanup tasks (isolated in build feature)
"onyx.server.features.build.sandbox.tasks",
]
)

View File

@@ -116,5 +116,7 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.connector_deletion",
"onyx.background.celery.tasks.doc_permission_syncing",
"onyx.background.celery.tasks.docprocessing",
# Sandbox cleanup tasks (isolated in build feature)
"onyx.server.features.build.sandbox.tasks",
]
)

View File

@@ -139,6 +139,27 @@ beat_task_templates: list[dict] = [
"queue": OnyxCeleryQueues.MONITORING,
},
},
# Sandbox cleanup tasks
{
"name": "cleanup-idle-sandboxes",
"task": OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,
"schedule": timedelta(minutes=1),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
"queue": OnyxCeleryQueues.SANDBOX,
},
},
{
"name": "cleanup-old-snapshots",
"task": OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,
"schedule": timedelta(hours=24),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
"queue": OnyxCeleryQueues.SANDBOX,
},
},
]
if ENTERPRISE_EDITION_ENABLED:

View File

@@ -31,17 +31,20 @@ from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.connectors.models import TextSection
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_last_successful_attempt_poll_range_end
from onyx.db.connector_credential_pair import update_connector_credential_pair
from onyx.db.constants import CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX
from onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.enums import ProcessingMode
from onyx.db.index_attempt import create_index_attempt_error
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair
@@ -53,7 +56,10 @@ from onyx.db.models import IndexAttempt
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.indexing.persistent_document_writer import get_persistent_document_writer
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR
@@ -367,6 +373,7 @@ def connector_document_extraction(
db_connector = index_attempt.connector_credential_pair.connector
db_credential = index_attempt.connector_credential_pair.credential
processing_mode = index_attempt.connector_credential_pair.processing_mode
is_primary = index_attempt.search_settings.status == IndexModelStatus.PRESENT
from_beginning = index_attempt.from_beginning
@@ -600,34 +607,100 @@ def connector_document_extraction(
logger.debug(f"Indexing batch of documents: {batch_description}")
memory_tracer.increment_and_maybe_trace()
# Store documents in storage
batch_storage.store_batch(batch_num, doc_batch_cleaned)
# cc4a
if processing_mode == ProcessingMode.FILE_SYSTEM:
# File system only - write directly to persistent storage,
# skip chunking/embedding/Vespa but still track documents in DB
# Create processing task data
processing_batch_data = {
"index_attempt_id": index_attempt_id,
"cc_pair_id": cc_pair_id,
"tenant_id": tenant_id,
"batch_num": batch_num, # 0-indexed
}
with get_session_with_current_tenant() as db_session:
# Create metadata for the batch
index_attempt_metadata = IndexAttemptMetadata(
attempt_id=index_attempt_id,
connector_id=db_connector.id,
credential_id=db_credential.id,
request_id=make_randomized_onyx_request_id("FSI"),
structured_id=f"{tenant_id}:{cc_pair_id}:{index_attempt_id}:{batch_num}",
batch_num=batch_num,
)
# Queue document processing task
app.send_task(
OnyxCeleryTask.DOCPROCESSING_TASK,
kwargs=processing_batch_data,
queue=OnyxCeleryQueues.DOCPROCESSING,
priority=docprocessing_priority,
)
# Upsert documents to PostgreSQL (document table + cc_pair relationship)
# This is a subset of what docprocessing does - just DB tracking, no chunking/embedding
index_doc_batch_prepare(
documents=doc_batch_cleaned,
index_attempt_metadata=index_attempt_metadata,
db_session=db_session,
ignore_time_skip=True, # Documents already filtered during extraction
)
batch_num += 1
total_doc_batches_queued += 1
# Mark documents as indexed for the CC pair
mark_document_as_indexed_for_cc_pair__no_commit(
connector_id=db_connector.id,
credential_id=db_credential.id,
document_ids=[doc.id for doc in doc_batch_cleaned],
db_session=db_session,
)
db_session.commit()
logger.info(
f"Queued document processing batch: "
f"batch_num={batch_num} "
f"docs={len(doc_batch_cleaned)} "
f"attempt={index_attempt_id}"
)
# Write documents to persistent file system
# Use creator_id for user-segregated storage paths (sandbox isolation)
creator_id = index_attempt.connector_credential_pair.creator_id
if creator_id is None:
raise ValueError(
f"ConnectorCredentialPair {index_attempt.connector_credential_pair.id} "
"must have a creator_id for persistent document storage"
)
user_id_str: str = str(creator_id)
writer = get_persistent_document_writer(user_id=user_id_str)
written_paths = writer.write_documents(doc_batch_cleaned)
# Update coordination directly (no docprocessing task)
with get_session_with_current_tenant() as db_session:
IndexingCoordination.update_batch_completion_and_docs(
db_session=db_session,
index_attempt_id=index_attempt_id,
total_docs_indexed=len(doc_batch_cleaned),
new_docs_indexed=len(doc_batch_cleaned),
total_chunks=0, # No chunks for file system mode
)
batch_num += 1
total_doc_batches_queued += 1
logger.info(
f"Wrote documents to file system: "
f"batch_num={batch_num} "
f"docs={len(written_paths)} "
f"attempt={index_attempt_id}"
)
else:
# REGULAR mode (default): Full pipeline - store and queue docprocessing
batch_storage.store_batch(batch_num, doc_batch_cleaned)
# Create processing task data
processing_batch_data = {
"index_attempt_id": index_attempt_id,
"cc_pair_id": cc_pair_id,
"tenant_id": tenant_id,
"batch_num": batch_num, # 0-indexed
}
# Queue document processing task
app.send_task(
OnyxCeleryTask.DOCPROCESSING_TASK,
kwargs=processing_batch_data,
queue=OnyxCeleryQueues.DOCPROCESSING,
priority=docprocessing_priority,
)
batch_num += 1
total_doc_batches_queued += 1
logger.info(
f"Queued document processing batch: "
f"batch_num={batch_num} "
f"docs={len(doc_batch_cleaned)} "
f"attempt={index_attempt_id}"
)
# Check checkpoint size periodically
CHECKPOINT_SIZE_CHECK_INTERVAL = 100

View File

@@ -1027,3 +1027,14 @@ INSTANCE_TYPE = (
## Discord Bot Configuration
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
# Persistent Document Storage Configuration
# When enabled, indexed documents are written to local filesystem with hierarchical structure
PERSISTENT_DOCUMENT_STORAGE_ENABLED = (
os.environ.get("PERSISTENT_DOCUMENT_STORAGE_ENABLED", "").lower() == "true"
)
# Base directory path for persistent document storage (local filesystem)
# Example: /var/onyx/indexed-docs or /app/indexed-docs
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
"PERSISTENT_DOCUMENT_STORAGE_PATH", "/app/indexed-docs"
)

View File

@@ -241,6 +241,7 @@ class NotificationType(str, Enum):
TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending" # 2 days left in trial
RELEASE_NOTES = "release_notes"
ASSISTANT_FILES_READY = "assistant_files_ready"
FEATURE_ANNOUNCEMENT = "feature_announcement"
class BlobType(str, Enum):
@@ -327,6 +328,7 @@ class FileOrigin(str, Enum):
PLAINTEXT_CACHE = "plaintext_cache"
OTHER = "other"
QUERY_HISTORY_CSV = "query_history_csv"
SANDBOX_SNAPSHOT = "sandbox_snapshot"
USER_FILE = "user_file"
@@ -383,6 +385,9 @@ class OnyxCeleryQueues:
# KG processing queue
KG_PROCESSING = "kg_processing"
# Sandbox processing queue
SANDBOX = "sandbox"
class OnyxRedisLocks:
PRIMARY_WORKER = "da_lock:primary_worker"
@@ -431,6 +436,10 @@ class OnyxRedisLocks:
# Release notes
RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"
# Sandbox cleanup
CLEANUP_IDLE_SANDBOXES_BEAT_LOCK = "da_lock:cleanup_idle_sandboxes_beat"
CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK = "da_lock:cleanup_old_snapshots_beat"
class OnyxRedisSignals:
BLOCK_VALIDATE_INDEXING_FENCES = "signal:block_validate_indexing_fences"
@@ -556,6 +565,10 @@ class OnyxCeleryTask:
CHECK_KG_PROCESSING_CLUSTERING_ONLY = "check_kg_processing_clustering_only"
KG_RESET_SOURCE_INDEX = "kg_reset_source_index"
# Sandbox cleanup
CLEANUP_IDLE_SANDBOXES = "cleanup_idle_sandboxes"
CLEANUP_OLD_SNAPSHOTS = "cleanup_old_snapshots"
# this needs to correspond to the matching entry in supervisord
ONYX_CELERY_BEAT_HEARTBEAT_KEY = "onyx:celery:beat:heartbeat"

View File

@@ -89,6 +89,9 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
meeting_date_unix = transcript["date"]
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
# Build hierarchy based on meeting date (year-month)
year_month = meeting_date.strftime("%Y-%m")
meeting_organizer_email = transcript["organizer_email"]
organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]
@@ -102,6 +105,14 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
sections=cast(list[TextSection | ImageSection], sections),
source=DocumentSource.FIREFLIES,
semantic_identifier=meeting_title,
doc_metadata={
"hierarchy": {
"source_path": [year_month],
"year_month": year_month,
"meeting_title": meeting_title,
"organizer_email": meeting_organizer_email,
}
},
metadata={
k: str(v)
for k, v in {

View File

@@ -240,8 +240,21 @@ def _get_userinfo(user: NamedUser) -> dict[str, str]:
def _convert_pr_to_document(
pull_request: PullRequest, repo_external_access: ExternalAccess | None
) -> Document:
repo_name = pull_request.base.repo.full_name if pull_request.base else ""
doc_metadata = DocMetadata(repo=repo_name)
repo_full_name = pull_request.base.repo.full_name if pull_request.base else ""
# Split full_name (e.g., "owner/repo") into owner and repo
parts = repo_full_name.split("/", 1)
owner_name = parts[0] if parts else ""
repo_name = parts[1] if len(parts) > 1 else repo_full_name
doc_metadata = {
"repo": repo_full_name,
"hierarchy": {
"source_path": [owner_name, repo_name, "pull_requests"],
"owner": owner_name,
"repo": repo_name,
"object_type": "pull_request",
},
}
return Document(
id=pull_request.html_url,
sections=[
@@ -259,7 +272,7 @@ def _convert_pr_to_document(
else None
),
# this metadata is used in perm sync
doc_metadata=doc_metadata.model_dump(),
doc_metadata=doc_metadata,
metadata={
k: [str(vi) for vi in v] if isinstance(v, list) else str(v)
for k, v in {
@@ -316,8 +329,21 @@ def _fetch_issue_comments(issue: Issue) -> str:
def _convert_issue_to_document(
issue: Issue, repo_external_access: ExternalAccess | None
) -> Document:
repo_name = issue.repository.full_name if issue.repository else ""
doc_metadata = DocMetadata(repo=repo_name)
repo_full_name = issue.repository.full_name if issue.repository else ""
# Split full_name (e.g., "owner/repo") into owner and repo
parts = repo_full_name.split("/", 1)
owner_name = parts[0] if parts else ""
repo_name = parts[1] if len(parts) > 1 else repo_full_name
doc_metadata = {
"repo": repo_full_name,
"hierarchy": {
"source_path": [owner_name, repo_name, "issues"],
"owner": owner_name,
"repo": repo_name,
"object_type": "issue",
},
}
return Document(
id=issue.html_url,
sections=[TextSection(link=issue.html_url, text=issue.body or "")],
@@ -327,7 +353,7 @@ def _convert_issue_to_document(
# updated_at is UTC time but is timezone unaware
doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
# this metadata is used in perm sync
doc_metadata=doc_metadata.model_dump(),
doc_metadata=doc_metadata,
metadata={
k: [str(vi) for vi in v] if isinstance(v, list) else str(v)
for k, v in {

View File

@@ -46,6 +46,103 @@ from onyx.utils.variable_functionality import noop_fallback
logger = setup_logger()
# Cache for folder path lookups to avoid redundant API calls
# Maps folder_id -> (folder_name, parent_id)
_folder_cache: dict[str, tuple[str, str | None]] = {}
def _get_folder_info(
service: GoogleDriveService, folder_id: str
) -> tuple[str, str | None]:
"""Fetch folder name and parent ID, with caching."""
if folder_id in _folder_cache:
return _folder_cache[folder_id]
try:
folder = (
service.files()
.get(
fileId=folder_id,
fields="name, parents",
supportsAllDrives=True,
)
.execute()
)
folder_name = folder.get("name", "Unknown")
parents = folder.get("parents", [])
parent_id = parents[0] if parents else None
_folder_cache[folder_id] = (folder_name, parent_id)
return folder_name, parent_id
except HttpError as e:
logger.warning(f"Failed to get folder info for {folder_id}: {e}")
_folder_cache[folder_id] = ("Unknown", None)
return "Unknown", None
def _get_drive_name(service: GoogleDriveService, drive_id: str) -> str:
"""Fetch shared drive name."""
cache_key = f"drive_{drive_id}"
if cache_key in _folder_cache:
return _folder_cache[cache_key][0]
try:
drive = service.drives().get(driveId=drive_id).execute()
drive_name = drive.get("name", f"Shared Drive {drive_id}")
_folder_cache[cache_key] = (drive_name, None)
return drive_name
except HttpError as e:
logger.warning(f"Failed to get drive name for {drive_id}: {e}")
_folder_cache[cache_key] = (f"Shared Drive {drive_id}", None)
return f"Shared Drive {drive_id}"
def build_folder_path(
file: GoogleDriveFileType,
service: GoogleDriveService,
drive_id: str | None = None,
) -> list[str]:
"""
Build the full folder path for a file by walking up the parent chain.
Returns a list of folder names from root to immediate parent.
"""
path_parts: list[str] = []
# Get the file's parent folder ID
parents = file.get("parents", [])
if not parents:
# File is at root level
if drive_id:
return [_get_drive_name(service, drive_id)]
return ["My Drive"]
parent_id: str | None = parents[0]
# Walk up the folder hierarchy (limit to 50 levels to prevent infinite loops)
visited: set[str] = set()
for _ in range(50):
if not parent_id or parent_id in visited:
break
visited.add(parent_id)
folder_name, next_parent = _get_folder_info(service, parent_id)
# Check if we've reached the root (parent is the drive itself or no parent)
if next_parent is None:
# This folder's name is either the drive root or My Drive
if drive_id:
path_parts.insert(0, _get_drive_name(service, drive_id))
else:
# For My Drive, the root folder name is usually the user's name
# We'll use "My Drive" as a consistent label
path_parts.insert(0, "My Drive")
break
else:
path_parts.insert(0, folder_name)
parent_id = next_parent
return path_parts if path_parts else ["My Drive"]
# This is not a standard valid unicode char, it is used by the docs advanced API to
# represent smart chips (elements like dates and doc links).
SMART_CHIP_CHAR = "\ue907"
@@ -526,12 +623,30 @@ def _convert_drive_item_to_document(
else None
)
# Build doc_metadata with hierarchy information
file_name = file.get("name", "")
mime_type = file.get("mimeType", "")
drive_id = file.get("driveId")
# Build full folder path by walking up the parent chain
source_path = build_folder_path(file, _get_drive_service(), drive_id)
doc_metadata = {
"hierarchy": {
"source_path": source_path,
"drive_id": drive_id,
"file_name": file_name,
"mime_type": mime_type,
}
}
# Create the document
return Document(
id=doc_id,
sections=sections,
source=DocumentSource.GOOGLE_DRIVE,
semantic_identifier=file.get("name", ""),
semantic_identifier=file_name,
doc_metadata=doc_metadata,
metadata={
"owner_names": ", ".join(
owner.get("displayName", "") for owner in file.get("owners", [])

View File

@@ -490,6 +490,13 @@ class HubSpotConnector(LoadConnector, PollConnector):
semantic_identifier=title,
doc_updated_at=ticket.updated_at.replace(tzinfo=timezone.utc),
metadata=metadata,
doc_metadata={
"hierarchy": {
"source_path": ["Tickets"],
"object_type": "ticket",
"object_id": ticket.id,
}
},
)
)
@@ -615,6 +622,13 @@ class HubSpotConnector(LoadConnector, PollConnector):
semantic_identifier=title,
doc_updated_at=company.updated_at.replace(tzinfo=timezone.utc),
metadata=metadata,
doc_metadata={
"hierarchy": {
"source_path": ["Companies"],
"object_type": "company",
"object_id": company.id,
}
},
)
)
@@ -738,6 +752,13 @@ class HubSpotConnector(LoadConnector, PollConnector):
semantic_identifier=title,
doc_updated_at=deal.updated_at.replace(tzinfo=timezone.utc),
metadata=metadata,
doc_metadata={
"hierarchy": {
"source_path": ["Deals"],
"object_type": "deal",
"object_id": deal.id,
}
},
)
)
@@ -881,6 +902,13 @@ class HubSpotConnector(LoadConnector, PollConnector):
semantic_identifier=title,
doc_updated_at=contact.updated_at.replace(tzinfo=timezone.utc),
metadata=metadata,
doc_metadata={
"hierarchy": {
"source_path": ["Contacts"],
"object_type": "contact",
"object_id": contact.id,
}
},
)
)

View File

@@ -274,6 +274,10 @@ class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
# Cast the sections list to the expected type
typed_sections = cast(list[TextSection | ImageSection], sections)
# Extract team name for hierarchy
team_name = (node.get("team") or {}).get("name") or "Unknown Team"
identifier = node.get("identifier", node["id"])
documents.append(
Document(
id=node["id"],
@@ -282,6 +286,13 @@ class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
semantic_identifier=f"[{node['identifier']}] {node['title']}",
title=node["title"],
doc_updated_at=time_str_to_utc(node["updatedAt"]),
doc_metadata={
"hierarchy": {
"source_path": [team_name],
"team_name": team_name,
"identifier": identifier,
}
},
metadata={
k: str(v)
for k, v in {

View File

@@ -234,6 +234,8 @@ def thread_to_doc(
"\n", " "
)
channel_name = channel["name"]
return Document(
id=_build_doc_id(channel_id=channel_id, thread_ts=thread[0]["ts"]),
sections=[
@@ -247,7 +249,14 @@ def thread_to_doc(
semantic_identifier=doc_sem_id,
doc_updated_at=get_latest_message_time(thread),
primary_owners=valid_experts,
metadata={"Channel": channel["name"]},
doc_metadata={
"hierarchy": {
"source_path": [channel_name],
"channel_name": channel_name,
"channel_id": channel_id,
}
},
metadata={"Channel": channel_name},
external_access=channel_access,
)

View File

@@ -22,6 +22,7 @@ from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import ProcessingMode
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
@@ -116,7 +117,14 @@ def get_connector_credential_pairs_for_user(
eager_load_user: bool = False,
order_by_desc: bool = False,
source: DocumentSource | None = None,
processing_mode: ProcessingMode | None = ProcessingMode.REGULAR,
) -> list[ConnectorCredentialPair]:
"""Get connector credential pairs for a user.
Args:
processing_mode: Filter by processing mode. Defaults to REGULAR to hide
FILE_SYSTEM connectors from standard admin UI. Pass None to get all.
"""
if eager_load_user:
assert (
eager_load_credential
@@ -142,6 +150,9 @@ def get_connector_credential_pairs_for_user(
if ids:
stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))
if processing_mode is not None:
stmt = stmt.where(ConnectorCredentialPair.processing_mode == processing_mode)
if order_by_desc:
stmt = stmt.order_by(desc(ConnectorCredentialPair.id))
@@ -160,6 +171,7 @@ def get_connector_credential_pairs_for_user_parallel(
eager_load_user: bool = False,
order_by_desc: bool = False,
source: DocumentSource | None = None,
processing_mode: ProcessingMode | None = ProcessingMode.REGULAR,
) -> list[ConnectorCredentialPair]:
with get_session_with_current_tenant() as db_session:
return get_connector_credential_pairs_for_user(
@@ -172,6 +184,7 @@ def get_connector_credential_pairs_for_user_parallel(
eager_load_user=eager_load_user,
order_by_desc=order_by_desc,
source=source,
processing_mode=processing_mode,
)
@@ -501,6 +514,7 @@ def add_credential_to_connector(
initial_status: ConnectorCredentialPairStatus = ConnectorCredentialPairStatus.SCHEDULED,
last_successful_index_time: datetime | None = None,
seeding_flow: bool = False,
processing_mode: ProcessingMode = ProcessingMode.REGULAR,
) -> StatusResponse:
connector = fetch_connector_by_id(connector_id, db_session)
@@ -566,6 +580,7 @@ def add_credential_to_connector(
access_type=access_type,
auto_sync_options=auto_sync_options,
last_successful_index_time=last_successful_index_time,
processing_mode=processing_mode,
)
db_session.add(association)
db_session.flush() # make sure the association has an id

View File

@@ -56,6 +56,13 @@ class IndexingMode(str, PyEnum):
REINDEX = "reindex"
class ProcessingMode(str, PyEnum):
"""Determines how documents are processed after fetching."""
REGULAR = "regular" # Full pipeline: chunk → embed → Vespa
FILE_SYSTEM = "file_system" # Write to file system only
class SyncType(str, PyEnum):
DOCUMENT_SET = "document_set"
USER_GROUP = "user_group"
@@ -194,3 +201,34 @@ class SwitchoverType(str, PyEnum):
REINDEX = "reindex"
ACTIVE_ONLY = "active_only"
INSTANT = "instant"
# Onyx Build Mode Enums
class BuildSessionStatus(str, PyEnum):
ACTIVE = "active"
IDLE = "idle"
class SandboxStatus(str, PyEnum):
PROVISIONING = "provisioning"
RUNNING = "running"
IDLE = "idle"
TERMINATED = "terminated"
FAILED = "failed"
def is_active(self) -> bool:
"""Check if sandbox is in an active state (running or idle)."""
return self in (SandboxStatus.RUNNING, SandboxStatus.IDLE)
def is_terminal(self) -> bool:
"""Check if sandbox is in a terminal state."""
return self in (SandboxStatus.TERMINATED, SandboxStatus.FAILED)
class ArtifactType(str, PyEnum):
WEB_APP = "web_app"
PPTX = "pptx"
DOCX = "docx"
IMAGE = "image"
MARKDOWN = "markdown"
EXCEL = "excel"

View File

@@ -11,6 +11,7 @@ from typing_extensions import TypedDict # noreorder
from uuid import UUID
from pydantic import ValidationError
from sqlalchemy.dialects.postgresql import JSONB as PGJSONB
from sqlalchemy.dialects.postgresql import UUID as PGUUID
from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID
@@ -55,8 +56,12 @@ from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MessageType
from onyx.db.enums import (
AccessType,
ArtifactType,
BuildSessionStatus,
EmbeddingPrecision,
IndexingMode,
ProcessingMode,
SandboxStatus,
SyncType,
SyncStatus,
MCPAuthenticationType,
@@ -608,6 +613,16 @@ class ConnectorCredentialPair(Base):
Enum(IndexingMode, native_enum=False), nullable=True
)
# Determines how documents are processed after fetching:
# REGULAR: Full pipeline (chunk → embed → Vespa)
# FILE_SYSTEM: Write to file system only (for CLI agent sandbox)
processing_mode: Mapped[ProcessingMode] = mapped_column(
Enum(ProcessingMode, native_enum=False),
nullable=False,
default=ProcessingMode.REGULAR,
server_default="regular",
)
connector: Mapped["Connector"] = relationship(
"Connector", back_populates="credentials"
)
@@ -4129,3 +4144,204 @@ class TenantUsage(Base):
# Ensure only one row per window start (tenant_id is in the schema name)
UniqueConstraint("window_start", name="uq_tenant_usage_window"),
)
"""Tables related to Build Mode (CLI Agent Platform)"""
class BuildSession(Base):
"""Stores metadata about CLI agent build sessions."""
__tablename__ = "build_session"
id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True), primary_key=True, default=uuid4
)
user_id: Mapped[UUID | None] = mapped_column(
PGUUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=True
)
name: Mapped[str | None] = mapped_column(String, nullable=True)
status: Mapped[BuildSessionStatus] = mapped_column(
Enum(BuildSessionStatus, native_enum=False, name="buildsessionstatus"),
nullable=False,
default=BuildSessionStatus.ACTIVE,
)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
last_activity_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False,
)
# Relationships
user: Mapped[User | None] = relationship("User", foreign_keys=[user_id])
sandbox: Mapped["Sandbox | None"] = relationship(
"Sandbox", back_populates="session", uselist=False, cascade="all, delete-orphan"
)
artifacts: Mapped[list["Artifact"]] = relationship(
"Artifact", back_populates="session", cascade="all, delete-orphan"
)
snapshots: Mapped[list["Snapshot"]] = relationship(
"Snapshot", back_populates="session", cascade="all, delete-orphan"
)
messages: Mapped[list["BuildMessage"]] = relationship(
"BuildMessage", back_populates="session", cascade="all, delete-orphan"
)
__table_args__ = (
Index("ix_build_session_user_created", "user_id", desc("created_at")),
Index("ix_build_session_status", "status"),
)
class Sandbox(Base):
"""Stores sandbox container metadata for build sessions."""
__tablename__ = "sandbox"
id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True), primary_key=True, default=uuid4
)
session_id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True),
ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
container_id: Mapped[str | None] = mapped_column(String, nullable=True)
status: Mapped[SandboxStatus] = mapped_column(
Enum(SandboxStatus, native_enum=False, name="sandboxstatus"),
nullable=False,
default=SandboxStatus.PROVISIONING,
)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
last_heartbeat: Mapped[datetime.datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
nextjs_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
# Relationships
session: Mapped[BuildSession] = relationship(
"BuildSession", back_populates="sandbox"
)
__table_args__ = (
Index("ix_sandbox_status", "status"),
Index("ix_sandbox_container_id", "container_id"),
)
class Artifact(Base):
"""Stores metadata about artifacts generated by CLI agents."""
__tablename__ = "artifact"
id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True), primary_key=True, default=uuid4
)
session_id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True),
ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
)
type: Mapped[ArtifactType] = mapped_column(
Enum(ArtifactType, native_enum=False, name="artifacttype"), nullable=False
)
# path of artifact in sandbox relative to outputs/
path: Mapped[str] = mapped_column(String, nullable=False)
name: Mapped[str] = mapped_column(String, nullable=False)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False,
)
# Relationships
session: Mapped[BuildSession] = relationship(
"BuildSession", back_populates="artifacts"
)
__table_args__ = (
Index("ix_artifact_session_created", "session_id", desc("created_at")),
Index("ix_artifact_type", "type"),
)
class Snapshot(Base):
"""Stores metadata about sandbox volume snapshots."""
__tablename__ = "snapshot"
id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True), primary_key=True, default=uuid4
)
session_id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True),
ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
)
storage_path: Mapped[str] = mapped_column(String, nullable=False)
size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
# Relationships
session: Mapped[BuildSession] = relationship(
"BuildSession", back_populates="snapshots"
)
__table_args__ = (
Index("ix_snapshot_session_created", "session_id", desc("created_at")),
)
class BuildMessage(Base):
"""Stores messages exchanged in build sessions.
The metadata field stores structured ACP event data:
- tool_call_start: {type: "tool_call_start", tool_call_id, kind, title, raw_input, ...}
- tool_call_progress: {type: "tool_call_progress", tool_call_id, status, raw_output, ...}
- agent_thought_chunk: {type: "agent_thought_chunk", content: {...}}
- agent_plan_update: {type: "agent_plan_update", entries: [...]}
- agent_message_chunk: content is stored in content field, metadata is null
"""
__tablename__ = "build_message"
id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True), primary_key=True, default=uuid4
)
session_id: Mapped[UUID] = mapped_column(
PGUUID(as_uuid=True),
ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
)
type: Mapped[MessageType] = mapped_column(
Enum(MessageType, native_enum=False, name="messagetype"), nullable=False
)
content: Mapped[str] = mapped_column(Text, nullable=False)
message_metadata: Mapped[dict[str, Any] | None] = mapped_column(
PGJSONB, nullable=True
)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
# Relationships
session: Mapped[BuildSession] = relationship(
"BuildSession", back_populates="messages"
)
__table_args__ = (
Index("ix_build_message_session_created", "session_id", desc("created_at")),
)

View File

@@ -1,3 +1,4 @@
from onyx.configs.app_configs import DEV_MODE
from onyx.feature_flags.interface import FeatureFlagProvider
from onyx.feature_flags.interface import NoOpFeatureFlagProvider
from onyx.utils.variable_functionality import (
@@ -19,7 +20,7 @@ def get_default_feature_flag_provider() -> FeatureFlagProvider:
Returns:
FeatureFlagProvider: The configured feature flag provider instance
"""
if MULTI_TENANT:
if MULTI_TENANT or DEV_MODE:
return fetch_versioned_implementation_with_fallback(
module="onyx.feature_flags.factory",
attribute="get_posthog_feature_flag_provider",

View File

@@ -0,0 +1,168 @@
"""
Persistent Document Writer for writing indexed documents to local filesystem with
hierarchical directory structure that mirrors the source organization.
"""
import hashlib
import json
from pathlib import Path
from onyx.connectors.models import Document
from onyx.server.features.build.configs import PERSISTENT_DOCUMENT_STORAGE_PATH
from onyx.utils.logger import setup_logger
logger = setup_logger()
class PersistentDocumentWriter:
"""Writes indexed documents to local filesystem with hierarchical structure.
Documents are stored in user-segregated paths:
{base_path}/{user_id}/{source}/{hierarchy}/document.json
This enables per-user isolation for sandbox access control.
"""
def __init__(
self,
base_path: str,
user_id: str,
):
self.base_path = Path(base_path)
self.user_id = user_id
def write_documents(self, documents: list[Document]) -> list[str]:
"""Write documents to local filesystem, returns written file paths"""
written_paths = []
# Build a map of base filenames to detect duplicates
# Key: (directory_path, base_filename) -> list of docs with that name
filename_map: dict[tuple[Path, str], list[Document]] = {}
for doc in documents:
dir_path = self._build_directory_path(doc)
base_filename = self._get_base_filename(doc)
key = (dir_path, base_filename)
if key not in filename_map:
filename_map[key] = []
filename_map[key].append(doc)
# Now write documents, appending ID if there are duplicates
for (dir_path, base_filename), docs in filename_map.items():
has_duplicates = len(docs) > 1
for doc in docs:
try:
if has_duplicates:
# Append sanitized ID to disambiguate
id_suffix = self._sanitize_path_component(doc.id)
if len(id_suffix) > 50:
id_suffix = hashlib.sha256(doc.id.encode()).hexdigest()[:16]
filename = f"{base_filename}_{id_suffix}.json"
else:
filename = f"{base_filename}.json"
path = dir_path / filename
self._write_document(doc, path)
written_paths.append(str(path))
except Exception as e:
logger.warning(
f"Failed to write document {doc.id} to persistent storage: {e}"
)
return written_paths
def _build_directory_path(self, doc: Document) -> Path:
"""Build directory path from document metadata.
Documents are stored under user-segregated paths:
{base_path}/{user_id}/{source}/{hierarchy}/
This enables per-user isolation for sandbox access control.
"""
parts: list[str] = []
# Add user_id as the first path component for user segregation
parts.append(self.user_id)
parts.append(doc.source.value)
# Get hierarchy from doc_metadata
hierarchy = doc.doc_metadata.get("hierarchy", {}) if doc.doc_metadata else {}
source_path = hierarchy.get("source_path", [])
if source_path:
parts.extend([self._sanitize_path_component(p) for p in source_path])
return self.base_path / "/".join(parts)
def _get_base_filename(self, doc: Document) -> str:
"""Get base filename from semantic identifier, falling back to ID"""
# Prefer semantic_identifier, fall back to title, then ID
name = doc.semantic_identifier or doc.title or doc.id
return self._sanitize_filename(name)
def _sanitize_path_component(self, component: str) -> str:
"""Sanitize a path component for file system safety"""
# Replace spaces with underscores
sanitized = component.replace(" ", "_")
# Replace other problematic characters
sanitized = sanitized.replace("/", "_").replace("\\", "_").replace(":", "_")
sanitized = sanitized.replace("<", "_").replace(">", "_").replace("|", "_")
sanitized = sanitized.replace('"', "_").replace("?", "_").replace("*", "_")
# Also handle null bytes and other control characters
sanitized = "".join(c for c in sanitized if ord(c) >= 32)
return sanitized.strip() or "unnamed"
def _sanitize_filename(self, name: str) -> str:
"""Sanitize name for use as filename"""
sanitized = self._sanitize_path_component(name)
if len(sanitized) > 200:
# Keep first 150 chars + hash suffix for uniqueness
hash_suffix = hashlib.sha256(name.encode()).hexdigest()[:16]
return f"{sanitized[:150]}_{hash_suffix}"
return sanitized
def _write_document(self, doc: Document, path: Path) -> None:
"""Serialize and write document to filesystem"""
content = {
"id": doc.id,
"semantic_identifier": doc.semantic_identifier,
"title": doc.title,
"source": doc.source.value,
"doc_updated_at": (
doc.doc_updated_at.isoformat() if doc.doc_updated_at else None
),
"metadata": doc.metadata,
"doc_metadata": doc.doc_metadata,
"sections": [
{"text": s.text if hasattr(s, "text") else None, "link": s.link}
for s in doc.sections
],
"primary_owners": [o.model_dump() for o in (doc.primary_owners or [])],
"secondary_owners": [o.model_dump() for o in (doc.secondary_owners or [])],
}
# Create parent directories if they don't exist
path.parent.mkdir(parents=True, exist_ok=True)
# Write the JSON file
with open(path, "w", encoding="utf-8") as f:
json.dump(content, f, indent=2, default=str)
logger.debug(f"Wrote document to {path}")
def get_persistent_document_writer(
user_id: str,
) -> PersistentDocumentWriter:
"""Factory function to create a PersistentDocumentWriter with default configuration.
Args:
user_id: User ID for user-segregated storage paths.
Documents are stored under {base_path}/{user_id}/...
for sandbox access control isolation.
"""
return PersistentDocumentWriter(
base_path=PERSISTENT_DOCUMENT_STORAGE_PATH,
user_id=user_id,
)

View File

@@ -738,7 +738,7 @@ def model_is_reasoning_model(model_name: str, model_provider: str) -> bool:
# Fallback: try using litellm.supports_reasoning() for newer models
try:
logger.debug("Falling back to `litellm.supports_reasoning`")
# logger.debug("Falling back to `litellm.supports_reasoning`")
full_model_name = (
f"{model_provider}/{model_name}"
if model_provider not in model_name

View File

@@ -63,6 +63,9 @@ from onyx.server.documents.connector import router as connector_router
from onyx.server.documents.credential import router as credential_router
from onyx.server.documents.document import router as document_router
from onyx.server.documents.standard_oauth import router as standard_oauth_router
from onyx.server.features.build.api.api import nextjs_assets_router
from onyx.server.features.build.api.api import router as build_router
from onyx.server.features.build.v1_api import v1_router as build_v1_router
from onyx.server.features.default_assistant.api import (
router as default_assistant_router,
)
@@ -376,6 +379,9 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
include_router_with_global_prefix_prepended(application, admin_input_prompt_router)
include_router_with_global_prefix_prepended(application, cc_pair_router)
include_router_with_global_prefix_prepended(application, projects_router)
include_router_with_global_prefix_prepended(application, build_router)
include_router_with_global_prefix_prepended(application, build_v1_router)
include_router_with_global_prefix_prepended(application, nextjs_assets_router)
include_router_with_global_prefix_prepended(application, document_set_router)
include_router_with_global_prefix_prepended(application, search_settings_router)
include_router_with_global_prefix_prepended(

View File

@@ -564,6 +564,7 @@ def associate_credential_to_connector(
access_type=metadata.access_type,
auto_sync_options=metadata.auto_sync_options,
groups=metadata.groups,
processing_mode=metadata.processing_mode,
)
# trigger indexing immediately

View File

@@ -18,6 +18,7 @@ from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.enums import ProcessingMode
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
@@ -483,6 +484,7 @@ class ConnectorCredentialPairMetadata(BaseModel):
access_type: AccessType
auto_sync_options: dict[str, Any] | None = None
groups: list[int] = Field(default_factory=list)
processing_mode: ProcessingMode = ProcessingMode.REGULAR
class CCStatusUpdateRequest(BaseModel):

View File

@@ -0,0 +1,303 @@
# ACP Data Capture - Full Field Documentation
## Overview
The backend now captures **ALL** fields from ACP (Agent Client Protocol) events and streams them directly to the frontend. This ensures complete transparency and allows the frontend to access any ACP data it needs.
## Backend Changes
### 1. Full Field Serialization
**File:** `messages_api.py:138-153`
Changed from `exclude_none=True` to `exclude_none=False` to capture ALL fields:
```python
def _serialize_acp_event(event: Any, event_type: str) -> str:
"""Serialize an ACP event to SSE format, preserving ALL ACP data."""
if hasattr(event, "model_dump"):
data = event.model_dump(mode="json", by_alias=True, exclude_none=False)
else:
data = {"raw": str(event)}
data["type"] = event_type
data["timestamp"] = datetime.now(tz=timezone.utc).isoformat()
return f"event: message\ndata: {json.dumps(data)}\n\n"
```
### 2. Timestamp Addition
All ACP events now include a `timestamp` field for frontend tracking.
## ACP Events and Their Fields
### agent_message_chunk (AgentMessageChunk)
Agent's text/content output chunks during streaming.
**Fields:**
- `content`: ContentBlock (text, image, audio, resource, etc.)
- `field_meta`: Optional metadata dictionary (_meta in ACP)
- `session_update`: "agent_message_chunk"
- `timestamp`: ISO 8601 timestamp (added by Onyx)
**Example:**
```json
{
"type": "agent_message_chunk",
"content": {
"type": "text",
"text": "I'll create a React app for you..."
},
"field_meta": null,
"session_update": "agent_message_chunk",
"timestamp": "2026-01-20T20:56:34.123Z"
}
```
### agent_thought_chunk (AgentThoughtChunk)
Agent's internal reasoning/thinking process.
**Fields:**
- `content`: ContentBlock
- `field_meta`: Optional metadata
- `session_update`: "agent_thought_chunk"
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "agent_thought_chunk",
"content": {
"type": "text",
"text": "Let me analyze the requirements..."
},
"field_meta": null,
"session_update": "agent_thought_chunk",
"timestamp": "2026-01-20T20:56:34.456Z"
}
```
### tool_call_start (ToolCallStart)
Indicates the agent is starting to use a tool.
**Fields:**
- `tool_call_id`: Unique ID for this tool invocation
- `kind`: Tool category (e.g., "edit", "execute", "other")
- `title`: Human-readable description of what the tool does
- `content`: ContentBlock with tool description/info
- `locations`: Array of file paths/locations affected
- `raw_input`: Original input parameters to the tool
- `raw_output`: Output (usually null at start)
- `status`: Tool status (usually null/pending at start)
- `field_meta`: Optional metadata
- `session_update`: Tool update type
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "tool_call_start",
"tool_call_id": "call_abc123",
"kind": "edit",
"title": "Write file /app/page.tsx",
"content": {
"type": "text",
"text": "Creating React component..."
},
"locations": ["/app/page.tsx"],
"raw_input": {
"path": "/app/page.tsx",
"content": "..."
},
"raw_output": null,
"status": null,
"field_meta": null,
"session_update": "tool_call_start",
"timestamp": "2026-01-20T20:56:35.789Z"
}
```
### tool_call_progress (ToolCallProgress)
Progress update or completion of a tool call.
**Fields:**
- `tool_call_id`: ID matching the tool_call_start
- `kind`: Tool category
- `title`: Tool title
- `content`: ContentBlock with progress/result info
- `locations`: File paths affected
- `raw_input`: Original input
- `raw_output`: Tool execution result
- `status`: "in_progress", "completed", "failed", etc.
- `field_meta`: Optional metadata
- `session_update`: Tool update type
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "tool_call_progress",
"tool_call_id": "call_abc123",
"kind": "edit",
"title": "Write file /app/page.tsx",
"content": {
"type": "text",
"text": "File written successfully"
},
"locations": ["/app/page.tsx"],
"raw_input": {...},
"raw_output": {
"success": true,
"bytes_written": 1234
},
"status": "completed",
"field_meta": null,
"session_update": "tool_call_progress",
"timestamp": "2026-01-20T20:56:36.012Z"
}
```
### agent_plan_update (AgentPlanUpdate)
Agent's execution plan with structured task list.
**Fields:**
- `entries`: Array of plan entries, each with:
- `id`: Task ID
- `description`: Task description
- `status`: "pending", "in_progress", "completed", "cancelled"
- `priority`: String ("high", "medium", "low") or number
- `field_meta`: Optional metadata
- `session_update`: "agent_plan_update"
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "agent_plan_update",
"entries": [
{
"id": "task_1",
"description": "Set up Next.js project structure",
"status": "completed",
"priority": "high"
},
{
"id": "task_2",
"description": "Create React components",
"status": "in_progress",
"priority": "medium"
}
],
"field_meta": null,
"session_update": "agent_plan_update",
"timestamp": "2026-01-20T20:56:37.345Z"
}
```
### current_mode_update (CurrentModeUpdate)
Agent switched to a different mode (e.g., coding mode, planning mode).
**Fields:**
- `current_mode_id`: New mode identifier
- `field_meta`: Optional metadata
- `session_update`: "current_mode_update"
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "current_mode_update",
"current_mode_id": "coding",
"field_meta": null,
"session_update": "current_mode_update",
"timestamp": "2026-01-20T20:56:38.678Z"
}
```
### prompt_response (PromptResponse)
Agent finished processing the user's request.
**Fields:**
- `stop_reason`: Why the agent stopped ("end_turn", "max_tokens", "refusal", etc.)
- `field_meta`: Optional metadata
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "prompt_response",
"stop_reason": "end_turn",
"field_meta": null,
"timestamp": "2026-01-20T20:56:39.901Z"
}
```
### error (ACPError)
An error occurred during agent execution.
**Fields:**
- `code`: Error code (string or null)
- `message`: Human-readable error message
- `data`: Additional error context/data
- `timestamp`: ISO 8601 timestamp
**Example:**
```json
{
"type": "error",
"code": "TOOL_EXECUTION_FAILED",
"message": "Failed to write file: permission denied",
"data": {
"path": "/protected/file.txt",
"errno": "EACCES"
},
"timestamp": "2026-01-20T20:56:40.234Z"
}
```
## Frontend TypeScript Types
All ACP packet types are now properly typed in `buildStreamingModels.ts`:
```typescript
// Raw ACP packets with ALL fields
export type StreamPacket =
| AgentMessageChunkPacket
| AgentThoughtChunkPacket
| ToolCallStartPacket
| ToolCallProgressPacket
| AgentPlanUpdatePacket
| CurrentModeUpdatePacket
| PromptResponsePacket
| ACPErrorPacket
| ... // Custom Onyx packets
```
## Key Benefits
1. **Complete Transparency**: All ACP data is available to the frontend
2. **Future-Proof**: New ACP fields automatically flow through
3. **Debugging**: Full event data logged on backend for troubleshooting
4. **Extensibility**: `field_meta` allows custom metadata without protocol changes
5. **Type Safety**: Full TypeScript types for all ACP events
## Logging
All ACP events are logged with their complete structure:
```python
logger.warning(
f"[STREAM] Event #{event_count}: {event_type} = {json.dumps(event_data, default=str)[:500]}"
)
```
This helps with debugging and understanding what data is flowing through the system.
## Custom Onyx Packets
In addition to raw ACP events, Onyx sends custom packets:
- `artifact_created`: New artifact generated (web app, file, etc.)
- `file_write`: File written to sandbox
- `error`: Onyx-specific errors (e.g., session not found)
These use the same SSE format and include timestamps.

View File

@@ -0,0 +1,114 @@
# AGENTS.md
This file provides guidance for AI agents when working in this sandbox.
## Structure
The `files` directory contains all of the knowledge from Chris' company, Onyx. This knowledge comes from Google Drive, Linear, Slack, Github, and Fireflies.
Each source has it's own directory - `Google_Drive`, `Linear`, `Slack`, `Github`, and `Fireflies`. Within each directory, the structure of the source is built out as a folder structure:
- Google Drive is copied over directly as is. End files are stored as `FILE_NAME.json`.
- Linear has each project as a folder, and then within each project, each individual ticket is stored as a file: `[TICKET_ID]_TICKET_NAME.json`.
- Slack has each channel as a folder titled `[CHANNEL_NAME]` in the root directory. Within each channel, each thread is represented as a single file called `[INITIAL_AUTHOR]_in_[CHANNEL]__[FIRST_MESSAGE].json`.
- Github has each organization as a folder titled `[ORG_NAME]`. Within each organization, there is
a folder for each repository tilted `[REPO_NAME]`. Within each repository there are up to two folders: `pull_requests` and `issues`. Each pull request / issue is then represented as a single file
within the appropriate folder. Pull requests are structured as `[PR_ID]__[PR_NAME].json` and issues
are structured as `[ISSUE_ID]__[ISSUE_NAME].json`.
- Fireflies has all calls in the root, each as a single file titled `CALL_TITLE.json`.
- HubSpot has four folders in the root: `Tickets`, `Companies`, `Deals`, and `Contacts`. Each object is stored as a file named after its title/name (e.g., `[TICKET_SUBJECT].json`, `[COMPANY_NAME].json`, `[DEAL_NAME].json`, `[CONTACT_NAME].json`).
Across all names, spaces are replaced by `_`.
Each JSON is structured like:
```
{
"id": "afbec183-b0c5-46bf-b768-1ce88d003729",
"semantic_identifier": "[CS-17] [Betclic] Update system prompt doesn't work",
"title": "[Betclic] Update system prompt doesn't work",
"source": "linear",
"doc_updated_at": "2025-11-10T16:31:07.735000+00:00",
"metadata": {
"team": "Customer Success",
"creator": "{'name': 'Chris Weaver', 'email': 'chris@danswer.ai'}",
"state": "Backlog",
"priority": "3",
"created_at": "2025-11-10T16:30:10.718Z"
},
"doc_metadata": {
"hierarchy": {
"source_path": [
"Customer Success"
],
"team_name": "Customer Success",
"identifier": "CS-17"
}
},
"sections": [
{
"text": "Happens \\~15% of the time.",
"link": "https://linear.app/onyx-app/issue/CS-17/betclic-update-system-prompt-doesnt-work"
}
],
"primary_owners": [],
"secondary_owners": []
}
```
Do NOT write any files to these directories. Do NOT edit any files in these directories.
There is a special folder called `outputs`. Any and all python scripts, javascript apps, generated documents, slides, etc. should go here.
Feel free to write/edit anything you find in here.
## Outputs
There should be four main types of outputs:
1. Web Applications / Dashboards
2. Slides
3. Markdown Documents
4. Graphs/Charts
Generally, you should use
### Web Applications / Dashboards
Web applications and dashboards should be written as a Next.js app. Within the `outputs` directory,
there is a folder called `web` that has the skeleton of a basic Next.js app in it. Use this. We do NOT use a `src` directory.
Use NextJS 16.1.1, React v19, Tailwindcss, and recharts.
The Next.js app is already running. Do not run `npm run dev` yourself.
If the app needs any pre-computation, then create a bash script called `prepare.sh` at the root of the `web` directory.
### Slides
Slides should be created using the nano-banana MCP.
The outputs should be placed within the `outputs/slides` directory, named `[SLIDE_NUMBER].png`.
Before creating slides, create a `SLIDE_OUTLINE.md` file describing the overall message as well as the content and structure of each slide.
### Markdown Documents
Markdown documents should be placed within the `outputs/document` directory.
If you want to have a single "Document" that has multiple distinct pages, then create a folder within
the `outputs/document` directory, and name each page `1.MD`, `2.MD`, ...
### Graphs/Charts
Graphs and charts should be placed in the `outputs/charts` directory.
Graphs and charts should be created with a python script. You have access to libraries like numpy, pandas, scipy, matplotlib, and PIL.
## Your Environment
You are in an ephemeral virtual machine.
You currently have Python 3.11.13 and Node v22.21.1.
**Python Virtual Environment**: A Python virtual environment is pre-configured at `.venv/` with common data science and visualization packages already installed (numpy, pandas, matplotlib, scipy, PIL, etc.). The environment should be automatically activated, but if you run into issues with missing packages, you can explicitly use `.venv/bin/python` or `.venv/bin/pip`.
If you need additional packages, install them with `pip install <package>` (or `.venv/bin/pip install <package>` if the venv isn't active). For javascript packages, use `npm` from within the `outputs/web` directory.

View File

@@ -0,0 +1,114 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Structure
The `files` directory contains all of the knowledge from Chris' company, Onyx. This knowledge comes from Google Drive, Linear, Slack, Github, and Fireflies.
Each source has it's own directory - `Google_Drive`, `Linear`, `Slack`, `Github`, and `Fireflies`. Within each directory, the structure of the source is built out as a folder structure:
- Google Drive is copied over directly as is. End files are stored as `FILE_NAME.json`.
- Linear has each project as a folder, and then within each project, each individual ticket is stored as a file: `[TICKET_ID]_TICKET_NAME.json`.
- Slack has each channel as a folder titled `[CHANNEL_NAME]` in the root directory. Within each channel, each thread is represented as a single file called `[INITIAL_AUTHOR]_in_[CHANNEL]__[FIRST_MESSAGE].json`.
- Github has each organization as a folder titled `[ORG_NAME]`. Within each organization, there is
a folder for each repository tilted `[REPO_NAME]`. Within each repository there are up to two folders: `pull_requests` and `issues`. Each pull request / issue is then represented as a single file
within the appropriate folder. Pull requests are structured as `[PR_ID]__[PR_NAME].json` and issues
are structured as `[ISSUE_ID]__[ISSUE_NAME].json`.
- Fireflies has all calls in the root, each as a single file titled `CALL_TITLE.json`.
- HubSpot has four folders in the root: `Tickets`, `Companies`, `Deals`, and `Contacts`. Each object is stored as a file named after its title/name (e.g., `[TICKET_SUBJECT].json`, `[COMPANY_NAME].json`, `[DEAL_NAME].json`, `[CONTACT_NAME].json`).
Across all names, spaces are replaced by `_`.
Each JSON is structured like:
```
{
"id": "afbec183-b0c5-46bf-b768-1ce88d003729",
"semantic_identifier": "[CS-17] [Betclic] Update system prompt doesn't work",
"title": "[Betclic] Update system prompt doesn't work",
"source": "linear",
"doc_updated_at": "2025-11-10T16:31:07.735000+00:00",
"metadata": {
"team": "Customer Success",
"creator": "{'name': 'Chris Weaver', 'email': 'chris@danswer.ai'}",
"state": "Backlog",
"priority": "3",
"created_at": "2025-11-10T16:30:10.718Z"
},
"doc_metadata": {
"hierarchy": {
"source_path": [
"Customer Success"
],
"team_name": "Customer Success",
"identifier": "CS-17"
}
},
"sections": [
{
"text": "Happens \\~15% of the time.",
"link": "https://linear.app/onyx-app/issue/CS-17/betclic-update-system-prompt-doesnt-work"
}
],
"primary_owners": [],
"secondary_owners": []
}
```
Do NOT write any files to these directories. Do NOT edit any files in these directories.
There is a special folder called `outputs`. Any and all python scripts, javascript apps, generated documents, slides, etc. should go here.
Feel free to write/edit anything you find in here.
## Outputs
There should be four main types of outputs:
1. Web Applications / Dashboards
2. Slides
3. Markdown Documents
4. Graphs/Charts
Generally, you should use
### Web Applications / Dashboards
Web applications and dashboards should be written as a Next.js app. Within the `outputs` directory,
there is a folder called `web` that has the skeleton of a basic Next.js app in it. Use this.
Use NextJS 16.1.1, React v19, Tailwindcss, and recharts.
The Next.js app is already running and accessible at http://localhost:3002. Do not run `npm run dev` yourself.
If the app needs any pre-computation, then create a bash script called `prepare.sh` at the root of the `web` directory.
### Slides
Slides should be created using the nano-banana MCP.
The outputs should be placed within the `outputs/slides` directory, named `[SLIDE_NUMBER].png`.
Before creating slides, create a `SLIDE_OUTLINE.md` file describing the overall message as well as the content and structure of each slide.
### Markdown Documents
Markdown documents should be placed within the `outputs/document` directory.
If you want to have a single "Document" that has multiple distinct pages, then create a folder within
the `outputs/document` directory, and name each page `1.MD`, `2.MD`, ...
### Graphs/Charts
Graphs and charts should be placed in the `outputs/charts` directory.
Graphs and charts should be created with a python script. You have access to libraries like numpy, pandas, scipy, matplotlib, and PIL.
## Your Environment
You are in an ephemeral virtual machine.
You currently have Python 3.11.13 and Node v22.21.1.
**Python Virtual Environment**: A Python virtual environment is pre-configured at `.venv/` with common data science and visualization packages already installed (numpy, pandas, matplotlib, scipy, PIL, etc.). The environment should be automatically activated, but if you run into issues with missing packages, you can explicitly use `.venv/bin/python` or `.venv/bin/pip`.
If you need additional packages, install them with `pip install <package>` (or `.venv/bin/pip install <package>` if the venv isn't active). For javascript packages, use `npm` from within the `outputs/web` directory.

View File

@@ -0,0 +1,453 @@
# Build Mode Packet Types
This document describes the packet types used for streaming agent responses in Onyx Build Mode.
## Overview
The Build Mode streaming API uses Server-Sent Events (SSE) to stream agent responses to the frontend. All packets are sent as `event: message` with a JSON payload containing a `type` field to distinguish packet types.
The packet system is based on:
- **Agent Client Protocol (ACP)**: https://agentclientprotocol.com
- **OpenCode ACP Implementation**: https://github.com/agentclientprotocol/python-sdk
## Packet Categories
### 1. Step/Thinking Packets
Track the agent's internal reasoning process.
#### `step_start`
Begin a logical step in agent processing.
```json
{
"type": "step_start",
"step_id": "planning",
"step_name": "Planning Implementation",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
#### `step_delta`
Progress within a step (agent's internal reasoning).
```json
{
"type": "step_delta",
"step_id": "thinking",
"content": "I need to first understand the codebase structure...",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `AgentThoughtChunk` from ACP
#### `step_end`
Finish a step.
```json
{
"type": "step_end",
"step_id": "planning",
"status": "completed",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
### 2. Tool Call Packets
Track tool invocations and their results.
#### `tool_start`
Agent invoking a tool.
```json
{
"type": "tool_start",
"tool_call_id": "tc_123",
"tool_name": "Read",
"tool_input": {
"file_path": "/path/to/file.py"
},
"title": "Reading file.py",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `ToolCallStart` from ACP
#### `tool_progress`
Tool execution progress update.
```json
{
"type": "tool_progress",
"tool_call_id": "tc_123",
"tool_name": "Bash",
"status": "in_progress",
"progress": 0.5,
"message": "Running tests...",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
#### `tool_end`
Tool execution finished.
```json
{
"type": "tool_end",
"tool_call_id": "tc_123",
"tool_name": "Read",
"status": "success",
"result": "File contents here...",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `ToolCallProgress` from ACP
### 3. Agent Output Packets
Track the agent's text responses.
#### `output_start`
Begin agent's text output.
```json
{
"type": "output_start",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
#### `output_delta`
Incremental agent text output.
```json
{
"type": "output_delta",
"content": "I've updated the file to include...",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `AgentMessageChunk` from ACP
#### `output_end`
Agent's text output finished.
```json
{
"type": "output_end",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
### 4. Plan Packets
Track the agent's execution plan.
#### `plan`
Agent's execution plan.
```json
{
"type": "plan",
"plan": "1. Read the file\n2. Make changes\n3. Run tests",
"entries": [
{
"id": "1",
"description": "Read the file",
"status": "pending",
"priority": 1
}
],
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `AgentPlanUpdate` from ACP
### 5. Mode Update Packets
Track agent mode changes (e.g., planning, implementing, debugging).
#### `mode_update`
Agent mode change.
```json
{
"type": "mode_update",
"mode": "implement",
"description": "Starting implementation",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `CurrentModeUpdate` from ACP
### 6. Completion Packets
Signal task completion.
#### `done`
Signal completion with summary.
```json
{
"type": "done",
"summary": "Task completed successfully",
"stop_reason": "end_turn",
"usage": {
"input_tokens": 1000,
"output_tokens": 500
},
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Stop Reasons:**
- `end_turn`: Agent completed normally
- `max_tokens`: Hit token limit
- `max_turn_requests`: Hit max tool calls
- `refusal`: Agent refused the request
- `cancelled`: User cancelled
**Source:** `PromptResponse` from ACP
### 7. Error Packets
Report errors.
#### `error`
An error occurred.
```json
{
"type": "error",
"message": "Failed to read file: File not found",
"code": -1,
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Source:** `Error` from ACP
### 8. Custom Onyx Packets
Onyx-specific packets not part of ACP.
#### `file_write`
File written to sandbox.
```json
{
"type": "file_write",
"path": "outputs/file.py",
"size_bytes": 1024,
"operation": "create",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
#### `artifact_created`
New artifact generated.
```json
{
"type": "artifact_created",
"artifact": {
"id": "550e8400-e29b-41d4-a716-446655440000",
"type": "web_app",
"name": "Web Application",
"path": "outputs/web/",
"preview_url": "/api/build/sessions/{session_id}/preview",
"download_url": "/api/build/sessions/{session_id}/artifacts/outputs/web/",
"mime_type": "text/html",
"size_bytes": 4096
},
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
**Artifact Types:**
- `web_app`: Web application
- `markdown`: Markdown document
- `image`: Image file
- `csv`: CSV/Excel file (displayed as CSV in UI)
- `excel`: Excel spreadsheet
- `pptx`: PowerPoint presentation
- `docx`: Word document
- `pdf`: PDF document
- `code`: Code file
- `other`: Other file type
### 9. Permission Packets
Request and respond to user permissions (future use).
#### `permission_request`
Request user permission for an operation.
```json
{
"type": "permission_request",
"request_id": "pr_123",
"operation": "delete_file",
"description": "Delete test.py?",
"auto_approve": false,
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
#### `permission_response`
Response to a permission request.
```json
{
"type": "permission_response",
"request_id": "pr_123",
"approved": true,
"reason": "User approved",
"timestamp": "2025-01-20T12:00:00.000Z"
}
```
## Content Block Types
All content in ACP can be sent as structured content blocks:
### Text Content
```json
{
"type": "text",
"text": "Hello world"
}
```
### Image Content
```json
{
"type": "image",
"data": "base64-encoded-image-data",
"mimeType": "image/png"
}
```
### Audio Content
```json
{
"type": "audio",
"data": "base64-encoded-audio-data",
"mimeType": "audio/wav"
}
```
### Embedded Resource
```json
{
"type": "embedded_resource",
"uri": "file:///path/to/file",
"text": "Resource contents...",
"mimeType": "text/plain"
}
```
### Resource Link
```json
{
"type": "resource_link",
"uri": "file:///path/to/file",
"name": "file.txt",
"mimeType": "text/plain",
"size": 1024
}
```
## Usage in Code
### Converting ACP Events to Packets
Use the conversion utilities in `build_packet_types.py`:
```python
from onyx.server.features.build.build_packet_types import (
convert_acp_thought_to_step_delta,
convert_acp_tool_start_to_tool_start,
convert_acp_tool_progress_to_tool_end,
convert_acp_message_chunk_to_output_delta,
convert_acp_plan_to_plan,
convert_acp_mode_update_to_mode_update,
convert_acp_prompt_response_to_done,
convert_acp_error_to_error,
)
# Convert ACP event to packet
if isinstance(acp_event, AgentThoughtChunk):
packet = convert_acp_thought_to_step_delta(acp_event)
yield packet
```
### Creating Artifacts
```python
from onyx.server.features.build.build_packet_types import (
create_artifact_from_file,
ArtifactType,
ArtifactCreatedPacket,
)
artifact = create_artifact_from_file(
session_id=session_id,
file_path="outputs/web/",
artifact_type=ArtifactType.WEB_APP,
name="Web Application",
)
packet = ArtifactCreatedPacket(artifact=artifact)
yield packet
```
### Formatting Packets for SSE
```python
def _format_packet_event(packet: BuildPacket) -> str:
"""Format a packet as SSE (all events use event: message)."""
return f"event: message\ndata: {packet.model_dump_json(by_alias=True)}\n\n"
# Use in streaming
yield _format_packet_event(packet)
```
## Type Safety
All packet types are Pydantic models with full type safety:
```python
from onyx.server.features.build.build_packet_types import (
BuildPacket,
StepDeltaPacket,
ToolStartPacket,
OutputDeltaPacket,
)
# Type-safe packet creation
packet: StepDeltaPacket = StepDeltaPacket(
step_id="thinking",
content="Analyzing the code..."
)
# Union type for all packets
def process_packet(packet: BuildPacket) -> None:
if isinstance(packet, StepDeltaPacket):
print(f"Thinking: {packet.content}")
elif isinstance(packet, ToolStartPacket):
print(f"Using tool: {packet.tool_name}")
```
## References
- Agent Client Protocol: https://agentclientprotocol.com
- ACP Prompt Turn: https://agentclientprotocol.com/protocol/prompt-turn
- ACP Content Blocks: https://agentclientprotocol.com/protocol/content
- OpenCode Python SDK: https://github.com/agentclientprotocol/python-sdk

View File

@@ -0,0 +1 @@
# Build feature module

View File

@@ -0,0 +1,219 @@
from collections.abc import Iterator
from uuid import UUID
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.features.build.api.messages_api import router as messages_router
from onyx.server.features.build.api.sessions_api import router as sessions_router
from onyx.server.features.build.db.sandbox import get_sandbox_by_session_id
from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter(prefix="/build")
# Include sub-routers for sessions and messages
router.include_router(sessions_router, tags=["build"])
router.include_router(messages_router, tags=["build"])
# Headers to skip when proxying (hop-by-hop headers)
EXCLUDED_HEADERS = {
"content-encoding",
"content-length",
"transfer-encoding",
"connection",
}
def _stream_response(response: httpx.Response) -> Iterator[bytes]:
"""Stream the response content in chunks."""
for chunk in response.iter_bytes(chunk_size=8192):
yield chunk
def _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:
"""Rewrite Next.js asset paths to go through the proxy."""
import re
# Base path includes session_id for routing
webapp_base_path = f"/api/build/sessions/{session_id}/webapp"
text = content.decode("utf-8")
# Rewrite /_next/ paths to go through our proxy
text = text.replace("/_next/", f"{webapp_base_path}/_next/")
# Rewrite root-level JSON data file fetch paths (e.g., /data.json, /pr_data.json)
# Only matches paths like "/filename.json" (no subdirectories)
text = re.sub(r'"(/[a-zA-Z0-9_-]+\.json)"', f'"{webapp_base_path}\\1"', text)
text = re.sub(r"'(/[a-zA-Z0-9_-]+\.json)'", f"'{webapp_base_path}\\1'", text)
# Rewrite favicon
text = text.replace('"/favicon.ico', f'"{webapp_base_path}/favicon.ico')
return text.encode("utf-8")
# Content types that may contain asset path references that need rewriting
REWRITABLE_CONTENT_TYPES = {
"text/html",
"text/css",
"application/javascript",
"text/javascript",
"application/x-javascript",
}
def _get_sandbox_url(session_id: UUID, db_session: Session) -> str:
"""Get the localhost URL for a sandbox's Next.js server.
Args:
session_id: The build session ID
db_session: Database session
Returns:
The localhost URL (e.g., "http://localhost:3010")
Raises:
HTTPException: If sandbox not found or port not allocated
"""
sandbox = get_sandbox_by_session_id(db_session, session_id)
if not sandbox:
raise HTTPException(status_code=404, detail="Sandbox not found")
if sandbox.nextjs_port is None:
raise HTTPException(status_code=503, detail="Sandbox port not allocated")
return f"http://localhost:{sandbox.nextjs_port}"
def _proxy_request(
path: str, request: Request, session_id: UUID, db_session: Session
) -> StreamingResponse | Response:
"""Proxy a request to the sandbox's Next.js server."""
base_url = _get_sandbox_url(session_id, db_session)
# Build the target URL
target_url = f"{base_url}/{path.lstrip('/')}"
# Include query params if present
if request.query_params:
target_url = f"{target_url}?{request.query_params}"
logger.debug(f"Proxying request to: {target_url}")
try:
# Make the request to the target URL
with httpx.Client(timeout=30.0, follow_redirects=True) as client:
response = client.get(
target_url,
headers={
key: value
for key, value in request.headers.items()
if key.lower() not in ("host", "content-length")
},
)
# Build response headers, excluding hop-by-hop headers
response_headers = {
key: value
for key, value in response.headers.items()
if key.lower() not in EXCLUDED_HEADERS
}
content_type = response.headers.get("content-type", "")
# For HTML/CSS/JS responses, rewrite asset paths
if any(ct in content_type for ct in REWRITABLE_CONTENT_TYPES):
content = _rewrite_asset_paths(response.content, str(session_id))
return Response(
content=content,
status_code=response.status_code,
headers=response_headers,
media_type=content_type,
)
return StreamingResponse(
content=_stream_response(response),
status_code=response.status_code,
headers=response_headers,
media_type=content_type or None,
)
except httpx.TimeoutException:
logger.error(f"Timeout while proxying request to {target_url}")
raise HTTPException(status_code=504, detail="Gateway timeout")
except httpx.RequestError as e:
logger.error(f"Error proxying request to {target_url}: {e}")
raise HTTPException(status_code=502, detail="Bad gateway")
@router.get("/sessions/{session_id}/webapp", response_model=None)
def get_webapp_root(
session_id: UUID,
request: Request,
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse | Response:
"""Proxy the root path of the webapp for a specific session."""
return _proxy_request("", request, session_id, db_session)
@router.get("/sessions/{session_id}/webapp/{path:path}", response_model=None)
def get_webapp_path(
session_id: UUID,
path: str,
request: Request,
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse | Response:
"""Proxy any subpath of the webapp (static assets, etc.) for a specific session."""
return _proxy_request(path, request, session_id, db_session)
# Separate router for Next.js static assets at /_next/*
# This is needed because Next.js apps may reference assets with root-relative paths
# that don't get rewritten. The session_id is extracted from the Referer header.
nextjs_assets_router = APIRouter()
def _extract_session_from_referer(request: Request) -> UUID | None:
"""Extract session_id from the Referer header.
Expects Referer to contain /api/build/sessions/{session_id}/webapp
"""
import re
referer = request.headers.get("referer", "")
match = re.search(r"/api/build/sessions/([a-f0-9-]+)/webapp", referer)
if match:
try:
return UUID(match.group(1))
except ValueError:
return None
return None
@nextjs_assets_router.get("/_next/{path:path}", response_model=None)
def get_nextjs_assets(
path: str,
request: Request,
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse | Response:
"""Proxy Next.js static assets requested at root /_next/ path.
The session_id is extracted from the Referer header since these requests
come from within the iframe context.
"""
session_id = _extract_session_from_referer(request)
if not session_id:
raise HTTPException(
status_code=400,
detail="Could not determine session from request context",
)
return _proxy_request(f"_next/{path}", request, session_id, db_session)

View File

@@ -0,0 +1,101 @@
"""API endpoints for Build Mode message management."""
from uuid import UUID
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.features.build.api.models import MessageListResponse
from onyx.server.features.build.api.models import MessageRequest
from onyx.server.features.build.api.models import MessageResponse
from onyx.server.features.build.session.manager import RateLimitError
from onyx.server.features.build.session.manager import SessionManager
from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter()
def check_build_rate_limits(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
"""
Dependency to check build mode rate limits before processing the request.
Raises HTTPException(429) if rate limit is exceeded.
Follows the same pattern as chat's check_token_rate_limits.
"""
session_manager = SessionManager(db_session)
try:
session_manager.check_rate_limit(user)
except RateLimitError as e:
raise HTTPException(
status_code=429,
detail=str(e),
)
@router.get("/sessions/{session_id}/messages", tags=PUBLIC_API_TAGS)
def list_messages(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageListResponse:
"""Get all messages for a build session."""
if user is None:
raise HTTPException(status_code=401, detail="Authentication required")
session_manager = SessionManager(db_session)
messages = session_manager.list_messages(session_id, user.id)
if messages is None:
raise HTTPException(status_code=404, detail="Session not found")
return MessageListResponse(
messages=[MessageResponse.from_model(msg) for msg in messages]
)
@router.post("/sessions/{session_id}/send-message", tags=PUBLIC_API_TAGS)
async def send_message(
session_id: UUID,
request: MessageRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
_rate_limit_check: None = Depends(check_build_rate_limits),
):
"""
Send a message to the CLI agent and stream the response.
Enforces rate limiting before executing the agent (via dependency).
Returns a Server-Sent Events (SSE) stream with the agent's response.
Follows the same pattern as /chat/send-message for consistency.
"""
if user is None:
raise HTTPException(status_code=401, detail="Authentication required")
session_manager = SessionManager(db_session)
# Stream the CLI agent's response
return StreamingResponse(
session_manager.send_message(session_id, user.id, request.content),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
},
)

View File

@@ -0,0 +1,208 @@
from datetime import datetime
from typing import Any
from pydantic import BaseModel
from onyx.configs.constants import MessageType
from onyx.db.enums import ArtifactType
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
# ===== Session Models =====
class SessionCreateRequest(BaseModel):
"""Request to create a new build session."""
name: str | None = None # Optional session name
class SessionUpdateRequest(BaseModel):
"""Request to update a build session.
If name is None, the session name will be auto-generated using LLM.
"""
name: str | None = None
class SessionNameGenerateResponse(BaseModel):
"""Response containing a generated session name."""
name: str
class SandboxResponse(BaseModel):
"""Sandbox metadata in session response."""
id: str
status: SandboxStatus
container_id: str | None
created_at: datetime
last_heartbeat: datetime | None
nextjs_port: int | None
@classmethod
def from_model(cls, sandbox) -> "SandboxResponse":
"""Convert Sandbox ORM model to response."""
return cls(
id=str(sandbox.id),
status=sandbox.status,
container_id=sandbox.container_id,
created_at=sandbox.created_at,
last_heartbeat=sandbox.last_heartbeat,
nextjs_port=sandbox.nextjs_port,
)
class ArtifactResponse(BaseModel):
"""Artifact metadata in session response."""
id: str
session_id: str
type: ArtifactType
name: str
path: str
preview_url: str | None
created_at: datetime
updated_at: datetime
@classmethod
def from_model(cls, artifact) -> "ArtifactResponse":
"""Convert Artifact ORM model to response."""
return cls(
id=str(artifact.id),
session_id=str(artifact.session_id),
type=artifact.type,
name=artifact.name,
path=artifact.path,
preview_url=getattr(artifact, "preview_url", None),
created_at=artifact.created_at,
updated_at=artifact.updated_at,
)
class SessionResponse(BaseModel):
"""Response containing session details."""
id: str
user_id: str | None
name: str | None
status: BuildSessionStatus
created_at: datetime
last_activity_at: datetime
sandbox: SandboxResponse | None
artifacts: list[ArtifactResponse]
@classmethod
def from_model(cls, session) -> "SessionResponse":
"""Convert BuildSession ORM model to response."""
return cls(
id=str(session.id),
user_id=str(session.user_id) if session.user_id else None,
name=session.name,
status=session.status,
created_at=session.created_at,
last_activity_at=session.last_activity_at,
sandbox=(
SandboxResponse.from_model(session.sandbox) if session.sandbox else None
),
artifacts=[ArtifactResponse.from_model(a) for a in session.artifacts],
)
class SessionListResponse(BaseModel):
"""Response containing list of sessions."""
sessions: list[SessionResponse]
# ===== Message Models =====
class MessageRequest(BaseModel):
"""Request to send a message to the CLI agent."""
content: str
class MessageResponse(BaseModel):
"""Response containing message details."""
id: str
session_id: str
type: MessageType
content: str
message_metadata: dict[str, Any] | None = None
created_at: datetime
@classmethod
def from_model(cls, message):
"""Convert BuildMessage ORM model to response."""
return cls(
id=str(message.id),
session_id=str(message.session_id),
type=message.type,
content=message.content,
message_metadata=message.message_metadata,
created_at=message.created_at,
)
class MessageListResponse(BaseModel):
"""Response containing list of messages."""
messages: list[MessageResponse]
# ===== Legacy Models (for compatibility with other code) =====
class CreateSessionRequest(BaseModel):
task: str
available_sources: list[str] | None = None
class CreateSessionResponse(BaseModel):
session_id: str
class ExecuteRequest(BaseModel):
task: str
context: str | None = None
class ArtifactInfo(BaseModel):
artifact_type: str # "webapp", "file", "markdown", "image"
path: str
filename: str
mime_type: str | None = None
class SessionStatus(BaseModel):
session_id: str
status: str # "idle", "running", "completed", "failed"
webapp_url: str | None = None
class FileSystemEntry(BaseModel):
name: str # File/folder name
path: str # Relative path from sandbox root
is_directory: bool # True for folders
size: int | None = None # File size in bytes
mime_type: str | None = None # MIME type for files
class DirectoryListing(BaseModel):
path: str # Current directory path
entries: list[FileSystemEntry] # Contents
class WebappInfo(BaseModel):
has_webapp: bool # Whether a webapp exists in outputs/web
webapp_url: str | None # URL to access the webapp (e.g., http://localhost:3015)
status: str # Sandbox status (running, terminated, etc.)
# ===== File Upload Models =====
class UploadResponse(BaseModel):
"""Response after successful file upload."""
filename: str # Sanitized filename
path: str # Relative path in sandbox (e.g., "user_uploaded_files/doc.pdf")
size_bytes: int # File size in bytes

View File

@@ -0,0 +1,100 @@
"""Simple packet logger for build mode debugging.
Logs the raw JSON of every packet emitted during build mode.
Log output: backend/onyx/server/features/build/packets.log
"""
import json
import logging
import os
from pathlib import Path
from typing import Any
class PacketLogger:
"""Simple packet logger - outputs raw JSON for each packet."""
_instance: "PacketLogger | None" = None
def __new__(cls) -> "PacketLogger":
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self) -> None:
if self._initialized:
return
self._initialized = True
self._enabled = os.getenv("LOG_LEVEL", "").upper() == "DEBUG"
self._logger: logging.Logger | None = None
if self._enabled:
self._setup_logger()
def _setup_logger(self) -> None:
"""Set up the file handler for packet logging."""
# Log to backend/onyx/server/features/build/packets.log
build_dir = Path(__file__).parents[1]
log_file = build_dir / "packets.log"
self._logger = logging.getLogger("build.packets")
self._logger.setLevel(logging.DEBUG)
self._logger.propagate = False
self._logger.handlers.clear()
handler = logging.FileHandler(log_file, mode="a", encoding="utf-8")
handler.setLevel(logging.DEBUG)
handler.setFormatter(logging.Formatter("%(message)s"))
self._logger.addHandler(handler)
def log(self, packet_type: str, payload: dict[str, Any] | None = None) -> None:
"""Log a packet as JSON.
Args:
packet_type: The type of packet
payload: The packet payload
"""
if not self._enabled or not self._logger:
return
try:
output = json.dumps(payload, indent=2, default=str) if payload else "{}"
self._logger.debug(f"\n=== {packet_type} ===\n{output}")
except Exception:
self._logger.debug(f"\n=== {packet_type} ===\n{payload}")
def log_raw(self, label: str, data: Any) -> None:
"""Log raw data with a label.
Args:
label: A label for this log entry
data: Any data to log
"""
if not self._enabled or not self._logger:
return
try:
if isinstance(data, (dict, list)):
output = json.dumps(data, indent=2, default=str)
else:
output = str(data)
self._logger.debug(f"\n=== {label} ===\n{output}")
except Exception:
self._logger.debug(f"\n=== {label} ===\n{data}")
# Singleton instance
_packet_logger: PacketLogger | None = None
def get_packet_logger() -> PacketLogger:
"""Get the singleton packet logger instance."""
global _packet_logger
if _packet_logger is None:
_packet_logger = PacketLogger()
return _packet_logger

View File

@@ -0,0 +1,68 @@
"""Build Mode packet types for streaming agent responses.
This module defines CUSTOM Onyx packet types that extend ACP (Agent Client Protocol).
ACP events are passed through directly from the agent - this module only contains
Onyx-specific extensions like artifacts and file operations.
All packets use SSE (Server-Sent Events) format with `event: message` and include
a `type` field to distinguish packet types.
ACP events (passed through directly from acp.schema):
- agent_message_chunk: Text/image content from agent
- agent_thought_chunk: Agent's internal reasoning
- tool_call_start: Tool invocation started
- tool_call_progress: Tool execution progress/result
- agent_plan_update: Agent's execution plan
- current_mode_update: Agent mode change
- prompt_response: Agent finished processing
- error: An error occurred
Custom Onyx packets (defined here):
- error: Onyx-specific errors (e.g., session not found)
Based on:
- Agent Client Protocol (ACP): https://agentclientprotocol.com
"""
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Literal
from pydantic import BaseModel
from pydantic import Field
# =============================================================================
# Base Packet Type
# =============================================================================
class BasePacket(BaseModel):
"""Base packet with common fields for all custom Onyx packet types."""
type: str
timestamp: str = Field(
default_factory=lambda: datetime.now(tz=timezone.utc).isoformat()
)
# =============================================================================
# Custom Onyx Packets
# =============================================================================
class ErrorPacket(BasePacket):
"""An Onyx-specific error occurred (e.g., session not found, sandbox not running)."""
type: Literal["error"] = "error"
message: str
code: int | None = None
details: dict[str, Any] | None = None
# =============================================================================
# Union Type for Custom Onyx Packets
# =============================================================================
BuildPacket = ErrorPacket

View File

@@ -0,0 +1,90 @@
"""Rate limiting logic for Build Mode."""
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Literal
from sqlalchemy.orm import Session
from onyx.db.models import User
from onyx.server.features.build.api.subscription_check import is_user_subscribed
from onyx.server.features.build.db.rate_limit import count_user_messages_in_window
from onyx.server.features.build.db.rate_limit import count_user_messages_total
from onyx.server.features.build.db.rate_limit import get_oldest_message_timestamp
from onyx.server.features.build.v1_api import RateLimitResponse
from shared_configs.configs import MULTI_TENANT
def get_user_rate_limit_status(
user: User,
db_session: Session,
) -> RateLimitResponse:
"""
Get the rate limit status for a user.
Rate limits:
- Cloud (MULTI_TENANT=true):
- Subscribed users: 50 messages per week (rolling 7-day window)
- Non-subscribed users: 5 messages (lifetime total)
- Self-hosted (MULTI_TENANT=false):
- Unlimited (no rate limiting)
Args:
user: The user object (None for unauthenticated users)
db_session: Database session
Returns:
RateLimitResponse with current limit status
"""
# Self-hosted deployments have no rate limits
if not MULTI_TENANT:
return RateLimitResponse(
is_limited=False,
limit_type="weekly",
messages_used=0,
limit=0, # 0 indicates unlimited
reset_timestamp=None,
)
# Determine subscription status
is_subscribed = is_user_subscribed(user, db_session)
# Set limits based on subscription
limit = 50 if is_subscribed else 5
limit_type: Literal["weekly", "total"] = "weekly" if is_subscribed else "total"
# Count messages
user_id = user.id if user else None
if user_id is None:
# Unauthenticated users have no usage
messages_used = 0
reset_timestamp = None
elif limit_type == "weekly":
# Subscribed: rolling 7-day window
cutoff_time = datetime.now(tz=timezone.utc) - timedelta(days=7)
messages_used = count_user_messages_in_window(user_id, cutoff_time, db_session)
# Calculate reset timestamp (when oldest message ages out)
# Only show reset time if user is at or over the limit
if messages_used >= limit:
oldest_msg = get_oldest_message_timestamp(user_id, cutoff_time, db_session)
if oldest_msg:
reset_time = oldest_msg + timedelta(days=7)
reset_timestamp = reset_time.isoformat()
else:
reset_timestamp = None
else:
reset_timestamp = None
else:
# Non-subscribed: lifetime total
messages_used = count_user_messages_total(user_id, db_session)
reset_timestamp = None
return RateLimitResponse(
is_limited=messages_used >= limit,
limit_type=limit_type,
messages_used=messages_used,
limit=limit,
reset_timestamp=reset_timestamp,
)

View File

@@ -0,0 +1,387 @@
"""API endpoints for Build Mode session management."""
from uuid import UUID
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import HTTPException
from fastapi import Response
from fastapi import UploadFile
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.features.build.api.models import ArtifactResponse
from onyx.server.features.build.api.models import DirectoryListing
from onyx.server.features.build.api.models import SessionCreateRequest
from onyx.server.features.build.api.models import SessionListResponse
from onyx.server.features.build.api.models import SessionNameGenerateResponse
from onyx.server.features.build.api.models import SessionResponse
from onyx.server.features.build.api.models import SessionUpdateRequest
from onyx.server.features.build.api.models import UploadResponse
from onyx.server.features.build.api.models import WebappInfo
from onyx.server.features.build.session.manager import SessionManager
from onyx.server.features.build.utils import sanitize_filename
from onyx.server.features.build.utils import validate_file
from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter(prefix="/sessions")
# =============================================================================
# Session Management Endpoints
# =============================================================================
@router.get("", response_model=SessionListResponse)
def list_sessions(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SessionListResponse:
"""List all build sessions for the current user."""
session_manager = SessionManager(db_session)
sessions = session_manager.list_sessions(user.id)
return SessionListResponse(
sessions=[SessionResponse.from_model(session) for session in sessions]
)
@router.post("", response_model=SessionResponse)
def create_session(
request: SessionCreateRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SessionResponse:
"""
Create or get an existing empty build session.
Returns an existing empty (no messages) session if one exists and is recent,
otherwise creates a new one with a provisioned sandbox.
This supports pre-provisioning by reusing recent empty sessions.
"""
session_manager = SessionManager(db_session)
try:
build_session = session_manager.get_or_create_empty_session(user.id)
except ValueError as e:
# Max concurrent sandboxes reached
raise HTTPException(status_code=429, detail=str(e))
except RuntimeError as e:
logger.error(f"Failed to provision sandbox: {e}")
raise HTTPException(status_code=500, detail="Failed to create session")
return SessionResponse.from_model(build_session)
@router.get("/{session_id}", response_model=SessionResponse)
def get_session_details(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SessionResponse:
"""
Get details of a specific build session.
If the sandbox is terminated, this will restore it synchronously.
"""
session_manager = SessionManager(db_session)
session = session_manager.get_session(session_id, user.id)
if session is None:
raise HTTPException(status_code=404, detail="Session not found")
return SessionResponse.from_model(session)
@router.post("/{session_id}/generate-name", response_model=SessionNameGenerateResponse)
def generate_session_name(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SessionNameGenerateResponse:
"""Generate a session name using LLM based on the first user message."""
session_manager = SessionManager(db_session)
generated_name = session_manager.generate_session_name(session_id, user.id)
if generated_name is None:
raise HTTPException(status_code=404, detail="Session not found")
return SessionNameGenerateResponse(name=generated_name)
@router.put("/{session_id}/name", response_model=SessionResponse)
def update_session_name(
session_id: UUID,
request: SessionUpdateRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SessionResponse:
"""Update the name of a build session."""
session_manager = SessionManager(db_session)
session = session_manager.update_session_name(session_id, user.id, request.name)
if session is None:
raise HTTPException(status_code=404, detail="Session not found")
return SessionResponse.from_model(session)
@router.delete("/{session_id}", response_model=None)
def delete_session(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
"""Delete a build session and all associated data."""
session_manager = SessionManager(db_session)
success = session_manager.delete_session(session_id, user.id)
if not success:
raise HTTPException(status_code=404, detail="Session not found")
return Response(status_code=204)
# =============================================================================
# Artifact Endpoints
# =============================================================================
@router.get(
"/{session_id}/artifacts",
response_model=list[ArtifactResponse],
)
def list_artifacts(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[dict]:
"""List artifacts generated in the session."""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
artifacts = session_manager.list_artifacts(session_id, user_id)
if artifacts is None:
raise HTTPException(status_code=404, detail="Session not found")
return artifacts
@router.get("/{session_id}/files", response_model=DirectoryListing)
def list_directory(
session_id: UUID,
path: str = "",
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> DirectoryListing:
"""
List files and directories in the sandbox.
Args:
session_id: The session ID
path: Relative path from sandbox root (empty string for root)
Returns:
DirectoryListing with sorted entries (directories first, then files)
"""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
try:
listing = session_manager.list_directory(session_id, user_id, path)
except ValueError as e:
error_message = str(e)
if "path traversal" in error_message.lower():
raise HTTPException(status_code=403, detail="Access denied")
elif "not found" in error_message.lower():
raise HTTPException(status_code=404, detail="Directory not found")
elif "not a directory" in error_message.lower():
raise HTTPException(status_code=400, detail="Path is not a directory")
raise HTTPException(status_code=400, detail=error_message)
if listing is None:
raise HTTPException(status_code=404, detail="Session not found")
return listing
@router.get("/{session_id}/artifacts/{path:path}")
def download_artifact(
session_id: UUID,
path: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
"""Download a specific artifact file."""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
try:
result = session_manager.download_artifact(session_id, user_id, path)
except ValueError as e:
error_message = str(e)
if (
"path traversal" in error_message.lower()
or "access denied" in error_message.lower()
):
raise HTTPException(status_code=403, detail="Access denied")
elif "directory" in error_message.lower():
raise HTTPException(status_code=400, detail="Cannot download directory")
raise HTTPException(status_code=400, detail=error_message)
if result is None:
raise HTTPException(status_code=404, detail="Artifact not found")
content, mime_type, filename = result
return Response(
content=content,
media_type=mime_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
},
)
@router.get("/{session_id}/webapp", response_model=WebappInfo)
def get_webapp_info(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> WebappInfo:
"""
Get webapp information for a session.
Returns whether a webapp exists, its URL, and the sandbox status.
"""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
webapp_info = session_manager.get_webapp_info(session_id, user_id)
if webapp_info is None:
raise HTTPException(status_code=404, detail="Session not found")
return WebappInfo(**webapp_info)
@router.get("/{session_id}/webapp/download")
def download_webapp(
session_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
"""
Download the webapp directory as a zip file.
Returns the entire outputs/web directory as a zip archive.
"""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
result = session_manager.download_webapp_zip(session_id, user_id)
if result is None:
raise HTTPException(status_code=404, detail="Webapp not found")
zip_bytes, filename = result
return Response(
content=zip_bytes,
media_type="application/zip",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
},
)
@router.post("/{session_id}/upload", response_model=UploadResponse)
async def upload_file_endpoint(
session_id: UUID,
file: UploadFile = File(...),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UploadResponse:
"""Upload a file to the session's sandbox.
The file will be placed in the sandbox's user_uploaded_files directory.
"""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
if not file.filename:
raise HTTPException(status_code=400, detail="File has no filename")
# Read file content
content = await file.read()
# Validate file (extension, mime type, size)
is_valid, error = validate_file(file.filename, file.content_type, len(content))
if not is_valid:
raise HTTPException(status_code=400, detail=error)
# Sanitize filename
safe_filename = sanitize_filename(file.filename)
try:
relative_path, _ = session_manager.upload_file(
session_id=session_id,
user_id=user_id,
filename=safe_filename,
content=content,
)
except ValueError as e:
error_message = str(e)
if "not found" in error_message.lower():
raise HTTPException(status_code=404, detail=error_message)
raise HTTPException(status_code=400, detail=error_message)
return UploadResponse(
filename=safe_filename,
path=relative_path,
size_bytes=len(content),
)
@router.delete("/{session_id}/files/{path:path}", response_model=None)
def delete_file_endpoint(
session_id: UUID,
path: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
"""Delete a file from the session's sandbox.
Args:
session_id: The session ID
path: Relative path to the file (e.g., "user_uploaded_files/doc.pdf")
"""
user_id: UUID = user.id
session_manager = SessionManager(db_session)
try:
deleted = session_manager.delete_file(session_id, user_id, path)
except ValueError as e:
error_message = str(e)
if "path traversal" in error_message.lower():
raise HTTPException(status_code=403, detail="Access denied")
elif "not found" in error_message.lower():
raise HTTPException(status_code=404, detail=error_message)
elif "directory" in error_message.lower():
raise HTTPException(status_code=400, detail="Cannot delete directory")
raise HTTPException(status_code=400, detail=error_message)
if not deleted:
raise HTTPException(status_code=404, detail="File not found")
return Response(status_code=204)

View File

@@ -0,0 +1,52 @@
"""Subscription detection for Build Mode rate limiting."""
from sqlalchemy.orm import Session
from onyx.configs.app_configs import DEV_MODE
from onyx.db.models import User
from onyx.server.usage_limits import is_tenant_on_trial_fn
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
def is_user_subscribed(user: User, db_session: Session) -> bool:
"""
Check if a user has an active subscription.
For cloud (MULTI_TENANT=true):
- Checks Stripe billing via control plane
- Returns True if tenant is NOT on trial (subscribed = NOT on trial)
For self-hosted (MULTI_TENANT=false):
- Checks license metadata
- Returns True if license status is ACTIVE
Args:
user: The user object (None for unauthenticated users)
db_session: Database session
Returns:
True if user has active subscription, False otherwise
"""
if DEV_MODE:
return True
if user is None:
return False
if MULTI_TENANT:
# Cloud: check Stripe billing via control plane
tenant_id = get_current_tenant_id()
try:
on_trial = is_tenant_on_trial_fn(tenant_id)
# Subscribed = NOT on trial
return not on_trial
except Exception as e:
logger.warning(f"Subscription check failed for tenant {tenant_id}: {e}")
# Default to non-subscribed (safer/more restrictive)
return False
return True

View File

@@ -0,0 +1,71 @@
import os
from enum import Enum
class SandboxBackend(str, Enum):
"""Backend mode for sandbox operations.
LOCAL: Development mode - no snapshots, no automatic cleanup
KUBERNETES: Production mode - full snapshots and cleanup
"""
LOCAL = "local"
KUBERNETES = "kubernetes"
# Sandbox backend mode (controls snapshot and cleanup behavior)
# "local" = no snapshots, no cleanup (for development)
# "kubernetes" = full snapshots and cleanup (for production)
SANDBOX_BACKEND = SandboxBackend(os.environ.get("SANDBOX_BACKEND", "local"))
# Persistent Document Storage Configuration
# When enabled, indexed documents are written to local filesystem with hierarchical structure
PERSISTENT_DOCUMENT_STORAGE_ENABLED = (
os.environ.get("PERSISTENT_DOCUMENT_STORAGE_ENABLED", "").lower() == "true"
)
# Base directory path for persistent document storage (local filesystem)
# Example: /var/onyx/indexed-docs or /app/indexed-docs
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
"PERSISTENT_DOCUMENT_STORAGE_PATH", ""
)
# Sandbox filesystem paths
SANDBOX_BASE_PATH = os.environ.get("SANDBOX_BASE_PATH", "/tmp/onyx-sandboxes")
OUTPUTS_TEMPLATE_PATH = os.environ.get("OUTPUTS_TEMPLATE_PATH", "/templates/outputs")
VENV_TEMPLATE_PATH = os.environ.get("VENV_TEMPLATE_PATH", "/templates/venv")
# Sandbox agent configuration
SANDBOX_AGENT_COMMAND = os.environ.get("SANDBOX_AGENT_COMMAND", "opencode").split()
# OpenCode disabled tools (comma-separated list)
# Available tools: bash, edit, write, read, grep, glob, list, lsp, patch,
# skill, todowrite, todoread, webfetch, question
# Example: "question,webfetch" to disable user questions and web fetching
_disabled_tools_str = os.environ.get("OPENCODE_DISABLED_TOOLS", "question")
OPENCODE_DISABLED_TOOLS: list[str] = [
t.strip() for t in _disabled_tools_str.split(",") if t.strip()
]
# Sandbox lifecycle configuration
SANDBOX_IDLE_TIMEOUT_SECONDS = int(
os.environ.get("SANDBOX_IDLE_TIMEOUT_SECONDS", "900")
)
SANDBOX_MAX_CONCURRENT_PER_ORG = int(
os.environ.get("SANDBOX_MAX_CONCURRENT_PER_ORG", "10")
)
# Sandbox snapshot storage
SANDBOX_SNAPSHOTS_BUCKET = os.environ.get(
"SANDBOX_SNAPSHOTS_BUCKET", "sandbox-snapshots"
)
# Next.js preview server port range
SANDBOX_NEXTJS_PORT_START = int(os.environ.get("SANDBOX_NEXTJS_PORT_START", "3010"))
SANDBOX_NEXTJS_PORT_END = int(os.environ.get("SANDBOX_NEXTJS_PORT_END", "3100"))
# File upload configuration
MAX_UPLOAD_FILE_SIZE_MB = int(os.environ.get("BUILD_MAX_UPLOAD_FILE_SIZE_MB", "50"))
MAX_UPLOAD_FILE_SIZE_BYTES = MAX_UPLOAD_FILE_SIZE_MB * 1024 * 1024
USER_UPLOADS_DIRECTORY = "user_uploaded_files"

View File

@@ -0,0 +1 @@
# Database operations for the build feature

View File

@@ -0,0 +1,354 @@
"""Database operations for Build Mode sessions."""
from datetime import datetime
from datetime import timedelta
from typing import Any
from uuid import UUID
from sqlalchemy import desc
from sqlalchemy import exists
from sqlalchemy.orm import Session
from onyx.configs.constants import MessageType
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.models import Artifact
from onyx.db.models import BuildMessage
from onyx.db.models import BuildSession
from onyx.db.models import Sandbox
from onyx.db.models import Snapshot
from onyx.utils.logger import setup_logger
logger = setup_logger()
def create_build_session(
user_id: UUID,
db_session: Session,
name: str | None = None,
) -> BuildSession:
"""Create a new build session for the given user."""
session = BuildSession(
user_id=user_id,
name=name,
status=BuildSessionStatus.ACTIVE,
)
db_session.add(session)
db_session.commit()
db_session.refresh(session)
logger.info(f"Created build session {session.id} for user {user_id}")
return session
def get_build_session(
session_id: UUID,
user_id: UUID,
db_session: Session,
) -> BuildSession | None:
"""Get a build session by ID, ensuring it belongs to the user."""
return (
db_session.query(BuildSession)
.filter(
BuildSession.id == session_id,
BuildSession.user_id == user_id,
)
.one_or_none()
)
def get_user_build_sessions(
user_id: UUID,
db_session: Session,
limit: int = 100,
) -> list[BuildSession]:
"""Get all build sessions for a user that have at least 1 message.
Excludes empty (pre-provisioned) sessions from the listing.
"""
return (
db_session.query(BuildSession)
.join(BuildMessage) # Inner join excludes empty sessions
.filter(BuildSession.user_id == user_id)
.group_by(BuildSession.id)
.order_by(desc(BuildSession.created_at))
.limit(limit)
.all()
)
def get_empty_session_for_user(
user_id: UUID,
db_session: Session,
max_age_minutes: int = 30,
) -> BuildSession | None:
"""Get the user's empty session (0 messages) if one exists and is recent."""
cutoff = datetime.utcnow() - timedelta(minutes=max_age_minutes)
return (
db_session.query(BuildSession)
.filter(
BuildSession.user_id == user_id,
BuildSession.created_at > cutoff,
~exists().where(BuildMessage.session_id == BuildSession.id),
)
.first()
)
def update_session_activity(
session_id: UUID,
db_session: Session,
) -> None:
"""Update the last activity timestamp for a session."""
session = (
db_session.query(BuildSession)
.filter(BuildSession.id == session_id)
.one_or_none()
)
if session:
session.last_activity_at = datetime.utcnow()
db_session.commit()
def update_session_status(
session_id: UUID,
status: BuildSessionStatus,
db_session: Session,
) -> None:
"""Update the status of a build session."""
session = (
db_session.query(BuildSession)
.filter(BuildSession.id == session_id)
.one_or_none()
)
if session:
session.status = status
db_session.commit()
logger.info(f"Updated build session {session_id} status to {status}")
def delete_build_session(
session_id: UUID,
user_id: UUID,
db_session: Session,
) -> bool:
"""Delete a build session and all related data."""
session = get_build_session(session_id, user_id, db_session)
if not session:
return False
db_session.delete(session)
db_session.commit()
logger.info(f"Deleted build session {session_id}")
return True
# Sandbox operations
def create_sandbox(
session_id: UUID,
db_session: Session,
) -> Sandbox:
"""Create a new sandbox for a build session."""
sandbox = Sandbox(
session_id=session_id,
status=SandboxStatus.PROVISIONING,
)
db_session.add(sandbox)
db_session.commit()
db_session.refresh(sandbox)
logger.info(f"Created sandbox {sandbox.id} for session {session_id}")
return sandbox
def get_sandbox_by_session(
session_id: UUID,
db_session: Session,
) -> Sandbox | None:
"""Get the sandbox for a given session."""
return (
db_session.query(Sandbox).filter(Sandbox.session_id == session_id).one_or_none()
)
def update_sandbox_status(
sandbox_id: UUID,
status: SandboxStatus,
db_session: Session,
container_id: str | None = None,
) -> None:
"""Update the status of a sandbox."""
sandbox = db_session.query(Sandbox).filter(Sandbox.id == sandbox_id).one_or_none()
if sandbox:
sandbox.status = status
if container_id is not None:
sandbox.container_id = container_id
sandbox.last_heartbeat = datetime.utcnow()
db_session.commit()
logger.info(f"Updated sandbox {sandbox_id} status to {status}")
def update_sandbox_heartbeat(
sandbox_id: UUID,
db_session: Session,
) -> None:
"""Update the heartbeat timestamp for a sandbox."""
sandbox = db_session.query(Sandbox).filter(Sandbox.id == sandbox_id).one_or_none()
if sandbox:
sandbox.last_heartbeat = datetime.utcnow()
db_session.commit()
# Artifact operations
def create_artifact(
session_id: UUID,
artifact_type: str,
path: str,
name: str,
db_session: Session,
) -> Artifact:
"""Create a new artifact record."""
artifact = Artifact(
session_id=session_id,
type=artifact_type,
path=path,
name=name,
)
db_session.add(artifact)
db_session.commit()
db_session.refresh(artifact)
logger.info(f"Created artifact {artifact.id} for session {session_id}")
return artifact
def get_session_artifacts(
session_id: UUID,
db_session: Session,
) -> list[Artifact]:
"""Get all artifacts for a session."""
return (
db_session.query(Artifact)
.filter(Artifact.session_id == session_id)
.order_by(desc(Artifact.created_at))
.all()
)
def update_artifact(
artifact_id: UUID,
db_session: Session,
path: str | None = None,
name: str | None = None,
) -> None:
"""Update artifact metadata."""
artifact = (
db_session.query(Artifact).filter(Artifact.id == artifact_id).one_or_none()
)
if artifact:
if path is not None:
artifact.path = path
if name is not None:
artifact.name = name
artifact.updated_at = datetime.utcnow()
db_session.commit()
logger.info(f"Updated artifact {artifact_id}")
# Snapshot operations
def create_snapshot(
session_id: UUID,
storage_path: str,
size_bytes: int,
db_session: Session,
) -> Snapshot:
"""Create a new snapshot record."""
snapshot = Snapshot(
session_id=session_id,
storage_path=storage_path,
size_bytes=size_bytes,
)
db_session.add(snapshot)
db_session.commit()
db_session.refresh(snapshot)
logger.info(f"Created snapshot {snapshot.id} for session {session_id}")
return snapshot
def get_latest_snapshot(
session_id: UUID,
db_session: Session,
) -> Snapshot | None:
"""Get the most recent snapshot for a session."""
return (
db_session.query(Snapshot)
.filter(Snapshot.session_id == session_id)
.order_by(desc(Snapshot.created_at))
.first()
)
def get_session_snapshots(
session_id: UUID,
db_session: Session,
) -> list[Snapshot]:
"""Get all snapshots for a session."""
return (
db_session.query(Snapshot)
.filter(Snapshot.session_id == session_id)
.order_by(desc(Snapshot.created_at))
.all()
)
# Message operations
def create_message(
session_id: UUID,
message_type: MessageType,
content: str,
db_session: Session,
message_metadata: dict[str, Any] | None = None,
) -> BuildMessage:
"""Create a new message in a build session.
Args:
session_id: Session UUID
message_type: Type of message (USER, ASSISTANT, SYSTEM)
content: Text content (empty string for structured events)
db_session: Database session
message_metadata: Optional structured ACP event data (tool calls, thinking, plans, etc.)
"""
message = BuildMessage(
session_id=session_id,
type=message_type,
content=content,
message_metadata=message_metadata,
)
db_session.add(message)
db_session.commit()
db_session.refresh(message)
logger.info(
f"Created {message_type.value} message {message.id} for session {session_id}"
+ (
f" with metadata type={message_metadata.get('type')}"
if message_metadata
else ""
)
)
return message
def get_session_messages(
session_id: UUID,
db_session: Session,
) -> list[BuildMessage]:
"""Get all messages for a session, ordered by creation time."""
return (
db_session.query(BuildMessage)
.filter(BuildMessage.session_id == session_id)
.order_by(BuildMessage.created_at)
.all()
)

View File

@@ -0,0 +1,96 @@
"""Database queries for Build Mode rate limiting."""
from datetime import datetime
from uuid import UUID
from sqlalchemy import func
from sqlalchemy.orm import Session
from onyx.configs.constants import MessageType
from onyx.db.models import BuildMessage
from onyx.db.models import BuildSession
def count_user_messages_in_window(
user_id: UUID,
cutoff_time: datetime,
db_session: Session,
) -> int:
"""
Count USER messages for a user since cutoff_time.
Args:
user_id: The user's UUID
cutoff_time: Only count messages created at or after this time
db_session: Database session
Returns:
Number of USER messages in the time window
"""
return (
db_session.query(func.count(BuildMessage.id))
.join(BuildSession, BuildMessage.session_id == BuildSession.id)
.filter(
BuildSession.user_id == user_id,
BuildMessage.type == MessageType.USER,
BuildMessage.created_at >= cutoff_time,
)
.scalar()
or 0
)
def count_user_messages_total(user_id: UUID, db_session: Session) -> int:
"""
Count all USER messages for a user (lifetime total).
Args:
user_id: The user's UUID
db_session: Database session
Returns:
Total number of USER messages
"""
return (
db_session.query(func.count(BuildMessage.id))
.join(BuildSession, BuildMessage.session_id == BuildSession.id)
.filter(
BuildSession.user_id == user_id,
BuildMessage.type == MessageType.USER,
)
.scalar()
or 0
)
def get_oldest_message_timestamp(
user_id: UUID,
cutoff_time: datetime,
db_session: Session,
) -> datetime | None:
"""
Get the timestamp of the oldest USER message in the time window.
Used to calculate when the rate limit will reset (when the oldest
message ages out of the rolling window).
Args:
user_id: The user's UUID
cutoff_time: Only consider messages created at or after this time
db_session: Database session
Returns:
Timestamp of oldest message in window, or None if no messages
"""
return (
db_session.query(BuildMessage.created_at)
.join(BuildSession, BuildMessage.session_id == BuildSession.id)
.filter(
BuildSession.user_id == user_id,
BuildMessage.type == MessageType.USER,
BuildMessage.created_at >= cutoff_time,
)
.order_by(BuildMessage.created_at.asc())
.limit(1)
.scalar()
)

View File

@@ -0,0 +1,249 @@
"""Database operations for CLI agent sandbox management."""
import datetime
from uuid import UUID
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.db.enums import SandboxStatus
from onyx.db.models import Sandbox
from onyx.db.models import Snapshot
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_END
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START
from onyx.utils.logger import setup_logger
logger = setup_logger()
def create_sandbox(
db_session: Session,
session_id: UUID,
nextjs_port: int | None = None,
) -> Sandbox:
"""Create a new sandbox record."""
sandbox = Sandbox(
session_id=session_id,
status=SandboxStatus.PROVISIONING,
nextjs_port=nextjs_port,
)
db_session.add(sandbox)
db_session.commit()
return sandbox
def get_sandbox_by_session_id(db_session: Session, session_id: UUID) -> Sandbox | None:
"""Get sandbox by session ID."""
stmt = select(Sandbox).where(Sandbox.session_id == session_id)
return db_session.execute(stmt).scalar_one_or_none()
def get_sandbox_by_id(db_session: Session, sandbox_id: UUID) -> Sandbox | None:
"""Get sandbox by its ID."""
stmt = select(Sandbox).where(Sandbox.id == sandbox_id)
return db_session.execute(stmt).scalar_one_or_none()
def update_sandbox_status(
db_session: Session, sandbox_id: UUID, status: SandboxStatus
) -> Sandbox:
"""Update sandbox status."""
sandbox = get_sandbox_by_id(db_session, sandbox_id)
if not sandbox:
raise ValueError(f"Sandbox {sandbox_id} not found")
sandbox.status = status
db_session.commit()
return sandbox
def update_sandbox_heartbeat(db_session: Session, sandbox_id: UUID) -> Sandbox:
"""Update sandbox last_heartbeat to now."""
sandbox = get_sandbox_by_id(db_session, sandbox_id)
if not sandbox:
raise ValueError(f"Sandbox {sandbox_id} not found")
sandbox.last_heartbeat = datetime.datetime.now(datetime.timezone.utc)
db_session.commit()
return sandbox
def get_idle_sandboxes(
db_session: Session, idle_threshold_seconds: int
) -> list[Sandbox]:
"""Get sandboxes that have been idle longer than threshold."""
threshold_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
seconds=idle_threshold_seconds
)
stmt = select(Sandbox).where(
Sandbox.status.in_([SandboxStatus.RUNNING, SandboxStatus.IDLE]),
Sandbox.last_heartbeat < threshold_time,
)
return list(db_session.execute(stmt).scalars().all())
def get_running_sandbox_count_by_tenant(db_session: Session, tenant_id: str) -> int:
"""Get count of running sandboxes for a tenant (for limit enforcement).
Note: tenant_id parameter is kept for API compatibility but is not used
since Sandbox model no longer has tenant_id. This function returns
the count of all running sandboxes.
"""
stmt = select(func.count(Sandbox.id)).where(
Sandbox.status.in_([SandboxStatus.RUNNING, SandboxStatus.IDLE])
)
result = db_session.execute(stmt).scalar()
return result or 0
def create_snapshot(
db_session: Session,
session_id: UUID,
storage_path: str,
size_bytes: int,
) -> Snapshot:
"""Create a snapshot record."""
snapshot = Snapshot(
session_id=session_id,
storage_path=storage_path,
size_bytes=size_bytes,
)
db_session.add(snapshot)
db_session.commit()
return snapshot
def get_latest_snapshot_for_session(
db_session: Session, session_id: UUID
) -> Snapshot | None:
"""Get most recent snapshot for a session."""
stmt = (
select(Snapshot)
.where(Snapshot.session_id == session_id)
.order_by(Snapshot.created_at.desc())
.limit(1)
)
return db_session.execute(stmt).scalar_one_or_none()
def get_snapshots_for_session(db_session: Session, session_id: UUID) -> list[Snapshot]:
"""Get all snapshots for a session, ordered by creation time descending."""
stmt = (
select(Snapshot)
.where(Snapshot.session_id == session_id)
.order_by(Snapshot.created_at.desc())
)
return list(db_session.execute(stmt).scalars().all())
def delete_old_snapshots(
db_session: Session, tenant_id: str, retention_days: int
) -> int:
"""Delete snapshots older than retention period, return count deleted.
Note: tenant_id parameter is kept for API compatibility but is not used
since Snapshot model no longer has tenant_id. This function deletes
all snapshots older than the retention period.
"""
cutoff_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
days=retention_days
)
stmt = select(Snapshot).where(
Snapshot.created_at < cutoff_time,
)
old_snapshots = db_session.execute(stmt).scalars().all()
count = 0
for snapshot in old_snapshots:
db_session.delete(snapshot)
count += 1
if count > 0:
db_session.commit()
return count
def delete_snapshot(db_session: Session, snapshot_id: UUID) -> bool:
"""Delete a specific snapshot by ID. Returns True if deleted, False if not found."""
stmt = select(Snapshot).where(Snapshot.id == snapshot_id)
snapshot = db_session.execute(stmt).scalar_one_or_none()
if not snapshot:
return False
db_session.delete(snapshot)
db_session.commit()
return True
def _is_port_available(port: int) -> bool:
"""Check if a port is available by attempting to bind to it.
Checks both IPv4 and IPv6 wildcard addresses to properly detect
if anything is listening on the port, regardless of address family.
"""
import socket
logger.debug(f"Checking if port {port} is available")
# Check IPv4 wildcard (0.0.0.0) - this will detect any IPv4 listener
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("0.0.0.0", port))
logger.debug(f"Port {port} IPv4 wildcard bind successful")
except OSError as e:
logger.debug(f"Port {port} IPv4 wildcard not available: {e}")
return False
# Check IPv6 wildcard (::) - this will detect any IPv6 listener
try:
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# IPV6_V6ONLY must be False to allow dual-stack behavior
sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
sock.bind(("::", port))
logger.debug(f"Port {port} IPv6 wildcard bind successful")
except OSError as e:
logger.debug(f"Port {port} IPv6 wildcard not available: {e}")
return False
logger.debug(f"Port {port} is available")
return True
def allocate_nextjs_port(db_session: Session) -> int:
"""Allocate an available port for a new sandbox.
Finds the first available port in the configured range by checking
both database allocations and system-level port availability.
Args:
db_session: Database session for querying allocated ports
Returns:
An available port number
Raises:
RuntimeError: If no ports are available in the configured range
"""
# Get all currently allocated ports from the database
allocated_ports = set(
db_session.query(Sandbox.nextjs_port)
.filter(Sandbox.nextjs_port.isnot(None))
.all()
)
allocated_ports = {port[0] for port in allocated_ports if port[0] is not None}
# Find first port that's not in DB and not currently bound
for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):
if port not in allocated_ports and _is_port_available(port):
return port
raise RuntimeError(
f"No available ports in range [{SANDBOX_NEXTJS_PORT_START}, {SANDBOX_NEXTJS_PORT_END})"
)

View File

@@ -0,0 +1,17 @@
google-genai>=1.0.0
matplotlib==3.9.1
matplotlib-inline>=0.1.7
matplotlib-venn>=1.1.2
numpy==1.26.4
opencv-python>=4.11.0.86
openpyxl>=3.1.5
pandas==2.2.2
pdfplumber>=0.11.7
Pillow>=10.0.0
pydantic>=2.11.9
python-pptx>=1.0.2
scikit-image>=0.25.2
scikit-learn>=1.7.2
scipy>=1.16.2
seaborn>=0.13.2
xgboost>=3.0.5

View File

@@ -0,0 +1,35 @@
"""
Sandbox module for CLI agent filesystem-based isolation.
This module provides lightweight sandbox management for CLI-based AI agent sessions.
Each sandbox is a directory on the local filesystem rather than a Docker container.
Usage:
from onyx.server.features.build.sandbox import get_sandbox_manager
# Get the appropriate sandbox manager based on SANDBOX_BACKEND config
sandbox_manager = get_sandbox_manager()
# Use the sandbox manager
sandbox_info = sandbox_manager.provision(...)
"""
from onyx.server.features.build.sandbox.manager import get_sandbox_manager
from onyx.server.features.build.sandbox.manager import LocalSandboxManager
from onyx.server.features.build.sandbox.manager import SandboxManager
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotInfo
__all__ = [
# Factory function (preferred)
"get_sandbox_manager",
# Interface
"SandboxManager",
# Implementations
"LocalSandboxManager",
# Models
"SandboxInfo",
"SnapshotInfo",
"FilesystemEntry",
]

View File

@@ -0,0 +1,283 @@
#!/usr/bin/env python3
"""Build sandbox templates for Next.js app and Python venv."""
import argparse
import json
import subprocess
import sys
from pathlib import Path
from typing import Any
try:
from onyx.server.features.build.configs import (
OUTPUTS_TEMPLATE_PATH,
VENV_TEMPLATE_PATH,
)
except ImportError:
# Fallback if running as standalone script
import os
OUTPUTS_TEMPLATE_PATH = os.environ.get(
"OUTPUTS_TEMPLATE_PATH", "/templates/outputs"
)
VENV_TEMPLATE_PATH = os.environ.get("VENV_TEMPLATE_PATH", "/templates/venv")
def build_nextjs_template(output_path: Path) -> None:
"""Build Next.js app template.
Creates a minimal Next.js 16.1.1 app with React 19, Tailwind CSS v4, and recharts.
Args:
output_path: Path where the outputs template should be created (will create web/ subdirectory)
"""
web_dir = output_path / "web"
web_dir.mkdir(parents=True, exist_ok=True)
# package.json
package_json: dict[str, Any] = {
"name": "onyx-sandbox-app",
"version": "0.1.0",
"private": True,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint",
},
"dependencies": {
"next": "16.1.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"recharts": "^2.12.0",
},
"devDependencies": {
"@tailwindcss/postcss": "^4.1.18",
"@types/node": "^20",
"@types/react": "^19",
"@types/react-dom": "^19",
"autoprefixer": "^10.4.20",
"postcss": "^8.4.47",
"tailwindcss": "^4.1.18",
"typescript": "^5",
},
}
(web_dir / "package.json").write_text(json.dumps(package_json, indent=2))
# next.config.js
next_config = """/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: true,
}
module.exports = nextConfig
"""
(web_dir / "next.config.js").write_text(next_config)
# tailwind.config.js
tailwind_config = """/** @type {import('tailwindcss').Config} */
module.exports = {
content: [
'./src/pages/**/*.{js,ts,jsx,tsx,mdx}',
'./src/components/**/*.{js,ts,jsx,tsx,mdx}',
'./src/app/**/*.{js,ts,jsx,tsx,mdx}',
],
theme: {
extend: {},
},
plugins: [],
}
"""
(web_dir / "tailwind.config.js").write_text(tailwind_config)
# postcss.config.js
postcss_config = """module.exports = {
plugins: {
"@tailwindcss/postcss": {},
autoprefixer: {},
},
}
"""
(web_dir / "postcss.config.js").write_text(postcss_config)
# tsconfig.json
tsconfig: dict[str, Any] = {
"compilerOptions": {
"target": "ES2020",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": True,
"skipLibCheck": True,
"strict": True,
"noEmit": True,
"esModuleInterop": True,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": True,
"isolatedModules": True,
"jsx": "preserve",
"incremental": True,
"plugins": [{"name": "next"}],
"paths": {"@/*": ["./src/*"]},
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"],
}
(web_dir / "tsconfig.json").write_text(json.dumps(tsconfig, indent=2))
# src/app/layout.tsx
layout_tsx = """export const metadata = {
title: 'Onyx Sandbox App',
description: 'Generated by Onyx',
}
export default function RootLayout({
children,
}: {
children: React.ReactNode
}) {
return (
<html lang="en">
<body>{children}</body>
</html>
)
}
"""
(web_dir / "src" / "app").mkdir(parents=True, exist_ok=True)
(web_dir / "src" / "app" / "layout.tsx").write_text(layout_tsx)
# src/app/page.tsx
page_tsx = """export default function Home() {
return (
<main>
<h1>Onyx Sandbox App</h1>
<p>Welcome to your generated Next.js app</p>
</main>
)
}
"""
(web_dir / "src" / "app" / "page.tsx").write_text(page_tsx)
# src/app/globals.css
globals_css = """@tailwind base;
@tailwind components;
@tailwind utilities;
"""
(web_dir / "src" / "app" / "globals.css").write_text(globals_css)
# public directory
(web_dir / "public").mkdir(exist_ok=True)
# Install npm dependencies
print(" Installing npm dependencies...")
install_result = subprocess.run(
["npm", "install"],
cwd=web_dir,
capture_output=True,
text=True,
)
if install_result.returncode != 0:
raise RuntimeError(
f"Failed to install npm dependencies: {install_result.stderr}"
)
def build_python_venv_template(target_path: Path, requirements_path: Path) -> None:
"""Build Python venv template with required packages.
Creates a Python virtual environment and installs packages from requirements file.
Args:
target_path: Path where the venv should be created
requirements_path: Path to requirements.txt file
Raises:
RuntimeError: If venv creation or package installation fails
"""
if not requirements_path.exists():
raise FileNotFoundError(f"Requirements file not found: {requirements_path}")
# Create venv
print(" Creating virtual environment...")
result = subprocess.run(
[sys.executable, "-m", "venv", str(target_path)],
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(f"Failed to create virtual environment: {result.stderr}")
# Determine pip path based on OS
if sys.platform == "win32":
pip_path = target_path / "Scripts" / "pip"
else:
pip_path = target_path / "bin" / "pip"
# Install requirements
print(f" Installing packages from {requirements_path.name}...")
install_result = subprocess.run(
[str(pip_path), "install", "-r", str(requirements_path)],
capture_output=True,
text=True,
)
if install_result.returncode != 0:
raise RuntimeError(f"Failed to install packages: {install_result.stderr}")
def main() -> None:
"""Build both templates."""
parser = argparse.ArgumentParser(
description="Build sandbox templates for Next.js app and Python venv"
)
parser.add_argument(
"--outputs-dir",
type=str,
default=OUTPUTS_TEMPLATE_PATH,
help=f"Output directory for Next.js template (default: {OUTPUTS_TEMPLATE_PATH})",
)
parser.add_argument(
"--venv-dir",
type=str,
default=VENV_TEMPLATE_PATH,
help=f"Output directory for Python venv template (default: {VENV_TEMPLATE_PATH})",
)
parser.add_argument(
"--requirements",
type=str,
default=None,
help="Path to requirements.txt (default: auto-detect)",
)
args = parser.parse_args()
outputs_dir = Path(args.outputs_dir)
venv_dir = Path(args.venv_dir)
# Find requirements file
if args.requirements:
requirements_file = Path(args.requirements)
else:
# Try to find requirements file relative to script location
script_dir = Path(__file__).parent
requirements_file = script_dir.parent / "initial-requirements.txt"
if not requirements_file.exists():
raise FileNotFoundError(
"Could not find requirements file. "
f"Expected at {requirements_file} or specify with --requirements"
)
# Build Next.js template
print(f"Building Next.js template to {outputs_dir}...")
build_nextjs_template(outputs_dir)
print("✅ Next.js template built successfully")
# Build Python venv template
print(f"\nBuilding Python venv template to {venv_dir}...")
print(" (This may take 30-60 seconds)")
build_python_venv_template(venv_dir, requirements_file)
print("✅ Python venv template built successfully")
print("\nTemplates ready!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,15 @@
"""Internal implementation details for sandbox management."""
from onyx.server.features.build.sandbox.internal.agent_client import ACPAgentClient
from onyx.server.features.build.sandbox.internal.directory_manager import (
DirectoryManager,
)
from onyx.server.features.build.sandbox.internal.process_manager import ProcessManager
from onyx.server.features.build.sandbox.internal.snapshot_manager import SnapshotManager
__all__ = [
"ACPAgentClient",
"DirectoryManager",
"ProcessManager",
"SnapshotManager",
]

View File

@@ -0,0 +1,712 @@
"""Communication with CLI agent subprocess using ACP (Agent Client Protocol).
ACP is a JSON-RPC 2.0 based protocol for communicating with coding agents.
See: https://agentclientprotocol.com
Usage:
# Simple usage with context manager
with ACPAgentClient(cwd="/path/to/project") as client:
for packet in client.send_message("What files are here?"):
print(packet)
# Manual lifecycle management
client = ACPAgentClient()
client.start(cwd="/path/to/project")
for packet in client.send_message("Hello"):
print(packet)
client.stop()
"""
import json
import os
import select
import shutil
import subprocess
import threading
from collections.abc import Generator
from dataclasses import dataclass
from dataclasses import field
from pathlib import Path
from typing import Any
from acp.schema import AgentMessageChunk
from acp.schema import AgentPlanUpdate
from acp.schema import AgentThoughtChunk
from acp.schema import CurrentModeUpdate
from acp.schema import Error
from acp.schema import PromptResponse
from acp.schema import ToolCallProgress
from acp.schema import ToolCallStart
from pydantic import ValidationError
# ACP Protocol version
ACP_PROTOCOL_VERSION = 1
# Default client info
DEFAULT_CLIENT_INFO = {
"name": "onyx-sandbox",
"title": "Onyx Sandbox Agent Client",
"version": "1.0.0",
}
# =============================================================================
# Response Event Types (from acp.schema + custom completion/error types)
# =============================================================================
# Union type for all possible events from send_message
# Uses ACP schema types for session updates, plus our completion type
ACPEvent = (
AgentMessageChunk # Text/image content from agent
| AgentThoughtChunk # Agent's internal reasoning
| ToolCallStart # Tool invocation started
| ToolCallProgress # Tool execution progress/result
| AgentPlanUpdate # Agent's execution plan
| CurrentModeUpdate # Agent mode change
| PromptResponse # Agent finished (contains stop_reason)
| Error # An error occurred
)
# =============================================================================
# Internal State Types
# =============================================================================
@dataclass
class ACPSession:
"""Represents an active ACP session."""
session_id: str
cwd: str
@dataclass
class ACPClientState:
"""Internal state for the ACP client."""
initialized: bool = False
current_session: ACPSession | None = None
next_request_id: int = 0
agent_capabilities: dict[str, Any] = field(default_factory=dict)
agent_info: dict[str, Any] = field(default_factory=dict)
def _find_opencode_binary() -> str | None:
"""Find the opencode binary path.
Returns:
Path to opencode binary, or None if not found
"""
# Check PATH first
opencode_path = shutil.which("opencode")
if opencode_path:
return opencode_path
# Try common installation paths
common_paths = [
Path.home() / ".opencode" / "bin" / "opencode",
Path("/usr/local/bin/opencode"),
]
for path in common_paths:
if path.exists():
return str(path)
return None
class ACPAgentClient:
"""ACP (Agent Client Protocol) client for communication with CLI agents.
Implements JSON-RPC 2.0 over stdin/stdout as specified by ACP.
Manages the agent subprocess lifecycle internally.
Usage:
# With context manager (recommended)
with ACPAgentClient(cwd="/path/to/project") as client:
for packet in client.send_message("Hello"):
print(packet)
# Manual lifecycle
client = ACPAgentClient()
client.start(cwd="/path/to/project")
try:
for packet in client.send_message("Hello"):
print(packet)
finally:
client.stop()
"""
def __init__(
self,
cwd: str | None = None,
opencode_path: str | None = None,
client_info: dict[str, Any] | None = None,
client_capabilities: dict[str, Any] | None = None,
auto_start: bool = True,
) -> None:
"""Initialize the ACP client.
Args:
cwd: Working directory for the agent. If provided and auto_start=True,
the agent will be started immediately.
opencode_path: Path to opencode binary. Auto-detected if not provided.
client_info: Client identification info (name, title, version)
client_capabilities: Client capabilities to advertise
auto_start: If True and cwd is provided, start the agent immediately
"""
self._opencode_path = opencode_path or _find_opencode_binary()
self._client_info = client_info or DEFAULT_CLIENT_INFO
self._client_capabilities = client_capabilities or {
"fs": {
"readTextFile": True,
"writeTextFile": True,
},
"terminal": True,
}
self._state = ACPClientState()
self._process: subprocess.Popen[str] | None = None
self._read_lock = threading.Lock()
self._cwd: str | None = None
# Auto-start if cwd provided
if cwd and auto_start:
self.start(cwd=cwd)
def __enter__(self) -> "ACPAgentClient":
"""Context manager entry."""
return self
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""Context manager exit - ensures cleanup."""
self.stop()
def start(
self,
cwd: str | None = None,
mcp_servers: list[dict[str, Any]] | None = None,
timeout: float = 30.0,
) -> str:
"""Start the agent process and initialize a session.
This method:
1. Starts the opencode acp subprocess
2. Sends the initialize handshake
3. Creates a new session
Args:
cwd: Working directory for the agent (defaults to current directory)
mcp_servers: Optional MCP server configurations
timeout: Timeout for initialization and session creation
Returns:
The session ID
Raises:
RuntimeError: If opencode is not found or startup fails
"""
if self._process is not None:
raise RuntimeError("Agent already started. Call stop() first.")
if not self._opencode_path:
raise RuntimeError(
"opencode binary not found. Install opencode or provide opencode_path."
)
self._cwd = cwd or os.getcwd()
# Start the opencode acp process
self._process = subprocess.Popen(
[self._opencode_path, "acp", "--cwd", self._cwd],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
try:
# Initialize the ACP connection
self._initialize(timeout=timeout)
# Create a session
session_id = self._create_session(
cwd=self._cwd,
mcp_servers=mcp_servers,
timeout=timeout,
)
return session_id
except Exception:
# Clean up on failure
self.stop()
raise
def stop(self) -> None:
"""Stop the agent process and clean up resources."""
if self._process is not None:
if self._process.poll() is None:
self._process.terminate()
try:
self._process.wait(timeout=5)
except subprocess.TimeoutExpired:
self._process.kill()
self._process = None
# Reset state
self._state = ACPClientState()
def _get_next_id(self) -> int:
"""Get the next request ID."""
request_id = self._state.next_request_id
self._state.next_request_id += 1
return request_id
def _ensure_running(self) -> subprocess.Popen[str]:
"""Ensure the process is running and return it.
Raises:
RuntimeError: If process is not running
"""
if self._process is None:
raise RuntimeError("Agent not started. Call start() first.")
if self._process.poll() is not None:
raise RuntimeError(
f"Agent process has terminated with code {self._process.returncode}"
)
return self._process
def _send_request(
self,
method: str,
params: dict[str, Any] | None = None,
) -> int:
"""Send a JSON-RPC request to the agent.
Args:
method: The RPC method name
params: Optional parameters for the method
Returns:
The request ID
Raises:
RuntimeError: If the process has terminated or pipe is broken
"""
process = self._ensure_running()
if process.stdin is None:
raise RuntimeError("Process stdin is not available")
request_id = self._get_next_id()
request: dict[str, Any] = {
"jsonrpc": "2.0",
"id": request_id,
"method": method,
}
if params is not None:
request["params"] = params
try:
process.stdin.write(json.dumps(request) + "\n")
process.stdin.flush()
except BrokenPipeError:
raise RuntimeError("Agent process stdin pipe is broken")
return request_id
def _send_notification(
self,
method: str,
params: dict[str, Any] | None = None,
) -> None:
"""Send a JSON-RPC notification (no response expected).
Args:
method: The notification method name
params: Optional parameters
Raises:
RuntimeError: If the process has terminated or pipe is broken
"""
process = self._ensure_running()
if process.stdin is None:
raise RuntimeError("Process stdin is not available")
notification: dict[str, Any] = {
"jsonrpc": "2.0",
"method": method,
}
if params is not None:
notification["params"] = params
try:
process.stdin.write(json.dumps(notification) + "\n")
process.stdin.flush()
except BrokenPipeError:
raise RuntimeError("Agent process stdin pipe is broken")
def _read_message(
self,
timeout: float | None = None,
) -> dict[str, Any] | None:
"""Read a single JSON-RPC message from the agent.
Args:
timeout: Optional timeout in seconds
Returns:
The parsed JSON message, or None if timeout/EOF
Raises:
RuntimeError: If process stdout is not available
"""
process = self._ensure_running()
if process.stdout is None:
raise RuntimeError("Process stdout is not available")
with self._read_lock:
if timeout is not None:
stdout_fd = process.stdout.fileno()
readable, _, _ = select.select([stdout_fd], [], [], timeout)
if not readable:
return None
line = process.stdout.readline()
if not line:
return None
line = line.strip()
if not line:
return None
try:
return json.loads(line)
except json.JSONDecodeError:
return {
"jsonrpc": "2.0",
"error": {
"code": -32700,
"message": f"Parse error: {line[:100]}",
},
}
def _wait_for_response(
self,
request_id: int,
timeout: float = 30.0,
) -> dict[str, Any]:
"""Wait for a response to a specific request.
Args:
request_id: The request ID to wait for
timeout: Maximum time to wait
Returns:
The response result
Raises:
RuntimeError: If timeout, error response, or process dies
"""
import time
start_time = time.time()
while True:
remaining = timeout - (time.time() - start_time)
if remaining <= 0:
raise RuntimeError(
f"Timeout waiting for response to request {request_id}"
)
message = self._read_message(timeout=min(remaining, 1.0))
if message is None:
process = self._ensure_running()
if process.poll() is not None:
raise RuntimeError(
f"Agent process terminated with code {process.returncode}"
)
continue
# Check if this is the response we're waiting for
if message.get("id") == request_id:
if "error" in message:
error = message["error"]
raise RuntimeError(
f"ACP error {error.get('code')}: {error.get('message')}"
)
return message.get("result", {})
def _initialize(self, timeout: float = 30.0) -> dict[str, Any]:
"""Initialize the ACP connection (internal).
Args:
timeout: Maximum time to wait for response
Returns:
The agent's capabilities and info
"""
params = {
"protocolVersion": ACP_PROTOCOL_VERSION,
"clientCapabilities": self._client_capabilities,
"clientInfo": self._client_info,
}
request_id = self._send_request("initialize", params)
result = self._wait_for_response(request_id, timeout)
self._state.initialized = True
self._state.agent_capabilities = result.get("agentCapabilities", {})
self._state.agent_info = result.get("agentInfo", {})
return result
def _create_session(
self,
cwd: str,
mcp_servers: list[dict[str, Any]] | None = None,
timeout: float = 30.0,
) -> str:
"""Create a new ACP session (internal).
Args:
cwd: Working directory for the session
mcp_servers: Optional MCP server configurations
timeout: Maximum time to wait for response
Returns:
The session ID
"""
# Note: opencode requires cwd and mcpServers
params: dict[str, Any] = {
"cwd": cwd,
"mcpServers": mcp_servers or [],
}
request_id = self._send_request("session/new", params)
result = self._wait_for_response(request_id, timeout)
session_id = result.get("sessionId")
if not session_id:
raise RuntimeError("No session ID returned from session/new")
self._state.current_session = ACPSession(
session_id=session_id,
cwd=cwd,
)
return session_id
def send_message(
self,
message: str,
timeout: float = 300.0,
) -> Generator[ACPEvent, None, None]:
"""Send a message and stream response events.
Args:
message: The message content to send
timeout: Maximum time to wait for complete response
Yields:
Typed ACP schema event objects (ACPEvent union):
- AgentMessageChunk: Text/image content from the agent
- AgentThoughtChunk: Agent's internal reasoning
- ToolCallStart: Tool invocation started
- ToolCallProgress: Tool execution progress/result
- AgentPlanUpdate: Agent's execution plan
- CurrentModeUpdate: Agent mode change
- PromptResponse: Agent finished (has stop_reason)
- Error: An error occurred
Raises:
RuntimeError: If no session or prompt fails
"""
import time
if self._state.current_session is None:
raise RuntimeError("No active session. Call start() first.")
session_id = self._state.current_session.session_id
process = self._ensure_running()
# Build prompt content blocks
prompt_content = [{"type": "text", "text": message}]
params = {
"sessionId": session_id,
"prompt": prompt_content,
}
request_id = self._send_request("session/prompt", params)
start_time = time.time()
while True:
remaining = timeout - (time.time() - start_time)
if remaining <= 0:
yield Error(code=-1, message="Timeout waiting for response")
break
message_data = self._read_message(timeout=min(remaining, 1.0))
if message_data is None:
if process.poll() is not None:
yield Error(
code=-1,
message=f"Agent process terminated with code {process.returncode}",
)
break
continue
# Check for response to our prompt request
if message_data.get("id") == request_id:
if "error" in message_data:
error_data = message_data["error"]
yield Error(
code=error_data.get("code", -1),
message=error_data.get("message", "Unknown error"),
)
else:
result = message_data.get("result", {})
yield PromptResponse.model_validate(result)
break
# Handle notifications (session/update)
if message_data.get("method") == "session/update":
params_data = message_data.get("params", {})
update = params_data.get("update", {})
for event in self._process_session_update(update):
yield event
# Handle requests from agent (e.g., fs/readTextFile)
elif "method" in message_data and "id" in message_data:
self._send_error_response(
message_data["id"],
-32601,
f"Method not supported: {message_data['method']}",
)
def _process_session_update(
self, update: dict[str, Any]
) -> Generator[ACPEvent, None, None]:
"""Process a session/update notification and yield typed ACP schema objects.
Validates and returns the actual ACP schema types directly.
Invalid updates are silently skipped.
"""
update_type = update.get("sessionUpdate")
if update_type == "agent_message_chunk":
try:
yield AgentMessageChunk.model_validate(update)
except ValidationError:
pass # Skip invalid updates
elif update_type == "agent_thought_chunk":
try:
yield AgentThoughtChunk.model_validate(update)
except ValidationError:
pass
elif update_type == "user_message_chunk":
pass # Echo of user message - skip
elif update_type == "tool_call":
try:
yield ToolCallStart.model_validate(update)
except ValidationError:
pass
elif update_type == "tool_call_update":
try:
yield ToolCallProgress.model_validate(update)
except ValidationError:
pass
elif update_type == "plan":
try:
yield AgentPlanUpdate.model_validate(update)
except ValidationError:
pass
elif update_type == "available_commands_update":
# Skip command updates - not relevant for consumers
pass
elif update_type == "current_mode_update":
try:
yield CurrentModeUpdate.model_validate(update)
except ValidationError:
pass
elif update_type == "session_info_update":
# Skip session info updates - internal bookkeeping
pass
# Unknown update types are silently skipped
def _send_error_response(
self,
request_id: int,
code: int,
message: str,
) -> None:
"""Send an error response to an agent request."""
process = self._process
if process is None or process.stdin is None:
return
response = {
"jsonrpc": "2.0",
"id": request_id,
"error": {
"code": code,
"message": message,
},
}
try:
process.stdin.write(json.dumps(response) + "\n")
process.stdin.flush()
except BrokenPipeError:
pass
def cancel(self) -> None:
"""Cancel the current operation."""
if self._state.current_session is None:
return
self._send_notification(
"session/cancel",
{"sessionId": self._state.current_session.session_id},
)
@property
def is_running(self) -> bool:
"""Check if the agent process is running."""
return self._process is not None and self._process.poll() is None
@property
def session_id(self) -> str | None:
"""Get the current session ID, if any."""
if self._state.current_session:
return self._state.current_session.session_id
return None
@property
def agent_info(self) -> dict[str, Any]:
"""Get the agent's info from initialization."""
return self._state.agent_info
@property
def agent_capabilities(self) -> dict[str, Any]:
"""Get the agent's capabilities from initialization."""
return self._state.agent_capabilities

View File

@@ -0,0 +1,268 @@
"""Directory management for sandbox lifecycle."""
import json
import shutil
from pathlib import Path
from typing import Any
class DirectoryManager:
"""Manages sandbox directory creation and cleanup.
Responsible for:
- Creating sandbox directory structure
- Setting up symlinks to knowledge files
- Copying templates (outputs, venv, skills, AGENTS.md)
- Cleaning up sandbox directories on termination
"""
def __init__(
self,
base_path: Path,
outputs_template_path: Path,
venv_template_path: Path,
skills_path: Path,
agent_instructions_template_path: Path,
) -> None:
"""Initialize DirectoryManager with template paths.
Args:
base_path: Root directory for all sandboxes
outputs_template_path: Path to outputs template directory
venv_template_path: Path to Python virtual environment template
skills_path: Path to agent skills directory
agent_instructions_template_path: Path to AGENTS.md template file
"""
self._base_path = base_path
self._outputs_template_path = outputs_template_path
self._venv_template_path = venv_template_path
self._skills_path = skills_path
self._agent_instructions_template_path = agent_instructions_template_path
def create_sandbox_directory(self, session_id: str) -> Path:
"""Create sandbox directory structure.
Creates the base directory for a sandbox session:
{base_path}/{session_id}/
├── files/ # Symlink to knowledge/source files
├── user_uploaded_files/ # User-uploaded files
├── outputs/ # Working directory from template
│ ├── web/ # Next.js app
│ ├── slides/
│ ├── markdown/
│ └── graphs/
├── .venv/ # Python virtual environment
├── AGENTS.md # Agent instructions
└── .agent/
└── skills/ # Agent skills
Args:
session_id: Unique identifier for the session
Returns:
Path to the created sandbox directory
"""
sandbox_path = self._base_path / session_id
sandbox_path.mkdir(parents=True, exist_ok=True)
return sandbox_path
def setup_files_symlink(
self,
sandbox_path: Path,
file_system_path: Path,
) -> None:
"""Create symlink to knowledge/source files.
Args:
sandbox_path: Path to the sandbox directory
file_system_path: Path to the source files to link
"""
files_link = sandbox_path / "files"
if not files_link.exists():
files_link.symlink_to(file_system_path, target_is_directory=True)
def setup_outputs_directory(self, sandbox_path: Path) -> None:
"""Copy outputs template and create additional directories.
Copies the Next.js template and creates additional output
directories for generated content (slides, markdown, graphs).
Args:
sandbox_path: Path to the sandbox directory
"""
output_dir = sandbox_path / "outputs"
if not output_dir.exists():
if self._outputs_template_path.exists():
shutil.copytree(self._outputs_template_path, output_dir, symlinks=True)
else:
output_dir.mkdir(parents=True)
# Create additional output directories for generated content
(output_dir / "slides").mkdir(parents=True, exist_ok=True)
(output_dir / "markdown").mkdir(parents=True, exist_ok=True)
(output_dir / "graphs").mkdir(parents=True, exist_ok=True)
def setup_venv(self, sandbox_path: Path) -> Path:
"""Copy virtual environment template.
Args:
sandbox_path: Path to the sandbox directory
Returns:
Path to the virtual environment directory
"""
venv_path = sandbox_path / ".venv"
if not venv_path.exists() and self._venv_template_path.exists():
shutil.copytree(self._venv_template_path, venv_path, symlinks=True)
return venv_path
def setup_agent_instructions(self, sandbox_path: Path) -> None:
"""Copy AGENTS.md instructions template.
Args:
sandbox_path: Path to the sandbox directory
"""
agent_md_path = sandbox_path / "AGENTS.md"
if (
not agent_md_path.exists()
and self._agent_instructions_template_path.exists()
):
shutil.copy(self._agent_instructions_template_path, agent_md_path)
def setup_skills(self, sandbox_path: Path) -> None:
"""Copy skills directory to .agent/skills.
Args:
sandbox_path: Path to the sandbox directory
"""
skills_dest = sandbox_path / ".agent" / "skills"
if self._skills_path.exists() and not skills_dest.exists():
skills_dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copytree(self._skills_path, skills_dest)
def setup_opencode_config(
self,
sandbox_path: Path,
provider: str,
model_name: str,
api_key: str | None = None,
api_base: str | None = None,
disabled_tools: list[str] | None = None,
) -> None:
"""Create opencode.json configuration file for the agent.
Configures the opencode CLI agent with the LLM provider settings
from Onyx's configured LLM provider.
Args:
sandbox_path: Path to the sandbox directory
provider: LLM provider type (e.g., "openai", "anthropic")
model_name: Model name (e.g., "claude-sonnet-4-5", "gpt-4o")
api_key: Optional API key for the provider
api_base: Optional custom API base URL
disabled_tools: Optional list of tools to disable (e.g., ["question", "webfetch"])
"""
config_path = sandbox_path / "opencode.json"
if config_path.exists():
return
# Build opencode model string: provider/model-name
opencode_model = f"{provider}/{model_name}"
# Build configuration
config: dict[str, Any] = {
"model": opencode_model,
}
# Add provider-specific configuration if API key provided
if api_key:
provider_config: dict[str, Any] = {"options": {"apiKey": api_key}}
if api_base:
provider_config["api"] = api_base
config["provider"] = {provider: provider_config}
# Disable specified tools via permissions
if disabled_tools:
config["permission"] = {tool: "deny" for tool in disabled_tools}
config_path.write_text(json.dumps(config, indent=2))
def cleanup_sandbox_directory(self, sandbox_path: Path) -> None:
"""Remove sandbox directory and all contents.
Args:
sandbox_path: Path to the sandbox directory to remove
"""
if sandbox_path.exists():
shutil.rmtree(sandbox_path)
def get_outputs_path(self, sandbox_path: Path) -> Path:
"""Return path to outputs directory.
Args:
sandbox_path: Path to the sandbox directory
Returns:
Path to the outputs directory
"""
return sandbox_path / "outputs"
def get_web_path(self, sandbox_path: Path) -> Path:
"""Return path to Next.js web directory.
Args:
sandbox_path: Path to the sandbox directory
Returns:
Path to the web directory
"""
return sandbox_path / "outputs" / "web"
def get_venv_path(self, sandbox_path: Path) -> Path:
"""Return path to virtual environment.
Args:
sandbox_path: Path to the sandbox directory
Returns:
Path to the .venv directory
"""
return sandbox_path / ".venv"
def directory_exists(self, sandbox_path: Path) -> bool:
"""Check if sandbox directory exists.
Args:
sandbox_path: Path to check
Returns:
True if directory exists and is a directory
"""
return sandbox_path.exists() and sandbox_path.is_dir()
def setup_user_uploads_directory(self, sandbox_path: Path) -> Path:
"""Create user uploads directory at user_uploaded_files.
This directory is used to store files uploaded by the user
through the chat interface.
Args:
sandbox_path: Path to the sandbox directory
Returns:
Path to the user uploads directory
"""
uploads_path = sandbox_path / "user_uploaded_files"
uploads_path.mkdir(parents=True, exist_ok=True)
return uploads_path
def get_user_uploads_path(self, sandbox_path: Path) -> Path:
"""Return path to user uploads directory.
Args:
sandbox_path: Path to the sandbox directory
Returns:
Path to the user_uploaded_files directory
"""
return sandbox_path / "user_uploaded_files"

View File

@@ -0,0 +1,338 @@
"""Process management for CLI agent and Next.js server subprocesses."""
import os
import shutil
import signal
import subprocess
import time
import urllib.error
import urllib.request
from pathlib import Path
from onyx.utils.logger import setup_logger
logger = setup_logger()
class ProcessManager:
"""Manages CLI agent and Next.js server subprocess lifecycle.
Responsible for:
- Building virtual environment activation settings
- Starting agent processes with proper environment
- Starting Next.js dev servers
- Checking process status
- Gracefully terminating processes
"""
def build_venv_env(self, venv_path: Path) -> dict[str, str]:
"""Build environment variables dict with the virtual environment activated.
Args:
venv_path: Path to the virtual environment directory
Returns:
Environment variables dictionary with venv activated
"""
env = os.environ.copy()
venv_bin = str(venv_path / "bin")
env["PATH"] = f"{venv_bin}:{env.get('PATH', '')}"
env["VIRTUAL_ENV"] = str(venv_path)
# Unset PYTHONHOME if set (can interfere with venv)
env.pop("PYTHONHOME", None)
return env
def start_agent_process(
self,
sandbox_path: Path,
agent_command: list[str],
venv_path: Path | None = None,
env_vars: dict[str, str] | None = None,
) -> subprocess.Popen[str]:
"""Start CLI agent as subprocess.
Working directory is set to sandbox root.
Virtual environment is activated if provided.
Args:
sandbox_path: Path to the sandbox directory (working dir)
agent_command: Command and arguments to start the agent
venv_path: Optional path to virtual environment
env_vars: Optional additional environment variables
Returns:
The subprocess.Popen object for the agent process
"""
env = self.build_venv_env(venv_path) if venv_path else os.environ.copy()
if env_vars:
env.update(env_vars)
process = subprocess.Popen(
agent_command,
cwd=sandbox_path,
env=env,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
return process
def start_nextjs_server(
self,
web_dir: Path,
port: int,
timeout: float = 180.0,
) -> subprocess.Popen[bytes]:
"""Start Next.js dev server.
1. Clear .next cache to avoid stale paths from template
2. Start npm run dev on specified port
3. Wait for server to be ready
Args:
web_dir: Path to the Next.js web directory
port: Port number to run the server on
timeout: Maximum time to wait for server to start
Returns:
The subprocess.Popen object for the Next.js server
Raises:
RuntimeError: If server fails to start within timeout
"""
logger.info(f"Starting Next.js server in {web_dir} on port {port}")
# Clear Next.js cache to avoid stale paths from template
next_cache = web_dir / ".next"
if next_cache.exists():
logger.debug(f"Clearing Next.js cache at {next_cache}")
shutil.rmtree(next_cache)
# Verify web_dir exists and has package.json
if not web_dir.exists():
logger.error(f"Web directory does not exist: {web_dir}")
raise RuntimeError(f"Web directory does not exist: {web_dir}")
package_json = web_dir / "package.json"
if not package_json.exists():
logger.error(f"package.json not found in {web_dir}")
raise RuntimeError(f"package.json not found in {web_dir}")
logger.debug(f"Starting npm run dev command in {web_dir}")
process = subprocess.Popen(
["npm", "run", "dev", "--", "-p", str(port)],
cwd=web_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
logger.info(f"Next.js process started with PID {process.pid}")
# Wait for server to be ready
server_url = f"http://localhost:{port}"
logger.info(f"Waiting for Next.js server at {server_url} (timeout: {timeout}s)")
if not self._wait_for_server(server_url, timeout=timeout, process=process):
# Check if process died
if process.poll() is not None:
# Capture stdout/stderr for debugging
stdout_data = b""
stderr_data = b""
try:
# Read available output (non-blocking since process is dead)
if process.stdout:
stdout_data = process.stdout.read()
if process.stderr:
stderr_data = process.stderr.read()
except Exception as e:
logger.warning(f"Failed to read process output: {e}")
stdout_str = stdout_data.decode("utf-8", errors="replace")
stderr_str = stderr_data.decode("utf-8", errors="replace")
logger.error(
f"Next.js server process died with code {process.returncode}"
)
if stdout_str.strip():
logger.error(f"Next.js stdout:\n{stdout_str}")
if stderr_str.strip():
logger.error(f"Next.js stderr:\n{stderr_str}")
raise RuntimeError(
f"Next.js server process died with code {process.returncode}. "
f"stderr: {stderr_str[:500]}"
)
# Process still running but server not responding
logger.error(
f"Next.js server failed to respond within {timeout} seconds "
f"(process still running with PID {process.pid})"
)
# Try to get any available output
try:
if process.stdout:
stdout_data = process.stdout.read1(4096) # type: ignore
if stdout_data:
logger.error(
f"Partial stdout: {stdout_data.decode('utf-8', errors='replace')}"
)
except Exception:
pass
raise RuntimeError(
f"Next.js server failed to start within {timeout} seconds"
)
logger.info(f"Next.js server is ready at {server_url}")
return process
def _wait_for_server(
self,
url: str,
timeout: float = 30.0,
poll_interval: float = 0.5,
process: subprocess.Popen[bytes] | None = None,
) -> bool:
"""Wait for a server to become available by polling.
Args:
url: URL to poll
timeout: Maximum time to wait in seconds
poll_interval: Time between poll attempts in seconds
process: Optional process to check if it's still running
Returns:
True if server became available, False if timeout reached
"""
start_time = time.time()
attempt_count = 0
last_log_time = start_time
while time.time() - start_time < timeout:
attempt_count += 1
elapsed = time.time() - start_time
# Check if process died early
if process is not None and process.poll() is not None:
logger.warning(
f"Process died during wait (exit code: {process.returncode}) "
f"after {elapsed:.1f}s and {attempt_count} attempts"
)
return False
try:
with urllib.request.urlopen(url, timeout=2) as response:
if response.status == 200:
logger.debug(
f"Server ready after {elapsed:.1f}s and {attempt_count} attempts"
)
return True
except urllib.error.HTTPError as e:
# Log HTTP errors (server responding but with error)
if time.time() - last_log_time >= 10:
logger.debug(
f"HTTP error {e.code} from {url} after {elapsed:.1f}s "
f"({attempt_count} attempts)"
)
last_log_time = time.time()
except (urllib.error.URLError, TimeoutError) as e:
# Log connection errors periodically (every 10 seconds)
if time.time() - last_log_time >= 10:
logger.debug(
f"Still waiting for {url} after {elapsed:.1f}s "
f"({attempt_count} attempts): {type(e).__name__}"
)
last_log_time = time.time()
time.sleep(poll_interval)
logger.warning(
f"Server at {url} did not become available within {timeout}s "
f"({attempt_count} attempts)"
)
return False
def is_process_running(self, pid: int) -> bool:
"""Check if process with given PID is still running.
Args:
pid: Process ID to check
Returns:
True if process is running, False otherwise
"""
try:
os.kill(pid, 0) # Signal 0 just checks if process exists
return True
except ProcessLookupError:
return False
except PermissionError:
return True # Process exists but we can't signal it
def terminate_process(self, pid: int, timeout: float = 5.0) -> bool:
"""Gracefully terminate process.
1. Send SIGTERM
2. Wait up to timeout seconds
3. If still running, send SIGKILL
Args:
pid: Process ID to terminate
timeout: Maximum time to wait for graceful shutdown
Returns:
True if process was terminated, False if it wasn't running
"""
if not self.is_process_running(pid):
return False
try:
os.kill(pid, signal.SIGTERM)
except ProcessLookupError:
return False
# Wait for graceful shutdown
deadline = time.time() + timeout
while time.time() < deadline:
if not self.is_process_running(pid):
return True
time.sleep(0.1)
# Force kill if still running
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
return True
def get_process_info(self, pid: int) -> dict[str, str | int | float] | None:
"""Get information about a running process.
Uses psutil if available, otherwise returns basic info.
Args:
pid: Process ID to get info for
Returns:
Dictionary with process info, or None if process not running
"""
if not self.is_process_running(pid):
return None
try:
import psutil
proc = psutil.Process(pid)
return {
"pid": pid,
"status": proc.status(),
"cpu_percent": proc.cpu_percent(),
"memory_mb": proc.memory_info().rss / 1024 / 1024,
"create_time": proc.create_time(),
}
except ImportError:
# psutil not available, return basic info
return {"pid": pid, "status": "unknown"}
except Exception:
return {"pid": pid, "status": "unknown"}

View File

@@ -0,0 +1,227 @@
"""Snapshot management for sandbox state persistence."""
import tarfile
import tempfile
from pathlib import Path
from uuid import uuid4
from onyx.configs.constants import FileOrigin
from onyx.file_store.file_store import FileStore
from onyx.utils.logger import setup_logger
logger = setup_logger()
# File type for snapshot archives
SNAPSHOT_FILE_TYPE = "application/gzip"
class SnapshotManager:
"""Manages sandbox snapshot creation and restoration.
Snapshots are tar.gz archives of the sandbox's outputs directory,
stored using the file store abstraction (S3-compatible storage).
Responsible for:
- Creating snapshots of outputs directories
- Restoring snapshots to target directories
- Deleting snapshots from storage
"""
def __init__(self, file_store: FileStore) -> None:
"""Initialize SnapshotManager with a file store.
Args:
file_store: The file store to use for snapshot storage
"""
self._file_store = file_store
def create_snapshot(
self,
sandbox_path: Path,
session_id: str,
tenant_id: str,
) -> tuple[str, str, int]:
"""Create a snapshot of the outputs directory.
Creates a tar.gz archive of the sandbox's outputs directory
and uploads it to the file store.
Args:
sandbox_path: Path to the sandbox directory
session_id: Session identifier for the sandbox
tenant_id: Tenant identifier for multi-tenant isolation
Returns:
Tuple of (snapshot_id, storage_path, size_bytes)
Raises:
FileNotFoundError: If outputs directory doesn't exist
RuntimeError: If snapshot creation fails
"""
snapshot_id = str(uuid4())
outputs_path = sandbox_path / "outputs"
if not outputs_path.exists():
raise FileNotFoundError(f"Outputs directory not found: {outputs_path}")
# Create tar.gz in temp location
tmp_path: str | None = None
try:
with tempfile.NamedTemporaryFile(
suffix=".tar.gz", delete=False
) as tmp_file:
tmp_path = tmp_file.name
# Create the tar archive
with tarfile.open(tmp_path, "w:gz") as tar:
tar.add(outputs_path, arcname="outputs")
# Get size
size_bytes = Path(tmp_path).stat().st_size
# Generate storage path for file store
# Format: sandbox-snapshots/{tenant_id}/{session_id}/{snapshot_id}.tar.gz
storage_path = (
f"sandbox-snapshots/{tenant_id}/{session_id}/{snapshot_id}.tar.gz"
)
display_name = f"sandbox-snapshot-{session_id}-{snapshot_id}.tar.gz"
# Upload to file store
with open(tmp_path, "rb") as f:
self._file_store.save_file(
content=f,
display_name=display_name,
file_origin=FileOrigin.SANDBOX_SNAPSHOT,
file_type=SNAPSHOT_FILE_TYPE,
file_id=storage_path,
file_metadata={
"session_id": session_id,
"tenant_id": tenant_id,
"snapshot_id": snapshot_id,
},
)
logger.info(
f"Created snapshot {snapshot_id} for session {session_id}, "
f"size: {size_bytes} bytes"
)
return snapshot_id, storage_path, size_bytes
except Exception as e:
logger.error(f"Failed to create snapshot for session {session_id}: {e}")
raise RuntimeError(f"Failed to create snapshot: {e}") from e
finally:
# Cleanup temp file
if tmp_path:
try:
Path(tmp_path).unlink(missing_ok=True)
except Exception as cleanup_error:
logger.warning(
f"Failed to cleanup temp file {tmp_path}: {cleanup_error}"
)
def restore_snapshot(
self,
storage_path: str,
target_path: Path,
) -> None:
"""Restore a snapshot to target directory.
Downloads the snapshot from file store and extracts the outputs/
directory to the target path.
Args:
storage_path: The file store path of the snapshot
target_path: Directory to extract the snapshot into
Raises:
FileNotFoundError: If snapshot doesn't exist in file store
RuntimeError: If restoration fails
"""
tmp_path: str | None = None
file_io = None
try:
# Download from file store
file_io = self._file_store.read_file(storage_path, use_tempfile=True)
# Write to temp file for tarfile extraction
with tempfile.NamedTemporaryFile(
suffix=".tar.gz", delete=False
) as tmp_file:
tmp_path = tmp_file.name
# Read from the IO object and write to temp file
content = file_io.read()
tmp_file.write(content)
# Ensure target path exists
target_path.mkdir(parents=True, exist_ok=True)
# Extract with security filter
with tarfile.open(tmp_path, "r:gz") as tar:
# Use data filter for safe extraction (prevents path traversal)
# Available in Python 3.11.4+
try:
tar.extractall(target_path, filter="data")
except TypeError:
# Fallback for older Python versions without filter support
# Manually validate paths for security
for member in tar.getmembers():
# Check for path traversal attempts
member_path = Path(target_path) / member.name
try:
member_path.resolve().relative_to(target_path.resolve())
except ValueError:
raise RuntimeError(
f"Path traversal attempt detected: {member.name}"
)
tar.extractall(target_path)
logger.info(f"Restored snapshot from {storage_path} to {target_path}")
except Exception as e:
logger.error(f"Failed to restore snapshot {storage_path}: {e}")
raise RuntimeError(f"Failed to restore snapshot: {e}") from e
finally:
# Cleanup temp file
if tmp_path:
try:
Path(tmp_path).unlink(missing_ok=True)
except Exception as cleanup_error:
logger.warning(
f"Failed to cleanup temp file {tmp_path}: {cleanup_error}"
)
# Close the file IO if it's still open
try:
if file_io:
file_io.close()
except Exception:
pass
def delete_snapshot(self, storage_path: str) -> None:
"""Delete snapshot from file store.
Args:
storage_path: The file store path of the snapshot to delete
Raises:
RuntimeError: If deletion fails (other than file not found)
"""
try:
self._file_store.delete_file(storage_path)
logger.info(f"Deleted snapshot: {storage_path}")
except Exception as e:
# Log but don't fail if snapshot doesn't exist
logger.warning(f"Failed to delete snapshot {storage_path}: {e}")
raise RuntimeError(f"Failed to delete snapshot: {e}") from e
def get_snapshot_size(self, storage_path: str) -> int | None:
"""Get the size of a snapshot in bytes.
Args:
storage_path: The file store path of the snapshot
Returns:
Size in bytes, or None if not available
"""
return self._file_store.get_file_size(storage_path)

View File

@@ -0,0 +1,177 @@
#!/usr/bin/env python3
"""Test script for ACPAgentClient with opencode CLI.
Usage:
# From backend directory:
PYTHONPATH=. python onyx/server/features/build/sandbox/internal/test_agent_client.py
# Or with specific message:
PYTHONPATH=. python onyx/server/features/build/sandbox/internal/test_agent_client.py "What files are in this directory?"
# With specific working directory:
PYTHONPATH=. python onyx/server/features/build/sandbox/internal/test_agent_client.py --dir /path/to/project "List files"
"""
import argparse
import shutil
import tempfile
from pathlib import Path
from acp.schema import AgentMessageChunk
from acp.schema import AgentPlanUpdate
from acp.schema import AgentThoughtChunk
from acp.schema import CurrentModeUpdate
from acp.schema import Error
from acp.schema import PromptResponse
from acp.schema import ToolCallProgress
from acp.schema import ToolCallStart
try:
from onyx.server.features.build.sandbox.internal.agent_client import ACPAgentClient
except ImportError:
from agent_client import ACPAgentClient # type: ignore
def test_with_opencode_acp(message: str, working_dir: str | None = None) -> None:
"""Test ACPAgentClient with the opencode CLI using ACP protocol."""
print("=" * 60)
print("Testing ACPAgentClient with opencode acp")
print("=" * 60)
# Use provided working dir or create temp dir
if working_dir:
work_dir = Path(working_dir)
if not work_dir.exists():
print(f"Working directory does not exist: {working_dir}")
return
cleanup_dir = False
else:
work_dir = Path(tempfile.mkdtemp(prefix="opencode-test-"))
cleanup_dir = True
print(f"Created temp working directory: {work_dir}")
try:
print(f"\nStarting ACPAgentClient in: {work_dir}")
# Use context manager - handles start/stop automatically
with ACPAgentClient(cwd=str(work_dir)) as client:
print(
f"Agent: {client.agent_info.get('name', 'unknown')} "
f"v{client.agent_info.get('version', '?')}"
)
print(f"Session ID: {client.session_id}")
print(f"\nSending message: {message}")
print("-" * 60)
text_buffer = ""
event_count = 0
for event in client.send_message(message, timeout=120.0):
event_count += 1
if isinstance(event, AgentMessageChunk):
content = event.content
if content.type == "text":
text_buffer += content.text
print(content.text, end="", flush=True)
elif isinstance(event, AgentThoughtChunk):
content = event.content
if content.type == "text":
print(f"\n[Thought: {content.text[:100]}...]", flush=True)
elif isinstance(event, ToolCallStart):
print(
f"\n[Tool Call: {event.title} ({event.kind}) "
f"- {event.tool_call_id}]",
flush=True,
)
elif isinstance(event, ToolCallProgress):
title_str = f"{event.title} " if event.title else ""
print(
f"\n[Tool Result: {title_str}{event.status} "
f"- {event.tool_call_id}]",
flush=True,
)
elif isinstance(event, AgentPlanUpdate):
steps = event.plan.entries if event.plan else []
print(f"\n[Plan: {len(steps)} steps]", flush=True)
elif isinstance(event, CurrentModeUpdate):
print(f"\n[Mode: {event.current_mode_id}]", flush=True)
elif isinstance(event, PromptResponse):
print(f"\n\n[Done - stop_reason: {event.stop_reason}]")
elif isinstance(event, Error):
print(f"\n[Error: {event.message}]")
else:
print(f"\n[Unknown event]: {event}", flush=True)
print("-" * 60)
print(f"\nReceived {event_count} events total")
if text_buffer:
print(f"Total text length: {len(text_buffer)} chars")
except RuntimeError as e:
print(f"\nError: {e}")
except Exception as e:
print(f"\nUnexpected error: {e}")
import traceback
traceback.print_exc()
finally:
if cleanup_dir:
shutil.rmtree(work_dir, ignore_errors=True)
print(f"\nCleaned up temp directory: {work_dir}")
def main() -> None:
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Test ACPAgentClient with opencode CLI",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Test with opencode CLI (default message)
python test_agent_client.py
# Test with specific message
python test_agent_client.py "What is 2+2?"
# Test with specific working directory
python test_agent_client.py "List files" --dir /path/to/project
""",
)
parser.add_argument(
"message",
type=str,
nargs="?",
default="What is 2+2? Reply briefly with just the number.",
help="Message to send to opencode",
)
parser.add_argument(
"--dir",
type=str,
metavar="PATH",
help="Working directory for opencode (default: temp dir)",
)
args = parser.parse_args()
print("\nACP Agent Client Test Suite")
print("===========================\n")
test_with_opencode_acp(args.message, args.dir)
print("\n\nDone!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,735 @@
"""Public interface for sandbox operations.
SandboxManager is the abstract interface for sandbox lifecycle management.
LocalSandboxManager is the filesystem-based implementation for local/dev environments.
Use get_sandbox_manager() to get the appropriate implementation based on SANDBOX_BACKEND.
"""
import threading
from abc import ABC
from abc import abstractmethod
from collections.abc import Generator
from datetime import datetime
from pathlib import Path
from uuid import UUID
from sqlalchemy.orm import Session
from onyx.db.enums import SandboxStatus
from onyx.db.llm import fetch_default_provider
from onyx.file_store.file_store import get_default_file_store
from onyx.server.features.build.configs import OPENCODE_DISABLED_TOOLS
from onyx.server.features.build.configs import OUTPUTS_TEMPLATE_PATH
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SANDBOX_BASE_PATH
from onyx.server.features.build.configs import SANDBOX_MAX_CONCURRENT_PER_ORG
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.configs import VENV_TEMPLATE_PATH
from onyx.server.features.build.db.sandbox import allocate_nextjs_port
from onyx.server.features.build.db.sandbox import create_sandbox as db_create_sandbox
from onyx.server.features.build.db.sandbox import create_snapshot as db_create_snapshot
from onyx.server.features.build.db.sandbox import get_latest_snapshot_for_session
from onyx.server.features.build.db.sandbox import get_running_sandbox_count_by_tenant
from onyx.server.features.build.db.sandbox import get_sandbox_by_id
from onyx.server.features.build.db.sandbox import update_sandbox_heartbeat
from onyx.server.features.build.db.sandbox import update_sandbox_status
from onyx.server.features.build.sandbox.internal.agent_client import ACPAgentClient
from onyx.server.features.build.sandbox.internal.agent_client import ACPEvent
from onyx.server.features.build.sandbox.internal.directory_manager import (
DirectoryManager,
)
from onyx.server.features.build.sandbox.internal.process_manager import ProcessManager
from onyx.server.features.build.sandbox.internal.snapshot_manager import SnapshotManager
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotInfo
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
class SandboxManager(ABC):
"""Abstract interface for sandbox operations.
Defines the contract for sandbox lifecycle management including:
- Provisioning and termination
- Snapshot creation
- Health checks
- Agent communication
- Filesystem operations
Use get_sandbox_manager() to get the appropriate implementation.
"""
@abstractmethod
def provision(
self,
session_id: str,
tenant_id: str,
file_system_path: str,
db_session: Session,
snapshot_id: str | None = None,
) -> SandboxInfo:
"""Provision a new sandbox for a session.
Args:
session_id: Unique identifier for the session
tenant_id: Tenant identifier for multi-tenant isolation
file_system_path: Path to the knowledge/source files to link
db_session: Database session
snapshot_id: Optional snapshot ID to restore from
Returns:
SandboxInfo with the provisioned sandbox details
Raises:
ValueError: If max concurrent sandboxes reached
RuntimeError: If provisioning fails
"""
...
@abstractmethod
def terminate(self, sandbox_id: str, db_session: Session) -> None:
"""Terminate a sandbox.
Args:
sandbox_id: The sandbox ID to terminate
db_session: Database session
"""
...
@abstractmethod
def create_snapshot(
self, sandbox_id: str, db_session: Session
) -> SnapshotInfo | None:
"""Create a snapshot of the sandbox's outputs directory.
Args:
sandbox_id: The sandbox ID to snapshot
db_session: Database session
Returns:
SnapshotInfo with the created snapshot details, or None if
snapshots are disabled
Raises:
ValueError: If sandbox not found
RuntimeError: If snapshot creation fails
"""
...
@abstractmethod
def health_check(self, sandbox_id: str, db_session: Session) -> bool:
"""Check if the sandbox is healthy.
Args:
sandbox_id: The sandbox ID to check
db_session: Database session
Returns:
True if sandbox is healthy, False otherwise
"""
...
@abstractmethod
def send_message(
self,
sandbox_id: str,
message: str,
db_session: Session,
) -> Generator[ACPEvent, None, None]:
"""Send a message to the CLI agent and stream typed ACP events.
Args:
sandbox_id: The sandbox ID to send message to
message: The message content to send
db_session: Database session
Yields:
Typed ACP schema event objects
Raises:
ValueError: If sandbox not found
RuntimeError: If agent communication fails
"""
...
@abstractmethod
def list_directory(
self, sandbox_id: str, path: str, db_session: Session
) -> list[FilesystemEntry]:
"""List contents of a directory in the sandbox's outputs directory.
Args:
sandbox_id: The sandbox ID
path: Relative path within the outputs directory
db_session: Database session
Returns:
List of FilesystemEntry objects sorted by directory first, then name
Raises:
ValueError: If sandbox not found, path traversal attempted,
or path is not a directory
"""
...
@abstractmethod
def read_file(self, sandbox_id: str, path: str, db_session: Session) -> bytes:
"""Read a file from the sandbox's outputs directory.
Args:
sandbox_id: The sandbox ID
path: Relative path within the outputs directory
db_session: Database session
Returns:
File contents as bytes
Raises:
ValueError: If sandbox not found, path traversal attempted,
or path is not a file
"""
...
@abstractmethod
def get_sandbox_info(
self, sandbox_id: str, db_session: Session
) -> SandboxInfo | None:
"""Get information about a sandbox.
Args:
sandbox_id: The sandbox ID
db_session: Database session
Returns:
SandboxInfo or None if not found
"""
...
@abstractmethod
def cancel_agent(self, sandbox_id: str) -> None:
"""Cancel the current agent operation.
Args:
sandbox_id: The sandbox ID
"""
...
class LocalSandboxManager(SandboxManager):
"""Filesystem-based sandbox manager for local/dev environments.
Manages sandboxes as directories on the local filesystem.
Suitable for development, testing, and single-node deployments.
Key characteristics:
- Sandboxes are directories under SANDBOX_BASE_PATH
- No container isolation (process-level only)
- Snapshots disabled by default (SANDBOX_BACKEND=local)
- No automatic cleanup of idle sandboxes
This is a singleton class - use get_sandbox_manager() to get the instance.
"""
_instance: "LocalSandboxManager | None" = None
_lock = threading.Lock()
def __new__(cls) -> "LocalSandboxManager":
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialize()
return cls._instance
def _initialize(self) -> None:
"""Initialize managers."""
# Paths for templates
build_dir = Path(__file__).parent.parent # /onyx/server/features/build/
skills_path = build_dir / "skills"
agent_instructions_template_path = build_dir / "AGENTS.template.md"
self._directory_manager = DirectoryManager(
base_path=Path(SANDBOX_BASE_PATH),
outputs_template_path=Path(OUTPUTS_TEMPLATE_PATH),
venv_template_path=Path(VENV_TEMPLATE_PATH),
skills_path=skills_path,
agent_instructions_template_path=agent_instructions_template_path,
)
self._process_manager = ProcessManager()
self._snapshot_manager = SnapshotManager(get_default_file_store())
# Track ACP clients in memory
self._acp_clients: dict[str, ACPAgentClient] = (
{}
) # sandbox_id -> ACPAgentClient
# Validate templates exist (raises RuntimeError if missing)
self._validate_templates()
def _validate_templates(self) -> None:
"""Validate that sandbox templates exist.
Raises RuntimeError if templates are missing.
Templates are required for sandbox functionality.
Raises:
RuntimeError: If outputs or venv templates are missing
"""
outputs_path = Path(OUTPUTS_TEMPLATE_PATH)
venv_path = Path(VENV_TEMPLATE_PATH)
missing_templates: list[str] = []
if not outputs_path.exists():
missing_templates.append(f"Outputs template not found at {outputs_path}")
if not venv_path.exists():
missing_templates.append(f"Venv template not found at {venv_path}")
if missing_templates:
error_msg = (
"Sandbox templates are missing. "
"Please build templates using:\n"
" python -m onyx.server.features.build.sandbox.build_templates\n"
"Or use Docker image built with Dockerfile.sandbox-templates.\n\n"
"Missing templates:\n"
)
error_msg += "\n".join(f" - {template}" for template in missing_templates)
raise RuntimeError(error_msg)
logger.debug(f"Outputs template found at {outputs_path}")
logger.debug(f"Venv template found at {venv_path}")
def _get_sandbox_path(self, session_id: str | UUID) -> Path:
"""Get the filesystem path for a sandbox based on session_id.
Args:
session_id: The session ID (can be string or UUID)
Returns:
Path to the sandbox directory
"""
return Path(SANDBOX_BASE_PATH) / str(session_id)
def provision(
self,
session_id: str,
tenant_id: str,
file_system_path: str,
db_session: Session,
snapshot_id: str | None = None,
) -> SandboxInfo:
"""Provision a new sandbox for a session.
1. Check concurrent sandbox limit for tenant
2. Create sandbox directory structure
3. Setup files symlink, outputs, venv, AGENTS.md, and skills
4. If snapshot_id provided and kubernetes backend, restore outputs from snapshot
5. Start Next.js dev server
6. Store sandbox record in DB
7. Return sandbox info (agent not started until first message)
"""
logger.info(
f"Starting sandbox provisioning for session {session_id}, "
f"tenant {tenant_id}"
)
session_uuid = UUID(session_id)
# Check limit (only enforce on cloud deployments)
if MULTI_TENANT:
logger.debug(f"Checking concurrent sandbox limit for tenant {tenant_id}")
running_count = get_running_sandbox_count_by_tenant(db_session, tenant_id)
logger.debug(
f"Current running sandboxes: {running_count}, "
f"max: {SANDBOX_MAX_CONCURRENT_PER_ORG}"
)
if running_count >= SANDBOX_MAX_CONCURRENT_PER_ORG:
raise ValueError(
f"Maximum concurrent sandboxes ({SANDBOX_MAX_CONCURRENT_PER_ORG}) "
f"reached for tenant"
)
else:
logger.debug(
f"Skipping sandbox limit check for tenant {tenant_id} "
"(self-hosted deployment)"
)
# Create directory structure
logger.info(f"Creating sandbox directory structure for session {session_id}")
sandbox_path = self._directory_manager.create_sandbox_directory(session_id)
logger.debug(f"Sandbox directory created at {sandbox_path}")
try:
# Setup files symlink
logger.debug(f"Setting up files symlink to {file_system_path}")
self._directory_manager.setup_files_symlink(
sandbox_path, Path(file_system_path)
)
logger.debug("Files symlink created")
# Setup outputs (from snapshot or template)
# NOTE: Snapshot restore is only supported in kubernetes backend
if snapshot_id and SANDBOX_BACKEND == SandboxBackend.KUBERNETES:
logger.debug(f"Restoring from snapshot {snapshot_id}")
snapshot = get_latest_snapshot_for_session(db_session, session_uuid)
if snapshot:
self._snapshot_manager.restore_snapshot(
snapshot.storage_path, sandbox_path
)
logger.debug("Snapshot restored")
else:
logger.warning(f"Snapshot {snapshot_id} not found, using template")
self._directory_manager.setup_outputs_directory(sandbox_path)
else:
if snapshot_id and SANDBOX_BACKEND == SandboxBackend.LOCAL:
logger.debug(
f"Ignoring snapshot {snapshot_id} (local backend - "
"snapshots disabled)"
)
logger.debug("Setting up outputs directory from template")
self._directory_manager.setup_outputs_directory(sandbox_path)
logger.debug("Outputs directory ready")
# Setup venv, AGENTS.md, and skills
logger.debug("Setting up virtual environment")
self._directory_manager.setup_venv(sandbox_path)
logger.debug("Virtual environment ready")
logger.debug("Setting up agent instructions (AGENTS.md)")
self._directory_manager.setup_agent_instructions(sandbox_path)
logger.debug("Agent instructions ready")
logger.debug("Setting up skills")
self._directory_manager.setup_skills(sandbox_path)
logger.debug("Skills ready")
# Setup user uploads directory
logger.debug("Setting up user uploads directory")
self._directory_manager.setup_user_uploads_directory(sandbox_path)
logger.debug("User uploads directory ready")
# Setup opencode.json with LLM provider configuration
logger.debug("Fetching default LLM provider")
llm_provider = fetch_default_provider(db_session)
if not llm_provider:
logger.error("No default LLM provider configured")
raise RuntimeError(
"No default LLM provider configured. "
"Please configure an LLM provider in admin settings."
)
logger.debug(
f"Setting up opencode config with provider: {llm_provider.provider}, "
f"model: {llm_provider.default_model_name}"
)
self._directory_manager.setup_opencode_config(
sandbox_path=sandbox_path,
provider=llm_provider.provider,
model_name=llm_provider.default_model_name,
api_key=llm_provider.api_key,
api_base=llm_provider.api_base,
disabled_tools=OPENCODE_DISABLED_TOOLS,
)
logger.debug("Opencode config ready")
# Allocate Next.js port and start server
nextjs_port = allocate_nextjs_port(db_session)
web_dir = self._directory_manager.get_web_path(sandbox_path)
logger.info(f"Starting Next.js server at {web_dir} on port {nextjs_port}")
self._process_manager.start_nextjs_server(web_dir, nextjs_port)
logger.info("Next.js server started successfully")
# Create DB record
logger.debug("Creating sandbox database record")
sandbox = db_create_sandbox(
db_session=db_session,
session_id=session_uuid,
nextjs_port=nextjs_port,
)
update_sandbox_status(db_session, sandbox.id, SandboxStatus.RUNNING)
logger.debug(f"Sandbox record created with ID {sandbox.id}")
logger.info(
f"Provisioned sandbox {sandbox.id} for session {session_id} "
f"at {sandbox_path}, Next.js on port {nextjs_port}"
)
return SandboxInfo(
id=str(sandbox.id),
session_id=session_id,
directory_path=str(self._get_sandbox_path(session_id)),
status=SandboxStatus.RUNNING,
created_at=sandbox.created_at,
last_heartbeat=None,
)
except Exception as e:
# Cleanup on failure
logger.error(
f"Sandbox provisioning failed for session {session_id}: {e}",
exc_info=True,
)
logger.info(f"Cleaning up sandbox directory at {sandbox_path}")
self._directory_manager.cleanup_sandbox_directory(sandbox_path)
raise
def terminate(self, sandbox_id: str, db_session: Session) -> None:
"""Terminate a sandbox.
1. Stop ACP client (terminates agent subprocess)
2. Cleanup sandbox directory (this will handle Next.js process cleanup)
3. Update DB status to TERMINATED
"""
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
logger.warning(f"Sandbox {sandbox_id} not found for termination")
return
# Stop ACP client (this terminates the opencode subprocess)
client = self._acp_clients.pop(sandbox_id, None)
if client:
try:
client.stop()
except Exception as e:
logger.warning(
f"Error stopping ACP client for sandbox {sandbox_id}: {e}"
)
# Cleanup directory (this will handle Next.js process cleanup)
sandbox_path = self._get_sandbox_path(sandbox.session_id)
self._directory_manager.cleanup_sandbox_directory(sandbox_path)
# Update status
update_sandbox_status(db_session, UUID(sandbox_id), SandboxStatus.TERMINATED)
logger.info(f"Terminated sandbox {sandbox_id}")
def create_snapshot(
self, sandbox_id: str, db_session: Session
) -> SnapshotInfo | None:
"""Create a snapshot of the sandbox's outputs directory.
Returns None if snapshots are disabled (local backend).
"""
# Snapshots are disabled for local backend
if SANDBOX_BACKEND == SandboxBackend.LOCAL:
logger.debug(
f"Skipping snapshot creation for sandbox {sandbox_id} "
"(local backend - snapshots disabled)"
)
return None
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
raise ValueError(f"Sandbox {sandbox_id} not found")
sandbox_path = self._get_sandbox_path(sandbox.session_id)
tenant_id = get_current_tenant_id()
snapshot_id, storage_path, size_bytes = self._snapshot_manager.create_snapshot(
sandbox_path,
str(sandbox.session_id),
tenant_id,
)
snapshot = db_create_snapshot(
db_session=db_session,
session_id=sandbox.session_id,
storage_path=storage_path,
size_bytes=size_bytes,
)
logger.info(
f"Created snapshot {snapshot.id} for sandbox {sandbox_id}, "
f"size: {size_bytes} bytes"
)
return SnapshotInfo(
id=str(snapshot.id),
session_id=str(sandbox.session_id),
storage_path=storage_path,
created_at=snapshot.created_at,
size_bytes=size_bytes,
)
def health_check(self, sandbox_id: str, db_session: Session) -> bool:
"""Check if the sandbox is healthy (Next.js server running)."""
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
return False
# Cannot check health if port is not known
if sandbox.nextjs_port is None:
return False
# Check Next.js server is responsive on the sandbox's allocated port
if self._process_manager._wait_for_server(
f"http://localhost:{sandbox.nextjs_port}",
timeout=5.0,
):
update_sandbox_heartbeat(db_session, UUID(sandbox_id))
return True
return False
def send_message(
self,
sandbox_id: str,
message: str,
db_session: Session,
) -> Generator[ACPEvent, None, None]:
"""Send a message to the CLI agent and stream typed ACP events.
Yields ACPEvent objects:
- AgentMessageChunk: Text/image content from agent
- AgentThoughtChunk: Agent's internal reasoning
- ToolCallStart: Tool invocation started
- ToolCallProgress: Tool execution progress/result
- AgentPlanUpdate: Agent's execution plan
- CurrentModeUpdate: Agent mode change
- PromptResponse: Agent finished (has stop_reason)
- Error: An error occurred
"""
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
raise ValueError(f"Sandbox {sandbox_id} not found")
# Get or create ACP client for this sandbox
client = self._acp_clients.get(sandbox_id)
if client is None or not client.is_running:
sandbox_path = self._get_sandbox_path(sandbox.session_id)
# Create and start ACP client
client = ACPAgentClient(cwd=str(sandbox_path))
self._acp_clients[sandbox_id] = client
# Update heartbeat on message send
update_sandbox_heartbeat(db_session, UUID(sandbox_id))
for event in client.send_message(message):
yield event
# Update heartbeat on activity
update_sandbox_heartbeat(db_session, UUID(sandbox_id))
def list_directory(
self, sandbox_id: str, path: str, db_session: Session
) -> list[FilesystemEntry]:
"""List contents of a directory in the sandbox's outputs directory."""
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
raise ValueError(f"Sandbox {sandbox_id} not found")
sandbox_path = self._get_sandbox_path(sandbox.session_id)
outputs_path = sandbox_path / "outputs"
target_path = outputs_path / path.lstrip("/")
# Security: ensure path is within outputs directory
try:
target_path.resolve().relative_to(outputs_path.resolve())
except ValueError:
raise ValueError("Path traversal not allowed")
if not target_path.is_dir():
raise ValueError(f"Not a directory: {path}")
entries = []
for item in target_path.iterdir():
stat = item.stat()
entries.append(
FilesystemEntry(
name=item.name,
path=str(item.relative_to(outputs_path)),
is_directory=item.is_dir(),
size_bytes=stat.st_size if item.is_file() else None,
modified_at=datetime.fromtimestamp(stat.st_mtime),
)
)
return sorted(entries, key=lambda e: (not e.is_directory, e.name.lower()))
def read_file(self, sandbox_id: str, path: str, db_session: Session) -> bytes:
"""Read a file from the sandbox's outputs directory."""
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
raise ValueError(f"Sandbox {sandbox_id} not found")
sandbox_path = self._get_sandbox_path(sandbox.session_id)
outputs_path = sandbox_path / "outputs"
target_path = outputs_path / path.lstrip("/")
# Security: ensure path is within outputs directory
try:
target_path.resolve().relative_to(outputs_path.resolve())
except ValueError:
raise ValueError("Path traversal not allowed")
if not target_path.is_file():
raise ValueError(f"Not a file: {path}")
return target_path.read_bytes()
def get_sandbox_info(
self, sandbox_id: str, db_session: Session
) -> SandboxInfo | None:
"""Get information about a sandbox."""
sandbox = get_sandbox_by_id(db_session, UUID(sandbox_id))
if not sandbox:
return None
return SandboxInfo(
id=str(sandbox.id),
session_id=str(sandbox.session_id),
directory_path=str(self._get_sandbox_path(sandbox.session_id)),
status=sandbox.status,
created_at=sandbox.created_at,
last_heartbeat=sandbox.last_heartbeat,
)
def cancel_agent(self, sandbox_id: str) -> None:
"""Cancel the current agent operation."""
client = self._acp_clients.get(sandbox_id)
if client:
client.cancel()
# Singleton instance cache for the factory
_sandbox_manager_instance: SandboxManager | None = None
_sandbox_manager_lock = threading.Lock()
def get_sandbox_manager() -> SandboxManager:
"""Get the appropriate SandboxManager implementation based on SANDBOX_BACKEND.
Returns:
SandboxManager instance (LocalSandboxManager for local backend,
future implementations for kubernetes backend)
Note:
Currently only LocalSandboxManager is implemented. When kubernetes
backend is needed, add KubernetesSandboxManager and update this factory.
"""
global _sandbox_manager_instance
if _sandbox_manager_instance is None:
with _sandbox_manager_lock:
if _sandbox_manager_instance is None:
if SANDBOX_BACKEND == SandboxBackend.LOCAL:
_sandbox_manager_instance = LocalSandboxManager()
elif SANDBOX_BACKEND == SandboxBackend.KUBERNETES:
# For now, use LocalSandboxManager for kubernetes too
# TODO: Implement KubernetesSandboxManager when needed
logger.warning(
"Kubernetes sandbox backend not yet implemented, "
"falling back to LocalSandboxManager"
)
_sandbox_manager_instance = LocalSandboxManager()
else:
raise ValueError(f"Unknown sandbox backend: {SANDBOX_BACKEND}")
return _sandbox_manager_instance

View File

@@ -0,0 +1,47 @@
"""Pydantic models for sandbox module communication."""
from datetime import datetime
from pydantic import BaseModel
from onyx.db.enums import SandboxStatus
class SandboxInfo(BaseModel):
"""Information about a sandbox instance.
Returned by SandboxManager.provision() and other methods.
"""
id: str
session_id: str
directory_path: str
status: SandboxStatus
created_at: datetime
last_heartbeat: datetime | None
class SnapshotInfo(BaseModel):
"""Information about a sandbox snapshot.
Returned by SandboxManager.create_snapshot().
"""
id: str
session_id: str
storage_path: str
created_at: datetime
size_bytes: int
class FilesystemEntry(BaseModel):
"""Represents a file or directory entry in the sandbox filesystem.
Used for directory listing operations.
"""
name: str
path: str
is_directory: bool
size_bytes: int | None
modified_at: datetime | None

View File

@@ -0,0 +1 @@
"""Celery tasks for sandbox management."""

View File

@@ -0,0 +1,175 @@
"""Celery tasks for sandbox cleanup operations."""
from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock
from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SANDBOX_IDLE_TIMEOUT_SECONDS
from onyx.server.features.build.configs import SandboxBackend
# Snapshot retention period in days
SNAPSHOT_RETENTION_DAYS = 30
@shared_task(
name=OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,
soft_time_limit=300,
bind=True,
ignore_result=True,
)
def cleanup_idle_sandboxes_task(self: Task, *, tenant_id: str) -> None:
"""Clean up sandboxes that have been idle for longer than the timeout.
This task:
1. Finds sandboxes that have been idle longer than SANDBOX_IDLE_TIMEOUT_SECONDS
2. Creates a snapshot of each idle sandbox (to preserve work) - kubernetes only
3. Terminates the sandbox and cleans up resources
NOTE: This task is a no-op for local backend - sandboxes persist until
manually terminated or server restart.
Args:
tenant_id: The tenant ID for multi-tenant isolation
"""
# Skip cleanup for local backend - sandboxes persist until manual termination
if SANDBOX_BACKEND == SandboxBackend.LOCAL:
task_logger.debug(
"cleanup_idle_sandboxes_task skipped (local backend - cleanup disabled)"
)
return
task_logger.info(f"cleanup_idle_sandboxes_task starting for tenant {tenant_id}")
redis_client = get_redis_client(tenant_id=tenant_id)
lock: RedisLock = redis_client.lock(
OnyxRedisLocks.CLEANUP_IDLE_SANDBOXES_BEAT_LOCK,
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
)
# Prevent overlapping runs of this task
if not lock.acquire(blocking=False):
task_logger.debug("cleanup_idle_sandboxes_task - lock not acquired, skipping")
return
try:
# Import here to avoid circular imports
from onyx.server.features.build.db.sandbox import get_idle_sandboxes
from onyx.server.features.build.sandbox.manager import get_sandbox_manager
sandbox_manager = get_sandbox_manager()
with get_session_with_current_tenant() as db_session:
idle_sandboxes = get_idle_sandboxes(
db_session, SANDBOX_IDLE_TIMEOUT_SECONDS
)
if not idle_sandboxes:
task_logger.debug("No idle sandboxes found")
return
task_logger.info(f"Found {len(idle_sandboxes)} idle sandboxes to clean up")
for sandbox in idle_sandboxes:
sandbox_id = str(sandbox.id)
task_logger.info(f"Cleaning up idle sandbox {sandbox_id}")
try:
# Create snapshot before terminating to preserve work
task_logger.debug(f"Creating snapshot for sandbox {sandbox_id}")
sandbox_manager.create_snapshot(sandbox_id, db_session)
task_logger.debug(f"Snapshot created for sandbox {sandbox_id}")
except Exception as e:
task_logger.warning(
f"Failed to create snapshot for sandbox {sandbox_id}: {e}"
)
# Continue with termination even if snapshot fails
try:
sandbox_manager.terminate(sandbox_id, db_session)
task_logger.info(f"Terminated idle sandbox {sandbox_id}")
except Exception as e:
task_logger.error(
f"Failed to terminate sandbox {sandbox_id}: {e}",
exc_info=True,
)
except Exception:
task_logger.exception("Error in cleanup_idle_sandboxes_task")
raise
finally:
if lock.owned():
lock.release()
task_logger.info("cleanup_idle_sandboxes_task completed")
@shared_task(
name=OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,
soft_time_limit=300,
bind=True,
ignore_result=True,
)
def cleanup_old_snapshots_task(self: Task, *, tenant_id: str) -> None:
"""Delete snapshots older than the retention period.
This task cleans up old snapshots to manage storage usage.
Snapshots older than SNAPSHOT_RETENTION_DAYS are deleted.
NOTE: This task is a no-op for local backend since snapshots are disabled.
Args:
tenant_id: The tenant ID for multi-tenant isolation
"""
# Skip for local backend - no snapshots to clean up
if SANDBOX_BACKEND == SandboxBackend.LOCAL:
task_logger.debug(
"cleanup_old_snapshots_task skipped (local backend - snapshots disabled)"
)
return
task_logger.info(f"cleanup_old_snapshots_task starting for tenant {tenant_id}")
redis_client = get_redis_client(tenant_id=tenant_id)
lock: RedisLock = redis_client.lock(
OnyxRedisLocks.CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK,
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
)
# Prevent overlapping runs of this task
if not lock.acquire(blocking=False):
task_logger.debug("cleanup_old_snapshots_task - lock not acquired, skipping")
return
try:
from onyx.server.features.build.db.sandbox import delete_old_snapshots
with get_session_with_current_tenant() as db_session:
deleted_count = delete_old_snapshots(
db_session, tenant_id, SNAPSHOT_RETENTION_DAYS
)
if deleted_count > 0:
task_logger.info(
f"Deleted {deleted_count} old snapshots for tenant {tenant_id}"
)
else:
task_logger.debug("No old snapshots to delete")
except Exception:
task_logger.exception("Error in cleanup_old_snapshots_task")
raise
finally:
if lock.owned():
lock.release()
task_logger.info("cleanup_old_snapshots_task completed")

View File

@@ -0,0 +1,404 @@
"""Tests for SandboxManager public interface.
These are external dependency unit tests that use real DB sessions and filesystem.
Each test covers a single happy path case for the corresponding public function.
Tests for provision are not included as they require the full sandbox environment
with Next.js servers.
"""
import shutil
import tempfile
from collections.abc import Generator
from pathlib import Path
from uuid import uuid4
import pytest
from acp.schema import PromptResponse
from acp.schema import ToolCallStart
from sqlalchemy.orm import Session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.models import BuildSession
from onyx.db.models import Sandbox
from onyx.file_store.file_store import get_default_file_store
from onyx.server.features.build.configs import SANDBOX_BASE_PATH
from onyx.server.features.build.sandbox.internal.agent_client import ACPEvent
from onyx.server.features.build.sandbox.manager import get_sandbox_manager
from onyx.server.features.build.sandbox.manager import LocalSandboxManager
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotInfo
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
TEST_TENANT_ID = "public"
@pytest.fixture(scope="function")
def db_session() -> Generator[Session, None, None]:
"""Create a database session for testing."""
SqlEngine.init_engine(pool_size=10, max_overflow=5)
with get_session_with_current_tenant() as session:
yield session
@pytest.fixture(scope="function")
def tenant_context() -> Generator[None, None, None]:
"""Set up tenant context for testing."""
token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
try:
yield
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
@pytest.fixture
def sandbox_manager() -> LocalSandboxManager:
"""Get the SandboxManager instance via factory function."""
manager = get_sandbox_manager()
assert isinstance(manager, LocalSandboxManager)
return manager
@pytest.fixture
def temp_sandbox_dir() -> Generator[Path, None, None]:
"""Create a temporary directory structure for sandbox testing."""
temp_dir = Path(tempfile.mkdtemp(prefix="sandbox_test_"))
outputs_dir = temp_dir / "outputs"
outputs_dir.mkdir()
yield temp_dir
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.fixture
def actual_sandbox_path(sandbox_record: Sandbox) -> Path:
"""Get the actual sandbox path where the manager expects it."""
return Path(SANDBOX_BASE_PATH) / str(sandbox_record.session_id)
@pytest.fixture
def sandbox_record(
db_session: Session, tenant_context: None
) -> Generator[Sandbox, None, None]:
"""Create a real Sandbox record in the database and set up sandbox directory."""
# Create BuildSession first (required foreign key)
build_session = BuildSession(
id=uuid4(),
status=BuildSessionStatus.ACTIVE,
)
db_session.add(build_session)
db_session.flush() # Flush to get the ID without committing
# Create Sandbox with reference to BuildSession
sandbox = Sandbox(
id=uuid4(),
session_id=build_session.id,
status=SandboxStatus.RUNNING,
)
db_session.add(sandbox)
db_session.commit()
db_session.refresh(sandbox)
# Create sandbox directory at the expected location
# The manager uses _get_sandbox_path() which returns SANDBOX_BASE_PATH / session_id
expected_sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.session_id)
expected_sandbox_path.mkdir(parents=True, exist_ok=True)
# Ensure outputs directory exists at the expected path
expected_outputs = expected_sandbox_path / "outputs"
expected_outputs.mkdir(parents=True, exist_ok=True)
yield sandbox
# Cleanup sandbox directory
if expected_sandbox_path.exists():
shutil.rmtree(expected_sandbox_path, ignore_errors=True)
# Cleanup - re-fetch in case it was deleted
existing = db_session.get(Sandbox, sandbox.id)
if existing:
db_session.delete(existing)
db_session.commit()
# Cleanup BuildSession (cascade should handle it, but be explicit)
existing_session = db_session.get(BuildSession, build_session.id)
if existing_session:
db_session.delete(existing_session)
db_session.commit()
@pytest.fixture
def file_store_initialized() -> Generator[None, None, None]:
"""Initialize file store for snapshot tests."""
get_default_file_store().initialize()
yield
class TestTerminate:
"""Tests for SandboxManager.terminate()."""
def test_terminate_updates_status(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
temp_sandbox_dir: Path,
tenant_context: None,
) -> None:
"""Test that terminate updates sandbox status to TERMINATED."""
sandbox_id = str(sandbox_record.id)
sandbox_manager.terminate(sandbox_id, db_session)
db_session.refresh(sandbox_record)
assert sandbox_record.status == SandboxStatus.TERMINATED
class TestCreateSnapshot:
"""Tests for SandboxManager.create_snapshot()."""
def test_create_snapshot_archives_outputs(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
actual_sandbox_path: Path,
tenant_context: None,
file_store_initialized: None,
) -> None:
"""Test that create_snapshot archives the outputs directory."""
outputs_dir = actual_sandbox_path / "outputs"
(outputs_dir / "app.py").write_text("print('hello')")
result = sandbox_manager.create_snapshot(str(sandbox_record.id), db_session)
assert isinstance(result, SnapshotInfo)
assert result.session_id == str(sandbox_record.session_id)
assert result.size_bytes > 0
class TestHealthCheck:
"""Tests for SandboxManager.health_check()."""
def test_health_check_returns_false_when_no_processes(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
tenant_context: None,
) -> None:
"""Test that health_check returns False when no processes are running."""
result = sandbox_manager.health_check(str(sandbox_record.id), db_session)
assert result is False
class TestListDirectory:
"""Tests for SandboxManager.list_directory()."""
def test_list_directory_returns_entries(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
actual_sandbox_path: Path,
tenant_context: None,
) -> None:
"""Test that list_directory returns filesystem entries."""
outputs_dir = actual_sandbox_path / "outputs"
(outputs_dir / "file.txt").write_text("content")
(outputs_dir / "subdir").mkdir()
result = sandbox_manager.list_directory(str(sandbox_record.id), "/", db_session)
assert len(result) == 2
assert all(isinstance(e, FilesystemEntry) for e in result)
assert result[0].name == "subdir" # directories first
assert result[0].is_directory is True
assert result[1].name == "file.txt"
assert result[1].is_directory is False
class TestReadFile:
"""Tests for SandboxManager.read_file()."""
def test_read_file_returns_contents(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
actual_sandbox_path: Path,
tenant_context: None,
) -> None:
"""Test that read_file returns file contents as bytes."""
outputs_dir = actual_sandbox_path / "outputs"
(outputs_dir / "test.txt").write_bytes(b"Hello, World!")
result = sandbox_manager.read_file(
str(sandbox_record.id), "test.txt", db_session
)
assert result == b"Hello, World!"
class TestGetSandboxInfo:
"""Tests for SandboxManager.get_sandbox_info()."""
def test_get_sandbox_info_returns_info(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
tenant_context: None,
) -> None:
"""Test that get_sandbox_info returns SandboxInfo."""
result = sandbox_manager.get_sandbox_info(str(sandbox_record.id), db_session)
assert result is not None
assert isinstance(result, SandboxInfo)
assert result.id == str(sandbox_record.id)
assert result.session_id == str(sandbox_record.session_id)
assert result.status == SandboxStatus.RUNNING
class TestCancelAgent:
"""Tests for SandboxManager.cancel_agent()."""
def test_cancel_agent_no_client_is_noop(
self,
sandbox_manager: LocalSandboxManager,
) -> None:
"""Test that cancel_agent is a no-op when no client exists."""
fake_sandbox_id = str(uuid4())
sandbox_manager._acp_clients.pop(fake_sandbox_id, None)
sandbox_manager.cancel_agent(fake_sandbox_id)
# No exception means success
class TestSendMessage:
"""Tests for SandboxManager.send_message()."""
def test_send_message_streams_events(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
temp_sandbox_dir: Path,
tenant_context: None,
) -> None:
"""Test that send_message streams ACPEvent objects and ends with PromptResponse."""
sandbox_id = str(sandbox_record.id)
events: list[ACPEvent] = []
for event in sandbox_manager.send_message(
sandbox_id, "What is 2 + 2?", db_session
):
events.append(event)
# Should have received at least one event
assert len(events) > 0
# Last event should be PromptResponse (success) or contain results
last_event = events[-1]
assert isinstance(last_event, PromptResponse)
# Verify heartbeat was updated
db_session.refresh(sandbox_record)
assert sandbox_record.last_heartbeat is not None
# Cleanup: stop the ACP client
sandbox_manager.cancel_agent(sandbox_id)
client = sandbox_manager._acp_clients.pop(sandbox_id, None)
if client:
client.stop()
def test_send_message_write_file(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
actual_sandbox_path: Path,
tenant_context: None,
) -> None:
"""Test that send_message can write files and emits edit tool calls."""
sandbox_id = str(sandbox_record.id)
events: list[ACPEvent] = []
for event in sandbox_manager.send_message(
sandbox_id,
"Create a file called hello.txt with the content 'Hello, World!'",
db_session,
):
events.append(event)
# Should have at least one ToolCallStart with kind='edit'
tool_calls = [e for e in events if isinstance(e, ToolCallStart)]
edit_tool_calls = [tc for tc in tool_calls if tc.kind == "edit"]
assert len(edit_tool_calls) >= 1, (
f"Expected at least one edit tool call, got {len(edit_tool_calls)}. "
f"Tool calls: {[(tc.title, tc.kind) for tc in tool_calls]}"
)
# Last event should be PromptResponse
last_event = events[-1]
assert isinstance(last_event, PromptResponse)
# Verify the file was actually created (agent writes relative to sandbox root)
created_file = actual_sandbox_path / "hello.txt"
assert created_file.exists(), f"Expected file {created_file} to be created"
assert "Hello" in created_file.read_text()
# Cleanup
sandbox_manager.cancel_agent(sandbox_id)
client = sandbox_manager._acp_clients.pop(sandbox_id, None)
if client:
client.stop()
def test_send_message_read_file(
self,
sandbox_manager: LocalSandboxManager,
db_session: Session,
sandbox_record: Sandbox,
actual_sandbox_path: Path,
tenant_context: None,
) -> None:
"""Test that send_message can read files and emits read tool calls."""
sandbox_id = str(sandbox_record.id)
# Create a file for the agent to read (at sandbox root, where agent has access)
test_file = actual_sandbox_path / "secret.txt"
test_file.write_text("The secret code is 12345")
events: list[ACPEvent] = []
for event in sandbox_manager.send_message(
sandbox_id,
"Read the file secret.txt and tell me what the secret code is",
db_session,
):
events.append(event)
# Should have at least one ToolCallStart with kind='read'
tool_calls = [e for e in events if isinstance(e, ToolCallStart)]
read_tool_calls = [tc for tc in tool_calls if tc.kind == "read"]
assert len(read_tool_calls) >= 1, (
f"Expected at least one read tool call, got {len(read_tool_calls)}. "
f"Tool calls: {[(tc.title, tc.kind) for tc in tool_calls]}"
)
# Last event should be PromptResponse
last_event = events[-1]
assert isinstance(last_event, PromptResponse)
# Cleanup
sandbox_manager.cancel_agent(sandbox_id)
client = sandbox_manager._acp_clients.pop(sandbox_id, None)
if client:
client.stop()

View File

@@ -0,0 +1,6 @@
"""Session management for Build Mode."""
from onyx.server.features.build.session.manager import RateLimitError
from onyx.server.features.build.session.manager import SessionManager
__all__ = ["SessionManager", "RateLimitError"]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,99 @@
---
name: image-generation
description: Generate images using nano banana.
---
# Image Generation Skill
Generate images using Nano Banana (Google Gemini Image API). Supports text-to-image and image-to-image generation with configurable options.
## Setup
### Dependencies
```bash
pip install google-genai Pillow
```
### Environment Variable
Set your API key:
```bash
export GEMINI_API_KEY="your_api_key_here"
```
## Usage
### Basic Text-to-Image
```bash
python scripts/generate.py --prompt "A futuristic city at sunset with neon lights" --output city.png
```
### With Aspect Ratio
```bash
python scripts/generate.py \
--prompt "Mountain landscape with a lake" \
--output landscape.png \
--aspect-ratio 16:9
```
### Image-to-Image Mode
Use a reference image to guide generation:
```bash
python scripts/generate.py \
--prompt "Make it look like a watercolor painting" \
--input-image original.png \
--output watercolor.png
```
### Generate Multiple Images
```bash
python scripts/generate.py \
--prompt "Abstract colorful art" \
--output art.png \
--num-images 3
```
## Arguments
| Argument | Short | Required | Default | Description |
|----------|-------|----------|---------|-------------|
| `--prompt` | `-p` | Yes | — | Text prompt describing the desired image |
| `--output` | `-o` | No | `output.png` | Output path for the generated image |
| `--model` | `-m` | No | `gemini-2.0-flash-preview-image-generation` | Model to use for generation |
| `--input-image` | `-i` | No | — | Reference image for image-to-image mode |
| `--aspect-ratio` | `-a` | No | — | Aspect ratio: `1:1`, `16:9`, `9:16`, `4:3`, `3:4` |
| `--num-images` | `-n` | No | `1` | Number of images to generate |
## Available Models
- `gemini-2.0-flash-preview-image-generation` - Fast, optimized for speed and lower latency
- `imagen-3.0-generate-002` - High quality image generation
## Programmatic Usage
Import the function directly in Python:
```python
from scripts.generate import generate_image
paths = generate_image(
prompt="A serene mountain lake under moonlight",
output_path="./outputs/lake.png",
aspect_ratio="16:9",
num_images=2,
)
```
## Tips
- **Detailed prompts work better**: Instead of "a cat", try "a fluffy orange tabby cat sitting on a windowsill, soft morning light, photorealistic"
- **Specify style**: Include style keywords like "digital art", "oil painting", "photorealistic", "anime style"
- **Use aspect ratios**: Match the aspect ratio to your intended use (16:9 for landscapes, 9:16 for portraits/mobile)
- **Image-to-image**: Great for style transfer, variations, or guided modifications of existing images

View File

@@ -0,0 +1,230 @@
#!/usr/bin/env python3
"""
Image generation script using Nano Banana (Google Gemini Image API).
Supports text-to-image and image-to-image generation with configurable options.
"""
import argparse
import base64
import os
import sys
from io import BytesIO
from pathlib import Path
from PIL import Image
def load_image_as_base64(image_path: str) -> tuple[str, str]:
"""Load an image file and return base64 data and mime type."""
path = Path(image_path)
if not path.exists():
raise FileNotFoundError(f"Image not found: {image_path}")
# Determine mime type from extension
ext = path.suffix.lower()
mime_types = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
}
mime_type = mime_types.get(ext, "image/png")
with open(image_path, "rb") as f:
data = base64.b64encode(f.read()).decode("utf-8")
return data, mime_type
def generate_image(
prompt: str,
output_path: str,
model: str = "gemini-3-pro-image-preview",
input_image: str | None = None,
aspect_ratio: str | None = None,
num_images: int = 1,
) -> list[str]:
"""
Generate image(s) using Google Gemini / Nano Banana API.
Args:
prompt: Text description for image generation.
output_path: Path to save the generated image(s).
model: Model ID to use for generation.
input_image: Optional path to reference image for image-to-image mode.
aspect_ratio: Aspect ratio (e.g., "1:1", "16:9", "9:16", "4:3", "3:4").
num_images: Number of images to generate.
Returns:
List of paths to saved images.
"""
api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GENAI_API_KEY")
if not api_key:
raise ValueError(
"API key not found. Set GEMINI_API_KEY or GENAI_API_KEY environment variable."
)
# lazy importing since very heavy libs
from google import genai
from google.genai import types
client = genai.Client(api_key=api_key)
# Build content parts
parts: list[types.Part] = []
# Add reference image if provided (image-to-image mode)
if input_image:
img_data, mime_type = load_image_as_base64(input_image)
parts.append(
types.Part.from_bytes(
data=base64.b64decode(img_data),
mime_type=mime_type,
)
)
# Add text prompt
parts.append(types.Part.from_text(text=prompt))
# Build generation config
generate_config = types.GenerateContentConfig(
response_modalities=["TEXT", "IMAGE"],
)
saved_paths: list[str] = []
output_dir = Path(output_path).parent
output_dir.mkdir(parents=True, exist_ok=True)
base_name = Path(output_path).stem
extension = Path(output_path).suffix or ".png"
for i in range(num_images):
response = client.models.generate_content(
model=model,
contents=types.Content(parts=parts),
config=generate_config,
)
# Validate response
if not response.candidates:
raise ValueError("No candidates returned from the API")
candidate = response.candidates[0]
if not candidate.content or not candidate.content.parts:
raise ValueError("No content parts returned from the API")
# Process response parts
image_count = 0
for part in candidate.content.parts:
if part.inline_data is not None and part.inline_data.data is not None:
# Extract and save the image
image_data = part.inline_data.data
image = Image.open(BytesIO(image_data))
# Generate output filename
if num_images == 1 and image_count == 0:
save_path = output_path
else:
save_path = str(
output_dir / f"{base_name}_{i + 1}_{image_count + 1}{extension}"
)
image.save(save_path)
saved_paths.append(save_path)
print(f"Saved: {save_path}")
image_count += 1
elif part.text:
# Print any text response from the model
print(f"Model response: {part.text}")
return saved_paths
def main() -> None:
"""Main entry point for CLI usage."""
parser = argparse.ArgumentParser(
description="Generate images using Nano Banana (Google Gemini Image API).",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Basic text-to-image generation
python generate.py --prompt "A futuristic city at sunset" --output city.png
# Generate with specific aspect ratio
python generate.py --prompt "Mountain landscape" --output landscape.png --aspect-ratio 16:9
# Image-to-image mode (use reference image)
python generate.py --prompt "Make it more colorful" --input-image ref.png --output colorful.png
# Generate multiple images
python generate.py --prompt "Abstract art" --output art.png --num-images 3
""",
)
parser.add_argument(
"--prompt",
"-p",
type=str,
required=True,
help="Text prompt describing the desired image.",
)
parser.add_argument(
"--output",
"-o",
type=str,
default="output.png",
help="Output path for the generated image (default: output.png).",
)
parser.add_argument(
"--model",
"-m",
type=str,
default="gemini-3-pro-image-preview",
help="Model to use (default: gemini-3-pro-image-preview).",
)
parser.add_argument(
"--input-image",
"-i",
type=str,
help="Path to reference image for image-to-image generation.",
)
parser.add_argument(
"--aspect-ratio",
"-a",
type=str,
choices=["1:1", "16:9", "9:16", "4:3", "3:4"],
help="Aspect ratio for the generated image.",
)
parser.add_argument(
"--num-images",
"-n",
type=int,
default=1,
help="Number of images to generate (default: 1).",
)
args = parser.parse_args()
try:
saved_paths = generate_image(
prompt=args.prompt,
output_path=args.output,
model=args.model,
input_image=args.input_image,
aspect_ratio=args.aspect_ratio,
num_images=args.num_images,
)
print(f"\nSuccessfully generated {len(saved_paths)} image(s):")
for path in saved_paths:
print(f" - {path}")
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,327 @@
"""Utility functions for Build Mode feature announcements and file validation."""
import re
from pathlib import Path
from sqlalchemy.orm import Session
from onyx.auth.schemas import UserRole
from onyx.configs.constants import NotificationType
from onyx.db.models import User
from onyx.db.notification import create_notification
from onyx.feature_flags.factory import get_default_feature_flag_provider
from onyx.feature_flags.interface import NoOpFeatureFlagProvider
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.server.features.build.configs import MAX_UPLOAD_FILE_SIZE_BYTES
from onyx.utils.logger import setup_logger
logger = setup_logger()
# =============================================================================
# File Upload Validation
# =============================================================================
# Additional extensions for code files (safe to read, not execute)
CODE_FILE_EXTENSIONS: set[str] = {
".py",
".js",
".ts",
".tsx",
".jsx",
".css",
".scss",
".less",
".java",
".go",
".rs",
".cpp",
".c",
".h",
".hpp",
".cs",
".rb",
".php",
".swift",
".kt",
".scala",
".sh",
".bash",
".zsh",
".env",
".ini",
".toml",
".cfg",
".properties",
}
# Additional MIME types for code files
CODE_MIME_TYPES: set[str] = {
"text/x-python",
"text/x-java",
"text/x-c",
"text/x-c++",
"text/x-go",
"text/x-rust",
"text/x-shellscript",
"text/css",
"text/javascript",
"application/javascript",
"application/typescript",
"application/octet-stream", # Generic (for code files with unknown type)
}
# Combine base Onyx extensions with code file extensions
ALLOWED_EXTENSIONS: set[str] = (
OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS | CODE_FILE_EXTENSIONS
)
# Combine base Onyx MIME types with code MIME types
ALLOWED_MIME_TYPES: set[str] = OnyxMimeTypes.ALLOWED_MIME_TYPES | CODE_MIME_TYPES
# Blocked extensions (executable/dangerous files)
BLOCKED_EXTENSIONS: set[str] = {
# Windows executables
".exe",
".dll",
".msi",
".scr",
".com",
".bat",
".cmd",
".ps1",
# macOS
".app",
".dmg",
".pkg",
# Linux
".deb",
".rpm",
".so",
# Cross-platform
".jar",
".war",
".ear",
# Other potentially dangerous
".vbs",
".vbe",
".wsf",
".wsh",
".hta",
".cpl",
".reg",
".lnk",
".pif",
}
# Regex for sanitizing filenames (allow alphanumeric, dash, underscore, period)
SAFE_FILENAME_PATTERN = re.compile(r"[^a-zA-Z0-9._-]")
def validate_file_extension(filename: str) -> tuple[bool, str | None]:
"""Validate file extension against allowlist.
Args:
filename: The filename to validate
Returns:
Tuple of (is_valid, error_message)
"""
ext = Path(filename).suffix.lower()
if not ext:
return False, "File must have an extension"
if ext in BLOCKED_EXTENSIONS:
return False, f"File type '{ext}' is not allowed for security reasons"
if ext not in ALLOWED_EXTENSIONS:
return False, f"File type '{ext}' is not supported"
return True, None
def validate_mime_type(content_type: str | None) -> bool:
"""Validate MIME type against allowlist.
Args:
content_type: The Content-Type header value
Returns:
True if the MIME type is allowed, False otherwise
"""
if not content_type:
# Allow missing content type - we'll validate by extension
return True
# Extract base MIME type (ignore charset etc.)
mime_type = content_type.split(";")[0].strip().lower()
if mime_type not in ALLOWED_MIME_TYPES:
return False
return True
def validate_file_size(size: int) -> bool:
"""Validate file size against limit.
Args:
size: File size in bytes
Returns:
True if the file size is allowed, False otherwise
"""
if size <= 0:
return False
if size > MAX_UPLOAD_FILE_SIZE_BYTES:
return False
return True
def sanitize_filename(filename: str) -> str:
"""Sanitize filename to prevent path traversal and other issues.
Args:
filename: The original filename
Returns:
Sanitized filename safe for filesystem use
"""
# Remove any path components (prevent path traversal)
filename = Path(filename).name
# Remove null bytes
filename = filename.replace("\x00", "")
# Replace unsafe characters with underscore
filename = SAFE_FILENAME_PATTERN.sub("_", filename)
# Remove leading/trailing dots and spaces
filename = filename.strip(". ")
# Ensure filename is not empty
if not filename:
filename = "unnamed_file"
# Ensure filename doesn't start with a dot (hidden file)
if filename.startswith("."):
filename = "_" + filename[1:]
# Limit length (preserve extension)
max_length = 255
if len(filename) > max_length:
stem = Path(filename).stem
ext = Path(filename).suffix
max_stem_length = max_length - len(ext)
filename = stem[:max_stem_length] + ext
return filename
def validate_file(
filename: str,
content_type: str | None,
size: int,
) -> tuple[bool, str | None]:
"""Validate a file for upload.
Performs all validation checks:
- Extension validation
- MIME type validation
- Size validation
Args:
filename: The filename to validate
content_type: The Content-Type header value
size: File size in bytes
Returns:
Tuple of (is_valid, error_message). error_message is None if valid.
"""
# Validate extension
ext_valid, ext_error = validate_file_extension(filename)
if not ext_valid:
return False, ext_error
# Validate MIME type
if not validate_mime_type(content_type):
return False, f"MIME type '{content_type}' is not supported"
# Validate file size
if not validate_file_size(size):
return (
False,
f"File size exceeds maximum allowed size of {MAX_UPLOAD_FILE_SIZE_BYTES} bytes",
)
return True, None
# =============================================================================
# Build Mode Feature Announcements
# =============================================================================
# PostHog feature flag key (inverted: True = disabled, so "not found" defaults to enabled)
BUILD_MODE_INTRO_DISABLED_FLAG = "build-mode-intro-disabled"
# Feature identifier in additional_data
BUILD_MODE_FEATURE_ID = "build_mode"
def is_build_mode_intro_enabled(user: User) -> bool:
"""
Check if Build Mode intro should be shown.
Uses inverted flag logic: checks if "build-mode-intro-disabled" is True.
- Flag = True → disabled (don't show)
- Flag = False or not found → enabled (show)
This ensures "not found" defaults to enabled since PostHog returns False for missing flags.
"""
# NOTE: This is where we should invert the logic to globally disable the intro notification
feature_flag_provider = get_default_feature_flag_provider()
# If no PostHog configured (NoOp provider), default to enabled
if isinstance(feature_flag_provider, NoOpFeatureFlagProvider):
return True
is_disabled = feature_flag_provider.feature_enabled(
BUILD_MODE_INTRO_DISABLED_FLAG,
user.id,
)
if is_disabled:
logger.debug("Build Mode intro disabled via PostHog feature flag")
return False
return True
def ensure_build_mode_intro_notification(user: User, db_session: Session) -> None:
"""
Create Build Mode intro notification for user if enabled and not already exists.
Called from /api/notifications endpoint. Uses notification deduplication
to ensure each user only gets one notification.
"""
# Posthog feature flag check
if not is_build_mode_intro_enabled(user):
return
# Only show to admin users (since only admins can create connectors)
if user.role != UserRole.ADMIN:
return
# Create notification (will be skipped if already exists due to deduplication)
create_notification(
user_id=user.id,
notif_type=NotificationType.FEATURE_ANNOUNCEMENT,
db_session=db_session,
title="Introducing Onyx Build Mode",
description="Unleash AI agents to create slides, dashboards, documents, and more.",
additional_data={"feature": BUILD_MODE_FEATURE_ID},
)

View File

@@ -0,0 +1,885 @@
"""
CC4A Build API v1 - Returns dummy data for frontend development.
Based on the specification in cc4a-overview.md.
"""
import json
import uuid
from collections.abc import Generator
from datetime import datetime
from enum import Enum
from typing import Literal
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from fastapi.responses import Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.configs.constants import DocumentSource
from onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import ProcessingMode
from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
from onyx.db.models import User
# =============================================================================
# Build Connector Models
# =============================================================================
class BuildConnectorStatus(str, Enum):
"""Status of a build connector."""
NOT_CONNECTED = "not_connected"
CONNECTED = "connected"
INDEXING = "indexing"
ERROR = "error"
DELETING = "deleting"
class BuildConnectorInfo(BaseModel):
"""Simplified connector info for build admin panel."""
cc_pair_id: int
connector_id: int
credential_id: int
source: str
name: str
status: BuildConnectorStatus
docs_indexed: int
last_indexed: datetime | None
error_message: str | None = None
class BuildConnectorListResponse(BaseModel):
"""List of build connectors."""
connectors: list[BuildConnectorInfo]
# =============================================================================
# In-Memory Session Store (for dummy data persistence)
# =============================================================================
# Stores sessions by ID
_sessions_store: dict[str, "SessionResponse"] = {}
# Stores messages by session_id
_messages_store: dict[str, list["MessageResponse"]] = {}
# Stores artifacts by session_id
_artifacts_store: dict[str, list["ArtifactMetadataResponse"]] = {}
# =============================================================================
# Streaming Protocol Models
# =============================================================================
class StreamingType(str, Enum):
"""Enum defining all streaming packet types."""
DONE = "done"
ERROR = "error"
STEP_START = "step_start"
STEP_DELTA = "step_delta"
STEP_END = "step_end"
OUTPUT_START = "output_start"
OUTPUT_DELTA = "output_delta"
ARTIFACT_CREATED = "artifact_created"
ARTIFACT_UPDATED = "artifact_updated"
TOOL_START = "tool_start"
TOOL_OUTPUT = "tool_output"
TOOL_END = "tool_end"
class ArtifactType(str, Enum):
NEXTJS_APP = "nextjs_app"
PPTX = "pptx"
MARKDOWN = "markdown"
CHART = "chart"
CSV = "csv"
IMAGE = "image"
class SessionStatusEnum(str, Enum):
ACTIVE = "active"
IDLE = "idle"
ARCHIVED = "archived"
# =============================================================================
# Request/Response Models
# =============================================================================
class CreateSessionRequest(BaseModel):
"""Request to create a new build session."""
name: str | None = None
description: str | None = None
class SessionResponse(BaseModel):
"""Session details response."""
id: str
org_id: str
user_id: str
sandbox_id: str | None = None
name: str | None = None
status: SessionStatusEnum
created_at: datetime
last_activity_at: datetime
class SessionListResponse(BaseModel):
"""List of sessions response."""
sessions: list[SessionResponse]
total: int
class SendMessageRequest(BaseModel):
"""Request to send a message to the agent."""
content: str
context: str | None = None
class MessageResponse(BaseModel):
"""A single message in the conversation."""
id: str
session_id: str
role: Literal["user", "assistant"]
content: str
created_at: datetime
class MessageHistoryResponse(BaseModel):
"""Message history response."""
messages: list[MessageResponse]
class ArtifactMetadataResponse(BaseModel):
"""Artifact metadata."""
id: str
session_id: str
type: ArtifactType
path: str
name: str
created_at: datetime
updated_at: datetime
preview_url: str | None = None
class ArtifactListResponse(BaseModel):
"""List of artifacts response."""
artifacts: list[ArtifactMetadataResponse]
class FileSystemEntry(BaseModel):
"""A file or directory entry."""
name: str
path: str
is_directory: bool
size: int | None = None
mime_type: str | None = None
modified_at: datetime | None = None
class DirectoryListingResponse(BaseModel):
"""Directory listing response."""
path: str
entries: list[FileSystemEntry]
class RenameSessionRequest(BaseModel):
"""Request to rename a session."""
name: str | None = None # If None, triggers auto-naming
class RateLimitResponse(BaseModel):
"""Rate limit information."""
is_limited: bool
limit_type: Literal["weekly", "total"]
messages_used: int
limit: int
reset_timestamp: str | None = None
class UploadResponse(BaseModel):
"""File upload response."""
path: str
size: int
name: str
# =============================================================================
# Dummy Data Generators
# =============================================================================
def _generate_dummy_directory_listing(path: str) -> list[FileSystemEntry]:
"""Generate dummy directory listing."""
if path == "" or path == "/outputs":
return [
FileSystemEntry(
name="web",
path="web/",
is_directory=True,
modified_at=datetime.utcnow(),
),
FileSystemEntry(
name="documents",
path="documents/",
is_directory=True,
modified_at=datetime.utcnow(),
),
FileSystemEntry(
name="presentations",
path="presentations/",
is_directory=True,
modified_at=datetime.utcnow(),
),
FileSystemEntry(
name="manifest.json",
path="manifest.json",
is_directory=False,
size=256,
mime_type="application/json",
modified_at=datetime.utcnow(),
),
]
elif "web" in path:
return [
FileSystemEntry(
name="src",
path=f"{path}/src",
is_directory=True,
modified_at=datetime.utcnow(),
),
FileSystemEntry(
name="package.json",
path=f"{path}/package.json",
is_directory=False,
size=1024,
mime_type="application/json",
modified_at=datetime.utcnow(),
),
]
return []
def _format_sse_event(event_type: str, data: dict) -> str:
"""Format an event as SSE."""
return f"event: message\ndata: {json.dumps(data)}\n\n"
def _generate_streaming_response(session_id: str) -> Generator[str, None, None]:
"""Generate a dummy streaming response simulating agent activity."""
import time
step_id = str(uuid.uuid4())
# Step 1: Start
yield _format_sse_event(
"message",
{
"type": "step_start",
"step_id": step_id,
"title": "Analyzing requirements",
"timestamp": datetime.utcnow().isoformat(),
},
)
time.sleep(0.3)
# Step 1: Delta
yield _format_sse_event(
"message",
{
"type": "step_delta",
"step_id": step_id,
"content": "Reading your requirements and planning the implementation...",
"timestamp": datetime.utcnow().isoformat(),
},
)
time.sleep(0.3)
# Step 1: End
yield _format_sse_event(
"message",
{
"type": "step_end",
"step_id": step_id,
"status": "success",
"timestamp": datetime.utcnow().isoformat(),
},
)
# Tool usage
yield _format_sse_event(
"message",
{
"type": "tool_start",
"tool_name": "write_file",
"tool_input": {"path": "/outputs/web/src/App.tsx"},
"timestamp": datetime.utcnow().isoformat(),
},
)
time.sleep(0.2)
yield _format_sse_event(
"message",
{
"type": "tool_end",
"tool_name": "write_file",
"status": "success",
"timestamp": datetime.utcnow().isoformat(),
},
)
# Artifact created - store it
artifact_id = str(uuid.uuid4())
artifact = ArtifactMetadataResponse(
id=artifact_id,
session_id=session_id,
type=ArtifactType.NEXTJS_APP,
path="web/",
name="Dashboard",
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
preview_url=f"/api/build/v1/sessions/{session_id}/preview",
)
if session_id in _artifacts_store:
_artifacts_store[session_id].append(artifact)
yield _format_sse_event(
"message",
{
"type": "artifact_created",
"artifact": {
"id": artifact_id,
"type": "nextjs_app",
"name": "Dashboard",
"path": "web/",
"preview_url": f"/api/build/v1/sessions/{session_id}/preview",
},
"timestamp": datetime.utcnow().isoformat(),
},
)
# Output start
yield _format_sse_event(
"message",
{
"type": "output_start",
"timestamp": datetime.utcnow().isoformat(),
},
)
# Output delta (streaming the response)
response_parts = [
"I've built your dashboard ",
"with the following features:\n\n",
"- Interactive charts using Recharts\n",
"- Responsive layout\n",
"- Dark mode support\n\n",
"You can preview it in the artifact panel.",
]
full_response = ""
for part in response_parts:
full_response += part
yield _format_sse_event(
"message",
{
"type": "output_delta",
"content": part,
"timestamp": datetime.utcnow().isoformat(),
},
)
time.sleep(0.1)
# Store assistant message
assistant_message = MessageResponse(
id=str(uuid.uuid4()),
session_id=session_id,
role="assistant",
content=full_response,
created_at=datetime.utcnow(),
)
if session_id in _messages_store:
_messages_store[session_id].append(assistant_message)
# Done
yield _format_sse_event(
"message",
{
"type": "done",
"summary": "Created a Next.js dashboard with charts",
"timestamp": datetime.utcnow().isoformat(),
},
)
# =============================================================================
# API Router
# =============================================================================
v1_router = APIRouter(prefix="/build/v1")
# -----------------------------------------------------------------------------
# Sessions
# -----------------------------------------------------------------------------
@v1_router.post("/sessions", response_model=SessionResponse)
def create_session(
request: CreateSessionRequest,
user: User | None = Depends(current_user),
) -> SessionResponse:
"""Create a new build session."""
session_id = str(uuid.uuid4())
user_id = str(user.id) if user else str(uuid.uuid4())
org_id = str(uuid.uuid4()) # Would come from user context in real impl
session = SessionResponse(
id=session_id,
org_id=org_id,
user_id=user_id,
sandbox_id=str(uuid.uuid4()),
status=SessionStatusEnum.ACTIVE,
created_at=datetime.utcnow(),
last_activity_at=datetime.utcnow(),
)
_sessions_store[session_id] = session
_messages_store[session_id] = []
_artifacts_store[session_id] = []
return session
@v1_router.put("/sessions/{session_id}", response_model=SessionResponse)
def get_and_wake_session(
session_id: str,
user: User | None = Depends(current_user),
) -> SessionResponse:
"""Get session details and wake it up if idle."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
session = _sessions_store[session_id]
# Wake up if idle
if session.status == SessionStatusEnum.IDLE:
session.status = SessionStatusEnum.ACTIVE
session.last_activity_at = datetime.utcnow()
return session
@v1_router.get("/sessions", response_model=SessionListResponse)
def list_sessions(
status: SessionStatusEnum | None = Query(None),
user: User | None = Depends(current_user),
) -> SessionListResponse:
"""List all sessions with optional filters."""
sessions = list(_sessions_store.values())
# Filter by status if provided
if status is not None:
sessions = [s for s in sessions if s.status == status]
# Filter by user if authenticated
if user is not None:
user_id = str(user.id)
sessions = [s for s in sessions if s.user_id == user_id]
# Sort by last_activity_at descending
sessions.sort(key=lambda s: s.last_activity_at, reverse=True)
return SessionListResponse(sessions=sessions, total=len(sessions))
@v1_router.delete("/sessions/{session_id}")
def delete_session(
session_id: str,
user: User | None = Depends(current_user),
) -> dict:
"""End session and perform full cleanup."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
del _sessions_store[session_id]
_messages_store.pop(session_id, None)
_artifacts_store.pop(session_id, None)
return {"status": "deleted", "session_id": session_id}
@v1_router.put("/sessions/{session_id}/name", response_model=SessionResponse)
def rename_session(
session_id: str,
request: RenameSessionRequest,
user: User | None = Depends(current_user),
) -> SessionResponse:
"""Rename a session. If name is None, auto-generate from first message."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
session = _sessions_store[session_id]
if request.name is not None:
# Manual rename
session.name = request.name
else:
# Auto-generate name from first user message (simulating LLM naming)
messages = _messages_store.get(session_id, [])
first_user_msg = next((m for m in messages if m.role == "user"), None)
if first_user_msg:
content = first_user_msg.content
session.name = content[:40].strip() + ("..." if len(content) > 40 else "")
else:
session.name = f"Build Session {session_id[:8]}"
session.last_activity_at = datetime.utcnow()
return session
# -----------------------------------------------------------------------------
# Messages
# -----------------------------------------------------------------------------
@v1_router.post("/sessions/{session_id}/messages")
def send_message(
session_id: str,
request: SendMessageRequest,
user: User | None = Depends(current_user),
) -> StreamingResponse:
"""Send a message to the agent and receive streaming response."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
# Store the user message
user_message = MessageResponse(
id=str(uuid.uuid4()),
session_id=session_id,
role="user",
content=request.content,
created_at=datetime.utcnow(),
)
_messages_store[session_id].append(user_message)
# Update session activity
_sessions_store[session_id].last_activity_at = datetime.utcnow()
return StreamingResponse(
_generate_streaming_response(session_id),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
)
@v1_router.get("/sessions/{session_id}/messages", response_model=MessageHistoryResponse)
def get_message_history(
session_id: str,
user: User | None = Depends(current_user),
) -> MessageHistoryResponse:
"""Get message history for a session."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
messages = _messages_store.get(session_id, [])
return MessageHistoryResponse(messages=messages)
# -----------------------------------------------------------------------------
# Artifacts
# -----------------------------------------------------------------------------
@v1_router.get("/sessions/{session_id}/artifacts", response_model=ArtifactListResponse)
def list_artifacts(
session_id: str,
user: User | None = Depends(current_user),
) -> ArtifactListResponse:
"""List all artifacts in the session."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
artifacts = _artifacts_store.get(session_id, [])
return ArtifactListResponse(artifacts=artifacts)
@v1_router.get(
"/sessions/{session_id}/artifacts/{artifact_id}",
response_model=ArtifactMetadataResponse,
)
def get_artifact_metadata(
session_id: str,
artifact_id: str,
user: User | None = Depends(current_user),
) -> ArtifactMetadataResponse:
"""Get metadata for a specific artifact."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
artifacts = _artifacts_store.get(session_id, [])
for artifact in artifacts:
if artifact.id == artifact_id:
return artifact
raise HTTPException(status_code=404, detail="Artifact not found")
@v1_router.get("/sessions/{session_id}/artifacts/{artifact_id}/content")
def get_artifact_content(
session_id: str,
artifact_id: str,
user: User | None = Depends(current_user),
) -> Response:
"""Download/stream artifact content."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
# Return dummy content
dummy_content = b"// Dummy artifact content\nexport default function App() {\n return <div>Hello World</div>;\n}\n"
return Response(
content=dummy_content,
media_type="text/plain",
headers={"Content-Disposition": 'attachment; filename="App.tsx"'},
)
# -----------------------------------------------------------------------------
# Filesystem (VM Explorer)
# -----------------------------------------------------------------------------
@v1_router.post("/sessions/{session_id}/fs/upload", response_model=UploadResponse)
async def upload_file(
session_id: str,
file: UploadFile = File(...),
user: User | None = Depends(current_user),
) -> UploadResponse:
"""Upload a file to the sandbox."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
content = await file.read()
return UploadResponse(
path=f"/user-input/{file.filename}",
size=len(content),
name=file.filename or "unknown",
)
@v1_router.get("/sessions/{session_id}/fs", response_model=DirectoryListingResponse)
def list_directory(
session_id: str,
path: str = Query("/outputs", description="Path to list"),
user: User | None = Depends(current_user),
) -> DirectoryListingResponse:
"""List directory contents in the sandbox."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
return DirectoryListingResponse(
path=path,
entries=_generate_dummy_directory_listing(path),
)
@v1_router.get("/sessions/{session_id}/fs/read")
def read_file(
session_id: str,
path: str = Query(..., description="Path to read"),
user: User | None = Depends(current_user),
) -> Response:
"""Read file content from the sandbox."""
if session_id not in _sessions_store:
raise HTTPException(status_code=404, detail="Session not found")
# Return dummy content based on file extension
if path.endswith(".json"):
content = b'{\n "name": "dashboard",\n "version": "1.0.0"\n}'
media_type = "application/json"
elif path.endswith(".md"):
content = b"# Dashboard\n\nThis is a generated dashboard application."
media_type = "text/markdown"
elif path.endswith(".tsx") or path.endswith(".ts"):
content = (
b"export default function Component() {\n return <div>Hello</div>;\n}"
)
media_type = "text/typescript"
else:
content = b"Dummy file content"
media_type = "text/plain"
return Response(content=content, media_type=media_type)
# -----------------------------------------------------------------------------
# Rate Limiting
# -----------------------------------------------------------------------------
@v1_router.get("/limit", response_model=RateLimitResponse)
def get_rate_limit(
user: User | None = Depends(current_user),
) -> RateLimitResponse:
"""Get rate limit information for the current user."""
is_paid = user is not None # Simplified logic
# Count total user messages across all sessions
total_messages = 0
for session_id in _messages_store:
messages = _messages_store[session_id]
total_messages += sum(1 for m in messages if m.role == "user")
limit = 50 if is_paid else 10
is_limited = total_messages >= limit
return RateLimitResponse(
is_limited=is_limited,
limit_type="weekly" if is_paid else "total",
messages_used=total_messages,
limit=limit,
reset_timestamp=datetime.utcnow().isoformat() if is_paid else None,
)
# -----------------------------------------------------------------------------
# Build Connectors
# -----------------------------------------------------------------------------
@v1_router.get("/connectors", response_model=BuildConnectorListResponse)
def get_build_connectors(
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> BuildConnectorListResponse:
"""Get all connectors for the build admin panel.
Returns all connector-credential pairs with simplified status information.
"""
cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
user=user,
get_editable=False,
eager_load_connector=True,
eager_load_credential=True,
processing_mode=ProcessingMode.FILE_SYSTEM, # Only show FILE_SYSTEM connectors
)
connectors: list[BuildConnectorInfo] = []
for cc_pair in cc_pairs:
# Skip ingestion API connectors and default pairs
if cc_pair.connector.source == DocumentSource.INGESTION_API:
continue
if cc_pair.name == "DefaultCCPair":
continue
# Determine status
error_message: str | None = None
if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
status = BuildConnectorStatus.DELETING
elif cc_pair.status == ConnectorCredentialPairStatus.INVALID:
status = BuildConnectorStatus.ERROR
error_message = "Connector credentials are invalid"
else:
# Check latest index attempt for errors
latest_attempt = get_latest_index_attempt_for_cc_pair_id(
db_session=db_session,
connector_credential_pair_id=cc_pair.id,
secondary_index=False,
only_finished=True,
)
if latest_attempt and latest_attempt.status == IndexingStatus.FAILED:
status = BuildConnectorStatus.ERROR
error_message = latest_attempt.error_msg
elif (
latest_attempt
and latest_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS
):
status = BuildConnectorStatus.ERROR
error_message = "Indexing completed with errors"
elif cc_pair.status == ConnectorCredentialPairStatus.PAUSED:
status = BuildConnectorStatus.CONNECTED
elif cc_pair.last_successful_index_time is None:
# Never successfully indexed - check if currently indexing
# First check cc_pair status for scheduled/initial indexing
if cc_pair.status in (
ConnectorCredentialPairStatus.SCHEDULED,
ConnectorCredentialPairStatus.INITIAL_INDEXING,
):
status = BuildConnectorStatus.INDEXING
else:
in_progress_attempt = get_latest_index_attempt_for_cc_pair_id(
db_session=db_session,
connector_credential_pair_id=cc_pair.id,
secondary_index=False,
only_finished=False,
)
if (
in_progress_attempt
and in_progress_attempt.status == IndexingStatus.IN_PROGRESS
):
status = BuildConnectorStatus.INDEXING
elif (
in_progress_attempt
and in_progress_attempt.status == IndexingStatus.NOT_STARTED
):
status = BuildConnectorStatus.INDEXING
else:
# Has a finished attempt but never succeeded - likely error
status = BuildConnectorStatus.ERROR
error_message = (
latest_attempt.error_msg
if latest_attempt
else "Initial indexing failed"
)
else:
status = BuildConnectorStatus.CONNECTED
connectors.append(
BuildConnectorInfo(
cc_pair_id=cc_pair.id,
connector_id=cc_pair.connector.id,
credential_id=cc_pair.credential.id,
source=cc_pair.connector.source.value,
name=cc_pair.name or cc_pair.connector.name or "Unnamed",
status=status,
docs_indexed=0, # Would need to query for this
last_indexed=cc_pair.last_successful_index_time,
error_message=error_message,
)
)
return BuildConnectorListResponse(connectors=connectors)

View File

@@ -9,6 +9,7 @@ from onyx.db.models import User
from onyx.db.notification import dismiss_notification
from onyx.db.notification import get_notification_by_id
from onyx.db.notification import get_notifications
from onyx.server.features.build.utils import ensure_build_mode_intro_notification
from onyx.server.features.release_notes.utils import (
ensure_release_notes_fresh_and_notify,
)
@@ -34,12 +35,17 @@ def get_notifications_api(
- Checking for misconfigurations due to version changes
- Explicitly announcing breaking changes
"""
# If more background checks are added, this should be moved to a helper function
# Background checks that create notifications
try:
ensure_build_mode_intro_notification(user, db_session)
except Exception:
logger.exception(
"Failed to check for build mode intro in notifications endpoint"
)
try:
ensure_release_notes_fresh_and_notify(db_session)
except Exception:
# Log exception but don't fail the entire endpoint
# Users can still see their existing notifications
logger.exception("Failed to check for release notes in notifications endpoint")
notifications = [

View File

@@ -1,5 +1,7 @@
# This file was autogenerated by uv via the following command:
# uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
agent-client-protocol==0.7.1
# via onyx
aioboto3==15.1.0
# via onyx
aiobotocore==2.24.0
@@ -31,6 +33,7 @@ annotated-types==0.7.0
# via pydantic
anyio==4.11.0
# via
# claude-agent-sdk
# google-genai
# httpx
# mcp
@@ -137,6 +140,8 @@ chevron==0.14.0
# via braintrust
chonkie==1.0.10
# via onyx
claude-agent-sdk==0.1.19
# via onyx
click==8.3.1
# via
# celery
@@ -569,6 +574,7 @@ matrix-client==0.3.2
# via zulip
mcp==1.25.0
# via
# claude-agent-sdk
# fastmcp
# onyx
mdurl==0.1.2
@@ -806,6 +812,7 @@ pycryptodome==3.19.1
# via onyx
pydantic==2.11.7
# via
# agent-client-protocol
# cohere
# exa-py
# fastapi

View File

@@ -1,5 +1,7 @@
# This file was autogenerated by uv via the following command:
# uv export --no-emit-project --no-default-groups --no-hashes --extra dev -o backend/requirements/dev.txt
agent-client-protocol==0.7.1
# via onyx
aioboto3==15.1.0
# via onyx
aiobotocore==2.24.0
@@ -26,9 +28,12 @@ annotated-types==0.7.0
# via pydantic
anyio==4.11.0
# via
# claude-agent-sdk
# google-genai
# httpx
# mcp
# openai
# sse-starlette
# starlette
appnope==0.1.4 ; sys_platform == 'darwin'
# via ipykernel
@@ -62,14 +67,18 @@ certifi==2025.11.12
# httpx
# requests
# sentry-sdk
cffi==2.0.0 ; implementation_name == 'pypy'
# via pyzmq
cffi==2.0.0 ; implementation_name == 'pypy' or platform_python_implementation != 'PyPy'
# via
# cryptography
# pyzmq
cfgv==3.4.0
# via pre-commit
charset-normalizer==3.4.4
# via requests
classify-imports==4.2.0
# via reorder-python-imports-black
claude-agent-sdk==0.1.19
# via onyx
click==8.3.1
# via
# black
@@ -87,6 +96,8 @@ comm==0.2.3
# via ipykernel
contourpy==1.3.3
# via matplotlib
cryptography==46.0.3
# via pyjwt
cycler==0.12.1
# via matplotlib
debugpy==1.8.17
@@ -213,9 +224,12 @@ httpx==0.28.1
# cohere
# google-genai
# litellm
# mcp
# openai
httpx-sse==0.4.3
# via cohere
# via
# cohere
# mcp
huggingface-hub==0.35.3
# via tokenizers
identify==2.6.15
@@ -248,7 +262,9 @@ jmespath==1.0.1
# boto3
# botocore
jsonschema==4.25.1
# via litellm
# via
# litellm
# mcp
jsonschema-specifications==2025.9.1
# via jsonschema
jupyter-client==8.6.3
@@ -275,6 +291,8 @@ matplotlib-inline==0.2.1
# via
# ipykernel
# ipython
mcp==1.25.0
# via claude-agent-sdk
multidict==6.7.0
# via
# aiobotocore
@@ -392,23 +410,30 @@ pyasn1==0.6.2
# rsa
pyasn1-modules==0.4.2
# via google-auth
pycparser==2.23 ; implementation_name == 'pypy'
pycparser==2.23 ; (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'pypy' and platform_python_implementation == 'PyPy')
# via cffi
pydantic==2.11.7
# via
# agent-client-protocol
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# litellm
# mcp
# onyx
# openai
# pydantic-settings
pydantic-core==2.33.2
# via pydantic
pydantic-settings==2.12.0
# via mcp
pygments==2.19.2
# via
# ipython
# ipython-pygments-lexers
pyjwt==2.10.1
# via mcp
pyparsing==3.2.5
# via matplotlib
pytest==8.3.5
@@ -436,7 +461,12 @@ python-dateutil==2.8.2
python-dotenv==1.1.1
# via
# litellm
# pydantic-settings
# pytest-dotenv
python-multipart==0.0.20
# via mcp
pywin32==311 ; sys_platform == 'win32'
# via mcp
pyyaml==6.0.3
# via
# huggingface-hub
@@ -491,11 +521,14 @@ sqlalchemy==2.0.15
# via
# alembic
# pytest-alembic
sse-starlette==3.0.3
# via mcp
stack-data==0.6.3
# via ipython
starlette==0.47.2
# via
# fastapi
# mcp
# prometheus-fastapi-instrumentator
tenacity==9.1.2
# via
@@ -571,6 +604,7 @@ typing-extensions==4.15.0
# grpcio
# huggingface-hub
# ipython
# mcp
# mypy
# openai
# pydantic
@@ -581,7 +615,10 @@ typing-extensions==4.15.0
# starlette
# typing-inspection
typing-inspection==0.4.2
# via pydantic
# via
# mcp
# pydantic
# pydantic-settings
tzdata==2025.2
# via faker
urllib3==2.6.3
@@ -591,7 +628,9 @@ urllib3==2.6.3
# sentry-sdk
# types-requests
uvicorn==0.35.0
# via onyx
# via
# mcp
# onyx
virtualenv==20.36.1
# via pre-commit
voyageai==0.2.3

View File

@@ -1,5 +1,7 @@
# This file was autogenerated by uv via the following command:
# uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
agent-client-protocol==0.7.1
# via onyx
aioboto3==15.1.0
# via onyx
aiobotocore==2.24.0
@@ -24,9 +26,12 @@ annotated-types==0.7.0
# via pydantic
anyio==4.11.0
# via
# claude-agent-sdk
# google-genai
# httpx
# mcp
# openai
# sse-starlette
# starlette
attrs==25.4.0
# via
@@ -54,8 +59,12 @@ certifi==2025.11.12
# httpx
# requests
# sentry-sdk
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
# via cryptography
charset-normalizer==3.4.4
# via requests
claude-agent-sdk==0.1.19
# via onyx
click==8.3.1
# via
# litellm
@@ -66,6 +75,8 @@ colorama==0.4.6 ; sys_platform == 'win32'
# via
# click
# tqdm
cryptography==46.0.3
# via pyjwt
decorator==5.2.1
# via retry
discord-py==2.4.0
@@ -168,9 +179,12 @@ httpx==0.28.1
# cohere
# google-genai
# litellm
# mcp
# openai
httpx-sse==0.4.3
# via cohere
# via
# cohere
# mcp
huggingface-hub==0.35.3
# via tokenizers
idna==3.11
@@ -191,13 +205,17 @@ jmespath==1.0.1
# boto3
# botocore
jsonschema==4.25.1
# via litellm
# via
# litellm
# mcp
jsonschema-specifications==2025.9.1
# via jsonschema
litellm==1.80.11
# via onyx
markupsafe==3.0.3
# via jinja2
mcp==1.25.0
# via claude-agent-sdk
monotonic==1.6
# via posthog
multidict==6.7.0
@@ -263,17 +281,26 @@ pyasn1==0.6.2
# rsa
pyasn1-modules==0.4.2
# via google-auth
pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
# via cffi
pydantic==2.11.7
# via
# agent-client-protocol
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# litellm
# mcp
# onyx
# openai
# pydantic-settings
pydantic-core==2.33.2
# via pydantic
pydantic-settings==2.12.0
# via mcp
pyjwt==2.10.1
# via mcp
python-dateutil==2.8.2
# via
# aiobotocore
@@ -281,7 +308,13 @@ python-dateutil==2.8.2
# google-cloud-bigquery
# posthog
python-dotenv==1.1.1
# via litellm
# via
# litellm
# pydantic-settings
python-multipart==0.0.20
# via mcp
pywin32==311 ; sys_platform == 'win32'
# via mcp
pyyaml==6.0.3
# via huggingface-hub
referencing==0.36.2
@@ -323,9 +356,12 @@ sniffio==1.3.1
# via
# anyio
# openai
sse-starlette==3.0.3
# via mcp
starlette==0.47.2
# via
# fastapi
# mcp
# prometheus-fastapi-instrumentator
tenacity==9.1.2
# via
@@ -353,6 +389,7 @@ typing-extensions==4.15.0
# google-genai
# grpcio
# huggingface-hub
# mcp
# openai
# pydantic
# pydantic-core
@@ -360,7 +397,10 @@ typing-extensions==4.15.0
# starlette
# typing-inspection
typing-inspection==0.4.2
# via pydantic
# via
# mcp
# pydantic
# pydantic-settings
urllib3==2.6.3
# via
# botocore
@@ -368,7 +408,9 @@ urllib3==2.6.3
# sentry-sdk
# types-requests
uvicorn==0.35.0
# via onyx
# via
# mcp
# onyx
voyageai==0.2.3
# via onyx
websockets==15.0.1

View File

@@ -2,6 +2,8 @@
# uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
accelerate==1.6.0
# via onyx
agent-client-protocol==0.7.1
# via onyx
aioboto3==15.1.0
# via onyx
aiobotocore==2.24.0
@@ -28,9 +30,12 @@ annotated-types==0.7.0
# via pydantic
anyio==4.11.0
# via
# claude-agent-sdk
# google-genai
# httpx
# mcp
# openai
# sse-starlette
# starlette
attrs==25.4.0
# via
@@ -60,8 +65,12 @@ certifi==2025.11.12
# httpx
# requests
# sentry-sdk
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
# via cryptography
charset-normalizer==3.4.4
# via requests
claude-agent-sdk==0.1.19
# via onyx
click==8.3.1
# via
# celery
@@ -82,6 +91,8 @@ colorama==0.4.6 ; sys_platform == 'win32'
# via
# click
# tqdm
cryptography==46.0.3
# via pyjwt
decorator==5.2.1
# via retry
discord-py==2.4.0
@@ -193,9 +204,12 @@ httpx==0.28.1
# cohere
# google-genai
# litellm
# mcp
# openai
httpx-sse==0.4.3
# via cohere
# via
# cohere
# mcp
huggingface-hub==0.35.3
# via
# accelerate
@@ -224,7 +238,9 @@ jmespath==1.0.1
joblib==1.5.2
# via scikit-learn
jsonschema==4.25.1
# via litellm
# via
# litellm
# mcp
jsonschema-specifications==2025.9.1
# via jsonschema
kombu==5.5.4
@@ -233,6 +249,8 @@ litellm==1.80.11
# via onyx
markupsafe==3.0.3
# via jinja2
mcp==1.25.0
# via claude-agent-sdk
mpmath==1.3.0
# via sympy
multidict==6.7.0
@@ -351,17 +369,26 @@ pyasn1==0.6.2
# rsa
pyasn1-modules==0.4.2
# via google-auth
pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
# via cffi
pydantic==2.11.7
# via
# agent-client-protocol
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# litellm
# mcp
# onyx
# openai
# pydantic-settings
pydantic-core==2.33.2
# via pydantic
pydantic-settings==2.12.0
# via mcp
pyjwt==2.10.1
# via mcp
python-dateutil==2.8.2
# via
# aiobotocore
@@ -369,7 +396,13 @@ python-dateutil==2.8.2
# celery
# google-cloud-bigquery
python-dotenv==1.1.1
# via litellm
# via
# litellm
# pydantic-settings
python-multipart==0.0.20
# via mcp
pywin32==311 ; sys_platform == 'win32'
# via mcp
pyyaml==6.0.3
# via
# accelerate
@@ -429,9 +462,12 @@ sniffio==1.3.1
# via
# anyio
# openai
sse-starlette==3.0.3
# via mcp
starlette==0.47.2
# via
# fastapi
# mcp
# prometheus-fastapi-instrumentator
# sentry-sdk
sympy==1.14.0
@@ -478,6 +514,7 @@ typing-extensions==4.15.0
# google-genai
# grpcio
# huggingface-hub
# mcp
# openai
# pydantic
# pydantic-core
@@ -487,7 +524,10 @@ typing-extensions==4.15.0
# torch
# typing-inspection
typing-inspection==0.4.2
# via pydantic
# via
# mcp
# pydantic
# pydantic-settings
tzdata==2025.2
# via kombu
urllib3==2.6.3
@@ -497,7 +537,9 @@ urllib3==2.6.3
# sentry-sdk
# types-requests
uvicorn==0.35.0
# via onyx
# via
# mcp
# onyx
vine==5.1.0
# via
# amqp

814
cc4a-plans/cc4a-overview.md Normal file
View File

@@ -0,0 +1,814 @@
# CLI Agent Platform Architecture
## Overview
A platform enabling users to interact with CLI-based AI agents running in isolated containers through a chat interface. Agents can generate artifacts (web apps, PowerPoints, Word docs, markdown, Excel sheets, images) that are viewable and explorable within the UI.
---
## Core Services
### 1. Frontend (Next.js)
- Chat interface for user interaction
- Slide-out panel for VM/filesystem exploration
- Real-time artifact rendering and preview
- Session management UI
### 2. Backend (FastAPI)
- Session lifecycle management
- Artifact tracking and retrieval
- Request proxying to CLI agents
- Streaming response handling
- **Sandbox Manager** (`sandbox_manager.py` - synchronous operations):
- `provision_sandbox()` - Create and start containers
- `restore_sandbox()` - Restore from snapshots
- `terminate_sandbox()` - Stop containers and snapshot
- All operations are SYNCHRONOUS (no Celery)
- **Background Jobs** (Celery - ONLY for idle timeout):
- `check_build_sandbox_idle` - Periodic task to terminate idle sandboxes
### 3. PostgreSQL
- Session metadata
- Artifact registry
- Sandbox state tracking
- Organization/user data
---
## Data Models
### Session
```
- id: UUID
- user_id: UUID (nullable - supports anonymous sessions)
- status: enum (active, idle)
- created_at: timestamp
- last_activity_at: timestamp
- sandbox: Sandbox (one-to-one relationship)
- artifacts: list[Artifact] (one-to-many relationship)
- snapshots: list[Snapshot] (one-to-many relationship)
```
### Artifact
```
- id: UUID
- session_id: UUID
- type: enum (web_app, pptx, docx, image, markdown, excel)
- path: string (relative to outputs/)
- name: string
- created_at: timestamp
- updated_at: timestamp
```
### Sandbox
```
- id: UUID
- session_id: UUID (unique - one-to-one with session)
- container_id: string (nullable)
- status: enum (provisioning, running, idle, terminated)
- created_at: timestamp
- last_heartbeat: timestamp (nullable)
```
### Snapshot
```
- id: UUID
- session_id: UUID
- storage_path: string
- created_at: timestamp
- size_bytes: bigint
```
---
## Volume Architecture
Each sandbox container mounts three volumes:
### 1. Knowledge Volume (Read-Only)
- **Source**: Organization's indexed file store
- **Mount**: `/knowledge`
- **Purpose**: Agent can reference org docs, code, data
- **Details**: See persistant-file-store-indexing.md
### 2. Outputs Volume (Read-Write)
- **Source**: Pre-built template OR restored snapshot
- **Mount**: `/outputs`
- **Contents**:
```
/outputs
├── web/ # Next.js skeleton app
│ ├── package.json
│ ├── src/
│ └── ...
├── documents/ # Markdown outputs
├── presentations/ # .pptx files
├── charts/ # Generated visualizations
│ └── venv/ # Python environment
└── manifest.json # Artifact registry
```
### 3. Instructions Volume (Read-Only, Dynamic)
- **Source**: Generated per-session
- **Mount**: `/instructions`
- **Contents**:
```
/instructions
└── INSTRUCTIONS.md # Agent system prompt + context
```
---
## Sequence Diagram: Standard User Interaction
```
┌──────────┐ ┌──────────┐ ┌─────────────────────────┐ ┌──────────┐
│ User │ │ Frontend │ │ Backend (FastAPI) │ │ CLI │
│ Browser │ │ (Next.js)│ │ + Sandbox Module │ │ Agent │
└────┬─────┘ └────┬─────┘ └───────────┬─────────────┘ └────┬─────┘
│ │ │ │
│ 1. Start Chat │ │ │
│───────────────>│ │ │
│ │ │ │
│ │ 2. POST /sessions │ │
│ │──────────────────────>│ │
│ │ │ │
│ │<──────────────────────│ Session Created │
│<───────────────│ Show Chat UI │ (status: initializing) │
│ │ │ │
│ │ │ 3. Provision Sandbox │
│ │ │ (async, in background)
│ │ │ - Mount knowledge vol
│ │ │ - Mount outputs vol │
│ │ │ - Mount instructions│
│ │ │ - Start container │
│ │ │───────────────────────>│
│ │ │ │
│ 4. Send │ │ (provisioning...) │
│ "Build me a │ │ │
│ dashboard" │ │ │
│───────────────>│ │ │
│ │ │ │
│ │ 5. POST /sessions/{id}/messages │
│ │──────────────────────>│ │
│ │ │ │
│ │ 6. Open SSE Stream │ │
│ │<─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │
│ │ │ │
│ Show │ SSE: initializing │ │
│ "Initializing │<──────────────────────│ │
│ sandbox..." │ │ │
│<───────────────│ │ │
│ │ │ │
│ │ │<─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┤
│ │ │ Sandbox Ready │
│ │ │ │
│ │ │ 7. Proxy to CLI Agent │
│ │ │───────────────────────>│
│ │ │ │
│ │ │ 8. Agent │
│ │ │ Executes│
│ │ │ │ │
│ │ │ 9. Stream: {"type":"step",
│ │ │ "content":"Reading│
│ │ │ requirements..."}│
│ │ │<───────────────────────│
│ │ │ │
│ │ SSE: step │ │
│ │<──────────────────────│ │
│ Show step │ │ │
│<───────────────│ │ │
│ │ │ │
│ │ │ 10. Stream: {"type":"step",
│ │ │ "content":"Creating
│ │ │ components..."} │
│ │ │<───────────────────────│
│ │ SSE: step │ │
│ │<──────────────────────│ │
│ Show step │ │ │
│<───────────────│ │ │
│ │ │ │
│ │ │ 11. Agent │
│ │ │ writes │
│ │ │ files to│
│ │ │ /outputs/web/
│ │ │ │
│ │ │ 12. Stream: {"type":"artifact",
│ │ │ "artifact":{"type":
│ │ │ "web_app",...}}
│ │ │<───────────────────────│
│ │ │ │
│ │ │ 13. Save artifact │
│ │ │ to Postgres │
│ │ │ │
│ │ SSE: artifact │ │
│ │<──────────────────────│ │
│ Show artifact │ │ │
│ preview │ │ │
│<───────────────│ │ │
│ │ │ │
│ │ │ 14. Stream: {"type":"output",
│ │ │ "content":"I've built
│ │ │ your dashboard..."}
│ │ │<───────────────────────│
│ │ SSE: output │ │
│ │<──────────────────────│ │
│ Show response │ │ │
│<───────────────│ │ │
│ │ │ │
│ │ │ 15. Stream: {"type":"done"}
│ │ │<───────────────────────│
│ │ SSE: done │ │
│ │<──────────────────────│ │
│ Enable input │ │ │
│<───────────────│ │ │
│ │ │ │
│ 16. Click to │ │ │
│ expand artifact│ │ │
│───────────────>│ │ │
│ │ │ │
│ │ 17. GET /sessions/{id}/artifacts/{id}/content │
│ │──────────────────────>│ │
│ │ │ │
│ │ │ 18. Read from sandbox │
│ │ │ filesystem │
│ │<──────────────────────│ │
│ Render full │ │ │
│ artifact view │ │ │
│<───────────────│ │ │
│ │ │ │
▼ ▼ ▼ ▼
```
### Flow Summary
| Step | Action | Description |
|------|--------|-------------|
| 1-3 | **Session Init** | User starts chat → Session created immediately → Sandbox provisions async |
| 4-7 | **Message Send** | User sends prompt → If sandbox not ready, shows "Initializing sandbox..." → Once ready, proxies to CLI agent |
| 8-11 | **Agent Execution** | CLI agent processes request, streams steps, writes files to `/outputs` |
| 12-13 | **Artifact Created** | Agent signals artifact creation → Backend persists metadata |
| 14-15 | **Completion** | Agent sends final response and done signal |
| 16-18 | **Artifact View** | User expands artifact → Backend fetches content from sandbox |
---
## Request Flow
### New Session Flow (Synchronous)
```
1. User creates session via POST /api/build/sessions
2. Backend creates Session record
3. Backend SYNCHRONOUSLY provisions sandbox (sandbox_manager.py):
a. Creates Sandbox record with status=PROVISIONING
b. Prepares knowledge volume (bind mount)
c. Copies outputs template to session-specific volume
d. Generates instructions file
e. Starts container with volumes mounted
f. Updates status=RUNNING with container_id
4. Returns SessionResponse with running sandbox
5. Frontend can now send messages immediately
```
### Follow-up Message Flow (Container Running)
```
1. User sends follow-up message
2. Backend checks Session → Sandbox is running
3. Backend updates last_activity_at timestamp
4. Backend proxies message directly to CLI agent
5. CLI agent streams steps/responses back
6. Backend streams to frontend via SSE
7. Frontend renders chat + any generated artifacts
```
### Follow-up Message Flow (Container Terminated)
```
1. User accesses session via GET /api/build/sessions/{id}
2. Backend checks Session → Sandbox status=TERMINATED
3. Backend SYNCHRONOUSLY restores sandbox (sandbox_manager.py):
a. Gets latest snapshot from DB
b. Retrieves snapshot from file store
c. Extracts to new outputs volume
d. Starts container with restored state
e. Updates status=RUNNING with new container_id
4. Returns SessionResponse with running sandbox
5. User can now send messages
```
### Idle Timeout Flow (Celery Background Job - ONLY use of Celery)
```
1. Celery beat schedules check_build_sandbox_idle task (every 5 minutes)
2. Task queries for sandboxes with last_activity_at > 15 minutes ago
3. For each idle sandbox:
a. Calls terminate_sandbox(session_id, create_snapshot=True)
b. Snapshot outputs volume to file store
c. Create Snapshot record (linked to session_id)
d. Terminate container
e. Update Sandbox status=TERMINATED
4. Sandbox will be restored on next access
```
---
## API Endpoints
### Sessions
```
POST /api/build/sessions # Create new session
GET /api/build/sessions/{id} # Get session details + wake it up
GET /api/build/sessions # List all sessions (with filters)
DELETE /api/build/sessions/{id} # End session (full cleanup)
```
### Messages
```
POST /api/build/sessions/{id}/messages # Send message (streaming response)
GET /api/build/sessions/{id}/messages # Get message history (no pagination)
```
### Artifacts
```
GET /api/build/sessions/{id}/artifacts # List artifacts
GET /api/build/sessions/{id}/artifacts/{artifact_id} # Get artifact metadata
GET /api/build/sessions/{id}/artifacts/{artifact_id}/content # Download/stream content
```
### Filesystem (VM Explorer)
```
POST /api/build/sessions/{id}/fs/upload # Upload file to sandbox, to /user-input directory (or similar)
GET /api/build/sessions/{id}/fs?path=/outputs # List directory
GET /api/build/sessions/{id}/fs/read?path=... # Read file content (maybe clicking on "external files" takes you directly to the source)
```
### Rate Limiting
```
GET /api/build/limit # unpaid gets 10 messages total, paid gets paid gets 50 messages / week
```
### Sandbox Manager (Synchronous Internal Functions)
Located in `backend/onyx/server/features/build/sandbox_manager.py`:
```python
# All operations are SYNCHRONOUS - called directly by API endpoints
provision_sandbox(session_id, db_session) # Provision new sandbox container
restore_sandbox(session_id, db_session) # Restore sandbox from snapshot
terminate_sandbox(session_id, db_session, create_snapshot) # Terminate sandbox
```
### Background Jobs (Celery - ONLY for idle timeout)
Located in `backend/onyx/background/celery/tasks/build_sandbox/tasks.py`:
```python
@shared_task
def check_build_sandbox_idle(tenant_id) # Periodic task to terminate idle sandboxes
```
**IMPORTANT**: Provisioning, restoration, and termination are NOT done via Celery.
They are synchronous operations called directly within API request handlers.
---
## Streaming Protocol
### Pydantic Models (Backend)
```python
from enum import Enum
from typing import Annotated, Literal, Union
from pydantic import BaseModel, Field
from datetime import datetime
class StreamingType(Enum):
"""Enum defining all streaming packet types. Single source of truth for type strings."""
# Control packets
DONE = "done"
ERROR = "error"
# Agent activity packets
STEP_START = "step_start"
STEP_DELTA = "step_delta"
STEP_END = "step_end"
# Output packets (final response)
OUTPUT_START = "output_start"
OUTPUT_DELTA = "output_delta"
# Artifact packets
ARTIFACT_CREATED = "artifact_created"
ARTIFACT_UPDATED = "artifact_updated"
# Tool usage packets
TOOL_START = "tool_start"
TOOL_OUTPUT = "tool_output"
TOOL_END = "tool_end"
# File operation packets
FILE_WRITE = "file_write"
FILE_DELETE = "file_delete"
class BasePacket(BaseModel):
"""Base class for all streaming packets."""
type: str = ""
timestamp: datetime = Field(default_factory=datetime.utcnow)
################################################
# Control Packets
################################################
class DonePacket(BasePacket):
"""Signals completion of the agent's response."""
type: Literal["done"] = StreamingType.DONE.value
summary: str | None = None
class ErrorPacket(BasePacket):
"""Signals an error occurred during processing."""
type: Literal["error"] = StreamingType.ERROR.value
message: str
code: str | None = None # e.g., "TIMEOUT", "SANDBOX_ERROR", "LLM_ERROR"
recoverable: bool = False
################################################
# Agent Step Packets (thinking/progress)
################################################
class StepStart(BasePacket):
"""Signals the start of a new agent step/action."""
type: Literal["step_start"] = StreamingType.STEP_START.value
step_id: str # Unique identifier for this step
title: str | None = None # e.g., "Reading requirements", "Creating components"
class StepDelta(BasePacket):
"""Streaming content for an agent step."""
type: Literal["step_delta"] = StreamingType.STEP_DELTA.value
step_id: str
content: str # Incremental text content
class StepEnd(BasePacket):
"""Signals completion of an agent step."""
type: Literal["step_end"] = StreamingType.STEP_END.value
step_id: str
status: Literal["success", "failed", "skipped"] = "success"
################################################
# Output Packets (final agent response)
################################################
class OutputStart(BasePacket):
"""Signals the start of the agent's final output."""
type: Literal["output_start"] = StreamingType.OUTPUT_START.value
class OutputDelta(BasePacket):
"""Streaming content for the agent's final output."""
type: Literal["output_delta"] = StreamingType.OUTPUT_DELTA.value
content: str # Incremental text content
################################################
# Artifact Packets
################################################
class ArtifactType(str, Enum):
WEB_APP = "web_app"
PPTX = "pptx"
DOCX = "docx"
IMAGE = "image"
MARKDOWN = "markdown"
EXCEL = "excel"
class ArtifactMetadata(BaseModel):
"""Metadata for an artifact."""
id: str # UUID
type: ArtifactType
name: str
path: str # Relative path within /outputs
preview_url: str | None = None # URL for inline preview if available
class ArtifactCreated(BasePacket):
"""Signals a new artifact has been created."""
type: Literal["artifact_created"] = StreamingType.ARTIFACT_CREATED.value
artifact: ArtifactMetadata
class ArtifactUpdated(BasePacket):
"""Signals an existing artifact has been updated."""
type: Literal["artifact_updated"] = StreamingType.ARTIFACT_UPDATED.value
artifact: ArtifactMetadata
changes: list[str] | None = None # Description of what changed
################################################
# Tool Usage Packets
################################################
class ToolStart(BasePacket):
"""Signals the agent is invoking a tool."""
type: Literal["tool_start"] = StreamingType.TOOL_START.value
tool_name: str # e.g., "bash", "read_file", "write_file", "web_search"
tool_input: dict | str | None = None # Input parameters
class ToolOutput(BasePacket):
"""Output from a tool invocation."""
type: Literal["tool_output"] = StreamingType.TOOL_OUTPUT.value
tool_name: str
output: str | None = None
is_error: bool = False
class ToolEnd(BasePacket):
"""Signals completion of a tool invocation."""
type: Literal["tool_end"] = StreamingType.TOOL_END.value
tool_name: str
status: Literal["success", "failed"] = "success"
################################################
# File Operation Packets
################################################
class FileWrite(BasePacket):
"""Signals a file was written to the outputs volume."""
type: Literal["file_write"] = StreamingType.FILE_WRITE.value
path: str # Relative path within /outputs
size_bytes: int | None = None
class FileDelete(BasePacket):
"""Signals a file was deleted from the outputs volume."""
type: Literal["file_delete"] = StreamingType.FILE_DELETE.value
path: str
################################################
# Packet Union
################################################
# Discriminated union of all possible packet types
PacketObj = Union[
# Control packets
DonePacket,
ErrorPacket,
# Step packets
StepStart,
StepDelta,
StepEnd,
# Output packets
OutputStart,
OutputDelta,
# Artifact packets
ArtifactCreated,
ArtifactUpdated,
# Tool packets
ToolStart,
ToolOutput,
ToolEnd,
# File packets
FileWrite,
FileDelete,
]
class StreamPacket(BaseModel):
"""Wrapper for streaming packets with session context."""
session_id: str
obj: Annotated[PacketObj, Field(discriminator="type")]
```
### SSE Event Format (Backend → Frontend)
Each packet is sent as an SSE event with the packet JSON as data:
```
event: message
data: {"type": "step_start", "step_id": "abc123", "title": "Reading requirements", "timestamp": "2024-01-15T10:30:00Z"}
event: message
data: {"type": "step_delta", "step_id": "abc123", "content": "Analyzing the file structure...", "timestamp": "2024-01-15T10:30:01Z"}
event: message
data: {"type": "tool_start", "tool_name": "write_file", "tool_input": {"path": "/outputs/web/src/App.tsx"}, "timestamp": "2024-01-15T10:30:02Z"}
event: message
data: {"type": "file_write", "path": "web/src/App.tsx", "size_bytes": 1523, "timestamp": "2024-01-15T10:30:03Z"}
event: message
data: {"type": "artifact_created", "artifact": {"id": "uuid-here", "type": "web_app", "name": "Dashboard", "path": "web/"}, "timestamp": "2024-01-15T10:30:04Z"}
event: message
data: {"type": "output_start", "timestamp": "2024-01-15T10:30:05Z"}
event: message
data: {"type": "output_delta", "content": "I've built your dashboard with the following features...", "timestamp": "2024-01-15T10:30:05Z"}
event: message
data: {"type": "done", "summary": "Created a Next.js dashboard with 3 components", "timestamp": "2024-01-15T10:30:10Z"}
```
### TypeScript Types (Frontend)
```typescript
// Enum for packet types
enum StreamingType {
DONE = "done",
ERROR = "error",
STEP_START = "step_start",
STEP_DELTA = "step_delta",
STEP_END = "step_end",
OUTPUT_START = "output_start",
OUTPUT_DELTA = "output_delta",
ARTIFACT_CREATED = "artifact_created",
ARTIFACT_UPDATED = "artifact_updated",
TOOL_START = "tool_start",
TOOL_OUTPUT = "tool_output",
TOOL_END = "tool_end",
FILE_WRITE = "file_write",
FILE_DELETE = "file_delete",
}
// Artifact types
type ArtifactType = "web_app" | "pptx" | "docx" | "image" | "markdown" | "excel";
interface ArtifactMetadata {
id: string;
type: ArtifactType;
name: string;
path: string;
preview_url?: string;
}
// Base packet interface
interface BasePacket {
type: string;
timestamp: string;
}
// Control packets
interface DonePacket extends BasePacket {
type: "done";
summary?: string;
}
interface ErrorPacket extends BasePacket {
type: "error";
message: string;
code?: string;
recoverable: boolean;
}
// Step packets
interface StepStart extends BasePacket {
type: "step_start";
step_id: string;
title?: string;
}
interface StepDelta extends BasePacket {
type: "step_delta";
step_id: string;
content: string;
}
interface StepEnd extends BasePacket {
type: "step_end";
step_id: string;
status: "success" | "failed" | "skipped";
}
// Output packets
interface OutputStart extends BasePacket {
type: "output_start";
}
interface OutputDelta extends BasePacket {
type: "output_delta";
content: string;
}
// Artifact packets
interface ArtifactCreated extends BasePacket {
type: "artifact_created";
artifact: ArtifactMetadata;
}
interface ArtifactUpdated extends BasePacket {
type: "artifact_updated";
artifact: ArtifactMetadata;
changes?: string[];
}
// Tool packets
interface ToolStart extends BasePacket {
type: "tool_start";
tool_name: string;
tool_input?: Record<string, unknown> | string;
}
interface ToolOutput extends BasePacket {
type: "tool_output";
tool_name: string;
output?: string;
is_error: boolean;
}
interface ToolEnd extends BasePacket {
type: "tool_end";
tool_name: string;
status: "success" | "failed";
}
// File packets
interface FileWrite extends BasePacket {
type: "file_write";
path: string;
size_bytes?: number;
}
interface FileDelete extends BasePacket {
type: "file_delete";
path: string;
}
// Discriminated union
type StreamPacket =
| DonePacket
| ErrorPacket
| StepStart
| StepDelta
| StepEnd
| OutputStart
| OutputDelta
| ArtifactCreated
| ArtifactUpdated
| ToolStart
| ToolOutput
| ToolEnd
| FileWrite
| FileDelete;
```
---
## Frontend Components
### Chat Panel
- Message input
- Message history with agent steps
- Artifact inline previews
### VM Explorer (Slide-out)
- File tree navigation
- File content viewer
- Artifact-specific renderers:
- Next.js: iframe preview + code view
- PPTX: slide viewer
- Markdown: rendered preview
- Charts: image/interactive view
---
## Configuration
```yaml
sandbox:
idle_timeout_seconds: 900 # 15 minutes
max_concurrent_per_org: 10
container_image: "cli-agent:latest"
resource_limits:
memory: "2Gi"
cpu: "1"
storage:
snapshots_bucket: "sandbox-snapshots"
outputs_template_path: "/templates/outputs"
knowledge:
base_path: "/mnt/knowledge"
```
---
## Open Questions
1. **Container orchestration**: Docker directly? Kubernetes? Firecracker?
2. **CLI agent protocol**: How does it receive messages? stdin? HTTP? Socket?
3. **Artifact detection**: How do we know when an artifact is created/updated? Filesystem watching? Agent reports it?
4. **Knowledge volume**: Per-org? Per-user? How large can it get?
5. **Snapshot storage**: S3? Local NFS? How long to retain?
6. **Multi-turn context**: Does the CLI agent maintain conversation history, or do we replay?
7. **Security**: Network isolation? What can the agent access?
8. **Preview generation**: Who renders Next.js apps? Dev server in container? Separate preview service?
---
## Next Steps
- [ ] Define CLI agent interface contract
- [ ] Design container image contents
- [ ] Detail snapshot/restore mechanics
- [ ] Specify frontend state management
- [ ] Define artifact type handlers
- [ ] Security model and isolation
- [ ] Monitoring and observability

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,396 @@
# Persistent File Storage for Indexed Documents
## Issues to Address
Extend the indexing pipeline to write every indexed document to a persistent VM file system with a hierarchical directory structure that mirrors the source organization. Initial implementation covers **Google Drive**, **Fireflies**, and **Linear** connectors.
Target structure:
```
{tenant_id}/
google_drive/
My Drive/
Folder1/
File1.json
Shared Drive 1/
Folder1/
File1.json
fireflies/
2024-01/
Meeting Title 1.json
Meeting Title 2.json
2024-02/
Meeting Title 3.json
linear/
TeamName/
DAN-123.json
DAN-456.json
OtherTeam/
OTH-789.json
```
## Important Notes
### Current State of Each Connector
1. **Google Drive** (`backend/onyx/connectors/google_drive/`)
- Most complex hierarchy: Drives → Folders → Files
- `RetrievedDriveFile` has `parent_id` field (`models.py:114`) but full folder path not stored
- Document creation in `doc_conversion.py:530` doesn't populate `doc_metadata`
- Folder tracking happens during crawling via `completion_map` in checkpoint
- **Challenge**: Must reconstruct folder paths from `parent_id` chain during crawling
2. **Fireflies** (`backend/onyx/connectors/fireflies/connector.py`)
- Flat structure: Meeting transcripts with date/title/organizer
- Document created in `_create_doc_from_transcript()` (line 52-116)
- Available fields: `title`, `date`, `organizer_email`, `participants`
- **Hierarchy approach**: Organize by year-month (e.g., `2024-01/`)
- Currently no `doc_metadata` populated
3. **Linear** (`backend/onyx/connectors/linear/connector.py`)
- Team-based hierarchy: Teams → Issues
- Document created in `_process_issues()` (line 277-301)
- Has `team.name` available from GraphQL query (line 197-199)
- Has `identifier` (e.g., "DAN-2327") which includes team prefix
- Currently no `doc_metadata` populated
### Integration Point
The best place to add file writing is `DocumentIndexingBatchAdapter.post_index()` in `backend/onyx/indexing/adapters/document_indexing_adapter.py:161-211`. This runs AFTER database commits and has access to:
- `context.updatable_docs` - all documents processed
- `filtered_documents` - documents that passed filtering
- `self.tenant_id` - for multi-tenant isolation
- `self.index_attempt_metadata` - connector/credential info
### Existing Patterns
- `FileStoreDocumentBatchStorage` in `backend/onyx/file_store/document_batch_storage.py` shows document serialization patterns
- `FileStore` abstraction in `backend/onyx/file_store/file_store.py` handles S3/MinIO storage with tenant isolation
## Implementation Strategy
### Phase 1: Create Core Infrastructure
**New file**: `backend/onyx/indexing/persistent_document_writer.py`
```python
class PersistentDocumentWriter:
"""Writes indexed documents to local filesystem with hierarchical structure"""
def __init__(
self,
tenant_id: str,
base_path: str,
):
self.tenant_id = tenant_id
self.base_path = Path(base_path)
def write_documents(self, documents: list[Document]) -> list[str]:
"""Write documents to local filesystem, returns written file paths"""
written_paths = []
for doc in documents:
path = self._build_path(doc)
self._write_document(doc, path)
written_paths.append(str(path))
return written_paths
def _build_path(self, doc: Document) -> Path:
"""Build hierarchical path from document metadata"""
parts = [self.tenant_id, doc.source.value]
# Get hierarchy from doc_metadata
hierarchy = doc.doc_metadata.get("hierarchy", {}) if doc.doc_metadata else {}
source_path = hierarchy.get("source_path", [])
if source_path:
parts.extend([self._sanitize_path_component(p) for p in source_path])
# File name from document ID (sanitized)
filename = f"{self._sanitize_filename(doc.id)}.json"
return self.base_path / "/".join(parts) / filename
def _sanitize_path_component(self, component: str) -> str:
"""Sanitize a path component for file system safety"""
# Replace problematic characters
sanitized = component.replace("/", "_").replace("\\", "_").replace(":", "_")
sanitized = sanitized.replace("<", "_").replace(">", "_").replace("|", "_")
sanitized = sanitized.replace('"', "_").replace("?", "_").replace("*", "_")
sanitized = "".join(c for c in sanitized if ord(c) >= 32)
return sanitized.strip() or "unnamed"
def _sanitize_filename(self, doc_id: str) -> str:
"""Sanitize document ID for use as filename"""
sanitized = self._sanitize_path_component(doc_id)
if len(sanitized) > 200:
return hashlib.sha256(doc_id.encode()).hexdigest()[:32]
return sanitized
def _write_document(self, doc: Document, path: Path) -> None:
"""Serialize and write document to filesystem"""
content = {
"id": doc.id,
"semantic_identifier": doc.semantic_identifier,
"title": doc.title,
"source": doc.source.value,
"doc_updated_at": doc.doc_updated_at.isoformat() if doc.doc_updated_at else None,
"metadata": doc.metadata,
"doc_metadata": doc.doc_metadata,
"sections": [
{"text": s.text if hasattr(s, "text") else None, "link": s.link}
for s in doc.sections
],
"primary_owners": [o.model_dump() for o in (doc.primary_owners or [])],
"secondary_owners": [o.model_dump() for o in (doc.secondary_owners or [])],
}
# Create parent directories if they don't exist
path.parent.mkdir(parents=True, exist_ok=True)
# Write the JSON file
with open(path, "w", encoding="utf-8") as f:
json.dump(content, f, indent=2, default=str)
```
**Add configuration** to `backend/onyx/configs/app_configs.py`:
```python
PERSISTENT_DOCUMENT_STORAGE_ENABLED = os.environ.get(
"PERSISTENT_DOCUMENT_STORAGE_ENABLED", ""
).lower() == "true"
# Base directory path for persistent document storage (local filesystem)
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
"PERSISTENT_DOCUMENT_STORAGE_PATH", "/app/indexed-docs"
)
```
### Phase 2: Update Linear Connector
**File**: `backend/onyx/connectors/linear/connector.py`
Linear is the simplest - just add team name to `doc_metadata`. Modify the document creation (around line 277):
```python
# Extract team name for hierarchy
team_name = (node.get("team") or {}).get("name") or "Unknown Team"
identifier = node.get("identifier", node["id"])
documents.append(
Document(
id=node["id"],
sections=typed_sections,
source=DocumentSource.LINEAR,
semantic_identifier=f"[{node['identifier']}] {node['title']}",
title=node["title"],
doc_updated_at=time_str_to_utc(node["updatedAt"]),
doc_metadata={
"hierarchy": {
"source_path": [team_name],
"team_name": team_name,
"identifier": identifier,
}
},
metadata={...}, # existing metadata
)
)
```
### Phase 3: Update Fireflies Connector
**File**: `backend/onyx/connectors/fireflies/connector.py`
Organize by year-month. Modify `_create_doc_from_transcript()` (around line 100):
```python
# Build hierarchy based on meeting date
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
year_month = meeting_date.strftime("%Y-%m")
return Document(
id=fireflies_id,
sections=cast(list[TextSection | ImageSection], sections),
source=DocumentSource.FIREFLIES,
semantic_identifier=meeting_title,
doc_metadata={
"hierarchy": {
"source_path": [year_month],
"year_month": year_month,
"meeting_title": meeting_title,
"organizer_email": meeting_organizer_email,
}
},
metadata={...}, # existing metadata
doc_updated_at=meeting_date,
primary_owners=organizer_email_user_info,
secondary_owners=meeting_participants_email_list,
)
```
### Phase 4: Update Google Drive Connector
This is the most complex. Need to:
1. Build a folder ID → folder name mapping during crawling
2. Pass this mapping to document conversion
3. Reconstruct full folder paths
**Step 4a**: Track folder names during retrieval
**File**: `backend/onyx/connectors/google_drive/connector.py`
Add folder name tracking to the connector class:
```python
class GoogleDriveConnector:
def __init__(self, ...):
...
self._folder_id_to_name: dict[str, str] = {}
self._folder_id_to_parent: dict[str, str | None] = {}
```
**File**: `backend/onyx/connectors/google_drive/file_retrieval.py`
When folders are retrieved/crawled, populate the mapping. In functions like `crawl_folders_for_files()` and `get_files_in_shared_drive()`, capture folder info:
```python
# When processing folders, store the mapping
folder_id_to_name[folder["id"]] = folder.get("name", "Untitled Folder")
folder_id_to_parent[folder["id"]] = folder.get("parents", [None])[0]
```
**Step 4b**: Build path reconstruction helper
**File**: `backend/onyx/connectors/google_drive/doc_conversion.py`
Add a helper function to reconstruct full paths:
```python
def build_folder_path(
file: GoogleDriveFileType,
folder_id_to_name: dict[str, str],
folder_id_to_parent: dict[str, str | None],
drive_name: str | None = None,
) -> list[str]:
"""Reconstruct the full folder path for a file"""
path_parts = []
# Start with the file's parent
parent_ids = file.get("parents", [])
parent_id = parent_ids[0] if parent_ids else None
# Walk up the folder hierarchy
visited = set() # Prevent infinite loops
while parent_id and parent_id in folder_id_to_name and parent_id not in visited:
visited.add(parent_id)
path_parts.insert(0, folder_id_to_name[parent_id])
parent_id = folder_id_to_parent.get(parent_id)
# Prepend drive name if available
if drive_name:
path_parts.insert(0, drive_name)
return path_parts
```
**Step 4c**: Populate doc_metadata in document creation
Modify `_convert_drive_item_to_document()` to accept and use the folder mapping:
```python
def _convert_drive_item_to_document(
creds: Any,
allow_images: bool,
size_threshold: int,
retriever_email: str,
file: GoogleDriveFileType,
permission_sync_context: PermissionSyncContext | None,
folder_id_to_name: dict[str, str] | None = None,
folder_id_to_parent: dict[str, str | None] | None = None,
drive_name: str | None = None,
) -> Document | ConnectorFailure | None:
...
# Build folder path if mapping available
source_path = []
if folder_id_to_name and folder_id_to_parent:
source_path = build_folder_path(
file, folder_id_to_name, folder_id_to_parent, drive_name
)
elif drive_name:
source_path = [drive_name]
return Document(
id=doc_id,
sections=sections,
source=DocumentSource.GOOGLE_DRIVE,
semantic_identifier=file.get("name", ""),
doc_metadata={
"hierarchy": {
"source_path": source_path,
"drive_name": drive_name,
"file_name": file.get("name", ""),
"mime_type": file.get("mimeType", ""),
}
},
metadata={...}, # existing
...
)
```
### Phase 5: Integrate into Indexing Pipeline
**File**: `backend/onyx/indexing/adapters/document_indexing_adapter.py`
Add persistent writing at the end of `post_index()`:
```python
from onyx.configs.app_configs import PERSISTENT_DOCUMENT_STORAGE_ENABLED
from onyx.indexing.persistent_document_writer import get_persistent_document_writer
def post_index(
self,
context: DocumentBatchPrepareContext,
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
result: BuildMetadataAwareChunksResult,
) -> None:
# ... existing code ...
self.db_session.commit()
# Write to persistent storage if enabled
if PERSISTENT_DOCUMENT_STORAGE_ENABLED and filtered_documents:
try:
writer = get_persistent_document_writer(tenant_id=self.tenant_id)
writer.write_documents(filtered_documents)
except Exception as e:
# Log but don't fail indexing
logger.warning(f"Failed to write documents to persistent storage: {e}")
```
Add factory function to `persistent_document_writer.py`:
```python
def get_persistent_document_writer(tenant_id: str) -> PersistentDocumentWriter:
from onyx.configs.app_configs import PERSISTENT_DOCUMENT_STORAGE_PATH
return PersistentDocumentWriter(
tenant_id=tenant_id,
base_path=PERSISTENT_DOCUMENT_STORAGE_PATH,
)
```
## File Changes Summary
| File | Change |
|------|--------|
| `backend/onyx/indexing/persistent_document_writer.py` | **NEW** - Core writer class |
| `backend/onyx/configs/app_configs.py` | Add config flags |
| `backend/onyx/connectors/linear/connector.py` | Add `doc_metadata` with team hierarchy |
| `backend/onyx/connectors/fireflies/connector.py` | Add `doc_metadata` with year-month hierarchy |
| `backend/onyx/connectors/google_drive/connector.py` | Add folder name tracking |
| `backend/onyx/connectors/google_drive/file_retrieval.py` | Capture folder names during crawl |
| `backend/onyx/connectors/google_drive/doc_conversion.py` | Add path reconstruction + `doc_metadata` |
| `backend/onyx/indexing/adapters/document_indexing_adapter.py` | Call writer in `post_index()` |

View File

@@ -23,6 +23,8 @@ dependencies = [
"uvicorn==0.35.0",
"voyageai==0.2.3",
"brotli>=1.2.0",
"claude-agent-sdk>=0.1.19",
"agent-client-protocol>=0.7.1",
"discord-py==2.4.0",
]

32
uv.lock generated
View File

@@ -38,6 +38,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/63/b1/8198e3cdd11a426b1df2912e3381018c4a4a55368f6d0857ba3ca418ef93/accelerate-1.6.0-py3-none-any.whl", hash = "sha256:1aee717d3d3735ad6d09710a7c26990ee4652b79b4e93df46551551b5227c2aa", size = 354748, upload-time = "2025-04-01T11:53:01.298Z" },
]
[[package]]
name = "agent-client-protocol"
version = "0.7.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pydantic" },
]
sdist = { url = "https://files.pythonhosted.org/packages/db/7c/12da39be4f73026fd9b02144df5f64d803488cf1439aa221b0edb7c305e3/agent_client_protocol-0.7.1.tar.gz", hash = "sha256:8d7031209e14c3f2f987e3b95e7d9c3286158e7b2af1bf43d6aae5b8a429249f", size = 66226, upload-time = "2025-12-28T13:58:57.012Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/03/48/48d2fb454f911147432cd779f548e188274e1700f1cbe0a258e78158331a/agent_client_protocol-0.7.1-py3-none-any.whl", hash = "sha256:4ffe999488f2b23db26f09becdfaa2aaae6529f0847a52bca61bc2c628001c0f", size = 53771, upload-time = "2025-12-28T13:58:55.967Z" },
]
[[package]]
name = "aioboto3"
version = "15.1.0"
@@ -952,6 +964,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8b/a5/f752351ea2652744f711bf5d478f00e2ceeeff0def6316ea9e1bfa18d8d2/classify_imports-4.2.0-py2.py3-none-any.whl", hash = "sha256:dbbc264b70a470ed8c6c95976a11dfb8b7f63df44ed1af87328bbed2663f5161", size = 5403, upload-time = "2022-07-31T22:59:10.799Z" },
]
[[package]]
name = "claude-agent-sdk"
version = "0.1.19"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "mcp" },
]
sdist = { url = "https://files.pythonhosted.org/packages/60/b0/73c6f4e09439b4442aa6d5650eb7bf232322d939feb9f15711525fc74a0a/claude_agent_sdk-0.1.19.tar.gz", hash = "sha256:318c6dbd049bfdb101ed580aece47d63bea32b75a708c86d9e649735e524c736", size = 56163, upload-time = "2026-01-08T01:46:39.848Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0d/6d/268c90b53c112f0fb0ed5e8eb5a011be90c77dce13ea35ceb825fd8654eb/claude_agent_sdk-0.1.19-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0d4ec526de19989dc1d62c3abdf9b71b785f8df851735ec14266fd14d341f34b", size = 53580971, upload-time = "2026-01-08T01:46:26.935Z" },
{ url = "https://files.pythonhosted.org/packages/1a/30/e53cd5888a0e6efd952e1c16709125cab7e91e1468a7180e016844fa69f5/claude_agent_sdk-0.1.19-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:cff32c935d952b61cc421f9731ad9549df63e871710055446e8abdd4f1d4c5ea", size = 67797723, upload-time = "2026-01-08T01:46:29.854Z" },
{ url = "https://files.pythonhosted.org/packages/7f/f9/86ac9ef3b200fbeb020f6d1c03fb30a15ca97e9c96f0005be17001bcb37a/claude_agent_sdk-0.1.19-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:016d0127cf6ef9f5e36dd385bb3a808c9f4ba2a6ed80c676affe4f0801edd311", size = 69507781, upload-time = "2026-01-08T01:46:33.411Z" },
{ url = "https://files.pythonhosted.org/packages/73/e0/f2547e461570d6a5a75e35447153fb932ea6b0ccba1a452f5717e2d12148/claude_agent_sdk-0.1.19-py3-none-win_amd64.whl", hash = "sha256:0b6d820599d1ecf8aad37cf65fcf42e7fd067b732325c4d85e07559584f4bafc", size = 71705389, upload-time = "2026-01-08T01:46:36.679Z" },
]
[[package]]
name = "click"
version = "8.3.1"
@@ -4534,8 +4562,10 @@ name = "onyx"
version = "0.0.0"
source = { editable = "." }
dependencies = [
{ name = "agent-client-protocol" },
{ name = "aioboto3" },
{ name = "brotli" },
{ name = "claude-agent-sdk" },
{ name = "cohere" },
{ name = "discord-py" },
{ name = "fastapi" },
@@ -4706,6 +4736,7 @@ model-server = [
[package.metadata]
requires-dist = [
{ name = "accelerate", marker = "extra == 'model-server'", specifier = "==1.6.0" },
{ name = "agent-client-protocol", specifier = ">=0.7.1" },
{ name = "aioboto3", specifier = "==15.1.0" },
{ name = "aiohttp", marker = "extra == 'backend'", specifier = "==3.13.3" },
{ name = "alembic", marker = "extra == 'backend'", specifier = "==1.10.4" },
@@ -4722,6 +4753,7 @@ requires-dist = [
{ name = "celery-types", marker = "extra == 'dev'", specifier = "==0.19.0" },
{ name = "chardet", marker = "extra == 'backend'", specifier = "==5.2.0" },
{ name = "chonkie", marker = "extra == 'backend'", specifier = "==1.0.10" },
{ name = "claude-agent-sdk", specifier = ">=0.1.19" },
{ name = "cohere", specifier = "==5.6.1" },
{ name = "dask", marker = "extra == 'backend'", specifier = "==2026.1.1" },
{ name = "ddtrace", marker = "extra == 'backend'", specifier = "==3.10.0" },

69
web/package-lock.json generated
View File

@@ -57,6 +57,7 @@
"lowlight": "^3.3.0",
"lucide-react": "^0.454.0",
"mdast-util-find-and-replace": "^3.0.1",
"motion": "^12.27.0",
"next": "^16.0.10",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
@@ -10305,6 +10306,33 @@
"url": "https://github.com/sponsors/rawify"
}
},
"node_modules/framer-motion": {
"version": "12.27.0",
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.27.0.tgz",
"integrity": "sha512-gJtqOKEDJH/jrn0PpsWp64gdOjBvGX8hY6TWstxjDot/85daIEtJHl1UsiwHSXiYmJF2QXUoXP6/3gGw5xY2YA==",
"license": "MIT",
"dependencies": {
"motion-dom": "^12.27.0",
"motion-utils": "^12.24.10",
"tslib": "^2.4.0"
},
"peerDependencies": {
"@emotion/is-prop-valid": "*",
"react": "^18.0.0 || ^19.0.0",
"react-dom": "^18.0.0 || ^19.0.0"
},
"peerDependenciesMeta": {
"@emotion/is-prop-valid": {
"optional": true
},
"react": {
"optional": true
},
"react-dom": {
"optional": true
}
}
},
"node_modules/fs-extra": {
"version": "10.1.0",
"dev": true,
@@ -13762,6 +13790,47 @@
"version": "1.0.4",
"license": "MIT"
},
"node_modules/motion": {
"version": "12.27.0",
"resolved": "https://registry.npmjs.org/motion/-/motion-12.27.0.tgz",
"integrity": "sha512-5/WbUMUV0QPOlgimOKJRhKwE+/pIHBI38SVgEpsfadOa5lYDgkgJAEav7KqNahdX3i3xkvogD5JR4K41w+9Hzw==",
"license": "MIT",
"dependencies": {
"framer-motion": "^12.27.0",
"tslib": "^2.4.0"
},
"peerDependencies": {
"@emotion/is-prop-valid": "*",
"react": "^18.0.0 || ^19.0.0",
"react-dom": "^18.0.0 || ^19.0.0"
},
"peerDependenciesMeta": {
"@emotion/is-prop-valid": {
"optional": true
},
"react": {
"optional": true
},
"react-dom": {
"optional": true
}
}
},
"node_modules/motion-dom": {
"version": "12.27.0",
"resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.27.0.tgz",
"integrity": "sha512-oDjl0WoAsWIWKl3GCDxmh7GITrNjmLX+w5+jwk4+pzLu3VnFvsOv2E6+xCXeH72O65xlXsr84/otiOYQKW/nQA==",
"license": "MIT",
"dependencies": {
"motion-utils": "^12.24.10"
}
},
"node_modules/motion-utils": {
"version": "12.24.10",
"resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.24.10.tgz",
"integrity": "sha512-x5TFgkCIP4pPsRLpKoI86jv/q8t8FQOiM/0E8QKBzfMozWHfkKap2gA1hOki+B5g3IsBNpxbUnfOum1+dgvYww==",
"license": "MIT"
},
"node_modules/ms": {
"version": "2.1.3",
"license": "MIT"

View File

@@ -73,6 +73,7 @@
"lowlight": "^3.3.0",
"lucide-react": "^0.454.0",
"mdast-util-find-and-replace": "^3.0.1",
"motion": "^12.27.0",
"next": "^16.0.10",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",

View File

@@ -6,6 +6,10 @@ import {
GMAIL_AUTH_IS_ADMIN_COOKIE_NAME,
GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME,
} from "@/lib/constants";
import {
BUILD_MODE_OAUTH_COOKIE_NAME,
BUILD_CONFIGURE_PATH,
} from "@/app/build/v1/constants";
import { processCookies } from "@/lib/userSS";
export const GET = async (request: NextRequest) => {
@@ -25,6 +29,17 @@ export const GET = async (request: NextRequest) => {
return NextResponse.redirect(new URL("/auth/error", getDomain(request)));
}
// Check for build mode OAuth flag (redirects to build admin panel)
const isBuildMode =
requestCookies.get(BUILD_MODE_OAUTH_COOKIE_NAME)?.value === "true";
if (isBuildMode) {
const redirectResponse = NextResponse.redirect(
new URL(BUILD_CONFIGURE_PATH, getDomain(request))
);
redirectResponse.cookies.delete(BUILD_MODE_OAUTH_COOKIE_NAME);
return redirectResponse;
}
const authCookieName =
connector === "gmail"
? GMAIL_AUTH_IS_ADMIN_COOKIE_NAME

View File

@@ -11,6 +11,7 @@ import {
DOCS_ADMINS_PATH,
GMAIL_AUTH_IS_ADMIN_COOKIE_NAME,
} from "@/lib/constants";
import { BUILD_MODE_OAUTH_COOKIE_NAME } from "@/app/build/v1/constants";
import Cookies from "js-cookie";
import { TextFormField, SectionHeader } from "@/components/Field";
import { Form, Formik } from "formik";
@@ -25,6 +26,7 @@ import { ValidSources } from "@/lib/types";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { FiFile, FiCheck, FiLink, FiAlertTriangle } from "react-icons/fi";
import { cn, truncateString } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";
type GmailCredentialJsonTypes = "authorized_user" | "service_account";
@@ -257,7 +259,6 @@ export const GmailJsonUploadSection = ({
existingAuthCredential,
}: GmailJsonUploadSectionProps) => {
const { mutate } = useSWRConfig();
const router = useRouter();
const [localServiceAccountData, setLocalServiceAccountData] = useState(
serviceAccountCredentialData
);
@@ -417,6 +418,13 @@ interface GmailCredentialSectionProps {
refreshCredentials: () => void;
connectorExists: boolean;
user: User | null;
buildMode?: boolean;
onOAuthRedirect?: () => void;
onCredentialCreated?: (
credential: Credential<
GmailCredentialJson | GmailServiceAccountCredentialJson
>
) => void;
}
async function handleRevokeAccess(
@@ -456,6 +464,9 @@ export const GmailAuthSection = ({
refreshCredentials,
connectorExists,
user,
buildMode = false,
onOAuthRedirect,
onCredentialCreated,
}: GmailCredentialSectionProps) => {
const router = useRouter();
const [isAuthenticating, setIsAuthenticating] = useState(false);
@@ -501,19 +512,29 @@ export const GmailAuthSection = ({
</p>
</div>
</div>
<Button
danger
onClick={async () => {
handleRevokeAccess(
connectorExists,
setPopup,
existingCredential,
refreshCredentials
);
}}
>
Revoke Access
</Button>
<Section flexDirection="row" justifyContent="between" height="fit">
<Button
danger
onClick={async () => {
handleRevokeAccess(
connectorExists,
setPopup,
existingCredential,
refreshCredentials
);
}}
>
Revoke Access
</Button>
{buildMode && onCredentialCreated && (
<Button
primary
onClick={() => onCredentialCreated(existingCredential)}
>
Continue
</Button>
)}
</Section>
</div>
</div>
);
@@ -629,11 +650,17 @@ export const GmailAuthSection = ({
Cookies.set(GMAIL_AUTH_IS_ADMIN_COOKIE_NAME, "true", {
path: "/",
});
if (buildMode) {
Cookies.set(BUILD_MODE_OAUTH_COOKIE_NAME, "true", {
path: "/",
});
}
const [authUrl, errorMsg] = await setupGmailOAuth({
isAdmin: true,
});
if (authUrl) {
onOAuthRedirect?.();
router.push(authUrl as Route);
} else {
setPopup({

View File

@@ -25,7 +25,21 @@ import {
refreshAllGoogleData,
} from "@/lib/googleConnector";
export const GmailMain = () => {
interface GmailMainProps {
buildMode?: boolean;
onOAuthRedirect?: () => void;
onCredentialCreated?: (
credential: Credential<
GmailCredentialJson | GmailServiceAccountCredentialJson
>
) => void;
}
export const GmailMain = ({
buildMode = false,
onOAuthRedirect,
onCredentialCreated,
}: GmailMainProps) => {
const { isAdmin, user } = useUser();
const { popup, setPopup } = usePopup();
@@ -191,6 +205,12 @@ export const GmailMain = () => {
serviceAccountKeyData={serviceAccountKeyData}
connectorExists={connectorExists}
user={user}
buildMode={buildMode}
onOAuthRedirect={onOAuthRedirect}
// Necessary prop drilling for build mode v1.
// TODO: either integrate gmail into normal flow
// or create a build-mode specific Gmail flow
onCredentialCreated={onCredentialCreated}
/>
</>
)}

View File

@@ -47,6 +47,7 @@ export enum NotificationType {
TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending",
ASSISTANT_FILES_READY = "assistant_files_ready",
RELEASE_NOTES = "release_notes",
FEATURE_ANNOUNCEMENT = "feature_announcement",
}
export interface Notification {

View File

@@ -0,0 +1,868 @@
"use client";
import React, { useRef, useState, useCallback, useMemo } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Logo from "@/refresh-components/Logo";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import {
SvgChevronDown,
SvgChevronRight,
SvgTerminalSmall,
SvgCheckCircle,
SvgLoader,
SvgAlertCircle,
SvgFileText,
SvgCode,
SvgEdit,
SvgGlobe,
SvgTrash,
SvgFiles,
SvgImage,
SvgBarChart,
} from "@opal/icons";
// ============================================
// Streaming Protocol Types (from cc4a-overview.md)
// ============================================
export enum StreamingType {
// Control packets
DONE = "done",
ERROR = "error",
// Agent activity packets
STEP_START = "step_start",
STEP_DELTA = "step_delta",
STEP_END = "step_end",
// Output packets (final response)
OUTPUT_START = "output_start",
OUTPUT_DELTA = "output_delta",
// Artifact packets
ARTIFACT_CREATED = "artifact_created",
ARTIFACT_UPDATED = "artifact_updated",
// Tool usage packets
TOOL_START = "tool_start",
TOOL_OUTPUT = "tool_output",
TOOL_END = "tool_end",
// File operation packets
FILE_WRITE = "file_write",
FILE_DELETE = "file_delete",
}
export type ArtifactType =
| "nextjs_app"
| "pptx"
| "markdown"
| "chart"
| "csv"
| "image";
export interface ArtifactMetadata {
id: string;
type: ArtifactType;
name: string;
path: string;
preview_url?: string;
}
// Base packet interface
interface BasePacket {
type: string;
timestamp: string;
}
// Control packets
export interface DonePacket extends BasePacket {
type: "done";
summary?: string;
}
export interface ErrorPacket extends BasePacket {
type: "error";
message: string;
code?: string;
recoverable: boolean;
}
// Step packets
export interface StepStart extends BasePacket {
type: "step_start";
step_id: string;
title?: string;
}
export interface StepDelta extends BasePacket {
type: "step_delta";
step_id: string;
content: string;
}
export interface StepEnd extends BasePacket {
type: "step_end";
step_id: string;
status: "success" | "failed" | "skipped";
}
// Output packets
export interface OutputStart extends BasePacket {
type: "output_start";
}
export interface OutputDelta extends BasePacket {
type: "output_delta";
content: string;
}
// Artifact packets
export interface ArtifactCreated extends BasePacket {
type: "artifact_created";
artifact: ArtifactMetadata;
}
export interface ArtifactUpdated extends BasePacket {
type: "artifact_updated";
artifact: ArtifactMetadata;
changes?: string[];
}
// Tool packets
export interface ToolStart extends BasePacket {
type: "tool_start";
tool_name: string;
tool_input?: Record<string, unknown> | string;
}
export interface ToolOutput extends BasePacket {
type: "tool_output";
tool_name: string;
output?: string;
is_error: boolean;
}
export interface ToolEnd extends BasePacket {
type: "tool_end";
tool_name: string;
status: "success" | "failed";
}
// File packets
export interface FileWrite extends BasePacket {
type: "file_write";
path: string;
size_bytes?: number;
}
export interface FileDelete extends BasePacket {
type: "file_delete";
path: string;
}
// Discriminated union of all packet types
export type BuildStreamPacket =
| DonePacket
| ErrorPacket
| StepStart
| StepDelta
| StepEnd
| OutputStart
| OutputDelta
| ArtifactCreated
| ArtifactUpdated
| ToolStart
| ToolOutput
| ToolEnd
| FileWrite
| FileDelete;
// ============================================
// Parsed State Types
// ============================================
interface ParsedStep {
id: string;
title?: string;
content: string;
status: "in_progress" | "success" | "failed" | "skipped";
}
interface ParsedTool {
name: string;
input?: Record<string, unknown> | string;
output?: string;
status: "in_progress" | "success" | "failed";
isError: boolean;
}
interface ParsedArtifact {
id: string;
type: ArtifactType;
name: string;
path: string;
previewUrl?: string;
isNew: boolean;
changes?: string[];
}
interface ParsedFileOp {
path: string;
operation: "write" | "delete";
sizeBytes?: number;
}
interface ParsedState {
steps: Map<string, ParsedStep>;
tools: ParsedTool[];
artifacts: Map<string, ParsedArtifact>;
fileOps: ParsedFileOp[];
outputContent: string;
isOutputStarted: boolean;
isDone: boolean;
doneSummary?: string;
error?: { message: string; code?: string; recoverable: boolean };
}
// ============================================
// Helper Functions
// ============================================
function getToolIcon(toolName: string) {
const name = toolName.toLowerCase();
if (name.includes("bash") || name.includes("terminal")) {
return SvgTerminalSmall;
}
if (name.includes("read")) {
return SvgFileText;
}
if (name.includes("write") || name.includes("edit")) {
return SvgEdit;
}
if (
name.includes("web") ||
name.includes("fetch") ||
name.includes("search")
) {
return SvgGlobe;
}
return SvgCode;
}
function getArtifactIcon(type: ArtifactType) {
switch (type) {
case "nextjs_app":
return SvgCode;
case "pptx":
return SvgBarChart;
case "markdown":
return SvgFileText;
case "chart":
return SvgBarChart;
case "csv":
return SvgFiles;
case "image":
return SvgImage;
default:
return SvgFiles;
}
}
function getArtifactLabel(type: ArtifactType): string {
switch (type) {
case "nextjs_app":
return "Web App";
case "pptx":
return "Presentation";
case "markdown":
return "Document";
case "chart":
return "Chart";
case "csv":
return "CSV Data";
case "image":
return "Image";
default:
return "Artifact";
}
}
// ============================================
// Packet Parsing
// ============================================
function parsePackets(packets: BuildStreamPacket[]): ParsedState {
const state: ParsedState = {
steps: new Map(),
tools: [],
artifacts: new Map(),
fileOps: [],
outputContent: "",
isOutputStarted: false,
isDone: false,
};
// Track active tools by name for matching start/output/end
const activeToolIndex = new Map<string, number>();
for (const packet of packets) {
switch (packet.type) {
case StreamingType.STEP_START: {
const p = packet as StepStart;
state.steps.set(p.step_id, {
id: p.step_id,
title: p.title,
content: "",
status: "in_progress",
});
break;
}
case StreamingType.STEP_DELTA: {
const p = packet as StepDelta;
const step = state.steps.get(p.step_id);
if (step) {
step.content += p.content;
}
break;
}
case StreamingType.STEP_END: {
const p = packet as StepEnd;
const step = state.steps.get(p.step_id);
if (step) {
step.status = p.status;
}
break;
}
case StreamingType.OUTPUT_START: {
state.isOutputStarted = true;
break;
}
case StreamingType.OUTPUT_DELTA: {
const p = packet as OutputDelta;
state.outputContent += p.content;
break;
}
case StreamingType.TOOL_START: {
const p = packet as ToolStart;
const toolIndex = state.tools.length;
state.tools.push({
name: p.tool_name,
input: p.tool_input,
status: "in_progress",
isError: false,
});
activeToolIndex.set(p.tool_name, toolIndex);
break;
}
case StreamingType.TOOL_OUTPUT: {
const p = packet as ToolOutput;
const idx = activeToolIndex.get(p.tool_name);
if (idx !== undefined && state.tools[idx]) {
state.tools[idx].output = p.output;
state.tools[idx].isError = p.is_error;
}
break;
}
case StreamingType.TOOL_END: {
const p = packet as ToolEnd;
const idx = activeToolIndex.get(p.tool_name);
if (idx !== undefined && state.tools[idx]) {
state.tools[idx].status = p.status;
}
activeToolIndex.delete(p.tool_name);
break;
}
case StreamingType.ARTIFACT_CREATED: {
const p = packet as ArtifactCreated;
state.artifacts.set(p.artifact.id, {
id: p.artifact.id,
type: p.artifact.type,
name: p.artifact.name,
path: p.artifact.path,
previewUrl: p.artifact.preview_url,
isNew: true,
});
break;
}
case StreamingType.ARTIFACT_UPDATED: {
const p = packet as ArtifactUpdated;
const existing = state.artifacts.get(p.artifact.id);
state.artifacts.set(p.artifact.id, {
id: p.artifact.id,
type: p.artifact.type,
name: p.artifact.name,
path: p.artifact.path,
previewUrl: p.artifact.preview_url,
isNew: existing ? false : true,
changes: p.changes,
});
break;
}
case StreamingType.FILE_WRITE: {
const p = packet as FileWrite;
state.fileOps.push({
path: p.path,
operation: "write",
sizeBytes: p.size_bytes,
});
break;
}
case StreamingType.FILE_DELETE: {
const p = packet as FileDelete;
state.fileOps.push({
path: p.path,
operation: "delete",
});
break;
}
case StreamingType.DONE: {
const p = packet as DonePacket;
state.isDone = true;
state.doneSummary = p.summary;
break;
}
case StreamingType.ERROR: {
const p = packet as ErrorPacket;
state.error = {
message: p.message,
code: p.code,
recoverable: p.recoverable,
};
break;
}
}
}
return state;
}
// ============================================
// Sub-components
// ============================================
interface StepBubbleProps {
step: ParsedStep;
}
function StepBubble({ step }: StepBubbleProps) {
const [isOpen, setIsOpen] = useState(false);
const isComplete = step.status !== "in_progress";
const isFailed = step.status === "failed";
return (
<Collapsible open={isOpen} onOpenChange={setIsOpen}>
<CollapsibleTrigger asChild>
<button
className={cn(
"inline-flex flex-row items-center gap-1.5 px-2.5 py-1.5 rounded-full",
"border transition-all text-left",
isFailed
? "bg-status-error-01 border-status-error-02"
: isComplete
? "bg-background-neutral-01 border-border-02 hover:bg-background-neutral-02"
: "bg-status-info-01 border-status-info-02"
)}
>
{!isComplete ? (
<SvgLoader className="size-3.5 stroke-status-info-05 animate-spin shrink-0" />
) : isFailed ? (
<SvgAlertCircle className="size-3.5 stroke-status-error-05 shrink-0" />
) : (
<SvgCheckCircle className="size-3.5 stroke-status-success-05 shrink-0" />
)}
<span className="text-xs font-medium text-text-04">
{step.title || "Step"}
</span>
{step.content && (
<>
{isOpen ? (
<SvgChevronDown className="size-3 stroke-text-03 shrink-0" />
) : (
<SvgChevronRight className="size-3 stroke-text-03 shrink-0" />
)}
</>
)}
</button>
</CollapsibleTrigger>
{step.content && (
<CollapsibleContent>
<div
className={cn(
"mt-2 p-3 rounded-08 border border-border-01",
"bg-background-neutral-02 text-text-05",
"text-xs overflow-x-auto max-h-48 overflow-y-auto"
)}
>
<MinimalMarkdown content={step.content} />
</div>
</CollapsibleContent>
)}
</Collapsible>
);
}
interface ToolBubbleProps {
tool: ParsedTool;
}
function ToolBubble({ tool }: ToolBubbleProps) {
const [isOpen, setIsOpen] = useState(false);
const isComplete = tool.status !== "in_progress";
const isFailed = tool.status === "failed" || tool.isError;
const Icon = getToolIcon(tool.name);
const inputStr = useMemo(() => {
if (!tool.input) return "";
if (typeof tool.input === "string") return tool.input;
return JSON.stringify(tool.input, null, 2);
}, [tool.input]);
return (
<Collapsible open={isOpen} onOpenChange={setIsOpen}>
<CollapsibleTrigger asChild>
<button
className={cn(
"inline-flex flex-row items-center gap-1.5 px-2.5 py-1.5 rounded-full",
"border transition-all text-left",
isFailed
? "bg-status-error-01 border-status-error-02"
: isComplete
? "bg-background-neutral-01 border-border-02 hover:bg-background-neutral-02"
: "bg-status-info-01 border-status-info-02"
)}
>
{!isComplete ? (
<SvgLoader className="size-3.5 stroke-status-info-05 animate-spin shrink-0" />
) : (
<Icon className="size-3.5 stroke-text-03 shrink-0" />
)}
<span className="text-xs font-medium text-text-04">{tool.name}</span>
{isComplete && !isFailed && (
<SvgCheckCircle className="size-3.5 stroke-status-success-05 shrink-0 ml-0.5" />
)}
{isFailed && (
<SvgAlertCircle className="size-3.5 stroke-status-error-05 shrink-0 ml-0.5" />
)}
{isOpen ? (
<SvgChevronDown className="size-3 stroke-text-03 shrink-0" />
) : (
<SvgChevronRight className="size-3 stroke-text-03 shrink-0" />
)}
</button>
</CollapsibleTrigger>
<CollapsibleContent>
<div
className={cn(
"mt-2 p-3 rounded-08 border border-border-01",
"bg-background-neutral-inverted-03 text-text-inverted-05",
"text-xs overflow-x-auto max-h-48 overflow-y-auto"
)}
style={{ fontFamily: "var(--font-dm-mono)" }}
>
{inputStr && (
<div className="mb-2">
<span className="text-text-inverted-03">Input:</span>
<pre className="whitespace-pre-wrap break-words m-0 mt-1">
{inputStr}
</pre>
</div>
)}
{tool.output && (
<div>
<span className="text-text-inverted-03">Output:</span>
<pre className="whitespace-pre-wrap break-words m-0 mt-1">
{tool.output}
</pre>
</div>
)}
{!inputStr && !tool.output && (
<span className="text-text-inverted-03">Waiting for output...</span>
)}
</div>
</CollapsibleContent>
</Collapsible>
);
}
interface ArtifactBubbleProps {
artifact: ParsedArtifact;
onClick?: (artifact: ParsedArtifact) => void;
}
function ArtifactBubble({ artifact, onClick }: ArtifactBubbleProps) {
const Icon = getArtifactIcon(artifact.type);
const label = getArtifactLabel(artifact.type);
return (
<button
onClick={() => onClick?.(artifact)}
className={cn(
"inline-flex flex-row items-center gap-2 px-3 py-2 rounded-08",
"border border-border-02 bg-background-neutral-01",
"hover:bg-background-neutral-02 transition-all text-left",
"shadow-sm"
)}
>
<Icon className="size-5 stroke-text-03 shrink-0" />
<div className="flex flex-col min-w-0">
<span className="text-sm font-medium text-text-05 truncate">
{artifact.name}
</span>
<span className="text-xs text-text-03">
{label}
{artifact.isNew && (
<span className="ml-1 text-status-success-05"> New</span>
)}
{artifact.changes && artifact.changes.length > 0 && (
<span className="ml-1 text-status-info-05"> Updated</span>
)}
</span>
</div>
<SvgChevronRight className="size-4 stroke-text-03 shrink-0 ml-auto" />
</button>
);
}
interface FileOpItemProps {
fileOp: ParsedFileOp;
}
function FileOpItem({ fileOp }: FileOpItemProps) {
const isDelete = fileOp.operation === "delete";
return (
<div
className={cn(
"inline-flex flex-row items-center gap-1.5 px-2 py-1 rounded",
"text-xs",
isDelete ? "text-status-error-04" : "text-text-03"
)}
>
{isDelete ? (
<SvgTrash className="size-3 shrink-0" />
) : (
<SvgEdit className="size-3 shrink-0" />
)}
<span className="truncate max-w-[200px]" title={fileOp.path}>
{fileOp.path}
</span>
{fileOp.sizeBytes !== undefined && (
<span className="text-text-02">
({(fileOp.sizeBytes / 1024).toFixed(1)}KB)
</span>
)}
</div>
);
}
// ============================================
// Main Component
// ============================================
export interface AIBuildMessageProps {
rawPackets: BuildStreamPacket[];
isStreaming?: boolean;
onArtifactClick?: (artifact: ParsedArtifact) => void;
}
function arePropsEqual(
prev: AIBuildMessageProps,
next: AIBuildMessageProps
): boolean {
return (
prev.rawPackets.length === next.rawPackets.length &&
prev.isStreaming === next.isStreaming &&
prev.onArtifactClick === next.onArtifactClick
);
}
const AIBuildMessage = React.memo(function AIBuildMessage({
rawPackets,
isStreaming = false,
onArtifactClick,
}: AIBuildMessageProps) {
// Parse packets incrementally
const lastProcessedIndexRef = useRef<number>(0);
const parsedStateRef = useRef<ParsedState>({
steps: new Map(),
tools: [],
artifacts: new Map(),
fileOps: [],
outputContent: "",
isOutputStarted: false,
isDone: false,
});
// Reset if packets array was replaced with shorter one
if (lastProcessedIndexRef.current > rawPackets.length) {
lastProcessedIndexRef.current = 0;
parsedStateRef.current = {
steps: new Map(),
tools: [],
artifacts: new Map(),
fileOps: [],
outputContent: "",
isOutputStarted: false,
isDone: false,
};
}
// Process new packets
if (rawPackets.length > lastProcessedIndexRef.current) {
// Re-parse all packets for simplicity (could optimize for incremental)
parsedStateRef.current = parsePackets(rawPackets);
lastProcessedIndexRef.current = rawPackets.length;
}
const state = parsedStateRef.current;
const steps = Array.from(state.steps.values());
const artifacts = Array.from(state.artifacts.values());
// Determine if we should show the output section
const showOutput = state.isOutputStarted && state.outputContent.length > 0;
const showSteps = steps.length > 0;
const showTools = state.tools.length > 0;
const showArtifacts = artifacts.length > 0;
const showFileOps = state.fileOps.length > 0;
const showError = !!state.error;
const hasContent =
showOutput ||
showSteps ||
showTools ||
showArtifacts ||
showFileOps ||
showError;
// Handle artifact click
const handleArtifactClick = useCallback(
(artifact: ParsedArtifact) => {
onArtifactClick?.(artifact);
},
[onArtifactClick]
);
return (
<div
className="flex items-start gap-3 py-4"
data-testid={state.isDone ? "build-ai-message" : undefined}
>
<div className="shrink-0 mt-0.5">
<Logo folded size={24} />
</div>
<div className="flex-1 flex flex-col gap-3 min-w-0">
{!hasContent && isStreaming ? (
<div className="flex items-center gap-2 py-1">
<SvgLoader className="size-4 stroke-text-03 animate-spin" />
<Text secondaryBody text03>
Thinking...
</Text>
</div>
) : (
<>
{/* Error display */}
{showError && state.error && (
<div
className={cn(
"flex flex-row items-start gap-2 px-3 py-2 rounded-08",
"bg-status-error-01 border border-status-error-02"
)}
>
<SvgAlertCircle className="size-4 stroke-status-error-05 shrink-0 mt-0.5" />
<div className="flex flex-col gap-1">
<Text secondaryBody className="text-status-error-05">
{state.error.message}
</Text>
{state.error.code && (
<Text secondaryBody text03>
Code: {state.error.code}
</Text>
)}
</div>
</div>
)}
{/* Steps section */}
{showSteps && (
<div className="flex flex-wrap gap-2">
{steps.map((step) => (
<StepBubble key={step.id} step={step} />
))}
</div>
)}
{/* Tools section */}
{showTools && (
<div className="flex flex-wrap gap-2">
{state.tools.map((tool, idx) => (
<ToolBubble key={`${tool.name}-${idx}`} tool={tool} />
))}
</div>
)}
{/* File operations */}
{showFileOps && (
<div className="flex flex-wrap gap-1 px-1">
{state.fileOps.map((fileOp, idx) => (
<FileOpItem key={`${fileOp.path}-${idx}`} fileOp={fileOp} />
))}
</div>
)}
{/* Artifacts section */}
{showArtifacts && (
<div className="flex flex-col gap-2">
{artifacts.map((artifact) => (
<ArtifactBubble
key={artifact.id}
artifact={artifact}
onClick={handleArtifactClick}
/>
))}
</div>
)}
{/* Output/Response section */}
{showOutput && (
<div className="py-1">
<MinimalMarkdown
content={state.outputContent}
className="text-text-05"
/>
</div>
)}
{/* Streaming indicator when output started but not done */}
{state.isOutputStarted && !state.isDone && isStreaming && (
<div className="flex items-center gap-1">
<SvgLoader className="size-3 stroke-text-03 animate-spin" />
</div>
)}
</>
)}
</div>
</div>
);
}, arePropsEqual);
export default AIBuildMessage;

View File

@@ -0,0 +1,53 @@
"use client";
import Text from "@/refresh-components/texts/Text";
import Logo from "@/refresh-components/Logo";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import { SvgLoader } from "@opal/icons";
interface AIMessageSimpleProps {
content: string;
isStreaming?: boolean;
}
/**
* AIMessageSimple - Simple AI message display for text content
*
* Used when we have plain text content (not streaming packets).
* For full streaming packet support, use AIBuildMessage.
*/
export default function AIMessageSimple({
content,
isStreaming = false,
}: AIMessageSimpleProps) {
const hasContent = content.length > 0;
return (
<div className="flex items-start gap-3 py-4">
<div className="shrink-0 mt-0.5">
<Logo folded size={24} />
</div>
<div className="flex-1 flex flex-col gap-2 min-w-0">
{!hasContent && isStreaming ? (
<div className="flex items-center gap-2 py-1">
<SvgLoader className="size-4 stroke-text-03 animate-spin" />
<Text secondaryBody text03>
Thinking...
</Text>
</div>
) : (
<>
<div className="py-1">
<MinimalMarkdown content={content} className="text-text-05" />
</div>
{isStreaming && (
<div className="flex items-center gap-1">
<SvgLoader className="size-3 stroke-text-03 animate-spin" />
</div>
)}
</>
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,77 @@
"use client";
import { BuildMessage } from "@/app/build/services/buildStreamingModels";
import Text from "@/refresh-components/texts/Text";
import Logo from "@/refresh-components/Logo";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import BuildAgentTimeline from "@/app/build/components/BuildAgentTimeline";
import { SvgLoader } from "@opal/icons";
interface AIMessageWithToolsProps {
content: string;
/** Structured event messages (tool calls, thinking, plans) to display in timeline */
eventMessages?: BuildMessage[];
isStreaming?: boolean;
}
/**
* AIMessageWithTools - AI message display with tool call activity
*
* Shows:
* - Agent timeline (tool calls, thinking, plans) when present
* - Message content with markdown rendering
* - Loading indicator when streaming
*/
export default function AIMessageWithTools({
content,
eventMessages = [],
isStreaming = false,
}: AIMessageWithToolsProps) {
const hasContent = content.length > 0;
const hasEvents = eventMessages.length > 0;
const hasActiveEvents = eventMessages.some(
(msg) => msg.message_metadata?.status === "in_progress"
);
// Don't render anything if there's no content and no events
if (!hasContent && !hasEvents && !isStreaming) {
return null;
}
return (
<div className="flex items-start gap-3 py-4">
<div className="shrink-0 mt-0.5">
<Logo folded size={24} />
</div>
<div className="flex-1 flex flex-col gap-3 min-w-0">
{/* Agent timeline section */}
{hasEvents && (
<div className="flex flex-col gap-2">
<BuildAgentTimeline messages={eventMessages} />
</div>
)}
{/* Message content */}
{!hasContent && isStreaming ? (
<div className="flex items-center gap-2 py-1">
<SvgLoader className="size-4 stroke-text-03 animate-spin" />
<Text secondaryBody text03>
{hasActiveEvents ? "Working..." : "Thinking..."}
</Text>
</div>
) : hasContent ? (
<>
<div className="py-1">
<MinimalMarkdown content={content} className="text-text-05" />
</div>
{isStreaming && (
<div className="flex items-center gap-1">
<SvgLoader className="size-3 stroke-text-03 animate-spin" />
</div>
)}
</>
) : null}
</div>
</div>
);
}

View File

@@ -0,0 +1,59 @@
"use client";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import { SvgFiles, SvgDownloadCloud } from "@opal/icons";
import { ArtifactInfo, getArtifactUrl } from "@/lib/build/client";
interface ArtifactListProps {
artifacts: ArtifactInfo[];
sessionId: string;
}
export default function ArtifactList({
artifacts,
sessionId,
}: ArtifactListProps) {
const fileArtifacts = artifacts.filter((a) => a.artifact_type !== "webapp");
if (fileArtifacts.length === 0) {
return (
<div className="p-4 text-center">
<Text secondaryBody text03>
No artifacts generated yet
</Text>
</div>
);
}
return (
<div className="border border-border-01 rounded-08 overflow-hidden">
<div className="p-2 bg-background-neutral-01 flex flex-row items-center gap-1.5">
<SvgFiles className="size-4 stroke-text-03" />
<Text mainUiAction text03>
Generated Files
</Text>
</div>
<ul className="divide-y divide-border-01 max-h-[50vh] overflow-auto">
{fileArtifacts.map((artifact) => (
<li
key={artifact.path}
className="p-2 flex flex-row items-center justify-between gap-2 hover:bg-background-neutral-01 transition-colors"
>
<Text mainContentMono text04 className="truncate flex-1 text-sm">
{artifact.filename}
</Text>
<a
href={getArtifactUrl(sessionId, artifact.path)}
download={artifact.filename}
>
<Button action tertiary leftIcon={SvgDownloadCloud}>
Download
</Button>
</a>
</li>
))}
</ul>
</div>
);
}

View File

@@ -0,0 +1,96 @@
"use client";
import React from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
export interface BigButtonProps
extends React.ButtonHTMLAttributes<HTMLButtonElement> {
// Subvariants
primary?: boolean;
secondary?: boolean;
// Inverted mode for dark backgrounds
inverted?: boolean;
}
const BigButton = React.forwardRef<HTMLButtonElement, BigButtonProps>(
(
{ primary, secondary, inverted, disabled, children, className, ...props },
ref
) => {
const subvariant = primary
? "primary"
: secondary
? "secondary"
: "primary";
const baseStyles =
"px-6 py-3 rounded-xl w-fit flex flex-row items-center justify-center transition-colors";
const variantStyles = {
primary: {
normal:
"bg-theme-primary-05 hover:bg-theme-primary-04 active:bg-theme-primary-06",
inverted: "bg-white hover:bg-gray-200 active:bg-gray-300",
disabled: "bg-background-neutral-04",
},
secondary: {
normal:
"bg-transparent border border-border-01 hover:bg-background-tint-02 active:bg-background-tint-00",
inverted:
"bg-transparent border border-text-inverted-05 hover:bg-background-tint-inverted-02 active:bg-background-tint-inverted-01",
disabled: "bg-background-neutral-03 border border-border-01",
},
};
const textStyles = {
primary: {
normal: "text-text-inverted-05",
inverted: "text-gray-900",
disabled: "text-text-inverted-04",
},
secondary: {
normal:
"text-text-03 group-hover:text-text-04 group-active:text-text-05",
inverted: "text-text-inverted-05",
disabled: "text-text-01",
},
};
const getVariantStyle = () => {
if (disabled) return variantStyles[subvariant].disabled;
return inverted
? variantStyles[subvariant].inverted
: variantStyles[subvariant].normal;
};
const getTextStyle = () => {
if (disabled) return textStyles[subvariant].disabled;
return inverted
? textStyles[subvariant].inverted
: textStyles[subvariant].normal;
};
return (
<button
ref={ref}
className={cn("group", baseStyles, getVariantStyle(), className)}
disabled={disabled}
type="button"
{...props}
>
<Text
mainContentEmphasis
className={cn("whitespace-nowrap", getTextStyle())}
as="span"
>
{children}
</Text>
</button>
);
}
);
BigButton.displayName = "BigButton";
export default BigButton;

View File

@@ -0,0 +1,139 @@
"use client";
import React, { useState, useMemo } from "react";
import { SvgChevronDown } from "@opal/icons";
import { cn } from "@/lib/utils";
import { BuildMessage } from "@/app/build/services/buildStreamingModels";
import Text from "@/refresh-components/texts/Text";
import BuildToolCallRenderer from "@/app/build/components/renderers/BuildToolCallRenderer";
interface BuildAgentTimelineProps {
messages: BuildMessage[];
}
/**
* BuildAgentTimeline - Displays messages with metadata in chronological order
*
* Reconstructs the agent's execution timeline from saved messages:
* - Tool calls (tool_call_start, tool_call_progress)
* - Thinking steps (agent_thought_chunk)
* - Plan updates (agent_plan_update)
*/
export default function BuildAgentTimeline({
messages,
}: BuildAgentTimelineProps) {
const [isExpanded, setIsExpanded] = useState(true);
// Filter messages that have metadata (structured events)
const structuredMessages = useMemo(() => {
return messages.filter((m) => m.message_metadata?.type);
}, [messages]);
if (structuredMessages.length === 0) return null;
// Group by type for status text
const toolMessages = structuredMessages.filter(
(m) =>
m.message_metadata?.type === "tool_call_start" ||
m.message_metadata?.type === "tool_call_progress"
);
const thinkingMessages = structuredMessages.filter(
(m) => m.message_metadata?.type === "agent_thought_chunk"
);
const planMessages = structuredMessages.filter(
(m) => m.message_metadata?.type === "agent_plan_update"
);
const statusText =
`${toolMessages.length} ${toolMessages.length === 1 ? "tool" : "tools"}` +
(thinkingMessages.length > 0
? `, ${thinkingMessages.length} thinking`
: "") +
(planMessages.length > 0 ? `, ${planMessages.length} plan` : "");
return (
<div className="my-2 border border-border-01 bg-background rounded-md overflow-hidden">
{/* Header */}
<div
className="flex items-center justify-between gap-2 px-3 py-2 cursor-pointer hover:bg-background-tint-02 transition-colors"
onClick={() => setIsExpanded(!isExpanded)}
>
<Text text02 className="text-sm">
Agent Activity
</Text>
<div className="flex items-center gap-2">
<Text text03 className="text-xs">
{statusText}
</Text>
<SvgChevronDown
className={cn(
"w-4 h-4 stroke-text-400 transition-transform duration-150",
!isExpanded && "rotate-[-90deg]"
)}
/>
</div>
</div>
{/* Timeline */}
<div
className={cn(
"overflow-hidden transition-all duration-200",
isExpanded ? "max-h-[2000px] opacity-100" : "max-h-0 opacity-0"
)}
>
<div className="px-3 pb-3 space-y-0">
{structuredMessages.map((message, index) => {
const metadata = message.message_metadata!;
const type = metadata.type;
// Render tool calls
if (type === "tool_call_start" || type === "tool_call_progress") {
return (
<BuildToolCallRenderer
key={message.id}
metadata={metadata}
isLastItem={index === structuredMessages.length - 1}
isLoading={false}
/>
);
}
// Render thinking steps
if (type === "agent_thought_chunk") {
return (
<BuildToolCallRenderer
key={message.id}
metadata={{
...metadata,
kind: "thought",
title: "Thinking",
}}
isLastItem={index === structuredMessages.length - 1}
isLoading={false}
/>
);
}
// Render plan updates
if (type === "agent_plan_update") {
return (
<BuildToolCallRenderer
key={message.id}
metadata={{
...metadata,
kind: "plan",
title: "Plan Update",
}}
isLastItem={index === structuredMessages.length - 1}
isLoading={false}
/>
);
}
return null;
})}
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,145 @@
"use client";
import React, { useRef, useState, useCallback, useEffect } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import IconButton from "@/refresh-components/buttons/IconButton";
import { SvgArrowUp, SvgStop } from "@opal/icons";
const MAX_INPUT_HEIGHT = 200;
export interface BuildInputBarHandle {
reset: () => void;
focus: () => void;
}
interface BuildInputBarProps {
onSubmit: (message: string) => void;
onStop?: () => void;
isRunning: boolean;
disabled?: boolean;
placeholder?: string;
}
const BuildInputBar = React.forwardRef<BuildInputBarHandle, BuildInputBarProps>(
(
{
onSubmit,
onStop,
isRunning,
disabled = false,
placeholder = "Describe your task...",
},
ref
) => {
const [message, setMessage] = useState("");
const textAreaRef = useRef<HTMLTextAreaElement>(null);
React.useImperativeHandle(ref, () => ({
reset: () => {
setMessage("");
if (textAreaRef.current) {
textAreaRef.current.style.height = "auto";
}
},
focus: () => {
textAreaRef.current?.focus();
},
}));
const handleSubmit = useCallback(() => {
if (!message.trim() || disabled || isRunning) return;
onSubmit(message.trim());
setMessage("");
if (textAreaRef.current) {
textAreaRef.current.style.height = "auto";
}
}, [message, onSubmit, disabled, isRunning]);
const handleKeyDown = useCallback(
(e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleSubmit();
}
},
[handleSubmit]
);
const handleChange = useCallback(
(e: React.ChangeEvent<HTMLTextAreaElement>) => {
setMessage(e.target.value);
// Auto-resize
const textarea = e.target;
textarea.style.height = "auto";
textarea.style.height = `${Math.min(
textarea.scrollHeight,
MAX_INPUT_HEIGHT
)}px`;
},
[]
);
useEffect(() => {
textAreaRef.current?.focus();
}, []);
const canSubmit = message.trim().length > 0 && !disabled && !isRunning;
return (
<div
className={cn(
"w-full rounded-16 border border-border-02 bg-background-neutral-00",
"focus-within:border-border-03 transition-colors",
"shadow-sm"
)}
>
<div className="flex flex-col p-3">
<textarea
ref={textAreaRef}
value={message}
onChange={handleChange}
onKeyDown={handleKeyDown}
placeholder={placeholder}
disabled={disabled || isRunning}
rows={1}
className={cn(
"w-full bg-transparent resize-none outline-none",
"text-text-05 placeholder:text-text-03",
"min-h-[1.5rem] max-h-[200px]"
)}
style={{ fontFamily: "inherit" }}
/>
<div className="flex flex-row items-center justify-between pt-2">
<Text secondaryBody text03>
Press Enter to send, Shift+Enter for new line
</Text>
<div className="flex flex-row items-center gap-2">
{isRunning && onStop ? (
<IconButton
icon={SvgStop}
onClick={onStop}
primary
tooltip="Stop"
/>
) : (
<IconButton
icon={SvgArrowUp}
onClick={handleSubmit}
disabled={!canSubmit}
primary
tooltip="Send"
/>
)}
</div>
</div>
</div>
</div>
);
}
);
BuildInputBar.displayName = "BuildInputBar";
export default BuildInputBar;

View File

@@ -0,0 +1,306 @@
"use client";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Logo from "@/refresh-components/Logo";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import {
SvgChevronDown,
SvgChevronRight,
SvgTerminalSmall,
SvgCheckCircle,
SvgLoader,
SvgAlertCircle,
SvgFileText,
SvgCode,
SvgEdit,
SvgGlobe,
} from "@opal/icons";
import { useState } from "react";
export interface OutputItem {
type:
| "text"
| "tool_call"
| "tool_output"
| "tool_result"
| "thinking"
| "status"
| "error";
content: string;
toolName?: string;
toolType?: string;
description?: string;
isComplete?: boolean;
timestamp: number;
}
interface BuildMessageProps {
items: OutputItem[];
isStreaming: boolean;
}
// Get icon based on tool type
function getToolIcon(toolType?: string) {
switch (toolType?.toLowerCase()) {
case "bash":
return SvgTerminalSmall;
case "read":
return SvgFileText;
case "write":
case "edit":
return SvgEdit;
case "web":
case "fetch":
return SvgGlobe;
default:
return SvgCode;
}
}
// Get a friendly label for the tool
function getToolLabel(toolType?: string, toolName?: string): string {
if (toolName) return toolName;
switch (toolType?.toLowerCase()) {
case "bash":
return "Running command";
case "read":
return "Reading file";
case "write":
return "Writing file";
case "edit":
return "Editing file";
default:
return "Using tool";
}
}
interface ToolCallBubbleProps {
item: OutputItem;
isStreaming: boolean;
}
function ToolCallBubble({ item, isStreaming }: ToolCallBubbleProps) {
const [isOpen, setIsOpen] = useState(false);
const isComplete = item.isComplete ?? !isStreaming;
const Icon = getToolIcon(item.toolType);
const label = getToolLabel(item.toolType, item.toolName);
return (
<Collapsible open={isOpen} onOpenChange={setIsOpen}>
<CollapsibleTrigger asChild>
<button
className={cn(
"inline-flex flex-row items-center gap-1.5 px-2.5 py-1.5 rounded-full",
"border transition-all text-left",
isComplete
? "bg-background-neutral-01 border-border-02 hover:bg-background-neutral-02"
: "bg-status-info-01 border-status-info-02"
)}
>
{isStreaming && !isComplete ? (
<SvgLoader className="size-3.5 stroke-status-info-05 animate-spin shrink-0" />
) : (
<Icon className="size-3.5 stroke-text-03 shrink-0" />
)}
<span className="text-xs font-medium text-text-04">
{item.toolType || "Tool"}
</span>
<span className="text-xs text-text-03 truncate max-w-[200px]">
{item.description || label}
</span>
{isComplete && (
<SvgCheckCircle className="size-3.5 stroke-status-success-05 shrink-0 ml-0.5" />
)}
{isOpen ? (
<SvgChevronDown className="size-3 stroke-text-03 shrink-0" />
) : (
<SvgChevronRight className="size-3 stroke-text-03 shrink-0" />
)}
</button>
</CollapsibleTrigger>
<CollapsibleContent>
<div
className={cn(
"mt-2 p-3 rounded-08 border border-border-01",
"bg-background-neutral-inverted-03 text-text-inverted-05",
"text-xs overflow-x-auto max-h-48 overflow-y-auto"
)}
style={{ fontFamily: "var(--font-dm-mono)" }}
>
<pre className="whitespace-pre-wrap break-words m-0">
{item.content || "Waiting for output..."}
</pre>
</div>
</CollapsibleContent>
</Collapsible>
);
}
interface TextItemProps {
content: string;
}
function TextItem({ content }: TextItemProps) {
return (
<div className="py-1">
<MinimalMarkdown content={content} className="text-text-05" />
</div>
);
}
interface ThinkingItemProps {
content: string;
}
function ThinkingItem({ content }: ThinkingItemProps) {
const [isOpen, setIsOpen] = useState(false);
return (
<Collapsible open={isOpen} onOpenChange={setIsOpen}>
<CollapsibleTrigger asChild>
<button
className={cn(
"inline-flex flex-row items-center gap-1.5 px-2.5 py-1.5 rounded-full",
"border border-theme-blue-02 bg-theme-blue-01 hover:bg-theme-blue-02",
"transition-all text-left"
)}
>
<span className="text-xs font-medium text-theme-blue-05">
Thinking
</span>
<span className="text-xs text-theme-blue-04 truncate max-w-[200px]">
{content.slice(0, 50)}
{content.length > 50 ? "..." : ""}
</span>
{isOpen ? (
<SvgChevronDown className="size-3 stroke-theme-blue-05 shrink-0" />
) : (
<SvgChevronRight className="size-3 stroke-theme-blue-05 shrink-0" />
)}
</button>
</CollapsibleTrigger>
<CollapsibleContent>
<div
className={cn(
"mt-2 p-3 rounded-08 border border-theme-blue-02",
"bg-theme-blue-01 text-theme-blue-05",
"text-xs overflow-x-auto max-h-48 overflow-y-auto italic"
)}
>
<p className="whitespace-pre-wrap break-words m-0">{content}</p>
</div>
</CollapsibleContent>
</Collapsible>
);
}
interface ToolResultItemProps {
content: string;
}
function ToolResultItem({ content }: ToolResultItemProps) {
// Truncate very long results
const displayContent =
content.length > 500 ? content.slice(0, 500) + "..." : content;
return (
<div
className={cn(
"p-2 rounded-08 border border-border-01",
"bg-background-neutral-01 text-text-03",
"text-xs overflow-x-auto max-h-32 overflow-y-auto"
)}
style={{ fontFamily: "var(--font-dm-mono)" }}
>
<pre className="whitespace-pre-wrap break-words m-0">
{displayContent}
</pre>
</div>
);
}
interface StatusItemProps {
content: string;
isError?: boolean;
}
function StatusItem({ content, isError }: StatusItemProps) {
return (
<div
className={cn(
"inline-flex flex-row items-center gap-2 px-3 py-1.5 rounded-full",
isError
? "bg-status-error-01 text-status-error-05"
: "bg-status-success-01 text-status-success-05"
)}
>
{isError ? (
<SvgAlertCircle className="size-4 shrink-0" />
) : (
<SvgCheckCircle className="size-4 shrink-0" />
)}
<Text secondaryBody>{content}</Text>
</div>
);
}
export default function BuildMessage({
items,
isStreaming,
}: BuildMessageProps) {
if (items.length === 0 && !isStreaming) {
return null;
}
return (
<div className="flex items-start gap-3 py-4">
<div className="shrink-0 mt-0.5">
<Logo folded size={24} />
</div>
<div className="flex-1 flex flex-col gap-2 min-w-0">
{items.length === 0 && isStreaming ? (
<div className="flex items-center gap-2 py-1">
<SvgLoader className="size-4 stroke-text-03 animate-spin" />
<Text secondaryBody text03>
Thinking...
</Text>
</div>
) : (
items.map((item, index) => {
switch (item.type) {
case "text":
return <TextItem key={index} content={item.content} />;
case "thinking":
return <ThinkingItem key={index} content={item.content} />;
case "tool_call":
case "tool_output":
return (
<ToolCallBubble
key={index}
item={item}
isStreaming={isStreaming}
/>
);
case "tool_result":
return <ToolResultItem key={index} content={item.content} />;
case "status":
return <StatusItem key={index} content={item.content} />;
case "error":
return (
<StatusItem key={index} content={item.content} isError />
);
default:
return null;
}
})
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,98 @@
"use client";
import { useRef, useEffect, useMemo } from "react";
import { BuildMessage } from "@/app/build/services/buildStreamingModels";
import UserMessage from "@/app/build/components/UserMessage";
import AIMessageWithTools from "@/app/build/components/AIMessageWithTools";
interface BuildMessageListProps {
messages: BuildMessage[];
isStreaming?: boolean;
}
/**
* BuildMessageList - Displays the conversation history
*
* Shows:
* - User messages (right-aligned bubbles)
* - Assistant responses (left-aligned with logo, including tool calls)
* - Agent activity timeline (tool calls, thinking, plans)
*
* Groups messages into display messages and event messages:
* - Display messages: user messages and assistant messages with content
* - Event messages: assistant messages with message_metadata (tool calls, thinking, plans)
*/
export default function BuildMessageList({
messages,
isStreaming = false,
}: BuildMessageListProps) {
const messagesEndRef = useRef<HTMLDivElement>(null);
// Separate display messages from event messages
const { displayMessages, eventMessages, hasEventsOnly } = useMemo(() => {
const display: BuildMessage[] = [];
const events: BuildMessage[] = [];
for (const msg of messages) {
// Event messages have metadata and empty/no content
if (msg.message_metadata?.type && !msg.content) {
events.push(msg);
} else {
display.push(msg);
}
}
// Check if we have events but no assistant messages with content
const hasAssistantContent = display.some((m) => m.type === "assistant");
const hasEventsOnly = events.length > 0 && !hasAssistantContent;
return { displayMessages: display, eventMessages: events, hasEventsOnly };
}, [messages]);
// Auto-scroll to bottom when new messages arrive
useEffect(() => {
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages.length]);
return (
<div className="flex flex-col items-center px-4 pb-4">
<div className="w-full max-w-2xl">
{displayMessages.map((message, index) => {
const isLastMessage = index === displayMessages.length - 1;
const isStreamingThis =
isStreaming && isLastMessage && message.type === "assistant";
const isLastAssistantMessage =
message.type === "assistant" && isLastMessage;
if (message.type === "user") {
return <UserMessage key={message.id} content={message.content} />;
}
// For assistant messages, only show event timeline on the last assistant message
// This avoids duplicating the timeline for multiple assistant responses
return (
<AIMessageWithTools
key={message.id}
content={message.content}
eventMessages={isLastAssistantMessage ? eventMessages : []}
isStreaming={isStreamingThis}
/>
);
})}
{/* If we have event messages but no assistant message to attach them to, render them */}
{hasEventsOnly && (
<AIMessageWithTools
key="events-only"
content=""
eventMessages={eventMessages}
isStreaming={isStreaming}
/>
)}
{/* Scroll anchor */}
<div ref={messagesEndRef} />
</div>
</div>
);
}

View File

@@ -0,0 +1,142 @@
"use client";
import { useState } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import {
SvgGlobe,
SvgExternalLink,
SvgHardDrive,
SvgFiles,
SvgX,
} from "@opal/icons";
import { getWebappUrl, ArtifactInfo } from "@/lib/build/client";
import FileBrowser from "@/app/build/components/FileBrowser";
import ArtifactList from "@/app/build/components/ArtifactList";
type TabId = "preview" | "files" | "artifacts";
interface Tab {
id: TabId;
label: string;
icon: React.FC<{ className?: string }>;
}
const TABS: Tab[] = [
{ id: "preview", label: "Preview", icon: SvgGlobe },
{ id: "files", label: "Files", icon: SvgHardDrive },
{ id: "artifacts", label: "Artifacts", icon: SvgFiles },
];
interface BuildSidePanelProps {
sessionId: string | null;
artifacts: ArtifactInfo[];
hasWebapp: boolean;
onClose?: () => void;
}
export default function BuildSidePanel({
sessionId,
artifacts,
hasWebapp,
onClose,
}: BuildSidePanelProps) {
const [activeTab, setActiveTab] = useState<TabId>("preview");
const fileArtifacts = artifacts.filter((a) => a.artifact_type !== "webapp");
if (!sessionId) {
return (
<div className="h-full flex flex-col items-center justify-center p-8 text-center">
<SvgHardDrive className="size-12 stroke-text-02 mb-4" />
<Text headingH3 text03>
No active session
</Text>
<Text secondaryBody text02 className="mt-2">
Start a task to view files and preview
</Text>
</div>
);
}
return (
<div className="h-full flex flex-col bg-background-neutral-00">
{/* Header with tabs */}
<div className="flex flex-row items-center justify-between border-b border-border-01 px-2">
<div className="flex flex-row">
{TABS.map((tab) => {
const Icon = tab.icon;
const isActive = activeTab === tab.id;
return (
<button
key={tab.id}
onClick={() => setActiveTab(tab.id)}
className={cn(
"flex flex-row items-center gap-1.5 px-3 py-2.5",
"border-b-2 transition-colors",
isActive
? "border-theme-primary-05 text-text-05"
: "border-transparent text-text-03 hover:text-text-04"
)}
>
<Icon className="size-4" />
<Text mainUiAction>{tab.label}</Text>
</button>
);
})}
</div>
{onClose && (
<button
onClick={onClose}
className="p-1.5 rounded-08 hover:bg-background-neutral-01 transition-colors"
>
<SvgX className="size-4 stroke-text-03" />
</button>
)}
</div>
{/* Tab content */}
<div className="flex-1 overflow-auto">
{activeTab === "preview" && (
<div className="h-full flex flex-col">
<div className="flex flex-row items-center justify-between p-2 border-b border-border-01">
<Text secondaryBody text03>
Live Preview
</Text>
<a
href={getWebappUrl(sessionId)}
target="_blank"
rel="noopener noreferrer"
>
<Button action tertiary rightIcon={SvgExternalLink}>
Open
</Button>
</a>
</div>
<div className="flex-1 p-2">
<iframe
src={getWebappUrl(sessionId)}
className="w-full h-full rounded-08 border border-border-01 bg-white"
sandbox="allow-scripts allow-same-origin allow-forms"
title="Web App Preview"
/>
</div>
</div>
)}
{activeTab === "files" && sessionId && (
<div className="p-2">
<FileBrowser sessionId={sessionId} />
</div>
)}
{activeTab === "artifacts" && sessionId && (
<div className="p-2">
<ArtifactList artifacts={artifacts} sessionId={sessionId} />
</div>
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,314 @@
"use client";
import React, { useState } from "react";
import {
SvgChevronDown,
SvgTerminalSmall,
SvgFileText,
SvgEdit,
SvgPlayCircle,
SvgSettings,
SvgCheck,
SvgX,
SvgLoader,
SvgMinusCircle,
} from "@opal/icons";
import { cn } from "@/lib/utils";
import {
ToolCall,
ToolCallStatus,
} from "@/app/build/services/buildStreamingModels";
import Text from "@/refresh-components/texts/Text";
/**
* Get the appropriate icon for a tool based on its kind
*/
function getToolIcon(kind: string) {
const kindLower = kind?.toLowerCase() || "";
if (kindLower.includes("bash") || kindLower.includes("execute")) {
return SvgTerminalSmall;
}
if (kindLower.includes("write") || kindLower === "edit") {
return SvgEdit;
}
if (kindLower.includes("read")) {
return SvgFileText;
}
return SvgSettings;
}
/**
* Get status icon based on tool call status
*/
function getStatusIcon(status: ToolCallStatus) {
switch (status) {
case "completed":
return SvgCheck;
case "failed":
return SvgX;
case "in_progress":
return SvgLoader;
case "pending":
return SvgMinusCircle;
case "cancelled":
return SvgX;
default:
return SvgMinusCircle;
}
}
/**
* Extract command from raw_input for technical display
*/
function extractCommand(toolCall: ToolCall): string | null {
const rawInput = toolCall.raw_input;
if (!rawInput) return null;
// For bash/execute tools, extract the command
if (rawInput.command) {
return rawInput.command;
}
// For file write/edit tools, show the file path
if (rawInput.file_path || rawInput.path) {
const path = rawInput.file_path || rawInput.path;
const operation = toolCall.kind === "edit" ? "Edit" : "Write";
return `${operation} ${path}`;
}
return null;
}
/**
* Extract user-friendly description from content or title
*/
function extractDescription(toolCall: ToolCall): string {
// Try to get description from content block
if (toolCall.content) {
const content = toolCall.content;
if (content.type === "text" && content.text) {
return content.text;
}
if (Array.isArray(content)) {
const textBlocks = content.filter(
(c: any) => c.type === "text" && c.text
);
if (textBlocks.length > 0) {
return textBlocks.map((c: any) => c.text).join(" ");
}
}
}
// Fallback to title
return toolCall.title || toolCall.name || "Running tool...";
}
/**
* Get status text based on tool status
*/
function getStatusText(status: ToolCallStatus): string {
switch (status) {
case "completed":
return "Complete";
case "failed":
return "Failed";
case "in_progress":
return "Running";
case "pending":
return "Pending";
case "cancelled":
return "Cancelled";
default:
return "Unknown";
}
}
/**
* Single tool step row with vertical connector
*/
function ToolStepRow({
toolCall,
isLastItem,
}: {
toolCall: ToolCall;
isLastItem: boolean;
}) {
const ToolIcon = getToolIcon(toolCall.kind);
const StatusIcon = getStatusIcon(toolCall.status);
const command = extractCommand(toolCall);
const description = extractDescription(toolCall);
const statusText = getStatusText(toolCall.status);
const isLoading =
toolCall.status === "in_progress" || toolCall.status === "pending";
const isFailed = toolCall.status === "failed";
return (
<div className="relative">
{/* Vertical connector line */}
{!isLastItem && (
<div
className="absolute w-px bg-border-02 z-0"
style={{ left: "10px", top: "20px", bottom: "0" }}
/>
)}
<div className={cn("flex items-start gap-2 relative z-10")}>
{/* Icon circle */}
<div className="flex flex-col items-center w-5">
<div className="flex-shrink-0 flex items-center justify-center w-5 h-5 bg-background rounded-full border border-border-01">
<ToolIcon
className={cn("w-3.5 h-3.5", isLoading && "text-action-link-01")}
/>
</div>
</div>
{/* Content */}
<div
className={cn(
"flex-1 min-w-0 overflow-hidden",
!isLastItem && "pb-4"
)}
>
{/* Status and title */}
<div className="flex items-center gap-2 mb-1">
<StatusIcon
className={cn(
"w-3.5 h-3.5",
toolCall.status === "completed" && "text-status-success-03",
toolCall.status === "failed" && "text-status-error-03",
toolCall.status === "in_progress" &&
"text-action-link-01 animate-spin",
toolCall.status === "pending" && "text-text-03"
)}
/>
<Text
text02
className={cn(isLoading && !isFailed && "loading-text")}
>
{statusText}
</Text>
</div>
{/* User-friendly description */}
<Text secondaryBody text03 className="mb-1">
{description}
</Text>
{/* Technical command (if available) */}
{command && (
<div className="mt-1 px-2 py-1 rounded bg-background-tint-02 border border-border-01">
<Text secondaryMono text03 className="text-xs break-all">
{command}
</Text>
</div>
)}
{/* Error message */}
{toolCall.error && (
<div className="mt-1 px-2 py-1 rounded bg-status-error-01 border border-status-error-02">
<Text secondaryMono className="text-xs text-status-error-03">
{toolCall.error}
</Text>
</div>
)}
</div>
</div>
</div>
);
}
interface BuildToolStepsRendererProps {
toolCalls: ToolCall[];
}
/**
* BuildToolStepsRenderer - Displays tool calls in chronological order
* with both technical details and user-friendly descriptions.
*
* Inspired by ResearchAgentRenderer from deep research UI.
*/
export default function BuildToolStepsRenderer({
toolCalls,
}: BuildToolStepsRendererProps) {
const [isExpanded, setIsExpanded] = useState(true);
if (toolCalls.length === 0) return null;
// Sort tool calls chronologically
const sortedToolCalls = [...toolCalls].sort(
(a, b) => a.startedAt.getTime() - b.startedAt.getTime()
);
// Determine overall status
const hasActiveTools = sortedToolCalls.some(
(tc) => tc.status === "in_progress" || tc.status === "pending"
);
const hasFailedTools = sortedToolCalls.some((tc) => tc.status === "failed");
const allComplete = sortedToolCalls.every((tc) => tc.status === "completed");
let statusText: string;
if (allComplete) {
statusText = "All steps complete";
} else if (hasFailedTools) {
statusText = "Some steps failed";
} else if (hasActiveTools) {
const activeTools = sortedToolCalls.filter(
(tc) => tc.status === "in_progress" || tc.status === "pending"
);
const lastActiveTool = activeTools[activeTools.length - 1];
statusText = lastActiveTool
? extractDescription(lastActiveTool)
: "Processing";
} else {
statusText = "Processing";
}
return (
<div className="my-2 p-3 rounded-lg border border-border-01 bg-background-neutral-02">
{/* Header with toggle */}
<div
className="flex items-center justify-between gap-2 cursor-pointer group"
onClick={() => setIsExpanded(!isExpanded)}
>
<div className="flex items-center gap-2">
<SvgPlayCircle className="w-4 h-4 text-text-03" />
<Text text02 className="truncate">
{statusText}
</Text>
</div>
<div className="flex items-center gap-2">
<Text secondaryMono text03 className="text-xs">
{sortedToolCalls.length}{" "}
{sortedToolCalls.length === 1 ? "step" : "steps"}
</Text>
<SvgChevronDown
className={cn(
"w-4 h-4 stroke-text-03 transition-transform duration-150 ease-in-out",
!isExpanded && "rotate-[-90deg]"
)}
/>
</div>
</div>
{/* Collapsible tool steps */}
<div
className={cn(
"overflow-hidden transition-all duration-200 ease-in-out",
isExpanded ? "max-h-[2000px] opacity-100 mt-3" : "max-h-0 opacity-0"
)}
>
<div className="space-y-0.5">
{sortedToolCalls.map((toolCall, index) => (
<ToolStepRow
key={toolCall.id}
toolCall={toolCall}
isLastItem={index === sortedToolCalls.length - 1}
/>
))}
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,51 @@
"use client";
import { LlmManager } from "@/lib/hooks";
import { BuildFile } from "@/app/build/contexts/UploadFilesContext";
import Text from "@/refresh-components/texts/Text";
import Logo from "@/refresh-components/Logo";
import InputBar from "@/app/build/components/InputBar";
interface BuildWelcomeProps {
onSubmit: (message: string, files: BuildFile[]) => void;
isRunning: boolean;
llmManager: LlmManager;
/** When true, shows spinner on send button with "Initializing sandbox..." tooltip */
sandboxInitializing?: boolean;
}
/**
* BuildWelcome - Welcome screen shown when no session exists
*
* Displays a centered welcome message and input bar to start a new build.
*/
export default function BuildWelcome({
onSubmit,
isRunning,
llmManager,
sandboxInitializing = false,
}: BuildWelcomeProps) {
return (
<div className="h-full flex flex-col items-center justify-center px-4">
<div className="flex flex-col items-center gap-4 mb-8">
<Logo folded size={48} />
<Text headingH2 text05>
What would you like to build?
</Text>
<Text secondaryBody text03 className="text-center max-w-md">
Describe your task and I'll execute it in an isolated environment. You
can build web apps, run scripts, process data, and more.
</Text>
</div>
<div className="w-full max-w-2xl">
<InputBar
onSubmit={onSubmit}
isRunning={isRunning}
placeholder="Create a React app that shows a dashboard..."
llmManager={llmManager}
sandboxInitializing={sandboxInitializing}
/>
</div>
</div>
);
}

View File

@@ -0,0 +1,217 @@
"use client";
import { useCallback } from "react";
import { useRouter } from "next/navigation";
import {
useSession,
useSessionId,
useHasSession,
useIsRunning,
useOutputPanelOpen,
useToggleOutputPanel,
useBuildSessionStore,
useIsPreProvisioning,
} from "@/app/build/hooks/useBuildSessionStore";
import { useBuildStreaming } from "@/app/build/hooks/useBuildStreaming";
import { BuildFile } from "@/app/build/contexts/UploadFilesContext";
import { uploadFile } from "@/app/build/services/apiServices";
import { useLlmManager } from "@/lib/hooks";
import { BUILD_SEARCH_PARAM_NAMES } from "@/app/build/services/searchParams";
import { usePopup } from "@/components/admin/connectors/Popup";
import InputBar from "@/app/build/components/InputBar";
import BuildWelcome from "@/app/build/components/BuildWelcome";
import BuildMessageList from "@/app/build/components/BuildMessageList";
import OutputPanelTab from "@/app/build/components/OutputPanelTab";
import SandboxStatusIndicator from "@/app/build/components/SandboxStatusIndicator";
interface BuildChatPanelProps {
/** Session ID from URL - used to prevent welcome flash while loading */
existingSessionId?: string | null;
}
/**
* BuildChatPanel - Center panel containing the chat interface
*
* Handles:
* - Welcome state (no session)
* - Message list (when session exists)
* - Input bar at bottom
* - Header with output panel toggle
*/
export default function BuildChatPanel({
existingSessionId,
}: BuildChatPanelProps) {
const router = useRouter();
const { popup, setPopup } = usePopup();
const outputPanelOpen = useOutputPanelOpen();
const toggleOutputPanel = useToggleOutputPanel();
const session = useSession();
const sessionId = useSessionId();
const hasSession = useHasSession();
const isRunning = useIsRunning();
// Access actions directly like chat does - these don't cause re-renders
const consumePreProvisionedSession = useBuildSessionStore(
(state) => state.consumePreProvisionedSession
);
const createNewSession = useBuildSessionStore(
(state) => state.createNewSession
);
const createSession = useBuildSessionStore((state) => state.createSession);
const appendMessageToCurrent = useBuildSessionStore(
(state) => state.appendMessageToCurrent
);
const appendMessageToSession = useBuildSessionStore(
(state) => state.appendMessageToSession
);
const refreshSessionHistory = useBuildSessionStore(
(state) => state.refreshSessionHistory
);
const nameBuildSession = useBuildSessionStore(
(state) => state.nameBuildSession
);
const { streamMessage } = useBuildStreaming();
const isPreProvisioning = useIsPreProvisioning();
const llmManager = useLlmManager();
const handleSubmit = useCallback(
async (message: string, files: BuildFile[]) => {
if (hasSession && sessionId) {
// Existing session flow
// Check if response is still streaming - show toast like main chat does
if (isRunning) {
setPopup({
message: "Please wait for the current operation to complete.",
type: "error",
});
return;
}
// Add user message to state
appendMessageToCurrent({
id: `msg-${Date.now()}`,
type: "user",
content: message,
timestamp: new Date(),
});
// Stream the response
await streamMessage(sessionId, message);
} else {
// New session flow - get pre-provisioned session or fall back to creating new one
const newSessionId = await consumePreProvisionedSession();
if (!newSessionId) {
// Fallback: createNewSession handles everything including navigation
const fallbackSessionId = await createNewSession(message);
if (fallbackSessionId) {
if (files.length > 0) {
await Promise.all(
files
.filter((f) => f.file)
.map((f) => uploadFile(fallbackSessionId, f.file!))
);
}
await streamMessage(fallbackSessionId, message);
}
} else {
// Pre-provisioned session flow:
// The backend session already exists (created during pre-provisioning).
// Here we initialize the LOCAL Zustand store entry with the right state.
const userMessage = {
id: `msg-${Date.now()}`,
type: "user" as const,
content: message,
timestamp: new Date(),
};
// Initialize local state (NOT an API call - backend session already exists)
// - status: "running" disables input immediately
// - isLoaded: false allows loadSession to fetch sandbox info while preserving messages
createSession(newSessionId, {
messages: [userMessage],
status: "running",
});
// 2. Upload files before navigation
if (files.length > 0) {
await Promise.all(
files
.filter((f) => f.file)
.map((f) => uploadFile(newSessionId, f.file!))
);
}
// 3. Navigate to URL - session controller will set currentSessionId
router.push(
`/build/v1?${BUILD_SEARCH_PARAM_NAMES.SESSION_ID}=${newSessionId}`
);
// 4. Name the session and refresh history
setTimeout(() => nameBuildSession(newSessionId), 200);
await refreshSessionHistory();
// 5. Stream the response (uses session ID directly, not currentSessionId)
await streamMessage(newSessionId, message);
}
}
},
[
hasSession,
sessionId,
isRunning,
setPopup,
appendMessageToCurrent,
streamMessage,
consumePreProvisionedSession,
createNewSession,
createSession,
appendMessageToSession,
refreshSessionHistory,
nameBuildSession,
router,
]
);
return (
<div className="flex flex-col h-full w-full">
{popup}
{/* Chat header */}
<div className="flex flex-row items-center justify-between pl-4 py-3">
<SandboxStatusIndicator />
{/* Output panel tab in header */}
<OutputPanelTab isOpen={outputPanelOpen} onClick={toggleOutputPanel} />
</div>
{/* Main content area */}
<div className="flex-1 overflow-auto">
{!hasSession && !existingSessionId ? (
<BuildWelcome
onSubmit={handleSubmit}
isRunning={isRunning}
llmManager={llmManager}
sandboxInitializing={isPreProvisioning}
/>
) : (
<BuildMessageList
messages={session?.messages ?? []}
isStreaming={isRunning}
/>
)}
</div>
{/* Input bar at bottom when session exists */}
{(hasSession || existingSessionId) && (
<div className="px-4 pb-4 pt-2">
<div className="max-w-2xl mx-auto">
<InputBar
onSubmit={handleSubmit}
isRunning={isRunning}
placeholder="Continue the conversation..."
llmManager={llmManager}
sessionId={sessionId ?? undefined}
/>
</div>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,318 @@
"use client";
import { useState, useCallback, useEffect } from "react";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import {
SvgFolder,
SvgFolderOpen,
SvgFileSmall,
SvgChevronRight,
SvgChevronDown,
SvgDownloadCloud,
SvgEye,
SvgHardDrive,
SvgLoader,
} from "@opal/icons";
import {
listDirectory,
getArtifactUrl,
FileSystemEntry,
} from "@/lib/build/client";
import FilePreviewModal from "@/app/build/components/FilePreviewModal";
interface FileBrowserProps {
sessionId: string;
}
interface DirectoryNodeProps {
entry: FileSystemEntry;
sessionId: string;
depth: number;
onPreview: (entry: FileSystemEntry) => void;
}
function DirectoryNode({
entry,
sessionId,
depth,
onPreview,
}: DirectoryNodeProps) {
const [isOpen, setIsOpen] = useState(false);
const [children, setChildren] = useState<FileSystemEntry[] | null>(null);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const loadChildren = useCallback(async () => {
if (children !== null) return;
setIsLoading(true);
setError(null);
try {
const listing = await listDirectory(sessionId, entry.path);
setChildren(listing.entries);
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to load directory");
} finally {
setIsLoading(false);
}
}, [sessionId, entry.path, children]);
const handleToggle = async (open: boolean) => {
setIsOpen(open);
if (open) {
await loadChildren();
}
};
const paddingLeft = depth * 1.25;
return (
<Collapsible open={isOpen} onOpenChange={handleToggle}>
<CollapsibleTrigger asChild>
<button
className="w-full flex flex-row items-center gap-2 p-2 hover:bg-background-neutral-01 rounded-08 transition-colors"
style={{ paddingLeft: `${paddingLeft}rem` }}
>
{isLoading ? (
<SvgLoader className="size-4 stroke-text-03 animate-spin" />
) : isOpen ? (
<SvgChevronDown className="size-4 stroke-text-03" />
) : (
<SvgChevronRight className="size-4 stroke-text-03" />
)}
{isOpen ? (
<SvgFolderOpen className="size-4 stroke-text-03" />
) : (
<SvgFolder className="size-4 stroke-text-03" />
)}
<Text mainContentMono text04 className="truncate">
{entry.name}
</Text>
</button>
</CollapsibleTrigger>
<CollapsibleContent>
{error && (
<div style={{ paddingLeft: `${paddingLeft + 1.25}rem` }}>
<Text secondaryBody className="text-status-error-01">
{error}
</Text>
</div>
)}
{children?.map((child) =>
child.is_directory ? (
<DirectoryNode
key={child.path}
entry={child}
sessionId={sessionId}
depth={depth + 1}
onPreview={onPreview}
/>
) : (
<FileNode
key={child.path}
entry={child}
sessionId={sessionId}
depth={depth + 1}
onPreview={onPreview}
/>
)
)}
</CollapsibleContent>
</Collapsible>
);
}
interface FileNodeProps {
entry: FileSystemEntry;
sessionId: string;
depth: number;
onPreview: (entry: FileSystemEntry) => void;
}
function FileNode({ entry, sessionId, depth, onPreview }: FileNodeProps) {
const paddingLeft = depth * 1.25;
const downloadUrl = getArtifactUrl(sessionId, entry.path);
const canPreview =
entry.mime_type?.startsWith("text/") ||
entry.mime_type?.startsWith("image/") ||
entry.mime_type === "application/json" ||
entry.name.endsWith(".md") ||
entry.name.endsWith(".txt") ||
entry.name.endsWith(".json") ||
entry.name.endsWith(".js") ||
entry.name.endsWith(".ts") ||
entry.name.endsWith(".tsx") ||
entry.name.endsWith(".jsx") ||
entry.name.endsWith(".css") ||
entry.name.endsWith(".html") ||
entry.name.endsWith(".py") ||
entry.name.endsWith(".yaml") ||
entry.name.endsWith(".yml");
const formatSize = (bytes: number | null) => {
if (bytes === null) return "";
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
};
return (
<div
className="w-full flex flex-row items-center gap-2 p-2 hover:bg-background-neutral-01 rounded-08 transition-colors group"
style={{ paddingLeft: `${paddingLeft + 1.25}rem` }}
>
<SvgFileSmall className="size-4 stroke-text-03 shrink-0" />
<Text mainContentMono text04 className="truncate flex-1">
{entry.name}
</Text>
{entry.size !== null && (
<Text secondaryBody text03 className="shrink-0">
{formatSize(entry.size)}
</Text>
)}
<div className="flex flex-row gap-1 opacity-0 group-hover:opacity-100 transition-opacity">
{canPreview && (
<Button
action
tertiary
leftIcon={SvgEye}
onClick={(e) => {
e.stopPropagation();
onPreview(entry);
}}
>
Preview
</Button>
)}
<a
href={downloadUrl}
download={entry.name}
onClick={(e) => e.stopPropagation()}
>
<Button action tertiary leftIcon={SvgDownloadCloud}>
Download
</Button>
</a>
</div>
</div>
);
}
export default function FileBrowser({ sessionId }: FileBrowserProps) {
const [rootEntries, setRootEntries] = useState<FileSystemEntry[] | null>(
null
);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [previewFile, setPreviewFile] = useState<FileSystemEntry | null>(null);
const [isOpen, setIsOpen] = useState(true);
const loadRoot = useCallback(async () => {
if (rootEntries !== null) return;
setIsLoading(true);
setError(null);
try {
const listing = await listDirectory(sessionId);
setRootEntries(listing.entries);
} catch (err) {
setError(
err instanceof Error ? err.message : "Failed to load file system"
);
} finally {
setIsLoading(false);
}
}, [sessionId, rootEntries]);
const handleToggleRoot = async (open: boolean) => {
setIsOpen(open);
if (open) {
await loadRoot();
}
};
const handlePreview = (entry: FileSystemEntry) => {
setPreviewFile(entry);
};
const handleClosePreview = () => {
setPreviewFile(null);
};
// Auto-load on mount
useEffect(() => {
loadRoot();
}, []);
return (
<>
<div className="border border-border-01 rounded-08 overflow-hidden">
<Collapsible open={isOpen} onOpenChange={handleToggleRoot}>
<CollapsibleTrigger asChild>
<button className="w-full flex flex-row items-center gap-2 p-2 bg-background-neutral-01 hover:bg-background-neutral-02 transition-colors">
{isLoading ? (
<SvgLoader className="size-4 stroke-text-03 animate-spin" />
) : isOpen ? (
<SvgChevronDown className="size-4 stroke-text-03" />
) : (
<SvgChevronRight className="size-4 stroke-text-03" />
)}
<SvgHardDrive className="size-4 stroke-text-03" />
<Text mainUiAction text03>
Workspace Files
</Text>
</button>
</CollapsibleTrigger>
<CollapsibleContent>
<div className="p-1 max-h-[50vh] overflow-auto">
{error && (
<Text secondaryBody className="text-status-error-01 p-2">
{error}
</Text>
)}
{rootEntries?.length === 0 && (
<Text secondaryBody text03 className="p-2 text-center">
No files yet
</Text>
)}
{rootEntries?.map((entry) =>
entry.is_directory ? (
<DirectoryNode
key={entry.path}
entry={entry}
sessionId={sessionId}
depth={0}
onPreview={handlePreview}
/>
) : (
<FileNode
key={entry.path}
entry={entry}
sessionId={sessionId}
depth={0}
onPreview={handlePreview}
/>
)
)}
</div>
</CollapsibleContent>
</Collapsible>
</div>
{previewFile && (
<FilePreviewModal
sessionId={sessionId}
entry={previewFile}
onClose={handleClosePreview}
/>
)}
</>
);
}

View File

@@ -0,0 +1,103 @@
"use client";
import { useState, useEffect } from "react";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { SvgFileText, SvgDownloadCloud, SvgImage } from "@opal/icons";
import { getArtifactUrl, FileSystemEntry } from "@/lib/build/client";
interface FilePreviewModalProps {
sessionId: string;
entry: FileSystemEntry;
onClose: () => void;
}
export default function FilePreviewModal({
sessionId,
entry,
onClose,
}: FilePreviewModalProps) {
const [content, setContent] = useState<string | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const downloadUrl = getArtifactUrl(sessionId, entry.path);
const isImage = entry.mime_type?.startsWith("image/");
useEffect(() => {
if (isImage) {
setIsLoading(false);
return;
}
const fetchContent = async () => {
setIsLoading(true);
setError(null);
try {
const response = await fetch(downloadUrl);
if (!response.ok) {
throw new Error(`Failed to fetch file: ${response.statusText}`);
}
const text = await response.text();
setContent(text);
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to load file");
} finally {
setIsLoading(false);
}
};
fetchContent();
}, [downloadUrl, isImage]);
return (
<Modal open onOpenChange={(open) => !open && onClose()}>
<Modal.Content>
<Modal.Header
icon={isImage ? SvgImage : SvgFileText}
title={entry.name}
description={entry.path}
onClose={onClose}
/>
<Modal.Body>
{isLoading ? (
<div className="flex items-center justify-center p-8">
<SimpleLoader />
</div>
) : error ? (
<Text secondaryBody className="text-status-error-01">
{error}
</Text>
) : isImage ? (
<div className="flex items-center justify-center p-4">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={downloadUrl}
alt={entry.name}
className="max-w-full max-h-[60vh] object-contain rounded-08"
/>
</div>
) : (
<div className="w-full overflow-auto max-h-[60vh] rounded-08 bg-background-neutral-02 border border-border-01">
<pre className="p-4 text-sm font-mono whitespace-pre-wrap break-words text-text-04">
{content}
</pre>
</div>
)}
</Modal.Body>
<Modal.Footer>
<a href={downloadUrl} download={entry.name}>
<Button action secondary leftIcon={SvgDownloadCloud}>
Download
</Button>
</a>
<Button action primary onClick={onClose}>
Close
</Button>
</Modal.Footer>
</Modal.Content>
</Modal>
);
}

View File

@@ -0,0 +1,322 @@
"use client";
import React, { useCallback, useEffect, useRef, useState } from "react";
import { cn, isImageFile } from "@/lib/utils";
import { LlmManager } from "@/lib/hooks";
import {
useUploadFilesContext,
BuildFile,
UploadFileStatus,
} from "@/app/build/contexts/UploadFilesContext";
import IconButton from "@/refresh-components/buttons/IconButton";
import LLMPopover from "@/refresh-components/popovers/LLMPopover";
import {
SvgArrowUp,
SvgFileText,
SvgImage,
SvgLoader,
SvgX,
SvgPaperclip,
} from "@opal/icons";
const MAX_INPUT_HEIGHT = 200;
export interface InputBarHandle {
reset: () => void;
focus: () => void;
}
export interface InputBarProps {
onSubmit: (message: string, files: BuildFile[]) => void;
isRunning: boolean;
disabled?: boolean;
placeholder?: string;
llmManager: LlmManager;
/** Session ID for immediate file uploads. If provided, files upload immediately when attached. */
sessionId?: string;
/** When true, shows spinner on send button with "Initializing sandbox..." tooltip */
sandboxInitializing?: boolean;
}
/**
* Simple file card for displaying attached files
*/
function BuildFileCard({
file,
onRemove,
}: {
file: BuildFile;
onRemove: (id: string) => void;
}) {
const isImage = isImageFile(file.name);
const isUploading = file.status === UploadFileStatus.UPLOADING;
return (
<div
className={cn(
"flex items-center gap-1.5 px-2 py-1 rounded-08",
"bg-background-neutral-01 border border-border-01",
"text-sm text-text-04"
)}
>
{isUploading ? (
<SvgLoader className="h-4 w-4 animate-spin text-text-03" />
) : isImage ? (
<SvgImage className="h-4 w-4 text-text-03" />
) : (
<SvgFileText className="h-4 w-4 text-text-03" />
)}
<span className="max-w-[120px] truncate">{file.name}</span>
<button
onClick={() => onRemove(file.id)}
className="ml-1 p-0.5 hover:bg-background-neutral-02 rounded"
>
<SvgX className="h-3 w-3 text-text-03" />
</button>
</div>
);
}
const InputBar = React.memo(
React.forwardRef<InputBarHandle, InputBarProps>(
(
{
onSubmit,
isRunning,
disabled = false,
placeholder = "Describe your task...",
llmManager,
sessionId,
sandboxInitializing = false,
},
ref
) => {
const [message, setMessage] = useState("");
const textAreaRef = useRef<HTMLTextAreaElement>(null);
const containerRef = useRef<HTMLDivElement>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const {
currentMessageFiles,
uploadFiles,
removeFile,
clearFiles,
hasUploadingFiles,
} = useUploadFilesContext();
// Expose reset and focus methods to parent via ref
React.useImperativeHandle(ref, () => ({
reset: () => {
setMessage("");
clearFiles();
},
focus: () => {
textAreaRef.current?.focus();
},
}));
// Auto-resize textarea based on content
useEffect(() => {
const textarea = textAreaRef.current;
if (textarea) {
textarea.style.height = "0px";
textarea.style.height = `${Math.min(
textarea.scrollHeight,
MAX_INPUT_HEIGHT
)}px`;
}
}, [message]);
// Auto-focus on mount
useEffect(() => {
textAreaRef.current?.focus();
}, []);
const handleFileSelect = useCallback(
async (e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (!files || files.length === 0) return;
// Pass sessionId so files upload immediately if session exists
uploadFiles(Array.from(files), sessionId);
e.target.value = "";
},
[uploadFiles, sessionId]
);
const handlePaste = useCallback(
(event: React.ClipboardEvent) => {
const items = event.clipboardData?.items;
if (items) {
const pastedFiles: File[] = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (item && item.kind === "file") {
const file = item.getAsFile();
if (file) pastedFiles.push(file);
}
}
if (pastedFiles.length > 0) {
event.preventDefault();
// Pass sessionId so files upload immediately if session exists
uploadFiles(pastedFiles, sessionId);
}
}
},
[uploadFiles, sessionId]
);
const handleInputChange = useCallback(
(event: React.ChangeEvent<HTMLTextAreaElement>) => {
setMessage(event.target.value);
},
[]
);
const handleSubmit = useCallback(() => {
if (
!message.trim() ||
disabled ||
isRunning ||
hasUploadingFiles ||
sandboxInitializing
)
return;
onSubmit(message.trim(), currentMessageFiles);
setMessage("");
clearFiles();
}, [
message,
disabled,
isRunning,
hasUploadingFiles,
sandboxInitializing,
onSubmit,
currentMessageFiles,
clearFiles,
]);
const handleKeyDown = useCallback(
(event: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (
event.key === "Enter" &&
!event.shiftKey &&
!(event.nativeEvent as any).isComposing
) {
event.preventDefault();
handleSubmit();
}
},
[handleSubmit]
);
const canSubmit =
message.trim().length > 0 &&
!disabled &&
!isRunning &&
!hasUploadingFiles &&
!sandboxInitializing;
return (
<div
ref={containerRef}
className={cn(
"w-full flex flex-col shadow-01 bg-background-neutral-00 rounded-16",
disabled && "opacity-50 cursor-not-allowed pointer-events-none"
)}
aria-disabled={disabled}
>
{/* Hidden file input */}
<input
ref={fileInputRef}
type="file"
className="hidden"
multiple
onChange={handleFileSelect}
accept="*/*"
/>
{/* Attached Files */}
{currentMessageFiles.length > 0 && (
<div className="p-2 rounded-t-16 flex flex-wrap gap-1">
{currentMessageFiles.map((file) => (
<BuildFileCard
key={file.id}
file={file}
onRemove={(id) => removeFile(id, sessionId)}
/>
))}
</div>
)}
{/* Input area */}
<textarea
onPaste={handlePaste}
onChange={handleInputChange}
onKeyDown={handleKeyDown}
ref={textAreaRef}
className={cn(
"w-full",
"h-[44px]",
"outline-none",
"bg-transparent",
"resize-none",
"placeholder:text-text-03",
"whitespace-pre-wrap",
"break-word",
"overscroll-contain",
"overflow-y-auto",
"px-3",
"pb-2",
"pt-3"
)}
autoFocus
style={{ scrollbarWidth: "thin" }}
role="textarea"
aria-multiline
placeholder={placeholder}
value={message}
disabled={disabled}
/>
{/* Bottom controls */}
<div className="flex justify-between items-center w-full p-1 min-h-[40px]">
{/* Bottom left controls */}
<div className="flex flex-row items-center">
{/* (+) button for file upload */}
<IconButton
icon={SvgPaperclip}
tooltip="Attach Files"
tertiary
disabled={disabled}
onClick={() => fileInputRef.current?.click()}
/>
</div>
{/* Bottom right controls */}
<div className="flex flex-row items-center gap-1">
{/* LLM popover */}
<div className={cn(llmManager.isLoadingProviders && "invisible")}>
<LLMPopover llmManager={llmManager} disabled={disabled} />
</div>
{/* Submit button */}
<IconButton
icon={sandboxInitializing ? SvgLoader : SvgArrowUp}
onClick={handleSubmit}
disabled={!canSubmit}
tooltip={
sandboxInitializing ? "Initializing sandbox..." : "Send"
}
iconClassName={sandboxInitializing ? "animate-spin" : undefined}
/>
</div>
</div>
</div>
);
}
)
);
InputBar.displayName = "InputBar";
export default InputBar;

Some files were not shown because too many files have changed in this diff Show More