Compare commits

...

9 Commits

Author SHA1 Message Date
Wenxi Onyx
65fb0957b5 revert wildcard back 2026-02-04 15:22:32 -08:00
Wenxi Onyx
e745ff1260 path to s5cmd 2026-02-04 15:06:50 -08:00
Wenxi Onyx
c5846b83a2 refactor(craft): chad s5cmd > aws cli (mem overhead + speed) 2026-02-04 15:06:50 -08:00
Justin Tahara
1a2589de2b fix(ci): Notification workflow for Slack (#8167) 2026-02-04 15:06:50 -08:00
Jessica Singh
dbe482e9a9 chore(chat compress): create readme (#8165) 2026-02-04 15:06:50 -08:00
rohoswagger
b220e3b8cb AGENTS.md fixes 2026-02-04 14:54:03 -08:00
rohoswagger
6cb9632851 remove console.log 2026-02-04 13:33:58 -08:00
rohoswagger
26fd637b74 feat(craft): file upload 2026-02-04 13:33:58 -08:00
rohoswagger
0a16675299 fix(craft): file upload 2026-02-04 13:33:58 -08:00
17 changed files with 957 additions and 266 deletions

View File

@@ -174,23 +174,10 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: "• check-version-tag"
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
@@ -1709,19 +1696,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Determine failed jobs
id: failed-jobs
shell: bash
@@ -1787,7 +1761,7 @@ jobs:
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
title: "🚨 Deployment Workflow Failed"
ref-name: ${{ github.ref_name }}

View File

@@ -0,0 +1,49 @@
# Chat History Compression
Compresses long chat histories by summarizing older messages while keeping recent ones verbatim.
## Architecture Decisions
### Branch-Aware via Tree Structure
Summaries are stored as `ChatMessage` records with two key fields:
- `parent_message_id` → last message when compression triggered (places summary in the tree)
- `last_summarized_message_id` → pointer to an older message up the chain (the cutoff). Messages after this are kept verbatim.
**Why store summary as a separate message?** If we embedded the summary in the `last_summarized_message_id` message itself, that message would contain context from messages that came after it—context that doesn't exist in other branches. By creating the summary as a new message attached to the branch tip, it only applies to the specific branch where compression occurred.
### Timestamp-Based Ordering
Messages are filtered by `time_sent` (not ID) so the logic remains intact if IDs are changed to UUIDs in the future.
### Progressive Summarization
Subsequent compressions incorporate the existing summary text + new messages, preventing information loss in very long conversations.
### Cutoff Marker Prompt Strategy
The LLM receives older messages, a cutoff marker, then recent messages. It summarizes only content before the marker while using recent context to inform what's important.
## Token Budget
Context window breakdown:
- `max_context_tokens` — LLM's total context window
- `reserved_tokens` — space for system prompt, tools, files, etc.
- Available for chat history = `max_context_tokens - reserved_tokens`
Configurable ratios:
- `COMPRESSION_TRIGGER_RATIO` (default 0.75) — compress when chat history exceeds this ratio of available space
- `RECENT_MESSAGES_RATIO` (default 0.25) — portion of chat history to keep verbatim when compressing
## Flow
1. Trigger when `history_tokens > available * 0.75`
2. Find existing summary for branch (if any)
3. Split messages: older (summarize) / recent (keep 25%)
4. Generate summary via LLM
5. Save as `ChatMessage` with `parent_message_id` + `last_summarized_message_id`
## Key Functions
| Function | Purpose |
|----------|---------|
| `get_compression_params` | Check if compression needed based on token counts |
| `find_summary_for_branch` | Find applicable summary by checking `parent_message_id` membership |
| `get_messages_to_summarize` | Split messages at token budget boundary |
| `compress_chat_history` | Orchestrate flow, save summary message |

View File

@@ -16,8 +16,6 @@ Ephemeral VM with Python 3.11 and Node v22. Virtual environment at `.venv/` incl
Install packages: `pip install <pkg>` or `npm install <pkg>` (from `outputs/web`).
{{ATTACHMENTS_SECTION}}
{{ORG_INFO_SECTION}}
## Skills

View File

@@ -269,7 +269,7 @@ class S3PersistentDocumentWriter:
s3://{bucket}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/document.json
This matches the location that KubernetesSandboxManager reads from when
provisioning sandboxes (via the init container's aws s3 sync command).
provisioning sandboxes (via the sidecar container's s5cmd sync command).
"""
def __init__(self, tenant_id: str, user_id: str):
@@ -338,7 +338,7 @@ class S3PersistentDocumentWriter:
{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/
This matches the path that KubernetesSandboxManager syncs from:
aws s3 sync "s3://{bucket}/{tenant_id}/knowledge/{user_id}/" /workspace/files/
s5cmd sync "s3://{bucket}/{tenant_id}/knowledge/{user_id}/*" /workspace/files/
"""
# Tenant and user segregation (matches K8s sandbox init container path)
parts = [self.tenant_id, "knowledge", self.user_id]

View File

@@ -417,7 +417,7 @@ class SandboxManager(ABC):
) -> bool:
"""Sync files from S3 to the sandbox's /workspace/files directory.
For Kubernetes backend: Executes `aws s3 sync` in the file-sync sidecar container.
For Kubernetes backend: Executes `s5cmd sync` in the file-sync sidecar container.
For Local backend: No-op since files are directly accessible via symlink.
This is idempotent - only downloads changed files.

View File

@@ -69,57 +69,12 @@ CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
}
DEFAULT_SCAN_DEPTH = 1
# Content for the attachments section when user has uploaded files
# NOTE: This is duplicated from agent_instructions.py to avoid circular imports
ATTACHMENTS_SECTION_CONTENT = """## Attachments (PRIORITY)
The `attachments/` directory contains files that the user has explicitly
uploaded during this session. **These files are critically important** and
should be treated as high-priority context.
### Why Attachments Matter
- The user deliberately chose to upload these files, signaling they are directly relevant to the task
- These files often contain the specific data, requirements, or examples the user wants you to work with
- They may include spreadsheets, documents, images, or code that should inform your work
### Required Actions
**At the start of every task, you MUST:**
1. **Check for attachments**: List the contents of `attachments/` to see what the user has provided
2. **Read and analyze each file**: Thoroughly examine every attachment to understand its contents and relevance
3. **Reference attachment content**: Use the information from attachments to inform your responses and outputs
### File Handling
- Uploaded files may be in various formats: CSV, JSON, PDF, images, text files, etc.
- For spreadsheets and data files, examine the structure, columns, and sample data
- For documents, extract key information and requirements
- For images, analyze and describe their content
- For code files, understand the logic and patterns
**Do NOT ignore user uploaded files.** They are there for a reason and likely
contain exactly what you need to complete the task successfully."""
def _normalize_connector_name(name: str) -> str:
"""Normalize a connector directory name for lookup."""
return name.lower().replace(" ", "_").replace("-", "_")
def build_attachments_section(attachments_path: Path) -> str:
"""Return attachments section if files exist, empty string otherwise."""
if not attachments_path.exists():
return ""
try:
if any(attachments_path.iterdir()):
return ATTACHMENTS_SECTION_CONTENT
except Exception:
pass
return ""
def _scan_directory_to_depth(
directory: Path, current_depth: int, max_depth: int, indent: str = " "
) -> list[str]:
@@ -232,27 +187,33 @@ def build_knowledge_sources_section(files_path: Path) -> str:
def main() -> None:
"""Main entry point for container startup script."""
"""Main entry point for container startup script.
Is called by the container startup script to scan /workspace/files and populate
the knowledge sources section.
"""
# Read template from environment variable
template = os.environ.get("AGENT_INSTRUCTIONS", "")
if not template:
print("Warning: No AGENT_INSTRUCTIONS template provided", file=sys.stderr)
template = "# Agent Instructions\n\nNo instructions provided."
# Scan files directory
# Scan files directory - check /workspace/files first, then /workspace/demo_data
files_path = Path("/workspace/files")
knowledge_sources_section = build_knowledge_sources_section(files_path)
demo_data_path = Path("/workspace/demo_data")
# Check attachments directory
attachments_path = Path("/workspace/attachments")
attachments_section = build_attachments_section(attachments_path)
# Use demo_data if files doesn't exist or is empty
if not files_path.exists() or not any(files_path.iterdir()):
if demo_data_path.exists():
files_path = demo_data_path
knowledge_sources_section = build_knowledge_sources_section(files_path)
# Replace placeholders
content = template
content = content.replace(
"{{KNOWLEDGE_SOURCES_SECTION}}", knowledge_sources_section
)
content = content.replace("{{ATTACHMENTS_SECTION}}", attachments_section)
# Write AGENTS.md
output_path = Path("/workspace/AGENTS.md")
@@ -268,7 +229,9 @@ def main() -> None:
if d.is_dir() and not d.name.startswith(".")
]
)
print(f"Generated AGENTS.md with {source_count} knowledge sources")
print(
f"Generated AGENTS.md with {source_count} knowledge sources from {files_path}"
)
if __name__ == "__main__":

View File

@@ -77,6 +77,9 @@ from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotResult
from onyx.server.features.build.sandbox.util.agent_instructions import (
ATTACHMENTS_SECTION_CONTENT,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
generate_agent_instructions,
)
@@ -287,6 +290,7 @@ class KubernetesSandboxManager(SandboxManager):
def _load_agent_instructions(
self,
files_path: Path | None = None,
provider: str | None = None,
model_name: str | None = None,
nextjs_port: int | None = None,
@@ -298,7 +302,9 @@ class KubernetesSandboxManager(SandboxManager):
) -> str:
"""Load and populate agent instructions from template file.
Args:
files_path: Path to the files directory (symlink to knowledge sources)
provider: LLM provider type
model_name: Model name
nextjs_port: Next.js port
@@ -312,15 +318,14 @@ class KubernetesSandboxManager(SandboxManager):
Populated agent instructions content
Note:
files_path is not passed here because in Kubernetes, the files are
synced via an init container after pod creation. The agent will
discover the file structure at runtime by exploring the files/ directory.
In Kubernetes mode, files_path refers to paths inside the pod.
Since the backend cannot access the pod filesystem, these are passed as None
to leave placeholders intact for the container script to resolve at runtime.
"""
return generate_agent_instructions(
template_path=self._agent_instructions_template_path,
skills_path=self._skills_path,
files_path=None, # Files are synced after pod creation
attachments_path=None, # Attachments won't exist until session workspace is created
files_path=files_path,
provider=provider,
model_name=model_name,
nextjs_port=nextjs_port,
@@ -352,13 +357,8 @@ class KubernetesSandboxManager(SandboxManager):
# via kubectl exec after new documents are indexed
file_sync_container = client.V1Container(
name="file-sync",
image="amazon/aws-cli:latest",
env=_get_local_aws_credential_env_vars()
+ [
# Set HOME to a writable directory so AWS CLI can create .aws config dir
# Without this, AWS CLI tries to access /.aws which fails with permission denied
client.V1EnvVar(name="HOME", value="/tmp"),
],
image="peakcom/s5cmd:v2.3.0",
env=_get_local_aws_credential_env_vars(),
command=["/bin/sh", "-c"],
args=[
f"""
@@ -369,28 +369,11 @@ trap 'echo "Received SIGTERM, exiting"; exit 0' TERM
echo "Starting initial file sync for tenant: {tenant_id} / user: {user_id}"
echo "S3 source: s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/"
# Capture both stdout and stderr, track exit code
# aws s3 sync returns exit code 1 even on success if there are warnings
sync_exit_code=0
sync_stderr=$(mktemp)
aws s3 sync "s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/" /workspace/files/ 2>"$sync_stderr" || sync_exit_code=$?
# Always show stderr if there was any output (for debugging)
if [ -s "$sync_stderr" ]; then
echo "=== S3 sync stderr output ==="
cat "$sync_stderr"
echo "=== End stderr output ==="
fi
rm -f "$sync_stderr"
# Report outcome
echo "S3 sync finished with exit code: $sync_exit_code"
# Exit codes 0 and 1 are both considered success
# (exit code 1 = success with warnings, e.g., metadata/timestamp issues)
if [ $sync_exit_code -eq 0 ] || [ $sync_exit_code -eq 1 ]; then
# s5cmd sync: high-performance parallel S3 sync (default 256 workers)
if /s5cmd sync "s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/*" /workspace/files/; then
echo "Initial sync complete, staying alive for incremental syncs"
else
sync_exit_code=$?
echo "ERROR: Initial sync failed with exit code: $sync_exit_code"
exit $sync_exit_code
fi
@@ -1108,7 +1091,15 @@ done
pod_name = self._get_pod_name(str(sandbox_id))
session_path = f"/workspace/sessions/{session_id}"
# Paths inside the pod (created during workspace setup below):
# - {session_path}/files: symlink to knowledge sources
# - {session_path}/attachments: user-uploaded files
#
# Note: files_path=None leaves {{KNOWLEDGE_SOURCES_SECTION}} placeholder intact
# for generate_agents_md.py to resolve at container runtime by scanning /workspace/files.
# Attachments section is injected dynamically when first file is uploaded.
agent_instructions = self._load_agent_instructions(
files_path=None, # Container script handles this at runtime
provider=llm_config.provider,
model_name=llm_config.model_name,
nextjs_port=nextjs_port,
@@ -1894,10 +1885,10 @@ echo '{tar_b64}' | base64 -d | tar -xzf -
) -> bool:
"""Sync files from S3 to the running pod via the file-sync sidecar.
Executes `aws s3 sync` in the file-sync sidecar container to download
Executes `s5cmd sync` in the file-sync sidecar container to download
any new or changed files from S3 to /workspace/files/.
This is safe to call multiple times - aws s3 sync is idempotent.
This is safe to call multiple times - s5cmd sync is idempotent.
Args:
sandbox_id: The sandbox UUID
@@ -1909,14 +1900,12 @@ echo '{tar_b64}' | base64 -d | tar -xzf -
"""
pod_name = self._get_pod_name(str(sandbox_id))
# Configure AWS CLI for higher concurrency (default is 10) then run sync
# max_concurrent_requests controls parallel S3 API calls for faster transfers
s3_path = f"s3://{self._s3_bucket}/{tenant_id}/knowledge/{str(user_id)}/"
# s5cmd sync: high-performance parallel S3 sync (default 256 workers)
s3_path = f"s3://{self._s3_bucket}/{tenant_id}/knowledge/{str(user_id)}/*"
sync_command = [
"/bin/sh",
"-c",
f"aws configure set default.s3.max_concurrent_requests 200 && "
f'aws s3 sync "{s3_path}" /workspace/files/',
f'/s5cmd sync "{s3_path}" /workspace/files/',
]
resp = k8s_stream(
self._stream_core_api.connect_get_namespaced_pod_exec,
@@ -1932,6 +1921,71 @@ echo '{tar_b64}' | base64 -d | tar -xzf -
logger.debug(f"File sync response: {resp}")
return True
def _ensure_agents_md_attachments_section(
self, sandbox_id: UUID, session_id: UUID
) -> None:
"""Ensure AGENTS.md has the attachments section.
Called after uploading a file. Only adds the section if it doesn't exist.
Inserts the section above ## Skills for better document flow.
This is a fire-and-forget operation - failures are logged but not raised.
"""
pod_name = self._get_pod_name(str(sandbox_id))
session_path = f"/workspace/sessions/{session_id}"
agents_md_path = f"{session_path}/AGENTS.md"
# Base64 encode the content for safe shell handling
attachments_content_b64 = base64.b64encode(
ATTACHMENTS_SECTION_CONTENT.encode()
).decode()
# Script: add section before ## Skills if not present
# Uses a temp file approach for safe insertion
script = f"""
if [ -f "{agents_md_path}" ]; then
if ! grep -q "## Attachments (PRIORITY)" "{agents_md_path}" 2>/dev/null; then
# Check if ## Skills exists
if grep -q "## Skills" "{agents_md_path}" 2>/dev/null; then
# Insert before ## Skills using awk
awk -v content="$(echo "{attachments_content_b64}" | base64 -d)" '
/^## Skills/ {{ print content; print ""; }}
{{ print }}
' "{agents_md_path}" > "{agents_md_path}.tmp" && mv "{agents_md_path}.tmp" "{agents_md_path}"
echo "ADDED_BEFORE_SKILLS"
else
# Fallback: append to end
echo "" >> "{agents_md_path}"
echo "" >> "{agents_md_path}"
echo "{attachments_content_b64}" | base64 -d >> "{agents_md_path}"
echo "ADDED_AT_END"
fi
else
echo "EXISTS"
fi
else
echo "NO_AGENTS_MD"
fi
"""
try:
resp = k8s_stream(
self._stream_core_api.connect_get_namespaced_pod_exec,
name=pod_name,
namespace=self._namespace,
container="sandbox",
command=["/bin/sh", "-c", script],
stderr=True,
stdin=False,
stdout=True,
tty=False,
)
logger.debug(
f"Ensure AGENTS.md attachments section for session {session_id}: "
f"{resp.strip()}"
)
except ApiException as e:
logger.warning(f"Failed to ensure AGENTS.md attachments section: {e}")
def upload_file(
self,
sandbox_id: UUID,
@@ -1941,7 +1995,11 @@ echo '{tar_b64}' | base64 -d | tar -xzf -
) -> str:
"""Upload a file to the session's attachments directory.
Uses tar streaming via stdin for efficient binary transfer.
Uses tar streaming via stdin with explicit byte count to avoid EOF issues.
The K8s Python client cannot close stdin without closing the entire WebSocket
connection, so we use `head -c <size>` to read exactly the expected bytes
instead of waiting for EOF.
Handles filename collisions atomically within the shell script.
Args:
@@ -1966,13 +2024,15 @@ echo '{tar_b64}' | base64 -d | tar -xzf -
tarinfo.size = len(content)
tar.addfile(tarinfo, io.BytesIO(content))
tar_data = tar_buffer.getvalue()
tar_size = len(tar_data)
# Shell script that:
# 1. Creates target directory and temp extraction directory
# 2. Extracts tar to temp directory
# 3. Moves file to target with collision handling
# 4. Cleans up temp directory
# 5. Outputs final filename
# 2. Reads exactly tar_size bytes from stdin (avoids needing EOF signal)
# 3. Extracts tar to temp directory
# 4. Moves file to target with collision handling
# 5. Cleans up temp directory
# 6. Outputs final filename
script = f"""
set -e
target_dir="{target_dir}"
@@ -1980,7 +2040,9 @@ tmpdir=$(mktemp -d)
trap 'rm -rf "$tmpdir"' EXIT
mkdir -p "$target_dir"
tar xf - -C "$tmpdir"
# Read exactly {tar_size} bytes and extract (avoids waiting for EOF)
head -c {tar_size} | tar xf - -C "$tmpdir"
# Find the extracted file (first file in tmpdir)
original=$(ls -1 "$tmpdir" | head -1)
@@ -2018,9 +2080,9 @@ echo "$base"
# Write tar data to stdin
ws_client.write_stdin(tar_data)
ws_client.close()
# Read response
# Read response - head -c will read exactly tar_size bytes and proceed,
# so we don't need to close stdin to signal EOF
stdout_data = ""
stderr_data = ""
while ws_client.is_open():
@@ -2047,9 +2109,12 @@ echo "$base"
logger.info(
f"Uploaded file to session {session_id}: attachments/{final_filename} "
f"({len(content)} bytes via tar)"
f"({len(content)} bytes)"
)
# Ensure AGENTS.md has the attachments section
self._ensure_agents_md_attachments_section(sandbox_id, session_id)
return f"attachments/{final_filename}"
except ApiException as e:

View File

@@ -962,8 +962,43 @@ class LocalSandboxManager(SandboxManager):
f"({len(content)} bytes)"
)
# Inject attachments section into AGENTS.md if not already present
self._ensure_agents_md_attachments_section(session_path)
return f"attachments/{filename}"
def _ensure_agents_md_attachments_section(self, session_path: Path) -> None:
"""Ensure AGENTS.md has the attachments section.
Called after uploading a file. Only adds the section if it doesn't exist.
Inserts the section above ## Skills for better document flow.
"""
from onyx.server.features.build.sandbox.util.agent_instructions import (
ATTACHMENTS_SECTION_CONTENT,
)
agents_md_path = session_path / "AGENTS.md"
if not agents_md_path.exists():
return
current_content = agents_md_path.read_text()
section_marker = "## Attachments (PRIORITY)"
if section_marker not in current_content:
# Insert before ## Skills if it exists, otherwise append
skills_marker = "## Skills"
if skills_marker in current_content:
updated_content = current_content.replace(
skills_marker,
ATTACHMENTS_SECTION_CONTENT + "\n\n" + skills_marker,
)
else:
updated_content = (
current_content.rstrip() + "\n\n" + ATTACHMENTS_SECTION_CONTENT
)
agents_md_path.write_text(updated_content)
logger.debug("Added attachments section to AGENTS.md")
def delete_file(
self,
sandbox_id: UUID,

View File

@@ -300,15 +300,11 @@ class DirectoryManager:
# Get the files path (symlink to knowledge sources)
files_path = sandbox_path / "files"
# Get the attachments path (user-uploaded files)
attachments_path = sandbox_path / "attachments"
# Use shared utility to generate content
content = generate_agent_instructions(
template_path=self._agent_instructions_template_path,
skills_path=self._skills_path,
files_path=files_path if files_path.exists() else None,
attachments_path=attachments_path if attachments_path.exists() else None,
provider=provider,
model_name=model_name,
nextjs_port=nextjs_port,

View File

@@ -247,10 +247,10 @@ def sync_sandbox_files(self: Task, *, user_id: str, tenant_id: str) -> bool:
"""Sync files from S3 to a user's running sandbox.
This task is triggered after documents are written to S3 during indexing.
It executes `aws s3 sync` in the file-sync sidecar container to download
It executes `s5cmd sync` in the file-sync sidecar container to download
any new or changed files.
This is safe to call multiple times - aws s3 sync is idempotent.
This is safe to call multiple times - s5cmd sync is idempotent.
Args:
user_id: The user ID whose sandbox should be synced

View File

@@ -130,7 +130,6 @@ The `org_info/` directory contains information about the organization and user c
# Content for the attachments section when user has uploaded files
# NOTE: This is duplicated in agent_instructions.py to avoid circular imports
ATTACHMENTS_SECTION_CONTENT = """## Attachments (PRIORITY)
The `attachments/` directory contains files that the user has explicitly
@@ -163,29 +162,6 @@ should be treated as high-priority context.
contain exactly what you need to complete the task successfully."""
def build_attachments_section(attachments_path: Path | None) -> str:
"""Build the attachments section for AGENTS.md.
Only includes the section when user-uploaded files are present
in the attachments directory.
Args:
attachments_path: Path to the attachments directory
Returns:
Formatted attachments section string, or empty string if no files
"""
if not attachments_path or not attachments_path.exists():
return ""
try:
if any(attachments_path.iterdir()):
return ATTACHMENTS_SECTION_CONTENT
except Exception:
pass
return ""
def build_org_info_section(include_org_info: bool) -> str:
"""Build the organization info section for AGENTS.md.
@@ -437,7 +413,6 @@ def generate_agent_instructions(
template_path: Path,
skills_path: Path,
files_path: Path | None = None,
attachments_path: Path | None = None,
provider: str | None = None,
model_name: str | None = None,
nextjs_port: int | None = None,
@@ -453,7 +428,6 @@ def generate_agent_instructions(
template_path: Path to the AGENTS.template.md file
skills_path: Path to the skills directory
files_path: Path to the files directory (symlink to knowledge sources)
attachments_path: Path to the attachments directory (user-uploaded files)
provider: LLM provider type (e.g., "openai", "anthropic")
model_name: Model name (e.g., "claude-sonnet-4-5", "gpt-4o")
nextjs_port: Port for Next.js development server
@@ -490,11 +464,6 @@ def generate_agent_instructions(
# Build org info section (only included when demo data is enabled)
org_info_section = build_org_info_section(include_org_info)
# Build attachments section (only included when files are present)
attachments_section = (
build_attachments_section(attachments_path) if attachments_path else ""
)
# Replace placeholders
content = template_content
content = content.replace("{{USER_CONTEXT}}", user_context)
@@ -506,7 +475,6 @@ def generate_agent_instructions(
content = content.replace("{{DISABLED_TOOLS_SECTION}}", disabled_tools_section)
content = content.replace("{{AVAILABLE_SKILLS_SECTION}}", available_skills_section)
content = content.replace("{{ORG_INFO_SECTION}}", org_info_section)
content = content.replace("{{ATTACHMENTS_SECTION}}", attachments_section)
# Only replace file-related placeholders if files_path is provided.
# When files_path is None (e.g., Kubernetes), leave placeholders intact

View File

@@ -19,8 +19,6 @@ interface BuildWelcomeProps {
isRunning: boolean;
/** When true, shows spinner on send button with "Initializing sandbox..." tooltip */
sandboxInitializing?: boolean;
/** Pre-provisioned session ID for file uploads before a session is active. */
preProvisionedSessionId?: string | null;
}
/**
@@ -32,7 +30,6 @@ export default function BuildWelcome({
onSubmit,
isRunning,
sandboxInitializing = false,
preProvisionedSessionId,
}: BuildWelcomeProps) {
const inputBarRef = useRef<InputBarHandle>(null);
const userPersona = getBuildUserPersona();
@@ -57,7 +54,7 @@ export default function BuildWelcome({
isRunning={isRunning}
placeholder="Analyze my data and create a dashboard..."
sandboxInitializing={sandboxInitializing}
preProvisionedSessionId={preProvisionedSessionId}
isWelcomePage
/>
<ConnectDataBanner />
<SuggestedPrompts persona={persona} onPromptClick={handlePromptClick} />

View File

@@ -25,7 +25,6 @@ import {
UploadFileStatus,
useUploadFilesContext,
} from "@/app/craft/contexts/UploadFilesContext";
import { uploadFile } from "@/app/craft/services/apiServices";
import { CRAFT_SEARCH_PARAM_NAMES } from "@/app/craft/services/searchParams";
import { CRAFT_PATH } from "@/app/craft/v1/constants";
import { usePopup } from "@/components/admin/connectors/Popup";
@@ -120,7 +119,8 @@ export default function BuildChatPanel({
// Disable input when pre-provisioning is in progress or failed (waiting for retry)
const sandboxNotReady = isPreProvisioning || isPreProvisioningFailed;
const { currentMessageFiles, hasUploadingFiles } = useUploadFilesContext();
const { currentMessageFiles, hasUploadingFiles, setActiveSession } =
useUploadFilesContext();
const followupSuggestions = useFollowupSuggestions();
const suggestionsLoading = useSuggestionsLoading();
const clearFollowupSuggestions = useBuildSessionStore(
@@ -133,6 +133,16 @@ export default function BuildChatPanel({
currentFilesRef.current = currentMessageFiles;
}, [currentMessageFiles]);
/**
* Keep the upload context in sync with the active session.
* The context handles all session change logic internally (fetching attachments,
* clearing files, auto-uploading pending files).
*/
useEffect(() => {
const activeSession = existingSessionId ?? preProvisionedSessionId ?? null;
setActiveSession(activeSession);
}, [existingSessionId, preProvisionedSessionId, setActiveSession]);
// Ref to access InputBar methods
const inputBarRef = useRef<InputBarHandle>(null);
@@ -332,23 +342,7 @@ export default function BuildChatPanel({
});
}
// Upload any files that need to be uploaded:
// - PENDING: Was attached before session existed, needs upload now
// - FAILED: Previous upload failed, retry
// - No path + not currently uploading: Edge case fallback
const currentFiles = currentFilesRef.current;
const filesToUpload = currentFiles.filter(
(f) =>
f.file &&
(f.status === UploadFileStatus.PENDING ||
f.status === UploadFileStatus.FAILED ||
(!f.path && f.status !== UploadFileStatus.UPLOADING))
);
if (filesToUpload.length > 0) {
await Promise.all(
filesToUpload.map((f) => uploadFile(newSessionId, f.file!))
);
}
// Note: PENDING files are auto-uploaded by the context when session becomes available
// Navigate to URL - session controller will set currentSessionId
router.push(
@@ -438,7 +432,6 @@ export default function BuildChatPanel({
onSubmit={handleSubmit}
isRunning={isRunning}
sandboxInitializing={sandboxNotReady}
preProvisionedSessionId={preProvisionedSessionId}
/>
) : (
<BuildMessageList
@@ -499,8 +492,6 @@ export default function BuildChatPanel({
onSubmit={handleSubmit}
isRunning={isRunning}
placeholder="Continue the conversation..."
sessionId={sessionId ?? undefined}
preProvisionedSessionId={preProvisionedSessionId}
/>
</div>
</div>

View File

@@ -26,6 +26,7 @@ import SelectButton from "@/refresh-components/buttons/SelectButton";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import {
SvgArrowUp,
SvgClock,
SvgFileText,
SvgImage,
SvgLoader,
@@ -52,14 +53,12 @@ export interface InputBarProps {
isRunning: boolean;
disabled?: boolean;
placeholder?: string;
/** Session ID for immediate file uploads. If provided, files upload immediately when attached. */
sessionId?: string;
/** Pre-provisioned session ID for file uploads before a session is active. */
preProvisionedSessionId?: string | null;
/** When true, shows spinner on send button with "Initializing sandbox..." tooltip */
sandboxInitializing?: boolean;
/** When true, removes bottom rounding to allow seamless connection with components below */
noBottomRounding?: boolean;
/** Whether this is the welcome page (no existing session in URL). Used for Demo Data pill. */
isWelcomePage?: boolean;
}
/**
@@ -74,6 +73,7 @@ function BuildFileCard({
}) {
const isImage = isImageFile(file.name);
const isUploading = file.status === UploadFileStatus.UPLOADING;
const isPending = file.status === UploadFileStatus.PENDING;
const isFailed = file.status === UploadFileStatus.FAILED;
const cardContent = (
@@ -87,6 +87,8 @@ function BuildFileCard({
>
{isUploading ? (
<SvgLoader className="h-4 w-4 animate-spin text-text-03" />
) : isPending ? (
<SvgClock className="h-4 w-4 text-text-03" />
) : isFailed ? (
<SvgAlertCircle className="h-4 w-4 text-status-error-02" />
) : isImage ? (
@@ -111,7 +113,7 @@ function BuildFileCard({
</div>
);
// Wrap in tooltip if there's an error
// Wrap in tooltip for error or pending status
if (isFailed && file.error) {
return (
<SimpleTooltip tooltip={file.error} side="top">
@@ -120,9 +122,30 @@ function BuildFileCard({
);
}
if (isPending) {
return (
<SimpleTooltip tooltip="Waiting for session to be ready..." side="top">
{cardContent}
</SimpleTooltip>
);
}
return cardContent;
}
/**
* InputBar - Text input with file attachment support
*
* File upload state is managed by UploadFilesContext. This component just:
* - Triggers file selection/paste
* - Displays attached files
* - Handles message submission
*
* The context handles:
* - Session binding (which session to upload to)
* - Auto-upload when session becomes available
* - Fetching existing attachments on session change
*/
const InputBar = memo(
forwardRef<InputBarHandle, InputBarProps>(
(
@@ -131,10 +154,9 @@ const InputBar = memo(
isRunning,
disabled = false,
placeholder = "Describe your task...",
sessionId,
preProvisionedSessionId,
sandboxInitializing = false,
noBottomRounding = false,
isWelcomePage = false,
},
ref
) => {
@@ -142,9 +164,6 @@ const InputBar = memo(
const demoDataEnabled = useDemoDataEnabled();
const [message, setMessage] = useState("");
// Use active session ID, falling back to pre-provisioned session ID
const effectiveSessionId =
sessionId ?? preProvisionedSessionId ?? undefined;
const textAreaRef = useRef<HTMLTextAreaElement>(null);
const containerRef = useRef<HTMLDivElement>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
@@ -200,11 +219,11 @@ const InputBar = memo(
async (e: ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (!files || files.length === 0) return;
// Pass effectiveSessionId so files upload immediately if session exists
uploadFiles(Array.from(files), effectiveSessionId);
// Context handles session binding internally
uploadFiles(Array.from(files));
e.target.value = "";
},
[uploadFiles, effectiveSessionId]
[uploadFiles]
);
const handlePaste = useCallback(
@@ -221,12 +240,12 @@ const InputBar = memo(
}
if (pastedFiles.length > 0) {
event.preventDefault();
// Pass effectiveSessionId so files upload immediately if session exists
uploadFiles(pastedFiles, effectiveSessionId);
// Context handles session binding internally
uploadFiles(pastedFiles);
}
}
},
[uploadFiles, effectiveSessionId]
[uploadFiles]
);
const handleInputChange = useCallback(
@@ -308,7 +327,7 @@ const InputBar = memo(
<BuildFileCard
key={file.id}
file={file}
onRemove={(id) => removeFile(id, effectiveSessionId)}
onRemove={removeFile}
/>
))}
</div>
@@ -356,8 +375,8 @@ const InputBar = memo(
disabled={disabled}
onClick={() => fileInputRef.current?.click()}
/>
{/* Demo Data indicator pill - only show on welcome page (no session) when demo data is enabled */}
{demoDataEnabled && !sessionId && (
{/* Demo Data indicator pill - only show on welcome page when demo data is enabled */}
{demoDataEnabled && isWelcomePage && (
<SimpleTooltip
tooltip="Switch to your data in the Configure panel!"
side="top"

View File

@@ -6,13 +6,14 @@ import {
useState,
useCallback,
useMemo,
useRef,
useEffect,
type ReactNode,
type Dispatch,
type SetStateAction,
} from "react";
import {
uploadFile as uploadFileApi,
deleteFile as deleteFileApi,
fetchDirectoryListing,
} from "@/app/craft/services/apiServices";
/**
@@ -58,6 +59,148 @@ const generateTempId = () => {
}
};
// =============================================================================
// File Validation (matches backend: build/configs.py and build/utils.py)
// =============================================================================
/** Maximum individual file size - matches BUILD_MAX_UPLOAD_FILE_SIZE_MB (50MB) */
const MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024;
/** Maximum total attachment size per session - matches BUILD_MAX_TOTAL_UPLOAD_SIZE_MB (200MB) */
const MAX_TOTAL_SIZE_BYTES = 200 * 1024 * 1024;
/** Maximum files per session - matches BUILD_MAX_UPLOAD_FILES_PER_SESSION */
const MAX_FILES_PER_SESSION = 20;
/** Blocked file extensions (executables/dangerous) - matches backend BLOCKED_EXTENSIONS */
const BLOCKED_EXTENSIONS = new Set([
// Windows executables
".exe",
".dll",
".msi",
".scr",
".com",
".bat",
".cmd",
".ps1",
// macOS
".app",
".dmg",
".pkg",
// Linux
".deb",
".rpm",
".so",
// Cross-platform
".jar",
".war",
".ear",
// Other potentially dangerous
".vbs",
".vbe",
".wsf",
".wsh",
".hta",
".cpl",
".reg",
".lnk",
".pif",
]);
/** Format bytes to human-readable string */
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
/** Get file extension (lowercase, including dot) */
function getFileExtension(filename: string): string {
const lastDot = filename.lastIndexOf(".");
if (lastDot === -1) return "";
return filename.slice(lastDot).toLowerCase();
}
/** Validation result for a single file */
interface FileValidationResult {
valid: boolean;
error?: string;
}
/** Validate a single file before upload */
function validateFile(file: File): FileValidationResult {
// Check file size
if (file.size > MAX_FILE_SIZE_BYTES) {
return {
valid: false,
error: `File too large (${formatBytes(
file.size
)}). Maximum is ${formatBytes(MAX_FILE_SIZE_BYTES)}.`,
};
}
// Check blocked extensions
const ext = getFileExtension(file.name);
if (ext && BLOCKED_EXTENSIONS.has(ext)) {
return {
valid: false,
error: `File type '${ext}' is not allowed for security reasons.`,
};
}
// Check for missing extension
if (!ext) {
return {
valid: false,
error: "File must have an extension.",
};
}
return { valid: true };
}
/** Validate total files and size constraints */
function validateBatch(
newFiles: File[],
existingFiles: BuildFile[]
): FileValidationResult {
const totalCount = existingFiles.length + newFiles.length;
if (totalCount > MAX_FILES_PER_SESSION) {
return {
valid: false,
error: `Too many files. Maximum is ${MAX_FILES_PER_SESSION} files per session.`,
};
}
const existingSize = existingFiles.reduce((sum, f) => sum + f.size, 0);
const newSize = newFiles.reduce((sum, f) => sum + f.size, 0);
const totalSize = existingSize + newSize;
if (totalSize > MAX_TOTAL_SIZE_BYTES) {
return {
valid: false,
error: `Total size exceeds limit. Maximum is ${formatBytes(
MAX_TOTAL_SIZE_BYTES
)} per session.`,
};
}
return { valid: true };
}
/** Create a failed BuildFile for validation errors */
function createFailedFile(file: File, error: string): BuildFile {
return {
id: generateTempId(),
name: file.name,
status: UploadFileStatus.FAILED,
file_type: file.type,
size: file.size,
created_at: new Date().toISOString(),
error,
};
}
// Create optimistic file from File object
const createOptimisticFile = (file: File): BuildFile => {
const tempId = generateTempId();
@@ -72,22 +215,99 @@ const createOptimisticFile = (file: File): BuildFile => {
};
};
/**
* Error types for better error handling
*/
export enum UploadErrorType {
NETWORK = "NETWORK",
AUTH = "AUTH",
NOT_FOUND = "NOT_FOUND",
SERVER = "SERVER",
UNKNOWN = "UNKNOWN",
}
function classifyError(error: unknown): {
type: UploadErrorType;
message: string;
} {
if (error instanceof Error) {
const message = error.message.toLowerCase();
if (message.includes("401") || message.includes("unauthorized")) {
return { type: UploadErrorType.AUTH, message: "Session expired" };
}
if (message.includes("404") || message.includes("not found")) {
return { type: UploadErrorType.NOT_FOUND, message: "Resource not found" };
}
if (message.includes("500") || message.includes("server")) {
return { type: UploadErrorType.SERVER, message: "Server error" };
}
if (message.includes("network") || message.includes("fetch")) {
return { type: UploadErrorType.NETWORK, message: "Network error" };
}
return { type: UploadErrorType.UNKNOWN, message: error.message };
}
return { type: UploadErrorType.UNKNOWN, message: "Upload failed" };
}
/**
* UploadFilesContext - Centralized file upload state management
*
* This context manages:
* - File attachment state (current files attached to input)
* - Active session binding (which session files are associated with)
* - Automatic upload of pending files when session becomes available
* - Automatic fetch of existing attachments when session changes
* - File upload, removal, and clearing operations
*
* Components should:
* - Call `setActiveSession(sessionId)` when session changes
* - Call `uploadFiles(files)` to attach files (uses active session internally)
* - Call `removeFile(fileId)` to remove files (uses active session internally)
* - Read `currentMessageFiles` to display attached files
*/
interface UploadFilesContextValue {
// Current message files (attached to the input bar)
currentMessageFiles: BuildFile[];
setCurrentMessageFiles: Dispatch<SetStateAction<BuildFile[]>>;
// Upload files - returns optimistic files immediately
uploadFiles: (files: File[], sessionId?: string) => Promise<BuildFile[]>;
// Active session ID (set by parent components)
activeSessionId: string | null;
// Remove a file from current message (and delete from sandbox if uploaded)
removeFile: (fileId: string, sessionId?: string) => void;
/**
* Set the active session ID. This triggers:
* - Fetching existing attachments from the new session (if different)
* - Clearing files if navigating to no session
* - Auto-uploading any pending files
*
* Call this when:
* - Session ID changes in URL
* - Pre-provisioned session becomes available
*/
setActiveSession: (sessionId: string | null) => void;
// Clear all current message files
/**
* Upload files to the active session.
* - If session is available: uploads immediately
* - If no session: marks as PENDING (auto-uploads when session available)
*/
uploadFiles: (files: File[]) => Promise<BuildFile[]>;
/**
* Remove a file from the input bar.
* If the file was uploaded, also deletes from the sandbox.
*/
removeFile: (fileId: string) => void;
/**
* Clear all attached files from the input bar.
* Does NOT delete from sandbox (use for form reset).
*/
clearFiles: () => void;
// Check if any files are uploading
hasUploadingFiles: boolean;
// Check if any files are pending upload
hasPendingFiles: boolean;
}
const UploadFilesContext = createContext<UploadFilesContextValue | null>(null);
@@ -97,9 +317,28 @@ export interface UploadFilesProviderProps {
}
export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
// =========================================================================
// State
// =========================================================================
const [currentMessageFiles, setCurrentMessageFiles] = useState<BuildFile[]>(
[]
);
const [activeSessionId, setActiveSessionId] = useState<string | null>(null);
// =========================================================================
// Refs for race condition protection
// =========================================================================
const isUploadingPendingRef = useRef(false);
const fetchingSessionRef = useRef<string | null>(null);
const prevSessionRef = useRef<string | null>(null);
// Track active deletions to prevent refetch race condition
const activeDeletionsRef = useRef<Set<string>>(new Set());
// =========================================================================
// Derived state
// =========================================================================
const hasUploadingFiles = useMemo(() => {
return currentMessageFiles.some(
@@ -107,16 +346,321 @@ export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
);
}, [currentMessageFiles]);
const hasPendingFiles = useMemo(() => {
return currentMessageFiles.some(
(file) => file.status === UploadFileStatus.PENDING
);
}, [currentMessageFiles]);
// =========================================================================
// Internal operations (not exposed to consumers)
// =========================================================================
/**
* Upload pending files to the given session.
* Internal function - called automatically by effects.
* Reads current files from state internally to avoid stale closures.
*/
const uploadPendingFilesInternal = useCallback(
async (sessionId: string): Promise<void> => {
if (isUploadingPendingRef.current) return;
// Read current files and find pending ones atomically
let pendingFiles: BuildFile[] = [];
setCurrentMessageFiles((prev) => {
pendingFiles = prev.filter(
(f) => f.status === UploadFileStatus.PENDING && f.file
);
// Mark as uploading in the same state update to avoid race conditions
if (pendingFiles.length > 0) {
return prev.map((f) =>
pendingFiles.some((pf) => pf.id === f.id)
? { ...f, status: UploadFileStatus.UPLOADING }
: f
);
}
return prev;
});
if (pendingFiles.length === 0) return;
isUploadingPendingRef.current = true;
try {
// Upload in parallel
const results = await Promise.all(
pendingFiles.map(async (file) => {
try {
const result = await uploadFileApi(sessionId, file.file!);
return { id: file.id, success: true as const, result };
} catch (error) {
const { message } = classifyError(error);
return {
id: file.id,
success: false as const,
errorMessage: message,
};
}
})
);
// Update statuses
setCurrentMessageFiles((prev) =>
prev.map((f) => {
const result = results.find((r) => r.id === f.id);
if (!result) return f;
return result.success
? {
...f,
status: UploadFileStatus.COMPLETED,
path: result.result.path,
name: result.result.filename,
file: undefined, // Clear blob to free memory
}
: {
...f,
status: UploadFileStatus.FAILED,
error: result.errorMessage,
};
})
);
} finally {
isUploadingPendingRef.current = false;
}
},
[]
);
/**
* Fetch existing attachments from the backend.
* Internal function - called automatically by effects.
*/
const fetchExistingAttachmentsInternal = useCallback(
async (sessionId: string, replace: boolean): Promise<void> => {
// Request deduplication
if (fetchingSessionRef.current === sessionId) return;
fetchingSessionRef.current = sessionId;
try {
const listing = await fetchDirectoryListing(sessionId, "attachments");
// Use deterministic IDs based on session and path for stable React keys
const attachments: BuildFile[] = listing.entries
.filter((entry) => !entry.is_directory)
.map((entry) => ({
id: `existing_${sessionId}_${entry.path}`,
name: entry.name,
status: UploadFileStatus.COMPLETED,
file_type: entry.mime_type || "application/octet-stream",
size: entry.size || 0,
created_at: new Date().toISOString(),
path: entry.path,
}));
if (replace) {
// When replacing, preserve any files that are still being processed locally
// (uploading, pending, or recently completed uploads that might not be in
// backend listing yet due to race conditions)
setCurrentMessageFiles((prev) => {
// Keep files that are still in-flight or don't have a path yet
const localOnlyFiles = prev.filter(
(f) =>
f.status === UploadFileStatus.UPLOADING ||
f.status === UploadFileStatus.PENDING ||
f.status === UploadFileStatus.PROCESSING ||
// Keep recently uploaded files (have temp ID, not fetched from backend)
f.id.startsWith("temp_")
);
// Merge: backend attachments + local-only files (avoiding duplicates by path)
const backendPaths = new Set(attachments.map((f) => f.path));
const nonDuplicateLocalFiles = localOnlyFiles.filter(
(f) => !f.path || !backendPaths.has(f.path)
);
return [...attachments, ...nonDuplicateLocalFiles];
});
} else if (attachments.length > 0) {
setCurrentMessageFiles((prev) => {
const existingPaths = new Set(prev.map((f) => f.path));
const newFiles = attachments.filter(
(f) => !existingPaths.has(f.path)
);
return [...prev, ...newFiles];
});
}
} catch (error) {
const { type } = classifyError(error);
if (type !== UploadErrorType.NOT_FOUND) {
console.error(
"[UploadFilesContext] fetchExistingAttachments error:",
error
);
}
if (replace) {
// On error, only clear files that aren't being processed locally
setCurrentMessageFiles((prev) =>
prev.filter(
(f) =>
f.status === UploadFileStatus.UPLOADING ||
f.status === UploadFileStatus.PENDING ||
f.status === UploadFileStatus.PROCESSING ||
f.id.startsWith("temp_")
)
);
}
} finally {
fetchingSessionRef.current = null;
}
},
[]
);
// =========================================================================
// Effects - Automatic state machine transitions
// =========================================================================
/**
* Effect: Handle session changes
*
* When activeSessionId changes:
* - If changed to a DIFFERENT non-null session: fetch attachments (replace mode)
* - If changed to null: do nothing (don't clear - session might be temporarily null during revalidation)
*
* This prevents unnecessary fetches/clears when the focus handler temporarily
* resets the pre-provisioned session state.
*/
useEffect(() => {
const prevSession = prevSessionRef.current;
const currentSession = activeSessionId;
// Only update ref when we have a non-null session (ignore temporary nulls)
if (currentSession) {
// Session changed to a different non-null value
if (currentSession !== prevSession) {
prevSessionRef.current = currentSession;
fetchExistingAttachmentsInternal(currentSession, true);
}
}
// When session becomes null, don't clear files or update ref.
// This handles the case where pre-provisioning temporarily resets on focus.
// Files will be cleared when user actually navigates away or logs out.
}, [activeSessionId, fetchExistingAttachmentsInternal]);
/**
* Effect: Auto-upload pending files when session becomes available
*
* This handles the case where user attaches files before session is ready.
*/
useEffect(() => {
if (activeSessionId && hasPendingFiles) {
uploadPendingFilesInternal(activeSessionId);
}
}, [activeSessionId, hasPendingFiles, uploadPendingFilesInternal]);
/**
* Effect: Refetch attachments after files are cleared
*
* When files are cleared (e.g., after sending a message) but we're still
* on the same session, refetch to restore any backend attachments.
*
* IMPORTANT: Skip refetch if files went to 0 due to active deletions.
* This prevents a race condition where refetch returns the file before
* backend deletion completes, causing the file pill to persist.
*/
const prevFilesLengthRef = useRef(currentMessageFiles.length);
useEffect(() => {
const prevLength = prevFilesLengthRef.current;
const currentLength = currentMessageFiles.length;
prevFilesLengthRef.current = currentLength;
// Files were just cleared (went from >0 to 0)
const filesWereCleared = prevLength > 0 && currentLength === 0;
// Skip refetch if there are active deletions in progress
// This prevents the deleted file from being re-added before backend deletion completes
const hasActiveDeletions = activeDeletionsRef.current.size > 0;
// Refetch if on same session and files were cleared (not deleted)
if (
filesWereCleared &&
activeSessionId &&
prevSessionRef.current === activeSessionId &&
!hasActiveDeletions
) {
fetchExistingAttachmentsInternal(activeSessionId, false);
}
}, [
currentMessageFiles.length,
activeSessionId,
fetchExistingAttachmentsInternal,
]);
// =========================================================================
// Public API
// =========================================================================
/**
* Set the active session. Triggers fetching/clearing as needed.
*/
const setActiveSession = useCallback((sessionId: string | null) => {
setActiveSessionId(sessionId);
}, []);
/**
* Upload files. Uses activeSessionId internally.
* Validates files before upload (size, extension, batch limits).
*/
const uploadFiles = useCallback(
async (files: File[], sessionId?: string): Promise<BuildFile[]> => {
// Create optimistic files
const optimisticFiles = files.map(createOptimisticFile);
async (files: File[]): Promise<BuildFile[]> => {
// Get current files for batch validation
const existingFiles = currentMessageFiles;
// Validate batch constraints first
const batchValidation = validateBatch(files, existingFiles);
if (!batchValidation.valid) {
// Create failed files for all with the batch error
const failedFiles = files.map((f) =>
createFailedFile(f, batchValidation.error!)
);
setCurrentMessageFiles((prev) => [...prev, ...failedFiles]);
return failedFiles;
}
// Validate each file individually and separate valid from invalid
const validFiles: File[] = [];
const failedFiles: BuildFile[] = [];
for (const file of files) {
const validation = validateFile(file);
if (validation.valid) {
validFiles.push(file);
} else {
failedFiles.push(createFailedFile(file, validation.error!));
}
}
// Add failed files immediately
if (failedFiles.length > 0) {
setCurrentMessageFiles((prev) => [...prev, ...failedFiles]);
}
// If no valid files, return early
if (validFiles.length === 0) {
return failedFiles;
}
// Create optimistic files for valid files
const optimisticFiles = validFiles.map(createOptimisticFile);
// Add to current message files immediately
setCurrentMessageFiles((prev) => [...prev, ...optimisticFiles]);
const sessionId = activeSessionId;
if (sessionId) {
// Upload all files in parallel for better performance
// Session available - upload immediately
const uploadPromises = optimisticFiles.map(async (optimisticFile) => {
try {
const result = await uploadFileApi(sessionId, optimisticFile.file!);
@@ -126,22 +670,18 @@ export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
result,
};
} catch (error) {
console.error("File upload failed:", error);
let errorMessage = "Upload failed";
if (error instanceof Error) {
errorMessage = error.message;
}
const { message } = classifyError(error);
return {
id: optimisticFile.id,
success: false as const,
errorMessage,
errorMessage: message,
};
}
});
const results = await Promise.all(uploadPromises);
// Batch update all file statuses at once
// Batch update all file statuses
setCurrentMessageFiles((prev) =>
prev.map((f) => {
const uploadResult = results.find((r) => r.id === f.id);
@@ -153,6 +693,7 @@ export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
status: UploadFileStatus.COMPLETED,
path: uploadResult.result.path,
name: uploadResult.result.filename,
file: undefined, // Clear blob to free memory
};
} else {
return {
@@ -164,8 +705,7 @@ export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
})
);
} else {
// No session yet - mark as PENDING (will upload when session is created)
// The ChatPanel fallback will handle uploading these when the session is ready
// No session yet - mark as PENDING (effect will auto-upload when session available)
setCurrentMessageFiles((prev) =>
prev.map((f) =>
optimisticFiles.some((of) => of.id === f.id)
@@ -175,47 +715,108 @@ export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
);
}
return optimisticFiles;
return [...failedFiles, ...optimisticFiles];
},
[]
[activeSessionId, currentMessageFiles]
);
/**
* Remove a file. Uses activeSessionId internally for sandbox deletion.
*/
const removeFile = useCallback(
(fileId: string, sessionId?: string) => {
// Find the file to check if it has been uploaded
const file = currentMessageFiles.find((f) => f.id === fileId);
(fileId: string) => {
// Track this deletion to prevent refetch race condition
activeDeletionsRef.current.add(fileId);
// If file has a path and sessionId is provided, delete from sandbox
if (file?.path && sessionId) {
deleteFileApi(sessionId, file.path).catch((error) => {
console.error("Failed to delete file from sandbox:", error);
});
}
// Use functional update to get current state and avoid stale closures
let removedFile: BuildFile | null = null;
let removedIndex = -1;
setCurrentMessageFiles((prev) => prev.filter((f) => f.id !== fileId));
setCurrentMessageFiles((prev) => {
const index = prev.findIndex((f) => f.id === fileId);
if (index === -1) return prev;
// Capture file info for potential rollback and backend deletion
const file = prev[index];
if (!file) return prev;
removedFile = file;
removedIndex = index;
// Return filtered array (optimistic removal)
return prev.filter((f) => f.id !== fileId);
});
// After state update, trigger backend deletion if needed
// Use setTimeout to ensure state update has completed
setTimeout(() => {
if (removedFile?.path && activeSessionId) {
const filePath = removedFile.path;
const fileToRestore = removedFile;
const indexToRestore = removedIndex;
deleteFileApi(activeSessionId, filePath)
.then(() => {
// Deletion succeeded - remove from active deletions
activeDeletionsRef.current.delete(fileId);
})
.catch((error) => {
console.error(
"[UploadFilesContext] Failed to delete file from sandbox:",
error
);
// Remove from active deletions
activeDeletionsRef.current.delete(fileId);
// Rollback: restore the file at its original position
setCurrentMessageFiles((prev) => {
// Check if file was already re-added (e.g., by another operation)
if (prev.some((f) => f.id === fileToRestore.id)) return prev;
const newFiles = [...prev];
const insertIndex = Math.min(indexToRestore, newFiles.length);
newFiles.splice(insertIndex, 0, fileToRestore);
return newFiles;
});
});
} else {
// No backend deletion needed - remove from active deletions immediately
activeDeletionsRef.current.delete(fileId);
}
}, 0);
},
[currentMessageFiles]
[activeSessionId]
);
/**
* Clear all files from the input bar.
*/
const clearFiles = useCallback(() => {
setCurrentMessageFiles([]);
}, []);
// =========================================================================
// Context value
// =========================================================================
const value = useMemo<UploadFilesContextValue>(
() => ({
currentMessageFiles,
setCurrentMessageFiles,
activeSessionId,
setActiveSession,
uploadFiles,
removeFile,
clearFiles,
hasUploadingFiles,
hasPendingFiles,
}),
[
currentMessageFiles,
activeSessionId,
setActiveSession,
uploadFiles,
removeFile,
clearFiles,
hasUploadingFiles,
hasPendingFiles,
]
);

View File

@@ -8,6 +8,7 @@ import { CRAFT_SEARCH_PARAM_NAMES } from "@/app/craft/services/searchParams";
import { CRAFT_PATH } from "@/app/craft/v1/constants";
import { getBuildUserPersona } from "@/app/craft/onboarding/constants";
import { useLLMProviders } from "@/lib/hooks/useLLMProviders";
import { checkPreProvisionedSession } from "@/app/craft/services/apiServices";
interface UseBuildSessionControllerProps {
/** Session ID from search params, or null for new session */
@@ -234,8 +235,8 @@ export function useBuildSessionController({
]);
// Effect: Re-validate pre-provisioned session on tab focus (multi-tab support)
// The backend's createSession does "get or create empty session" - it returns
// the same session if still valid, or a new one if consumed by another tab.
// Uses checkPreProvisionedSession API to validate without resetting state,
// which prevents unnecessary cascading effects when session is still valid.
useEffect(() => {
const handleFocus = async () => {
const { preProvisioning } = useBuildSessionStore.getState();
@@ -244,16 +245,41 @@ export function useBuildSessionController({
if (preProvisioning.status === "ready") {
const cachedSessionId = preProvisioning.sessionId;
// Reset to idle and re-provision - backend will return same session if
// still valid, or create new one if it was consumed by another tab
useBuildSessionStore.setState({ preProvisioning: { status: "idle" } });
const newSessionId = await useBuildSessionStore
.getState()
.ensurePreProvisionedSession();
try {
// Check if session is still valid WITHOUT resetting state
const { valid } = await checkPreProvisionedSession(cachedSessionId);
if (newSessionId && newSessionId !== cachedSessionId) {
console.info(
`[PreProvision] Session changed on focus: ${cachedSessionId} -> ${newSessionId}`
if (!valid) {
// Session was consumed by another tab - now reset and re-provision
console.info(
`[PreProvision] Session ${cachedSessionId.slice(
0,
8
)} invalidated on focus, re-provisioning...`
);
useBuildSessionStore.setState({
preProvisioning: { status: "idle" },
});
const newSessionId = await useBuildSessionStore
.getState()
.ensurePreProvisionedSession();
if (newSessionId) {
console.info(
`[PreProvision] Session changed on focus: ${cachedSessionId.slice(
0,
8
)} -> ${newSessionId.slice(0, 8)}`
);
}
}
// If valid, do nothing - keep the current session
} catch (error) {
// On error, log but don't reset - better to keep potentially stale session
// than to cause UI flicker on network blip
console.warn(
"[PreProvision] Failed to validate session on focus:",
error
);
}
}

View File

@@ -553,9 +553,18 @@ export async function deleteFile(
sessionId: string,
path: string
): Promise<void> {
const res = await fetch(`${API_BASE}/sessions/${sessionId}/files/${path}`, {
method: "DELETE",
});
// Encode each path segment individually (spaces, special chars) but preserve slashes
const encodedPath = path
.split("/")
.map((segment) => encodeURIComponent(segment))
.join("/");
const res = await fetch(
`${API_BASE}/sessions/${sessionId}/files/${encodedPath}`,
{
method: "DELETE",
}
);
if (!res.ok) {
const errorData = await res.json().catch(() => ({}));