Compare commits

...

5 Commits

Author SHA1 Message Date
Weves
f36ea9079d . 2026-01-27 17:10:27 -08:00
Weves
81c440e1f3 . 2026-01-27 17:08:10 -08:00
Weves
7366bed680 move to sync upload_file_endpoint 2026-01-27 17:06:41 -08:00
Weves
6bec786cf7 Better recovery 2026-01-27 17:05:57 -08:00
Weves
d6485235c4 massively reduced grace period 2026-01-27 16:41:11 -08:00
4 changed files with 23 additions and 12 deletions

View File

@@ -73,7 +73,7 @@ def list_messages(
@router.post("/sessions/{session_id}/send-message", tags=PUBLIC_API_TAGS)
async def send_message(
def send_message(
session_id: UUID,
request: MessageRequest,
user: User = Depends(current_user),

View File

@@ -638,7 +638,7 @@ def download_webapp(
@router.post("/{session_id}/upload", response_model=UploadResponse)
async def upload_file_endpoint(
def upload_file_endpoint(
session_id: UUID,
file: UploadFile = File(...),
user: User = Depends(current_user),
@@ -654,8 +654,8 @@ async def upload_file_endpoint(
if not file.filename:
raise HTTPException(status_code=400, detail="File has no filename")
# Read file content
content = await file.read()
# Read file content (use sync file interface)
content = file.file.read()
# Validate file (extension, mime type, size)
is_valid, error = validate_file(file.filename, file.content_type, len(content))

View File

@@ -436,7 +436,7 @@ sleep infinity
containers=[sandbox_container, file_sync_container],
volumes=volumes,
restart_policy="Never",
termination_grace_period_seconds=600,
termination_grace_period_seconds=10, # Fast pod termination
# Node selection for sandbox nodes
node_selector={"onyx.app/workload": "sandbox"},
tolerations=[

View File

@@ -464,10 +464,15 @@ class SessionManager:
sandbox = existing_sandbox
sandbox_id = sandbox.id
if sandbox.status == SandboxStatus.TERMINATED:
# Re-provision terminated sandbox
if sandbox.status in (
SandboxStatus.TERMINATED,
SandboxStatus.SLEEPING,
SandboxStatus.FAILED,
):
# Re-provision sandbox (pod doesn't exist or failed)
logger.info(
f"Re-provisioning terminated sandbox {sandbox_id} for user {user_id}"
f"Re-provisioning {sandbox.status.value} sandbox {sandbox_id} "
f"for user {user_id}"
)
sandbox_info = self._sandbox_manager.provision(
sandbox_id=sandbox_id,
@@ -508,11 +513,14 @@ class SessionManager:
f"for new session {session_id}"
)
else:
# Handle other statuses (SLEEPING, PROVISIONING, FAILED, etc.)
logger.info(
f"Reusing existing sandbox {sandbox_id} (status: {sandbox.status}) "
f"for new session {session_id}"
# PROVISIONING status - sandbox is being created by another request
# Just fail this request
msg = (
f"Sandbox {sandbox_id} has status {sandbox.status.value} and is being "
f"created by another request for new session {session_id}"
)
logger.error(msg)
raise RuntimeError(msg)
else:
# Create new Sandbox record for the user (uses flush, caller commits)
sandbox = create_sandbox__no_commit(
@@ -595,6 +603,9 @@ class SessionManager:
"""
existing = get_empty_session_for_user(user_id, self._db_session)
if existing:
logger.info(
f"Existing empty session {existing.id} found for user {user_id}"
)
# Verify sandbox is healthy before returning existing session
sandbox = get_sandbox_by_user_id(self._db_session, user_id)