Compare commits

...

10 Commits

9 changed files with 191 additions and 16 deletions

View File

@@ -64,6 +64,13 @@ PUBLIC_ENDPOINT_SPECS = [
("/build/sessions/{session_id}/webapp/{path:path}", {"GET"}),
]
# WebSocket routes have no `methods` attribute so they can't go in PUBLIC_ENDPOINT_SPECS.
# These paths are access-controlled inside their handlers via _check_webapp_access.
PUBLIC_WEBSOCKET_PATHS = {
"/build/sessions/{session_id}/webapp/_next/webpack-hmr",
"/build/sessions/{session_id}/webapp/_next/hmr",
}
def is_route_in_spec_list(
route: BaseRoute, public_endpoint_specs: list[tuple[str, set[str]]]
@@ -114,6 +121,21 @@ def check_router_auth(
if is_route_in_spec_list(route, public_endpoint_specs):
continue
# WebSocket routes have no methods; check against the explicit WebSocket allowlist
if not hasattr(route, "methods") and hasattr(route, "path"):
if route.path in PUBLIC_WEBSOCKET_PATHS:
continue
# Strip global prefix for comparison
processed_global_prefix = (
f"/{APP_API_PREFIX.strip('/')}" if APP_API_PREFIX else ""
)
if processed_global_prefix and route.path.startswith(
processed_global_prefix
):
stripped = route.path[len(processed_global_prefix) :]
if stripped in PUBLIC_WEBSOCKET_PATHS:
continue
# check for auth
found_auth = False
route_dependant_obj = cast(

View File

@@ -1,3 +1,5 @@
import asyncio
import re
from collections.abc import Iterator
from pathlib import Path
from uuid import UUID
@@ -8,6 +10,8 @@ from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi import WebSocket
from fastapi import WebSocketDisconnect
from fastapi.responses import RedirectResponse
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
@@ -20,6 +24,7 @@ from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import ProcessingMode
from onyx.db.enums import SandboxStatus
from onyx.db.enums import SharingScope
from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
from onyx.db.models import BuildSession
@@ -239,18 +244,41 @@ def _stream_response(response: httpx.Response) -> Iterator[bytes]:
yield chunk
def _inject_asset_fixer(content: bytes, session_id: str) -> bytes:
"""Inject a script that rewrites /_next/ in dynamically inserted <style> tags.
next/font in dev mode injects @font-face declarations via React client-side,
bypassing server-side proxy rewriting. Patching appendChild/insertBefore
intercepts these before the browser parses the font URL.
"""
base = f"/api/build/sessions/{session_id}/webapp"
script = (
f"<script>(function(){{var B='{base}';"
"function f(n){if(!n||n.nodeType!==1)return;"
"if(n.tagName==='STYLE'&&n.textContent)"
"n.textContent=n.textContent.replace(/(url\\s*\\(\\s*['\"]?)\\/_next\\//g,'$1'+B+'/_next/');"
"else if(n.tagName==='LINK'){var h=n.getAttribute('href')||'';"
"if(h.indexOf('/_next/')===0)n.setAttribute('href',B+h);}}"
"function w(m){var o=Element.prototype[m];"
"Element.prototype[m]=function(n){f(n);return o.apply(this,arguments);}}"
"w('appendChild');w('insertBefore');})()</script>"
)
text = content.decode("utf-8")
# Inject immediately after <head> so it runs before React initialises
text = re.sub(
r"(<head\b[^>]*>)", rf"\1{script}", text, count=1, flags=re.IGNORECASE
)
return text.encode("utf-8")
def _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:
"""Rewrite Next.js asset paths to go through the proxy."""
import re
# Base path includes session_id for routing
webapp_base_path = f"/api/build/sessions/{session_id}/webapp"
text = content.decode("utf-8")
# Rewrite /_next/ paths to go through our proxy
text = text.replace("/_next/", f"{webapp_base_path}/_next/")
# Rewrite JSON data file fetch paths (e.g., /data.json, /data/tickets.json)
# Matches paths like "/filename.json" or "/path/to/file.json"
# Anchor on delimiter so already-prefixed URLs (from assetPrefix) aren't double-rewritten.
for delim in ('"', "'", "("):
text = text.replace(f"{delim}/_next/", f"{delim}{webapp_base_path}/_next/")
text = re.sub(
r'"(/(?:[a-zA-Z0-9_-]+/)*[a-zA-Z0-9_-]+\.json)"',
f'"{webapp_base_path}\\1"',
@@ -261,7 +289,6 @@ def _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:
f"'{webapp_base_path}\\1'",
text,
)
# Rewrite favicon
text = text.replace('"/favicon.ico', f'"{webapp_base_path}/favicon.ico')
return text.encode("utf-8")
@@ -348,6 +375,8 @@ def _proxy_request(
# For HTML/CSS/JS responses, rewrite asset paths
if any(ct in content_type for ct in REWRITABLE_CONTENT_TYPES):
content = _rewrite_asset_paths(response.content, str(session_id))
if "text/html" in content_type:
content = _inject_asset_fixer(content, str(session_id))
return Response(
content=content,
status_code=response.status_code,
@@ -394,13 +423,22 @@ def _check_webapp_access(
_OFFLINE_HTML_PATH = Path(__file__).parent / "templates" / "webapp_offline.html"
def _offline_html_response() -> Response:
def _offline_html_response(auto_refresh: bool) -> Response:
"""Return a branded Craft HTML page when the sandbox is not reachable.
Design mirrors the default Craft web template (outputs/web/app/page.tsx):
terminal window aesthetic with Minecraft-themed typing animation.
Args:
auto_refresh: When True, inject a meta refresh tag so the page polls
for the sandbox to wake up. Should only be True when the sandbox is
genuinely asleep (SLEEPING/TERMINATED), not when it is RUNNING but
Next.js is momentarily restarting — that case causes jarring
periodic iframe reloads and the frontend's own polling handles it.
"""
html = _OFFLINE_HTML_PATH.read_text()
meta_tag = '<meta http-equiv="refresh" content="15" />' if auto_refresh else ""
html = html.replace("{auto_refresh_meta}", meta_tag)
return Response(content=html, status_code=503, media_type="text/html")
@@ -435,10 +473,61 @@ def get_webapp(
return _proxy_request(path, request, session_id, db_session)
except HTTPException as e:
if e.status_code in (502, 503, 504):
return _offline_html_response()
session = db_session.get(BuildSession, session_id)
sandbox = (
get_sandbox_by_user_id(db_session, session.user_id)
if session and session.user_id
else None
)
sandbox_is_asleep = sandbox is None or sandbox.status in (
SandboxStatus.SLEEPING,
SandboxStatus.TERMINATED,
)
return _offline_html_response(auto_refresh=sandbox_is_asleep)
raise
async def _hmr_websocket_sink(
websocket: WebSocket,
session_id: UUID,
user: User | None,
db_session: Session,
) -> None:
"""Accept the HMR WebSocket silently to prevent the retry/reload cycle."""
try:
_check_webapp_access(session_id, user, db_session)
except Exception:
await websocket.close(code=4003)
return
await websocket.accept()
try:
while True:
await asyncio.sleep(30)
except WebSocketDisconnect:
pass
@public_build_router.websocket("/sessions/{session_id}/webapp/_next/webpack-hmr")
async def webapp_hmr_sink_webpack(
websocket: WebSocket,
session_id: UUID,
user: User | None = Depends(optional_user),
db_session: Session = Depends(get_session),
) -> None:
await _hmr_websocket_sink(websocket, session_id, user, db_session)
@public_build_router.websocket("/sessions/{session_id}/webapp/_next/hmr")
async def webapp_hmr_sink_turbopack(
websocket: WebSocket,
session_id: UUID,
user: User | None = Depends(optional_user),
db_session: Session = Depends(get_session),
) -> None:
await _hmr_websocket_sink(websocket, session_id, user, db_session)
# =============================================================================
# Sandbox Management Endpoints
# =============================================================================

View File

@@ -3,7 +3,7 @@
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta http-equiv="refresh" content="15" />
{auto_refresh_meta}
<title>Craft — Starting up</title>
<style>
*,

View File

@@ -1,7 +1,8 @@
import type { NextConfig } from "next";
const nextConfig: NextConfig = {
/* config options here */
// Routes all asset URLs (including HMR WebSocket) through the Craft proxy path.
assetPrefix: process.env.CRAFT_ASSET_PREFIX || undefined,
};
export default nextConfig;

View File

@@ -118,6 +118,7 @@ RESOURCE_DELETION_POLL_INTERVAL_SECONDS = 0.5
def _build_nextjs_start_script(
session_path: str,
nextjs_port: int,
session_id: UUID,
check_node_modules: bool = False,
) -> str:
"""Build shell script to start the NextJS dev server.
@@ -146,6 +147,7 @@ cd {session_path}/outputs/web
{npm_install_check}
# Start npm run dev in background
echo "Starting Next.js dev server on port {nextjs_port}..."
export CRAFT_ASSET_PREFIX=/api/build/sessions/{session_id}/webapp
nohup npm run dev -- -p {nextjs_port} > {session_path}/nextjs.log 2>&1 &
NEXTJS_PID=$!
echo "Next.js server started with PID $NEXTJS_PID"
@@ -1329,7 +1331,7 @@ fi
# Build NextJS startup script (npm install already done in outputs_setup)
nextjs_start_script = _build_nextjs_start_script(
session_path, nextjs_port, check_node_modules=False
session_path, nextjs_port, session_id=session_id, check_node_modules=False
)
setup_script = f"""
@@ -1703,7 +1705,10 @@ echo "SNAPSHOT_RESTORED"
# Start NextJS dev server (check node_modules since restoring from snapshot)
start_script = _build_nextjs_start_script(
safe_session_path, nextjs_port, check_node_modules=True
safe_session_path,
nextjs_port,
session_id=session_id,
check_node_modules=True,
)
k8s_stream(
self._stream_core_api.connect_get_namespaced_pod_exec,

View File

@@ -526,7 +526,7 @@ class LocalSandboxManager(SandboxManager):
logger.info(f"Starting Next.js server at {web_dir} on port {nextjs_port}")
nextjs_process = self._process_manager.start_nextjs_server(
web_dir, nextjs_port
web_dir, nextjs_port, session_id=session_id
)
# Store process for clean shutdown on session delete
with self._nextjs_lock:
@@ -843,7 +843,7 @@ class LocalSandboxManager(SandboxManager):
)
return
process = self._process_manager.start_nextjs_server(
web_dir, nextjs_port
web_dir, nextjs_port, session_id=session_id
)
with self._nextjs_lock:
self._nextjs_processes[process_key] = process

View File

@@ -8,6 +8,7 @@ import time
import urllib.error
import urllib.request
from pathlib import Path
from uuid import UUID
from onyx.utils.logger import setup_logger
@@ -27,6 +28,7 @@ class ProcessManager:
self,
web_dir: Path,
port: int,
session_id: UUID,
timeout: float = 180.0,
) -> subprocess.Popen[bytes]:
"""Start Next.js dev server.
@@ -76,6 +78,10 @@ class ProcessManager:
cwd=web_dir,
stdout=None,
stderr=None,
env={
**os.environ,
"CRAFT_ASSET_PREFIX": f"/api/build/sessions/{session_id}/webapp",
},
)
logger.info(f"Next.js process started with PID {process.pid}")

View File

View File

@@ -0,0 +1,52 @@
"""Unit tests for _rewrite_asset_paths in the webapp proxy."""
from onyx.server.features.build.api.api import _rewrite_asset_paths
SESSION_ID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
BASE = f"/api/build/sessions/{SESSION_ID}/webapp"
def rewrite(html: str) -> str:
return _rewrite_asset_paths(html.encode(), SESSION_ID).decode()
class TestNextjsPathRewriting:
def test_rewrites_bare_next_script_src(self):
html = '<script src="/_next/static/chunks/main.js">'
result = rewrite(html)
assert f'src="{BASE}/_next/static/chunks/main.js"' in result
assert '"/_next/' not in result
def test_rewrites_bare_next_in_single_quotes(self):
html = "<link href='/_next/static/css/app.css'>"
result = rewrite(html)
assert f"'{BASE}/_next/static/css/app.css'" in result
def test_rewrites_bare_next_in_url_parens(self):
html = "background: url(/_next/static/media/font.woff2)"
result = rewrite(html)
assert f"url({BASE}/_next/static/media/font.woff2)" in result
def test_no_double_prefix_when_already_proxied(self):
"""assetPrefix makes Next.js emit already-prefixed URLs — must not double-rewrite."""
already_prefixed = f'<script src="{BASE}/_next/static/chunks/main.js">'
result = rewrite(already_prefixed)
# Should be unchanged
assert result == already_prefixed
# Specifically, no double path
assert f"{BASE}/{BASE}" not in result
def test_rewrites_favicon(self):
html = '<link rel="icon" href="/favicon.ico">'
result = rewrite(html)
assert f'"{BASE}/favicon.ico"' in result
def test_rewrites_json_data_path_double_quoted(self):
html = 'fetch("/data/tickets.json")'
result = rewrite(html)
assert f'"{BASE}/data/tickets.json"' in result
def test_rewrites_json_data_path_single_quoted(self):
html = "fetch('/data/items.json')"
result = rewrite(html)
assert f"'{BASE}/data/items.json'" in result