mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-11 18:52:39 +00:00
Compare commits
11 Commits
main
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b6edc091f | ||
|
|
11f3d41a67 | ||
|
|
40cb943354 | ||
|
|
1a0850d5d0 | ||
|
|
f8294c84d6 | ||
|
|
84eef6d9c1 | ||
|
|
6da3e1bae2 | ||
|
|
474eab8fb9 | ||
|
|
8b0ca9a66d | ||
|
|
bc1f81c342 | ||
|
|
2c45123de5 |
@@ -64,6 +64,13 @@ PUBLIC_ENDPOINT_SPECS = [
|
||||
("/build/sessions/{session_id}/webapp/{path:path}", {"GET"}),
|
||||
]
|
||||
|
||||
# WebSocket routes have no `methods` attribute so they can't go in PUBLIC_ENDPOINT_SPECS.
|
||||
# These paths are access-controlled inside their handlers via _check_webapp_access.
|
||||
PUBLIC_WEBSOCKET_PATHS = {
|
||||
"/build/sessions/{session_id}/webapp/_next/webpack-hmr",
|
||||
"/build/sessions/{session_id}/webapp/_next/hmr",
|
||||
}
|
||||
|
||||
|
||||
def is_route_in_spec_list(
|
||||
route: BaseRoute, public_endpoint_specs: list[tuple[str, set[str]]]
|
||||
@@ -114,6 +121,21 @@ def check_router_auth(
|
||||
if is_route_in_spec_list(route, public_endpoint_specs):
|
||||
continue
|
||||
|
||||
# WebSocket routes have no methods; check against the explicit WebSocket allowlist
|
||||
if not hasattr(route, "methods") and hasattr(route, "path"):
|
||||
if route.path in PUBLIC_WEBSOCKET_PATHS:
|
||||
continue
|
||||
# Strip global prefix for comparison
|
||||
processed_global_prefix = (
|
||||
f"/{APP_API_PREFIX.strip('/')}" if APP_API_PREFIX else ""
|
||||
)
|
||||
if processed_global_prefix and route.path.startswith(
|
||||
processed_global_prefix
|
||||
):
|
||||
stripped = route.path[len(processed_global_prefix) :]
|
||||
if stripped in PUBLIC_WEBSOCKET_PATHS:
|
||||
continue
|
||||
|
||||
# check for auth
|
||||
found_auth = False
|
||||
route_dependant_obj = cast(
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import asyncio
|
||||
import re
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
@@ -8,6 +10,8 @@ from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi import Request
|
||||
from fastapi import Response
|
||||
from fastapi import WebSocket
|
||||
from fastapi import WebSocketDisconnect
|
||||
from fastapi.responses import RedirectResponse
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -20,6 +24,7 @@ from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.enums import IndexingStatus
|
||||
from onyx.db.enums import ProcessingMode
|
||||
from onyx.db.enums import SandboxStatus
|
||||
from onyx.db.enums import SharingScope
|
||||
from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
|
||||
from onyx.db.models import BuildSession
|
||||
@@ -239,18 +244,45 @@ def _stream_response(response: httpx.Response) -> Iterator[bytes]:
|
||||
yield chunk
|
||||
|
||||
|
||||
def _inject_asset_fixer(content: bytes, session_id: str) -> bytes:
|
||||
"""Inject a script that rewrites /_next/ in dynamically inserted <style> tags.
|
||||
|
||||
next/font in dev mode injects @font-face declarations via React client-side,
|
||||
bypassing server-side proxy rewriting. Patching appendChild/insertBefore
|
||||
intercepts these before the browser parses the font URL.
|
||||
"""
|
||||
base = f"/api/build/sessions/{session_id}/webapp"
|
||||
script = (
|
||||
f"<script>(function(){{var B='{base}';"
|
||||
"function f(n){if(!n||n.nodeType!==1)return;"
|
||||
"if(n.tagName==='STYLE'&&n.textContent)"
|
||||
"n.textContent=n.textContent.replace(/(url\\s*\\(\\s*['\"]?)\\/_next\\//g,'$1'+B+'/_next/');"
|
||||
"else if(n.tagName==='LINK'){var h=n.getAttribute('href')||'';"
|
||||
"if(h.indexOf('/_next/')===0)n.setAttribute('href',B+h);}}"
|
||||
"function w(m){var o=Element.prototype[m];"
|
||||
"Element.prototype[m]=function(n){f(n);return o.apply(this,arguments);}}"
|
||||
"w('appendChild');w('insertBefore');})()</script>"
|
||||
)
|
||||
text = content.decode("utf-8")
|
||||
# Inject immediately after <head> so it runs before React initialises
|
||||
text = re.sub(
|
||||
r"(<head\b[^>]*>)",
|
||||
lambda m: m.group(0) + script,
|
||||
text,
|
||||
count=1,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
return text.encode("utf-8")
|
||||
|
||||
|
||||
def _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:
|
||||
"""Rewrite Next.js asset paths to go through the proxy."""
|
||||
import re
|
||||
|
||||
# Base path includes session_id for routing
|
||||
webapp_base_path = f"/api/build/sessions/{session_id}/webapp"
|
||||
|
||||
text = content.decode("utf-8")
|
||||
# Rewrite /_next/ paths to go through our proxy
|
||||
text = text.replace("/_next/", f"{webapp_base_path}/_next/")
|
||||
# Rewrite JSON data file fetch paths (e.g., /data.json, /data/tickets.json)
|
||||
# Matches paths like "/filename.json" or "/path/to/file.json"
|
||||
# Anchor on delimiter so already-prefixed URLs (from assetPrefix) aren't double-rewritten.
|
||||
for delim in ('"', "'", "("):
|
||||
text = text.replace(f"{delim}/_next/", f"{delim}{webapp_base_path}/_next/")
|
||||
text = re.sub(
|
||||
r'"(/(?:[a-zA-Z0-9_-]+/)*[a-zA-Z0-9_-]+\.json)"',
|
||||
f'"{webapp_base_path}\\1"',
|
||||
@@ -261,7 +293,6 @@ def _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:
|
||||
f"'{webapp_base_path}\\1'",
|
||||
text,
|
||||
)
|
||||
# Rewrite favicon
|
||||
text = text.replace('"/favicon.ico', f'"{webapp_base_path}/favicon.ico')
|
||||
return text.encode("utf-8")
|
||||
|
||||
@@ -348,6 +379,8 @@ def _proxy_request(
|
||||
# For HTML/CSS/JS responses, rewrite asset paths
|
||||
if any(ct in content_type for ct in REWRITABLE_CONTENT_TYPES):
|
||||
content = _rewrite_asset_paths(response.content, str(session_id))
|
||||
if "text/html" in content_type:
|
||||
content = _inject_asset_fixer(content, str(session_id))
|
||||
return Response(
|
||||
content=content,
|
||||
status_code=response.status_code,
|
||||
@@ -394,13 +427,22 @@ def _check_webapp_access(
|
||||
_OFFLINE_HTML_PATH = Path(__file__).parent / "templates" / "webapp_offline.html"
|
||||
|
||||
|
||||
def _offline_html_response() -> Response:
|
||||
def _offline_html_response(auto_refresh: bool) -> Response:
|
||||
"""Return a branded Craft HTML page when the sandbox is not reachable.
|
||||
|
||||
Design mirrors the default Craft web template (outputs/web/app/page.tsx):
|
||||
terminal window aesthetic with Minecraft-themed typing animation.
|
||||
|
||||
Args:
|
||||
auto_refresh: When True, inject a meta refresh tag so the page polls
|
||||
for the sandbox to wake up. Should only be True when the sandbox is
|
||||
genuinely asleep (SLEEPING/TERMINATED), not when it is RUNNING but
|
||||
Next.js is momentarily restarting — that case causes jarring
|
||||
periodic iframe reloads and the frontend's own polling handles it.
|
||||
"""
|
||||
html = _OFFLINE_HTML_PATH.read_text()
|
||||
meta_tag = '<meta http-equiv="refresh" content="15" />' if auto_refresh else ""
|
||||
html = html.replace("{auto_refresh_meta}", meta_tag)
|
||||
return Response(content=html, status_code=503, media_type="text/html")
|
||||
|
||||
|
||||
@@ -435,10 +477,61 @@ def get_webapp(
|
||||
return _proxy_request(path, request, session_id, db_session)
|
||||
except HTTPException as e:
|
||||
if e.status_code in (502, 503, 504):
|
||||
return _offline_html_response()
|
||||
session = db_session.get(BuildSession, session_id)
|
||||
sandbox = (
|
||||
get_sandbox_by_user_id(db_session, session.user_id)
|
||||
if session and session.user_id
|
||||
else None
|
||||
)
|
||||
sandbox_is_asleep = sandbox is None or sandbox.status in (
|
||||
SandboxStatus.SLEEPING,
|
||||
SandboxStatus.TERMINATED,
|
||||
)
|
||||
return _offline_html_response(auto_refresh=sandbox_is_asleep)
|
||||
raise
|
||||
|
||||
|
||||
async def _hmr_websocket_sink(
|
||||
websocket: WebSocket,
|
||||
session_id: UUID,
|
||||
user: User | None,
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
"""Accept the HMR WebSocket silently to prevent the retry/reload cycle."""
|
||||
try:
|
||||
_check_webapp_access(session_id, user, db_session)
|
||||
except Exception:
|
||||
await websocket.close(code=4003)
|
||||
return
|
||||
|
||||
await websocket.accept()
|
||||
try:
|
||||
while True:
|
||||
await asyncio.sleep(30)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
|
||||
|
||||
@public_build_router.websocket("/sessions/{session_id}/webapp/_next/webpack-hmr")
|
||||
async def webapp_hmr_sink_webpack(
|
||||
websocket: WebSocket,
|
||||
session_id: UUID,
|
||||
user: User | None = Depends(optional_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
await _hmr_websocket_sink(websocket, session_id, user, db_session)
|
||||
|
||||
|
||||
@public_build_router.websocket("/sessions/{session_id}/webapp/_next/hmr")
|
||||
async def webapp_hmr_sink_turbopack(
|
||||
websocket: WebSocket,
|
||||
session_id: UUID,
|
||||
user: User | None = Depends(optional_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
await _hmr_websocket_sink(websocket, session_id, user, db_session)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sandbox Management Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta http-equiv="refresh" content="15" />
|
||||
{auto_refresh_meta}
|
||||
<title>Craft — Starting up</title>
|
||||
<style>
|
||||
*,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
/* config options here */
|
||||
// Routes all asset URLs (including HMR WebSocket) through the Craft proxy path.
|
||||
assetPrefix: process.env.CRAFT_ASSET_PREFIX || undefined,
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
|
||||
@@ -118,6 +118,7 @@ RESOURCE_DELETION_POLL_INTERVAL_SECONDS = 0.5
|
||||
def _build_nextjs_start_script(
|
||||
session_path: str,
|
||||
nextjs_port: int,
|
||||
session_id: UUID,
|
||||
check_node_modules: bool = False,
|
||||
) -> str:
|
||||
"""Build shell script to start the NextJS dev server.
|
||||
@@ -146,6 +147,7 @@ cd {session_path}/outputs/web
|
||||
{npm_install_check}
|
||||
# Start npm run dev in background
|
||||
echo "Starting Next.js dev server on port {nextjs_port}..."
|
||||
export CRAFT_ASSET_PREFIX=/api/build/sessions/{session_id}/webapp
|
||||
nohup npm run dev -- -p {nextjs_port} > {session_path}/nextjs.log 2>&1 &
|
||||
NEXTJS_PID=$!
|
||||
echo "Next.js server started with PID $NEXTJS_PID"
|
||||
@@ -1329,7 +1331,7 @@ fi
|
||||
|
||||
# Build NextJS startup script (npm install already done in outputs_setup)
|
||||
nextjs_start_script = _build_nextjs_start_script(
|
||||
session_path, nextjs_port, check_node_modules=False
|
||||
session_path, nextjs_port, session_id=session_id, check_node_modules=False
|
||||
)
|
||||
|
||||
setup_script = f"""
|
||||
@@ -1703,7 +1705,10 @@ echo "SNAPSHOT_RESTORED"
|
||||
|
||||
# Start NextJS dev server (check node_modules since restoring from snapshot)
|
||||
start_script = _build_nextjs_start_script(
|
||||
safe_session_path, nextjs_port, check_node_modules=True
|
||||
safe_session_path,
|
||||
nextjs_port,
|
||||
session_id=session_id,
|
||||
check_node_modules=True,
|
||||
)
|
||||
k8s_stream(
|
||||
self._stream_core_api.connect_get_namespaced_pod_exec,
|
||||
|
||||
@@ -526,7 +526,7 @@ class LocalSandboxManager(SandboxManager):
|
||||
logger.info(f"Starting Next.js server at {web_dir} on port {nextjs_port}")
|
||||
|
||||
nextjs_process = self._process_manager.start_nextjs_server(
|
||||
web_dir, nextjs_port
|
||||
web_dir, nextjs_port, session_id=session_id
|
||||
)
|
||||
# Store process for clean shutdown on session delete
|
||||
with self._nextjs_lock:
|
||||
@@ -843,7 +843,7 @@ class LocalSandboxManager(SandboxManager):
|
||||
)
|
||||
return
|
||||
process = self._process_manager.start_nextjs_server(
|
||||
web_dir, nextjs_port
|
||||
web_dir, nextjs_port, session_id=session_id
|
||||
)
|
||||
with self._nextjs_lock:
|
||||
self._nextjs_processes[process_key] = process
|
||||
|
||||
@@ -8,6 +8,7 @@ import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
@@ -27,6 +28,7 @@ class ProcessManager:
|
||||
self,
|
||||
web_dir: Path,
|
||||
port: int,
|
||||
session_id: UUID,
|
||||
timeout: float = 180.0,
|
||||
) -> subprocess.Popen[bytes]:
|
||||
"""Start Next.js dev server.
|
||||
@@ -76,6 +78,10 @@ class ProcessManager:
|
||||
cwd=web_dir,
|
||||
stdout=None,
|
||||
stderr=None,
|
||||
env={
|
||||
**os.environ,
|
||||
"CRAFT_ASSET_PREFIX": f"/api/build/sessions/{session_id}/webapp",
|
||||
},
|
||||
)
|
||||
logger.info(f"Next.js process started with PID {process.pid}")
|
||||
|
||||
|
||||
0
backend/tests/unit/build/__init__.py
Normal file
0
backend/tests/unit/build/__init__.py
Normal file
52
backend/tests/unit/build/test_rewrite_asset_paths.py
Normal file
52
backend/tests/unit/build/test_rewrite_asset_paths.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Unit tests for _rewrite_asset_paths in the webapp proxy."""
|
||||
|
||||
from onyx.server.features.build.api.api import _rewrite_asset_paths
|
||||
|
||||
SESSION_ID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
|
||||
BASE = f"/api/build/sessions/{SESSION_ID}/webapp"
|
||||
|
||||
|
||||
def rewrite(html: str) -> str:
|
||||
return _rewrite_asset_paths(html.encode(), SESSION_ID).decode()
|
||||
|
||||
|
||||
class TestNextjsPathRewriting:
|
||||
def test_rewrites_bare_next_script_src(self):
|
||||
html = '<script src="/_next/static/chunks/main.js">'
|
||||
result = rewrite(html)
|
||||
assert f'src="{BASE}/_next/static/chunks/main.js"' in result
|
||||
assert '"/_next/' not in result
|
||||
|
||||
def test_rewrites_bare_next_in_single_quotes(self):
|
||||
html = "<link href='/_next/static/css/app.css'>"
|
||||
result = rewrite(html)
|
||||
assert f"'{BASE}/_next/static/css/app.css'" in result
|
||||
|
||||
def test_rewrites_bare_next_in_url_parens(self):
|
||||
html = "background: url(/_next/static/media/font.woff2)"
|
||||
result = rewrite(html)
|
||||
assert f"url({BASE}/_next/static/media/font.woff2)" in result
|
||||
|
||||
def test_no_double_prefix_when_already_proxied(self):
|
||||
"""assetPrefix makes Next.js emit already-prefixed URLs — must not double-rewrite."""
|
||||
already_prefixed = f'<script src="{BASE}/_next/static/chunks/main.js">'
|
||||
result = rewrite(already_prefixed)
|
||||
# Should be unchanged
|
||||
assert result == already_prefixed
|
||||
# Specifically, no double path
|
||||
assert f"{BASE}/{BASE}" not in result
|
||||
|
||||
def test_rewrites_favicon(self):
|
||||
html = '<link rel="icon" href="/favicon.ico">'
|
||||
result = rewrite(html)
|
||||
assert f'"{BASE}/favicon.ico"' in result
|
||||
|
||||
def test_rewrites_json_data_path_double_quoted(self):
|
||||
html = 'fetch("/data/tickets.json")'
|
||||
result = rewrite(html)
|
||||
assert f'"{BASE}/data/tickets.json"' in result
|
||||
|
||||
def test_rewrites_json_data_path_single_quoted(self):
|
||||
html = "fetch('/data/items.json')"
|
||||
result = rewrite(html)
|
||||
assert f"'{BASE}/data/items.json'" in result
|
||||
Reference in New Issue
Block a user