Merge remote-tracking branch 'origin/main' into codex/agent-lab

chore(devtools): devcontainer allows go and rust repos (#10041 )
2026-04-10 09:22:55 +00:00 · 2026-04-09 16:15:03 -07:00 · 2026-04-09 15:46:50 -07:00 · 2026-04-09 15:07:50 -07:00 · 2026-04-09 15:07:02 -07:00
68 changed files with 7477 additions and 526 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -10,6 +10,7 @@
    "source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig.host,type=bind,readonly",
    "source=${localEnv:HOME}/.ssh,target=/home/dev/.ssh.host,type=bind,readonly",
    "source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim.host,type=bind,readonly",
+    "source=onyx-devcontainer-cache,target=/home/dev/.cache,type=volume",
    "source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
  ],
  "remoteUser": "dev",
--- a/.devcontainer/init-dev-user.sh
+++ b/.devcontainer/init-dev-user.sh
@@ -5,7 +5,7 @@ set -euo pipefail
 # bind-mounted files are accessible without running as root.
 #
 # Standard Docker:   Workspace is owned by the host user's UID (e.g. 1000).
-#                    We remap dev to that UID — fast and seamless.
+#                    We remap dev to that UID -- fast and seamless.
 #
 # Rootless Docker:   Workspace appears as root-owned (UID 0) inside the
 #                    container due to user-namespace mapping.  We can't remap
@@ -23,9 +23,10 @@ DEV_GID=$(id -g "$TARGET_USER")
 DEV_HOME=/home/"$TARGET_USER"

 # Ensure directories that tools expect exist under ~dev.
-# ~/.local is a named Docker volume — ensure subdirs exist and are owned by dev.
+# ~/.local and ~/.cache are named Docker volumes -- ensure they are owned by dev.
 mkdir -p "$DEV_HOME"/.local/state "$DEV_HOME"/.local/share
 chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.local
+chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.cache

 # Copy host configs mounted as *.host into their real locations.
 # This gives the dev user owned copies without touching host originals.
@@ -41,7 +42,7 @@ if [ -d "$DEV_HOME/.config/nvim.host" ]; then
    chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.config/nvim"
 fi

-# Already matching — nothing to do.
+# Already matching -- nothing to do.
 if [ "$WS_UID" = "$DEV_UID" ] && [ "$WS_GID" = "$DEV_GID" ]; then
    exit 0
 fi
--- a/.devcontainer/init-firewall.sh
+++ b/.devcontainer/init-firewall.sh
@@ -24,7 +24,7 @@ fi
 ipset create allowed-domains hash:net || true
 ipset flush allowed-domains

-# Fetch GitHub IP ranges (IPv4 only — ipset hash:net and iptables are IPv4)
+# Fetch GitHub IP ranges (IPv4 only -- ipset hash:net and iptables are IPv4)
 GITHUB_IPS=$(curl -s https://api.github.com/meta | jq -r '.api[]' 2>/dev/null | grep -v ':' || echo "")
 for ip in $GITHUB_IPS; do
    if ! ipset add allowed-domains "$ip" -exist 2>&1; then
@@ -42,6 +42,9 @@ ALLOWED_DOMAINS=(
    "update.code.visualstudio.com"
    "pypi.org"
    "files.pythonhosted.org"
+    "go.dev"
+    "storage.googleapis.com"
+    "static.rust-lang.org"
 )

 for domain in "${ALLOWED_DOMAINS[@]}"; do
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,361 +1,55 @@
-# PROJECT KNOWLEDGE BASE
-
-This file provides guidance to AI agents when working with code in this repository.
-
-## KEY NOTES
-
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
-  to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
-  `a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
-  make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
-  outside of those directories.
-
-## Project Overview
-
-**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
-
-### Background Workers (Celery)
-
-Onyx uses Celery for asynchronous task processing with multiple specialized workers:
-
-#### Worker Types
-
-1. **Primary Worker** (`celery_app.py`)
-   - Coordinates core background tasks and system-wide operations
-   - Handles connector management, document sync, pruning, and periodic checks
-   - Runs with 4 threads concurrency
-   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
-
-2. **Docfetching Worker** (`docfetching`)
-   - Fetches documents from external data sources (connectors)
-   - Spawns docprocessing tasks for each document batch
-   - Implements watchdog monitoring for stuck connectors
-   - Configurable concurrency (default from env)
-
-3. **Docprocessing Worker** (`docprocessing`)
-   - Processes fetched documents through the indexing pipeline:
-     - Upserts documents to PostgreSQL
-     - Chunks documents and adds contextual information
-     - Embeds chunks via model server
-     - Writes chunks to Vespa vector database
-     - Updates document metadata
-   - Configurable concurrency (default from env)
-
-4. **Light Worker** (`light`)
-   - Handles lightweight, fast operations
-   - Tasks: vespa operations, document permissions sync, external group sync
-   - Higher concurrency for quick tasks
-
-5. **Heavy Worker** (`heavy`)
-   - Handles resource-intensive operations
-   - Primary task: document pruning operations
-   - Runs with 4 threads concurrency
-
-6. **KG Processing Worker** (`kg_processing`)
-   - Handles Knowledge Graph processing and clustering
-   - Builds relationships between documents
-   - Runs clustering algorithms
-   - Configurable concurrency
-
-7. **Monitoring Worker** (`monitoring`)
-   - System health monitoring and metrics collection
-   - Monitors Celery queues, process memory, and system status
-   - Single thread (monitoring doesn't need parallelism)
-   - Cloud-specific monitoring tasks
-
-8. **User File Processing Worker** (`user_file_processing`)
-   - Processes user-uploaded files
-   - Handles user file indexing and project synchronization
-   - Configurable concurrency
-
-9. **Beat Worker** (`beat`)
-   - Celery's scheduler for periodic tasks
-   - Uses DynamicTenantScheduler for multi-tenant support
-   - Schedules tasks like:
-     - Indexing checks (every 15 seconds)
-     - Connector deletion checks (every 20 seconds)
-     - Vespa sync checks (every 20 seconds)
-     - Pruning checks (every 20 seconds)
-     - KG processing (every 60 seconds)
-     - Monitoring tasks (every 5 minutes)
-     - Cleanup tasks (hourly)
-
-#### Key Features
-
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
-  middleware layer that automatically finds the appropriate tenant ID when sending tasks
-  via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
-
-#### Important Notes
-
-**Defining Tasks**:
-
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
- Never enqueue a task without an expiration. Always supply `expires=` when
-  sending tasks, either from the beat schedule or directly from another task. It
-  should never be acceptable to submit code which enqueues tasks without an
-  expiration, as doing so can lead to unbounded task queue growth.
-
-**Defining APIs**:
-When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
-function.
-
-**Testing Updates**:
-If you make any updates to a celery worker and you want to test these changes, you will need
-to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
-
-**Task Time Limits**:
-Since all tasks are executed in thread pools, the time limit features of Celery are silently 
-disabled and won't work. Timeout logic must be implemented within the task itself.
-
-### Code Quality
-
-```bash
-# Install and run pre-commit hooks
-pre-commit install
-pre-commit run --all-files
-```
-
-NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
-
-## Architecture Overview
-
-### Technology Stack
-
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
-
-### Directory Structure
-
-```
-backend/
-├── onyx/
-│   ├── auth/                    # Authentication & authorization
-│   ├── chat/                    # Chat functionality & LLM interactions
-│   ├── connectors/              # Data source connectors
-│   ├── db/                      # Database models & operations
-│   ├── document_index/          # Vespa integration
-│   ├── federated_connectors/    # External search connectors
-│   ├── llm/                     # LLM provider integrations
-│   └── server/                  # API endpoints & routers
-├── ee/                          # Enterprise Edition features
-├── alembic/                     # Database migrations
-└── tests/                       # Test suites
-
-web/
-├── src/app/                     # Next.js app router pages
-├── src/components/              # Reusable React components
-└── src/lib/                     # Utilities & business logic
-```
-
-## Frontend Standards
-
-Frontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.
-
-## Database & Migrations
-
-### Running Migrations
-
-```bash
-# Standard migrations
-alembic upgrade head
-
-# Multi-tenant (Enterprise)
-alembic -n schema_private upgrade head
-```
-
-### Creating Migrations
-
-```bash
-# Create migration
-alembic revision -m "description"
-
-# Multi-tenant migration
-alembic -n schema_private revision -m "description"
-```
-
-Write the migration manually and place it in the file that alembic creates when running the above command.
-
-## Testing Strategy
-
-First, you must activate the virtual environment with `source .venv/bin/activate`.
-
-There are 4 main types of tests within Onyx:
-
-### Unit Tests
-
-These should not assume any Onyx/external services are available to be called.
-Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
-write these for complex, isolated modules e.g. `citation_processing.py`.
-
-To run them:
-
-```bash
-pytest -xv backend/tests/unit
-```
-
-### External Dependency Unit Tests
-
-These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
-MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
-
-However, the actual Onyx containers are not running and with these tests we call the function to test directly.
-We can also mock components/calls at will.
-
-The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
-need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
-with certain args, something that would be impossible with proper integration tests).
-
-A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
-
-To run them:
-
-```bash
-python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
-```
-
-### Integration Tests
-
-Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
-mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
-verification is necessary) over any other type of test.
-
-Tests are parallelized at a directory level.
-
-When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
-class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
-calling the utilities directly (e.g. do NOT create admin users with
-`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
-
-A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.
-
-To run them:
-
-```bash
-python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
-```
-
-### Playwright (E2E) Tests
-
-These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
-running, _including_ the Web Server.
-
-Use these tests for anything that requires significant frontend <-> backend coordination.
-
-Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
-
-To run them:
-
-```bash
-npx playwright test <TEST_NAME>
-```
-
-For shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.
-
-## Logs
-
-When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
-to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
-will be tailing their logs to this file.
-
-## Security Considerations
-
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
-
-## AI/LLM Integration
-
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
-
-## Creating a Plan
-
-When creating a plan in the `plans` directory, make sure to include at least these elements:
-
-**Issues to Address**
-What the change is meant to do.
-
-**Important Notes**
-Things you come across in your research that are important to the implementation.
-
-**Implementation strategy**
-How you are going to make the changes happen. High level approach.
-
-**Tests**
-What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
-verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
-
-Do NOT include these: _Timeline_, _Rollback plan_
-
-This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
-Keep it high level. You can reference certain files or functions though.
-
-Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
-
-## Error Handling
-
-**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
-Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
-
-A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
-`{"error_code": "...", "detail": "..."}` shape. This eliminates boilerplate and keeps error
-handling consistent across the entire backend.
-
-```python
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-
-# ✅ Good
-raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
-
-# ✅ Good — no extra message needed
-raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
-
-# ✅ Good — upstream service with dynamic status code
-raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
-
-# ❌ Bad — using HTTPException directly
-raise HTTPException(status_code=404, detail="Session not found")
-
-# ❌ Bad — starlette constant
-raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
-```
-
-Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
-category is needed, add it there first — do not invent ad-hoc codes.
-
-**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
-status code is dynamic (comes from the upstream response), use `status_code_override`:
-
-```python
-raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
-```
-
-## Best Practices
-
-In addition to the other content in this file, best practices for contributing
-to the codebase can be found in the "Engineering Best Practices" section of
-`CONTRIBUTING.md`. Understand its contents and follow them.
+# Project Knowledge Base
+
+This file is the entrypoint for agents working in this repository. Keep it small.
+
+## Start Here
+
+- General development workflow and repo conventions: [CONTRIBUTING.md](./CONTRIBUTING.md)
+- Frontend standards for `web/` and `desktop/`: [web/AGENTS.md](./web/AGENTS.md)
+- Backend testing strategy and commands: [backend/tests/README.md](./backend/tests/README.md)
+- Celery worker and task guidance: [backend/onyx/background/celery/README.md](./backend/onyx/background/celery/README.md)
+- Backend API error-handling rules: [backend/onyx/error_handling/README.md](./backend/onyx/error_handling/README.md)
+- Plan-writing guidance: [plans/README.md](./plans/README.md)
+
+## Agent-Lab Docs
+
+When working on `agent-lab` or on tasks explicitly about agent-engineering, use:
+
+- [docs/agent/README.md](./docs/agent/README.md)
+
+These docs are the system of record for the `agent-lab` workflow.
+
+## Universal Notes
+
+- For non-trivial work, create the target worktree first and keep the edit, test, and PR loop
+  inside that worktree. Do not prototype in one checkout and copy the patch into another unless
+  you are explicitly debugging the harness itself.
+- Use `ods worktree create` for harness-managed worktrees. Do not use raw `git worktree add` when
+  you want the `agent-lab` workflow, because it will skip the manifest, env overlays, dependency
+  bootstrap, and lane-aware base-ref selection.
+- When a change needs browser proof, use the harness journey flow instead of ad hoc screen capture:
+  record `before` in the target worktree before making the change, then record `after` in that
+  same worktree after validation. Use `ods journey compare` only when you need to recover a missed
+  baseline or compare two explicit revisions after the fact.
+- After opening a PR, treat review feedback and failing checks as part of the same loop:
+  use `ods pr-review ...` for GitHub review threads and `ods pr-checks diagnose` plus `ods trace`
+  for failing Playwright runs.
+- PR titles and commit messages should use conventional-commit style such as `fix: ...` or
+  `feat: ...`. Do not use `[codex]` prefixes in this repo.
+- If Python dependencies appear missing, activate the root venv with `source .venv/bin/activate`.
+- To make tests work, check the root `.env` file for an OpenAI key.
+- If using Playwright to explore the frontend, you can usually log in with username `a@example.com`
+  and password `a` at `http://localhost:3000`.
+- Assume Onyx services are already running unless the task indicates otherwise. Check `backend/log`
+  if you need to verify service activity.
+- When making backend calls in local development flows, go through the frontend proxy:
+  `http://localhost:3000/api/...`, not `http://localhost:8080/...`.
+- Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`. Do not add ad hoc DB access
+  elsewhere.
+
+## How To Use This File
+
+- Use this file as a map, not a manual.
+- Follow the nearest authoritative doc for the subsystem you are changing.
+- If a repeated rule matters enough to teach every future agent, document it near the code it
+  governs or encode it mechanically.
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -12,7 +12,7 @@ founders@onyx.app for more information. Please visit https://github.com/onyx-dot
 ARG ENABLE_CRAFT=false

 # DO_NOT_TRACK is used to disable telemetry for Unstructured
-ENV ONYX_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -1,7 +1,7 @@
 # Base stage with dependencies
 FROM python:3.11.7-slim-bookworm AS base

-ENV ONYX_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface

 COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
--- a/backend/onyx/background/celery/README.md
+++ b/backend/onyx/background/celery/README.md
@@ -0,0 +1,37 @@
+# Celery Development Notes
+
+This document is the local reference for Celery worker structure and task-writing rules in Onyx.
+
+## Worker Types
+
+Onyx uses multiple specialized workers:
+
+1. `primary`: coordinates core background tasks and system-wide operations.
+2. `docfetching`: fetches documents from connectors and schedules downstream work.
+3. `docprocessing`: runs the indexing pipeline for fetched documents.
+4. `light`: handles lightweight and fast operations.
+5. `heavy`: handles more resource-intensive operations.
+6. `kg_processing`: runs knowledge-graph processing and clustering.
+7. `monitoring`: collects health and system metrics.
+8. `user_file_processing`: processes user-uploaded files.
+9. `beat`: schedules periodic work.
+
+For actual implementation details, inspect:
+
+- `backend/onyx/background/celery/apps/`
+- `backend/onyx/background/celery/configs/`
+- `backend/onyx/background/celery/tasks/`
+
+## Task Rules
+
+- Always use `@shared_task` rather than `@celery_app`.
+- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks/`.
+- Never enqueue a task without `expires=`. This is a hard requirement because stale queued work can
+  accumulate without bound.
+- Do not rely on Celery time-limit enforcement. These workers run in thread pools, so timeout logic
+  must be implemented inside the task itself.
+
+## Testing Note
+
+If you change Celery worker code and want to validate it against a running local worker, the worker
+usually needs to be restarted manually. There is no general auto-restart on code change.
--- a/backend/onyx/background/celery/memory_monitoring.py
+++ b/backend/onyx/background/celery/memory_monitoring.py
@@ -5,7 +5,7 @@ from logging.handlers import RotatingFileHandler

 import psutil

-from onyx.utils.platform import is_running_in_container
+from onyx.utils.logger import is_running_in_container
 from onyx.utils.logger import setup_logger

 # Regular application logger
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -42,7 +42,7 @@ from onyx.db.models import UserGroup
 from onyx.db.search_settings import get_active_search_settings_list
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import redis_lock_dump
-from onyx.utils.platform import is_running_in_container
+from onyx.utils.logger import is_running_in_container
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
 from shared_configs.configs import MULTI_TENANT
--- a/backend/onyx/error_handling/README.md
+++ b/backend/onyx/error_handling/README.md
@@ -0,0 +1,47 @@
+# Error Handling
+
+This directory is the local source of truth for backend API error handling.
+
+## Primary Rule
+
+Raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
+
+The global FastAPI exception handler converts `OnyxError` into the standard JSON shape:
+
+```json
+{"error_code": "...", "detail": "..."}
+```
+
+This keeps API behavior consistent and avoids repetitive route-level boilerplate.
+
+## Examples
+
+```python
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+
+# Good
+raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
+
+# Good
+raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
+
+# Good: preserve a dynamic upstream status code
+raise OnyxError(
+    OnyxErrorCode.BAD_GATEWAY,
+    detail,
+    status_code_override=e.response.status_code,
+)
+```
+
+Avoid:
+
+```python
+raise HTTPException(status_code=404, detail="Session not found")
+```
+
+## Notes
+
+- Available error codes are defined in `backend/onyx/error_handling/error_codes.py`.
+- If a new error category is needed, add it there first rather than inventing ad hoc strings.
+- When forwarding upstream service failures with dynamic status codes, use `status_code_override`.
--- a/backend/onyx/server/settings/api.py
+++ b/backend/onyx/server/settings/api.py
@@ -6,7 +6,6 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import Session

 from onyx import __version__ as onyx_version
-from onyx.utils.platform import is_running_in_container
 from onyx.auth.permissions import require_permission
 from onyx.auth.users import is_user_admin
 from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
@@ -112,7 +111,6 @@ def fetch_settings(
            if DISABLE_VECTOR_DB
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        ),
-        is_containerized=is_running_in_container(),
    )


--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -131,7 +131,3 @@ class UserSettings(Settings):
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        )
    )
-    # True when the backend is running inside a container (Docker/Podman).
-    # The frontend uses this to default local-service URLs (e.g. Ollama,
-    # LM Studio) to host.docker.internal instead of localhost.
-    is_containerized: bool = False
--- a/backend/onyx/utils/logger.py
+++ b/backend/onyx/utils/logger.py
@@ -169,7 +169,11 @@ def get_standard_formatter() -> ColoredFormatter:
    )


-from onyx.utils.platform import is_running_in_container  # noqa: F401
+DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"
+
+
+def is_running_in_container() -> bool:
+    return os.getenv(DANSWER_DOCKER_ENV_STR) == "true"


 def setup_logger(
--- a/backend/onyx/utils/platform.py
+++ b/backend/onyx/utils/platform.py
@@ -1,25 +0,0 @@
-import logging
-import os
-
-logger = logging.getLogger(__name__)
-
-_ONYX_DOCKER_ENV_STR = "ONYX_RUNNING_IN_DOCKER"
-_DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"
-
-
-def is_running_in_container() -> bool:
-    onyx_val = os.getenv(_ONYX_DOCKER_ENV_STR)
-    if onyx_val is not None:
-        return onyx_val == "true"
-
-    danswer_val = os.getenv(_DANSWER_DOCKER_ENV_STR)
-    if danswer_val is not None:
-        logger.warning(
-            "%s is deprecated and will be ignored in a future release. "
-            "Use %s instead.",
-            _DANSWER_DOCKER_ENV_STR,
-            _ONYX_DOCKER_ENV_STR,
-        )
-        return danswer_val == "true"
-
-    return False
--- a/backend/tests/README.md
+++ b/backend/tests/README.md
@@ -45,6 +45,15 @@ npx playwright test <TEST_NAME>
 Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
 their own `conftest.py` for directory-scoped fixtures.

+## Additional Onyx-Specific Guidance
+
+- Activate the root venv first with `source .venv/bin/activate`.
+- For many product changes in this repo, prefer integration tests or external dependency unit tests
+  over isolated unit tests.
+- When writing integration tests, check `backend/tests/integration/common_utils/` and the root
+  `conftest.py` for fixtures and managers before inventing new helpers.
+- Prefer existing fixtures over constructing users or entities manually inside tests.
+
 ## Running Tests Repeatedly (`pytest-repeat`)

 Use `pytest-repeat` to catch flaky tests by running them multiple times:
--- a/docs/agent/ARCHITECTURE.md
+++ b/docs/agent/ARCHITECTURE.md
@@ -0,0 +1,89 @@
+# Initial Architecture Map
+
+Status: provisional baseline. This is a routing map for agents, not a complete design spec for
+every subsystem. Update it as the repo becomes more explicit.
+
+## Top-Level Surfaces
+
+The repository is easiest to reason about as six main surfaces:
+
+| Surface | Primary Paths | Purpose |
+| --- | --- | --- |
+| Backend product logic | `backend/onyx/`, `backend/ee/onyx/` | Core auth, chat, search, indexing, connectors, API, and enterprise extensions |
+| Data and persistence | `backend/onyx/db/`, `backend/ee/onyx/db/`, `backend/alembic/` | DB models, data access logic, and schema migrations |
+| Frontend product surfaces | `web/src/app/`, `web/src/sections/`, `web/src/layouts/` | Next.js routes, screens, and feature-level UI composition |
+| Frontend design system and shared UI | `web/lib/opal/`, `web/src/refresh-components/` | Preferred primitives for new UI work |
+| Devtools and local developer workflows | `tools/ods/`, `cli/` | Repo automation, CI helpers, visual regression tooling, and CLI integrations |
+| Agent-facing platform work | `backend/onyx/server/features/build/`, `backend/onyx/mcp_server/`, `backend/onyx/deep_research/`, `backend/onyx/agents/` | Sandbox runtime, MCP tool surface, agent orchestration, and research workflows |
+| Agent-lab harness state | shared git metadata under `$(git rev-parse --git-common-dir)/onyx-agent-lab/` | Local worktree manifests, ports, env overlays, and verification artifacts for agentized development |
+
+## Backend Map
+
+Use these paths as the first stop when routing backend changes:
+
+| Area | Paths | Notes |
+| --- | --- | --- |
+| Authentication and access control | `backend/onyx/auth/`, `backend/onyx/access/`, `backend/ee/onyx/access/` | User identity, auth flows, permissions |
+| Chat and answer generation | `backend/onyx/chat/`, `backend/onyx/server/query_and_chat/` | Chat loop, message processing, streaming |
+| Retrieval and tools | `backend/onyx/tools/`, `backend/onyx/context/`, `backend/onyx/mcp_server/` | Search tools, web tools, context assembly, MCP exposure |
+| Connectors and indexing | `backend/onyx/connectors/`, `backend/onyx/document_index/`, `backend/onyx/background/` | Source sync, indexing, pruning, permissions sync |
+| LLM and prompt infrastructure | `backend/onyx/llm/`, `backend/onyx/prompts/`, `backend/ee/onyx/prompts/` | Provider integrations and prompting |
+| Server APIs and feature entrypoints | `backend/onyx/server/`, `backend/ee/onyx/server/` | FastAPI routes and product feature APIs |
+| Agent and build platform | `backend/onyx/server/features/build/`, `backend/onyx/agents/`, `backend/onyx/deep_research/` | Sandboxes, agent runtimes, orchestration, long-running research |
+| Persistence | `backend/onyx/db/`, `backend/ee/onyx/db/` | Put DB operations here, not in route handlers or feature modules |
+
+## Frontend Map
+
+For frontend work, route changes by intent first, then by component maturity:
+
+| Intent | Preferred Paths | Notes |
+| --- | --- | --- |
+| Next.js route/page work | `web/src/app/` | App Router pages and page-local wiring |
+| Feature composition | `web/src/sections/`, `web/src/layouts/` | Preferred place for reusable feature-level assemblies |
+| New shared UI primitives | `web/lib/opal/`, `web/src/refresh-components/` | Default targets for new reusable UI |
+| Legacy shared UI | `web/src/components/` | Avoid for new work unless forced by the local surface |
+| Frontend business logic | `web/src/lib/`, `web/src/hooks/`, `web/src/interfaces/` | Utilities, hooks, typed interfaces |
+
+Important frontend rule already established in [web/AGENTS.md](../../web/AGENTS.md):
+
+- Do not use `web/src/components/` for new component work.
+
+## Existing Hard Constraints
+
+These rules already exist and should be treated as architectural boundaries:
+
+- Backend errors should raise `OnyxError`, not `HTTPException`.
+- DB operations belong under `backend/onyx/db/` or `backend/ee/onyx/db/`.
+- New FastAPI APIs should not use `response_model`.
+- Celery tasks should use `@shared_task`.
+- Enqueued Celery tasks must include `expires=`.
+- Backend calls in local/manual flows should go through `http://localhost:3000/api/...`.
+
+## Change Routing Heuristics
+
+Use these heuristics before editing:
+
+1. If the task changes persistence semantics, start in the DB layer and migrations.
+2. If the task changes user-visible UI, find the route in `web/src/app/`, then move downward into
+   `sections`, `layouts`, and preferred shared UI.
+3. If the task spans product behavior and background execution, inspect both the API entrypoint and
+   the relevant Celery path.
+4. If the task concerns agentization, build, or local execution, check whether
+   `backend/onyx/server/features/build/` or `tools/ods/` is the better home before creating a new
+   subsystem.
+5. If the task needs isolated local boot, browser validation, or per-change artifacts, check
+   [HARNESS.md](./HARNESS.md) before inventing another ad hoc runner.
+6. If the change touches a historically messy area, consult [LEGACY_ZONES.md](./LEGACY_ZONES.md)
+   before adding more local patterns.
+
+## Test Routing
+
+Onyx already has a clear testing ladder:
+
+- `backend/tests/unit/`: isolated logic only
+- `backend/tests/external_dependency_unit/`: real infra, direct function calls, selective mocking
+- `backend/tests/integration/`: real deployment, no mocking
+- `web/tests/e2e/`: full frontend-backend coordination
+
+Prefer the lowest layer that still validates the real behavior. For many product changes in this
+repo, that means integration or Playwright rather than unit tests.
--- a/docs/agent/BRANCHING.md
+++ b/docs/agent/BRANCHING.md
@@ -0,0 +1,147 @@
+# Branching Model for `agent-lab`
+
+This is the branching policy for `agent-lab`. It is intentionally separate from the default
+workflow on `main`.
+
+This document explains how to use a long-running `agent-lab` branch without making `main`
+implicitly depend on lab-only agent-engineering changes.
+
+## Goals
+
+- Keep `main` stable and consensus-driven.
+- Allow opt-in agent-engineering improvements to live on `agent-lab`.
+- Let engineers and agents use `agent-lab` as a control checkout for worktree-based development.
+- Ensure product PRs to `main` originate from `main`-based branches, not from `agent-lab`.
+
+## Branch Roles
+
+| Branch | Purpose |
+| --- | --- |
+| `main` | Shipping branch and team default |
+| `codex/agent-lab` | Long-running control checkout containing the harness and agent-engineering improvements |
+| `codex/lab/<name>` | Short-lived branch for `agent-lab`-only tooling, docs, or workflow work |
+| `codex/fix/<name>`, `codex/feat/<name>`, etc. | Short-lived product branch cut from `origin/main` and managed by the `agent-lab` control checkout |
+
+## Core Rule
+
+`main` must never depend on `agent-lab`.
+
+That means:
+
+- `codex/agent-lab` may contain extra tooling, docs, checks, and workflow changes.
+- Product branches may be managed by the `agent-lab` control checkout, but they must still be based
+  on `origin/main`.
+- A PR to `main` should come from a `main`-based product branch, not from `codex/agent-lab`.
+
+## Preferred Workflow
+
+### Lab-Only Work
+
+Use this for agent-engineering docs, harnesses, optional checks, or tooling that should remain on
+`agent-lab` for now.
+
+1. Branch from `codex/agent-lab` into `codex/lab/<name>`.
+   For local isolation, create the branch via `ods worktree create codex/lab/<name>`.
+2. Make the lab-only changes.
+3. Open the PR back into `codex/agent-lab`.
+4. Do not open these changes directly to `main` unless the team later agrees to upstream them.
+
+### Product Feature Work
+
+Use this when you want to fix a product bug or build a shipping feature for `main`.
+
+1. Stay in the `codex/agent-lab` control checkout.
+2. Create a product worktree from `origin/main`, using a conventional branch lane such as:
+   - `ods worktree create codex/fix/<name>`
+   - `ods worktree create codex/feat/<name>`
+3. Make the code changes inside that worktree checkout.
+4. Run harness commands from the control checkout against the tracked worktree:
+   - `ods agent-check --worktree codex/fix/<name>`
+   - `ods verify --worktree codex/fix/<name>`
+   - `ods backend api --worktree codex/fix/<name>`
+   - `ods web dev --worktree codex/fix/<name>`
+5. If the change needs browser proof, record a before/after journey:
+   - before editing: `ods journey run --worktree codex/fix/<name> --journey <name> --label before`
+   - after validating the fix: `ods journey run --worktree codex/fix/<name> --journey <name> --label after`
+   - use `ods journey compare` only when the initial `before` capture was missed and a recovery
+     baseline is needed later
+   - after the PR exists, publish the artifact directory you captured or the fallback compare run
+     with `ods journey publish --run-dir <dir> --pr <number>`
+6. Commit, push, and open the PR from the product worktree checkout itself.
+   Prefer `ods pr-open` so the repo template and conventional-commit title check stay in the same
+   control plane.
+7. Open the PR directly from that product branch to `main`.
+8. After the PR is open, use:
+   - `ods pr-review triage --pr <number>`
+   - `ods pr-checks diagnose --pr <number>`
+   - `ods pr-review respond --comment-id ... --thread-id ... --body ...`
+
+## Commit Hygiene Rules
+
+This workflow only works if commits are separated cleanly.
+
+Agents and humans should:
+
+- keep lab-only workflow changes in separate commits from product logic
+- avoid mixing refactors, harness changes, and feature behavior in one commit
+- use conventional-commit messages and PR titles
+- prefer multiple small commits over one large mixed commit
+
+Good split:
+
+- `docs(agent-lab): clarify control-checkout workflow`
+- `fix: suppress logged-out modal on fresh unauthenticated load`
+- `test: add regression coverage for auth-page logout modal`
+
+Bad split:
+
+- `misc: update agent docs, add lint, change connector UI, fix API`
+
+## Guidance for Agents
+
+When an agent is working on product code, it should assume:
+
+1. The product branch should be created from `origin/main`, not from `codex/agent-lab`.
+2. The `codex/agent-lab` checkout is the control plane for `ods` commands until the harness is
+   upstreamed more broadly.
+3. The code change itself should still be made and committed inside the target product worktree.
+4. A PR to `main` should use a conventional-commit title such as `fix: ...` or `feat: ...`.
+
+If a product bug is discovered while editing on `codex/agent-lab`, treat that as exploration.
+Restart the real fix in a fresh `main`-based product worktree and port only the minimal product
+patch there.
+
+## What Should Usually Stay on `agent-lab`
+
+These are usually lab-only unless explicitly approved for upstreaming:
+
+- branch-specific workflow docs
+- harness-only `ods` commands
+- non-consensus lint rules
+- agent harness scripts
+- opt-in automation for review or promotion
+- branch-specific AGENTS guidance
+
+## What Can Be Promoted to `main`
+
+These can be promoted once they stand on their own:
+
+- product feature code
+- product tests
+- bug fixes
+- low-controversy lint rules with team agreement
+- small devtools improvements that are useful outside `agent-lab`
+
+## Review Standard
+
+If opening a PR to `main` from the `agent-lab` control workflow:
+
+- make sure the PR branch itself is based on `origin/main`
+- use a conventional-commit title
+- mention any control-plane validation that was run with `ods ... --worktree <branch>`
+- attach journey artifacts when browser behavior changed
+- treat review-thread replies and failing checks as part of the same agent loop, not as a separate
+  manual phase
+
+This keeps the product branch reviewable without forcing reviewers to understand the entire
+`agent-lab` branch.
--- a/docs/agent/GOLDEN_RULES.md
+++ b/docs/agent/GOLDEN_RULES.md
@@ -0,0 +1,73 @@
+# Golden Rules
+
+These are the current rules for the `agent-lab` workflow. The long-term goal is to move the useful
+ones from prose into shared checks, scripts, or tests where appropriate.
+
+Some of these are already documented elsewhere in the repo as project standards. In this file,
+they should be treated as the active rules for work done on `agent-lab`.
+
+## Current Rules
+
+### Backend
+
+1. Raise `OnyxError` instead of `HTTPException`.
+2. Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`.
+3. Use `@shared_task` for Celery tasks.
+4. Never enqueue a Celery task without `expires=`.
+5. Do not use FastAPI `response_model` on new APIs.
+6. Keep Python strictly typed.
+
+### Frontend
+
+1. Prefer `web/lib/opal/` and `web/src/refresh-components/` for new shared UI.
+2. Do not add new shared components under `web/src/components/`.
+3. Route backend calls through the frontend `/api/...` surface in local and test flows.
+4. Keep TypeScript strictly typed.
+
+### Workflow
+
+1. Start in a tracked worktree created by `ods worktree create`. Do not use raw `git worktree add`
+   for harness-managed work.
+2. For harness work, use `codex/lab/...` branches based on `codex/agent-lab`. For product work,
+   use conventional branches such as `codex/fix/...` or `codex/feat/...` based on `origin/main`.
+3. Make edits inside the target worktree. Copying a patch from another checkout is only acceptable
+   when debugging the harness itself.
+4. Prefer integration or external-dependency-unit tests over unit tests when validating real Onyx
+   behavior.
+5. When a repeated review comment appears, convert it into repo-local documentation or a mechanical
+   check.
+6. For browser-visible changes, prefer a registered `ods journey` capture over an ad hoc manual
+   recording. The before/after artifacts should live with the PR loop.
+7. Use `ods pr-review` to fetch and triage GitHub review threads instead of relying on memory or
+   the web UI alone. Reply and resolve from the same workflow when confidence is high.
+8. Use `ods pr-checks diagnose` to detect failing GitHub checks and point the next remediation
+   command. For Playwright failures, pair it with `ods trace`.
+6. PR titles and commit messages should use conventional-commit style such as `fix: ...` or
+   `feat: ...`. Never use `[codex]` prefixes in this repo.
+9. When touching legacy areas, leave the area more explicit than you found it: better naming,
+   better boundaries, or a follow-up cleanup note.
+
+## Mechanical Checks
+
+These are strong candidates for `ods agent-check` or dedicated linters:
+
+| Check | Why it matters |
+| --- | --- |
+| Ban `HTTPException` in backend product code | Keeps API error handling consistent |
+| Ban direct DB mutations outside DB directories | Preserves layering |
+| Detect task enqueue calls missing `expires=` | Prevents queue growth and stale work |
+| Detect new imports from `web/src/components/` in non-legacy code | Prevents further UI drift |
+| Detect direct calls to backend ports in tests/scripts where frontend proxy should be used | Preserves realistic request paths |
+| Detect missing docs/agent references for new repo-level rules | Prevents knowledge from staying only in chat |
+
+## Rule Promotion Policy
+
+Promote a rule from prose into enforcement when at least one is true:
+
+- it has been violated more than once
+- a violation is expensive to detect late
+- the remediation is mechanical
+- the error message can teach the correct pattern succinctly
+
+Agents work better with fast, local, actionable failures than with broad stylistic feedback after a
+PR is opened.
--- a/docs/agent/HARNESS.md
+++ b/docs/agent/HARNESS.md
@@ -0,0 +1,267 @@
+# Worktree Harness
+
+This document defines the `agent-lab` harness model for doing end-to-end work on `onyx`.
+
+The goal is to make one agent capable of taking one isolated change from edit to verification
+without depending on human memory for ports, paths, or validation steps.
+
+## Principles
+
+These decisions follow the same principles described in OpenAI's
+[Harness engineering](https://openai.com/index/harness-engineering/) and
+[Unlocking the Codex harness](https://openai.com/index/unlocking-the-codex-harness/) articles:
+
+- each task should run in its own git worktree
+- the app should be bootable per worktree
+- browser state should be directly legible to the agent
+- logs, traces, and test artifacts should be attached to the same worktree lifecycle
+- repository docs plus local metadata should be the system of record, not chat memory
+
+## Current Harness Surface
+
+The first `agent-lab` harness layer lives in `tools/ods/`.
+
+Implemented command surfaces:
+
+- `ods worktree create <branch>`: creates a git worktree plus local agent metadata
+- `ods worktree deps up|status|reset|down`: provisions and manages namespaced external state
+- `ods worktree status`: lists tracked worktrees and their URLs
+- `ods worktree show [worktree]`: prints the manifest for one worktree
+- `ods worktree remove <worktree>`: removes the worktree and local harness state
+- `ods journey list|run|compare|publish`: records registered browser journeys, including local
+  before/after video artifacts and optional PR publication
+- `ods pr-review fetch|triage|respond|resolve`: turns GitHub review threads into a local
+  machine-readable loop
+- `ods pr-checks status|diagnose`: makes failing GitHub checks queryable from the same control
+  plane
+- `ods verify`: runs the agent verification ladder and writes a machine-readable summary
+- `ods agent-check`: runs diff-based architectural and doc checks
+
+## Required Workflow
+
+This is the required `agent-lab` workflow going forward:
+
+1. Create the target worktree first with `ods worktree create`.
+2. Make the code changes inside that worktree.
+3. Run verification against that same worktree.
+4. Open the PR from that same worktree.
+
+Do not implement a change in one checkout and then rsync or patch it into another checkout just to
+test it. That is only acceptable when explicitly debugging the harness itself.
+
+Also do not use raw `git worktree add` for harness-managed work. `ods worktree create` is the
+authoritative entrypoint because it disables repo hooks during checkout, writes the local manifest,
+bootstraps env/runtime dependencies, provisions namespaced state, and records the worktree lane and
+base ref.
+
+## Control Checkout Model
+
+Right now the harness code itself lives on `codex/agent-lab`, not on plain `main`.
+
+That means the `codex/agent-lab` checkout acts as the control plane:
+
+- lab worktrees such as `codex/lab/...` are based on `codex/agent-lab`
+- product worktrees such as `codex/fix/...` or `codex/feat/...` are based on `origin/main`
+- the `agent-lab` checkout can still manage those product worktrees via `--worktree`
+  flags on `ods backend`, `ods web`, `ods verify`, and `ods agent-check`
+
+This lets us use the harness to manage a `main`-based product branch before the harness itself has
+been upstreamed to `main`.
+
+## Worktree Metadata
+
+Each `agent-lab` worktree gets a local manifest stored under the shared git metadata directory:
+
+```text
+$(git rev-parse --git-common-dir)/onyx-agent-lab/worktrees/<id>/
+```
+
+The manifest tracks:
+
+- branch name
+- checkout path
+- base ref used when the branch was created
+- dependency mode and namespace-derived external dependency settings
+- reserved ports for web, API, model server, and MCP
+- browser-facing URLs
+- generated env overlay file paths
+- artifact directory
+- last verification summary
+
+This state is local runtime metadata. It is intentionally not checked into the repo.
+
+## Boot Model
+
+The current harness boot model isolates the mutable application processes and can also isolate the
+mutable non-search data plane.
+
+Per worktree:
+
+- Next.js dev server gets its own `PORT`
+- browser-facing base URL is unique
+- backend API port is unique
+- model server port is unique
+- MCP port reservation exists for future worktree-local MCP runtime use
+- artifacts are written to a worktree-specific directory
+
+Today this is enough to make the app bootable per worktree without requiring a fully duplicated
+dependency container stack for every task.
+
+Important boundary:
+
+- isolated today: app processes, ports, URLs, local artifacts, worktree-local dependency installs,
+  PostgreSQL database, Redis key prefix, and MinIO file-store bucket when the worktree runs in
+  `namespaced` dependency mode
+- shared today: OpenSearch/Vespa and the rest of the local dependency stack started via docker
+  compose
+
+This means a normal `agent-lab` worktree can run against:
+
+- a dedicated Postgres database on the shared local Postgres server
+- a dedicated Redis namespace on the shared local Redis instance
+- a dedicated MinIO file-store bucket on the shared local object store
+
+OpenSearch/Vespa remain shared-only by design on this branch. The harness should never imply
+otherwise.
+
+This is a deliberate brownfield adaptation of the OpenAI article’s worktree-per-task model:
+keep the common path mechanically isolated where the repo already supports it, and explicitly mark
+the high-complexity surfaces that remain shared.
+
+## Dependency Modes
+
+`agent-lab` currently supports two dependency modes:
+
+- `namespaced`: default mode for agent feature work. Creates one Postgres database, one Redis
+  prefix, and one MinIO bucket per worktree.
+- `shared`: reuse the existing local DB/Redis/MinIO state when full isolation is unnecessary.
+
+The worktree manifest is the source of truth for the selected mode and the derived namespace values.
+
+Search infrastructure policy:
+
+- OpenSearch/Vespa are always shared
+- there is no current plan to add namespaced or per-worktree search stacks on `agent-lab`
+- tasks that mutate search/index infrastructure should be treated as higher-risk and validated with
+  extra care because the harness does not isolate that surface
+
+## Backend and Web Integration
+
+When `ods backend ...` or `ods web ...` runs inside a tracked `agent-lab` worktree, it should
+derive runtime settings from the worktree manifest automatically.
+
+Current behavior:
+
+- `ods backend api` defaults to the reserved worktree API port
+- `ods backend model_server` defaults to the reserved worktree model-server port
+- `ods web dev` gets the reserved worktree web port plus `BASE_URL`, `WEB_DOMAIN`,
+  `INTERNAL_URL`, and `MCP_INTERNAL_URL`
+- backend and web commands also inherit the manifest’s dependency namespace env overrides
+- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files mirror those values
+- `ods worktree bootstrap` prepares the worktree to run by linking env files, linking or cloning
+  the Python runtime, and preparing `web/node_modules`
+- `ods worktree deps up` provisions namespaced Postgres/Redis/MinIO state when needed
+- `ods backend ... --worktree <id>` and `ods web ... --worktree <id>` let the `agent-lab`
+  control checkout run app processes against a tracked target worktree
+
+This makes the standard dev commands work in an isolated way without inventing a second startup
+surface just for agents.
+
+## Browser Validation
+
+Use two browser surfaces with different jobs:
+
+- Chrome DevTools MCP for exploratory validation, DOM snapshots, navigation, and interactive bug
+  reproduction
+- Playwright for codified end-to-end verification, screenshots, and retained traces
+- `ods journey run` for the default article-style loop inside one worktree: capture `before` before
+  the fix, then capture `after` after the fix and publish the resulting artifacts to the PR when
+  needed
+- `ods journey compare` as the fallback path when the agent missed the initial `before` capture or
+  needs a strict baseline-vs-branch comparison after the fact
+
+Important detail:
+
+- The default path should not launch two worktrees just to prove a normal UI bug fix. Use one
+  tracked product worktree, start the app in that worktree, and record `before` and `after` from
+  that same environment.
+- If the fix is still uncommitted, always capture from the tracked target worktree, not from a
+  temporary `HEAD` checkout.
+- `ods journey compare` is reserved for recovery or explicit revision comparison, not as the
+  standard path for every PR.
+
+The worktree manifest's `web` URL is the source of truth for both.
+
+If an agent needs to inspect live UI behavior while iterating, it should prefer Chrome DevTools MCP
+against the worktree URL. If the behavior needs to become a repeatable regression check, encode it
+as Playwright coverage under `web/tests/e2e/`.
+
+## Verification Ladder
+
+The expected verification sequence for a worktree is:
+
+1. `ods agent-check`
+2. targeted backend tests when backend behavior changed
+3. targeted Playwright runs when UI or frontend-backend flows changed
+4. `ods journey run --label before` before the code change, then `ods journey run --label after`
+   after the change when the PR needs durable browser proof
+5. screenshot and trace review when UI validation fails
+
+`ods verify` is the first unified entrypoint for this ladder. It writes a JSON summary into the
+worktree artifact directory so later agent runs can inspect prior results directly.
+
+For product worktrees based on `main`, the intended control-plane usage is:
+
+1. from `codex/agent-lab`, run `ods worktree create codex/fix/<name>`
+2. edit inside the created `main`-based checkout
+3. from `codex/agent-lab`, run `ods verify --worktree codex/fix/<name>`
+4. if live processes are needed, run `ods backend ... --worktree codex/fix/<name>` and
+   `ods web ... --worktree codex/fix/<name>`
+5. commit, push, and open the PR from the product worktree checkout itself
+
+## Artifacts
+
+Per-worktree artifacts are written under the local harness state directory, not into chat.
+
+Current artifact classes:
+
+- verification summaries
+- pytest logs
+- Playwright logs
+- journey screenshots, videos, traces, and compare summaries
+- PR review thread snapshots and triage outputs
+- dependency namespace metadata in the local manifest
+
+Existing repo outputs are still relevant:
+
+- Playwright traces and screenshots under `web/output/`
+- screenshot diff reports from `ods screenshot-diff`
+- CI trace retrieval from `ods trace`
+
+## Known Gaps
+
+This is the initial harness layer, not the finished system.
+
+Still missing:
+
+- one-command `up/down` orchestration for all local processes
+- worktree-local observability stack for logs, metrics, and traces
+- worktree-local MCP server runtime wiring
+- automatic promotion tooling from `agent-lab` feature branches to `main`
+- recurring doc-gardening and cleanup agents
+- resumable long-running task server for local development tasks
+
+Resolved in the current harness layer:
+
+- fresh-worktree bootstrap for `.venv`, `.vscode/.env*`, and `web/node_modules`
+- namespaced isolation for Postgres, Redis, and MinIO on a per-worktree basis
+- registered before/after browser journeys with durable artifact directories
+- GitHub review-thread fetch/triage/respond tooling
+- GitHub failing-check diagnosis from the same `ods` control plane
+
+Non-goals on this branch:
+
+- OpenSearch/Vespa namespacing
+- per-worktree vector/search stacks
+
+Those are the next places to invest if we want to match the article more closely.
--- a/docs/agent/LEGACY_ZONES.md
+++ b/docs/agent/LEGACY_ZONES.md
@@ -0,0 +1,87 @@
+# Legacy Zones
+
+Status: initial classification. This file exists to stop agents from treating every existing
+pattern in the repository as equally desirable precedent.
+
+## Zone Types
+
+| Zone | Meaning | Edit Policy |
+| --- | --- | --- |
+| `strict` | Preferred surface for new work | Freely extend, but keep boundaries explicit and add tests |
+| `transition` | Actively evolving surface with mixed patterns | Prefer local consistency, avoid introducing new abstractions casually |
+| `legacy-adapter` | Known historical surface or deprecated pattern area | Avoid new dependencies on it; prefer facades, wrappers, or migrations away |
+| `frozen` | Only touch for bug fixes, security, or explicitly scoped work | Do not expand the pattern set |
+
+## Initial Classification
+
+### Strict
+
+These are good default targets for new investment:
+
+- `backend/onyx/db/`
+- `backend/ee/onyx/db/`
+- `backend/onyx/error_handling/`
+- `backend/onyx/mcp_server/`
+- `backend/onyx/server/features/build/`
+- `tools/ods/`
+- `web/lib/opal/`
+- `web/src/refresh-components/`
+- `web/src/layouts/`
+- `web/src/sections/cards/`
+
+### Transition
+
+These areas are important and active, but they mix styles, eras, and responsibilities:
+
+- `backend/onyx/server/`
+- `backend/ee/onyx/server/`
+- `backend/onyx/chat/`
+- `backend/onyx/tools/`
+- `backend/onyx/agents/`
+- `backend/onyx/deep_research/`
+- `web/src/app/`
+- `web/src/sections/`
+- `web/src/lib/`
+
+Edit guidance:
+
+- prefer incremental refactors over sweeping rewrites
+- keep changes local when the area lacks clear boundaries
+- add tests before extracting new shared abstractions
+
+### Legacy-Adapter
+
+These areas should not be treated as default precedent for new work:
+
+- `web/src/components/`
+- `backend/model_server/legacy/`
+
+Edit guidance:
+
+- do not add fresh reusable components or helper patterns here
+- if a task requires touching these areas, prefer introducing an adapter in a stricter surface
+- if you must extend a legacy file, keep the blast radius small and document follow-up cleanup
+
+### Frozen
+
+No repo-wide frozen zones are declared yet beyond files or subsystems that are clearly deprecated on
+their face. Add explicit entries here rather than relying on tribal knowledge.
+
+## Brownfield Rules
+
+When a task lands in a non-strict zone:
+
+1. Identify whether the task is fixing behavior, adding capability, or migrating structure.
+2. Avoid copying local patterns into stricter parts of the codebase.
+3. If an unsafe pattern is unavoidable, isolate it behind a typed boundary.
+4. Record newly discovered smells in [GOLDEN_RULES.md](./GOLDEN_RULES.md) or a follow-on
+   execution plan.
+
+## Promotion Criteria
+
+A transition area can move toward `strict` when:
+
+- its dependency boundaries are easy to explain
+- new code has a preferred home
+- tests are reliable enough for agents to use as feedback loops
+- recurring review comments have been turned into written or mechanical rules
--- a/docs/agent/QUALITY_SCORE.md
+++ b/docs/agent/QUALITY_SCORE.md
@@ -0,0 +1,48 @@
+# Quality Score Baseline
+
+This file is an intentionally rough baseline for how legible the repository is to coding agents.
+It is not a product quality report. It is a scorecard for agent development ergonomics.
+
+## Scoring Rubric
+
+Each area is scored from `0` to `5` on four dimensions:
+
+- `Legibility`: how easy it is to discover the right files and concepts
+- `Boundaries`: how clearly dependency and ownership seams are defined
+- `Verification`: how available and reliable the feedback loops are
+- `Agent ergonomics`: how likely an agent is to make a correct change without human rescue
+
+Overall score is directional, not mathematically precise.
+
+## Initial Baseline
+
+| Area | Legibility | Boundaries | Verification | Agent ergonomics | Overall | Notes |
+| --- | --- | --- | --- | --- | --- | --- |
+| Backend core (`backend/onyx/`, `backend/ee/onyx/`) | 3 | 3 | 4 | 3 | 3.25 | Strong test surface, but top-level routing docs are thin |
+| Persistence (`backend/onyx/db/`, migrations) | 4 | 4 | 3 | 4 | 3.75 | Clearer than most areas because path-level rules already exist |
+| Frontend modern surfaces (`web/src/app/`, `sections`, `opal`, `refresh-components`) | 3 | 3 | 3 | 3 | 3.0 | Direction exists, but mixed generations still leak across boundaries |
+| Frontend legacy shared UI (`web/src/components/`) | 1 | 1 | 2 | 1 | 1.25 | Explicitly deprecated, but still present and easy for agents to cargo-cult |
+| Agent platform and build sandbox (`backend/onyx/server/features/build/`) | 3 | 4 | 3 | 4 | 3.5 | Good substrate for agentization, but not yet aimed at repo development workflows |
+| MCP, CLI, and devtools (`backend/onyx/mcp_server/`, `cli/`, `tools/ods/`) | 4 | 4 | 4 | 4 | 4.0 | `agent-check`, worktree manifests, `ods verify`, `ods journey`, and PR review/check tooling give this surface a real control plane |
+| Repo-level docs and plans | 4 | 3 | 4 | 4 | 3.75 | `docs/agent/` now describes the journey/review/check loop directly, though subsystem coverage is still uneven |
+
+## Biggest Gaps
+
+1. Repo-level architecture knowledge is still thinner than the runtime and workflow docs.
+2. Brownfield and legacy zones are not explicitly flagged enough for agents.
+3. Important engineering rules still outnumber the mechanical checks that enforce them.
+4. The worktree harness does not yet include a local observability stack or one-command process orchestration.
+
+## Near-Term Targets
+
+The next improvements should aim to move these areas:
+
+- Repo-level docs and plans: `3.0 -> 4.0`
+- Frontend legacy safety: `1.25 -> 2.5`
+- Backend core agent ergonomics: `3.0 -> 4.0`
+- Worktree observability and runtime automation: `2.5 -> 4.0`
+
+## Update Policy
+
+When a new check, map, or workflow materially improves agent behavior, update this scorecard and
+note what changed. If a score changes, the adjacent notes should explain why.
--- a/docs/agent/README.md
+++ b/docs/agent/README.md
@@ -0,0 +1,68 @@
+# Agent Engineering Docs
+
+This directory is the knowledge base for the `agent-lab` workflow around making development of
+`onyx` itself more agentized.
+
+The goal is not to replace the root [AGENTS.md](../../AGENTS.md).
+The goal is to keep architecture maps, unsafe-zone notes, quality signals, and follow-on
+execution plans in a form that coding agents can discover and update.
+
+On `agent-lab`, this directory is the system of record for agent-engineering workflow.
+
+## Principles
+
+- Keep the entrypoint small. The root `AGENTS.md` should point here; it should not become a
+  growing encyclopedia.
+- Create the target worktree first. The intended workflow is one task, one tracked worktree, one
+  verification loop, and one PR from that same checkout.
+- Keep artifacts with the workflow. Browser videos, traces, review summaries, and check triage
+  should be produced by harness commands and stored as machine-readable outputs, not recreated
+  from chat memory.
+- Prefer maps over manuals. Agents need navigable pointers to the right subsystem, not a giant
+  blob of undifferentiated instructions.
+- Encode recurring judgment into the repo. If a rule matters often, document it here and then
+  promote it into a check, linter, test, or script.
+- Distinguish legacy from greenfield. Agents will copy the patterns they see. If an area is
+  historically messy, we need to say so explicitly.
+- Version decisions with the code. If a design choice matters for future changes, it should live
+  in-repo rather than in chat or memory.
+
+## Documents
+
+- [ARCHITECTURE.md](./ARCHITECTURE.md): top-level codebase map and change-routing guidance.
+- [BRANCHING.md](./BRANCHING.md): branch model for long-running `agent-lab` development and
+  promotion of product-only changes to `main`.
+- [HARNESS.md](./HARNESS.md): worktree runtime model, verification ladder, and browser/tooling
+  expectations.
+- [LEGACY_ZONES.md](./LEGACY_ZONES.md): edit policy for strict, transitional, and legacy areas.
+- [GOLDEN_RULES.md](./GOLDEN_RULES.md): active rules for `agent-lab` and promotion targets for
+  mechanical enforcement.
+- [QUALITY_SCORE.md](./QUALITY_SCORE.md): baseline legibility and maintainability assessment for
+  agent work.
+
+## Operating Model
+
+Use this directory for information that should change how future agents work in the `agent-lab`
+workflow:
+
+- architecture maps
+- dependency and layering rules
+- "do not extend this pattern" warnings
+- safe extension points
+- recurring cleanup policies
+- harness/runtime behavior for worktree-based development
+- before/after browser journeys and PR artifact publication
+- GitHub review and failing-check control loops
+- quality scorecards
+- active execution plans for agent-engineering improvements
+
+Current workflow split:
+
+- `codex/agent-lab` is the control checkout for the harness itself.
+- `codex/lab/<name>` branches are for harness/docs/tooling work based on `codex/agent-lab`.
+- `codex/fix/<name>`, `codex/feat/<name>`, and similar conventional product branches should be
+  created from `origin/main`, even when they are managed from the `agent-lab` control checkout.
+- PR titles and commit messages should use conventional-commit style, never `[codex]` prefixes.
+
+Do not turn this into a dumping ground. If something is local to one feature, keep it with that
+feature. This directory is for `agent-lab`-level agent-development guidance.
--- a/tools/ods/README.md
+++ b/tools/ods/README.md
@@ -28,11 +28,11 @@ Some commands require external tools to be installed and configured:
 - **uv** - Required for `backend` commands
  - Install from [docs.astral.sh/uv](https://docs.astral.sh/uv/)

- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, and `trace` commands
+- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, `trace`, `pr-review`, and `pr-checks` commands
  - Install from [cli.github.com](https://cli.github.com/)
  - Authenticate with `gh auth login`

- **AWS CLI** - Required for `screenshot-diff` commands (S3 baseline sync)
+- **AWS CLI** - Required for `screenshot-diff` commands and `journey publish` (S3 artifact sync)
  - Install from [aws.amazon.com/cli](https://aws.amazon.com/cli/)
  - Authenticate with `aws sso login` or `aws configure`

@@ -196,11 +196,19 @@ ods backend <subcommand>
 | Flag | Default | Description |
 |------|---------|-------------|
 | `--no-ee` | `false` | Disable Enterprise Edition features (enabled by default) |
+| `--worktree` | current checkout | Run the command against a tracked agent-lab worktree |
 | `--port` | `8080` (api) / `9000` (model_server) | Port to listen on |

 Shell environment takes precedence over `.env` file values, so inline overrides
 work as expected (e.g. `S3_ENDPOINT_URL=foo ods backend api`).

+When run inside a tracked `agent-lab` worktree, `ods backend api` and
+`ods backend model_server` will automatically use that worktree's reserved
+ports unless you override them explicitly with `--port`.
+
+The same command can also be launched from the `codex/agent-lab` control
+checkout against another tracked worktree via `--worktree <branch>`.
+
 **Examples:**

 ```shell
@@ -218,6 +226,9 @@ ods backend model_server

 # Start the model server on a custom port
 ods backend model_server --port 9001
+
+# Run the API server for a tracked product worktree from the control checkout
+ods backend api --worktree codex/fix/auth-banner-modal
 ```

 ### `web` - Run Frontend Scripts
@@ -231,6 +242,14 @@ ods web <script> [args...]
 Script names are available via shell completion (for supported shells via
 `ods completion`), and are read from `web/package.json`.

+When run inside a tracked `agent-lab` worktree, `ods web ...` automatically
+injects the worktree's `PORT`, `BASE_URL`, `WEB_DOMAIN`, `INTERNAL_URL`, and
+`MCP_INTERNAL_URL` so the Next.js dev server boots against the right isolated
+stack.
+
+From the `codex/agent-lab` control checkout, `--worktree <branch>` applies the
+same wiring to a tracked target worktree.
+
 **Examples:**

 ```shell
@@ -242,6 +261,162 @@ ods web lint

 # Forward extra args to the script
 ods web test --watch
+
+# Run the Next.js dev server for a tracked product worktree
+ods web dev --worktree codex/fix/auth-banner-modal
+```
+
+### `worktree` - Manage Agent-Lab Worktrees
+
+Create and manage local git worktrees for agentized development. Each tracked
+worktree gets:
+
+- a reserved port bundle for web, API, model server, and MCP
+- an explicit dependency mode for local external state
+- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files
+- a local artifact directory for verification logs and summaries
+- a manifest stored under the shared git metadata directory
+- bootstrap support for env files, Python runtime, and frontend dependencies
+
+`ods worktree create` is the authoritative entrypoint for this workflow. Do not
+use raw `git worktree add` when you want the `agent-lab` harness, because you
+will skip the manifest, env overlays, dependency bootstrap, and lane-aware base
+selection.
+
+```shell
+ods worktree <subcommand>
+```
+
+**Subcommands:**
+
+- `create <branch>` - Create a worktree and manifest
+- `bootstrap [worktree]` - Prepare env files and dependencies for a worktree
+- `deps up|status|reset|down [worktree]` - Provision and manage namespaced external state
+- `status` - List tracked worktrees and URLs
+- `show [worktree]` - Show detailed metadata for one worktree
+- `remove <worktree>` - Remove a worktree and its local state
+
+`ods worktree create` bootstraps new worktrees by default. The current bootstrap
+behavior is:
+
+- link `.vscode/.env` and `.vscode/.env.web` from the source checkout when present
+- link the source checkout's `.venv` when present
+- clone `web/node_modules` into the worktree when present, falling back to
+  `npm ci --prefer-offline --no-audit`
+
+Current isolation boundary:
+
+- worktree-local: web/API/model-server ports, URLs, env overlays, artifact dirs
+- namespaced when `--dependency-mode namespaced` is used: PostgreSQL database,
+  Redis prefix, and MinIO file-store bucket
+- always shared: OpenSearch/Vespa and the rest of the docker-compose dependency stack
+
+`namespaced` is the default dependency mode on `agent-lab`. `shared` is still
+available for lighter-weight work that does not need isolated DB/Redis/MinIO
+state.
+
+Branch lanes:
+
+- `codex/lab/<name>` worktrees are treated as harness work and default to
+  `codex/agent-lab` as the base ref
+- `codex/fix/<name>`, `codex/feat/<name>`, and other conventional product lanes
+  default to `origin/main` as the base ref
+- branches that do not encode a lane fall back to `HEAD`; use `--from` or a
+  clearer branch name when the base matters
+
+Control-plane note:
+
+- the harness lives on `codex/agent-lab`
+- product worktrees can still be based on `origin/main`
+- run `ods backend`, `ods web`, `ods verify`, and `ods agent-check` with
+  `--worktree <branch>` from the control checkout when the target worktree does
+  not carry the harness code itself
+
+Search/vector note:
+
+- OpenSearch/Vespa stay shared-only
+- this branch intentionally does not implement namespaced or per-worktree search stacks
+- tasks that touch search/index infrastructure should assume a shared surface
+
+**Examples:**
+
+```shell
+# Create a product bugfix worktree from main
+ods worktree create codex/fix/auth-banner-modal
+
+# Create a lab-only worktree from agent-lab
+ods worktree create codex/lab/browser-validation
+
+# Reuse the shared DB/Redis/MinIO state for a lighter-weight task
+ods worktree create codex/fix/ui-polish --dependency-mode shared
+
+# Re-bootstrap an existing worktree
+ods worktree bootstrap codex/fix/auth-banner-modal
+
+# Inspect the current worktree's namespaced dependency state
+ods worktree deps status
+
+# Reset the current worktree's Postgres/Redis/MinIO namespace
+ods worktree deps reset
+
+# See tracked worktrees
+ods worktree status
+
+# Show the current worktree manifest
+ods worktree show
+
+# Remove a worktree when finished
+ods worktree remove codex/fix/auth-banner-modal
+
+# Remove a worktree and tear down its namespaced dependencies
+ods worktree remove codex/fix/auth-banner-modal --drop-deps
+```
+
+### `verify` - Run the Agent-Lab Verification Ladder
+
+Run a unified verification flow for the current checkout. `ods verify` is the
+first worktree-aware entrypoint that combines:
+
+- `agent-check`
+- optional targeted pytest execution
+- optional targeted Playwright execution
+- machine-readable verification summaries written to the worktree artifact dir
+
+```shell
+ods verify
+```
+
+Useful flags:
+
+| Flag | Description |
+|------|-------------|
+| `--base-ref <ref>` | Ref to compare against for `agent-check` |
+| `--skip-agent-check` | Skip the diff-based rules step |
+| `--worktree <id>` | Run verification against a tracked worktree from the control checkout |
+| `--pytest <path>` | Run a specific pytest path or node id (repeatable) |
+| `--playwright <path>` | Run a specific Playwright test path (repeatable) |
+| `--playwright-grep <expr>` | Pass `--grep` through to Playwright |
+| `--playwright-project <name>` | Limit Playwright to one project |
+
+Examples:
+
+```shell
+# Run just the diff-based checks
+ods verify
+
+# Validate a backend change with one focused integration target
+ods verify --pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
+
+# Validate a UI change with one Playwright suite
+ods verify --playwright tests/e2e/chat/welcome_page.spec.ts --playwright-project admin
+
+# Run both backend and UI checks
+ods verify \
+  --pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py \
+  --playwright tests/e2e/admin/default-agent.spec.ts
+
+# Verify a tracked product worktree from the control checkout
+ods verify --worktree codex/fix/auth-banner-modal
 ```

 ### `dev` - Devcontainer Management
@@ -325,6 +500,56 @@ Check that specified modules are only lazily imported (used for keeping backend
 ods check-lazy-imports
 ```

+### `agent-check` - Check New Agent-Safety Violations
+
+Run a small set of diff-based checks aimed at keeping new changes agent-friendly
+without failing on historical debt already present in the repository.
+
+This command is part of the expected workflow on `agent-lab`. It is not necessarily a repo-wide
+mandatory gate on `main`.
+
+```shell
+ods agent-check
+```
+
+Current checks flag newly added:
+
+- `HTTPException` usage in backend product code
+- `response_model=` on backend APIs
+- Celery `.delay()` calls
+- imports from `web/src/components/` outside the legacy component tree
+
+The command also validates the `docs/agent/` knowledge base by checking that
+required files exist and that local markdown links in that surface resolve
+correctly.
+
+Useful flags:
+
+| Flag | Description |
+|------|-------------|
+| `--staged` | Check the staged diff instead of the working tree |
+| `--base-ref <ref>` | Diff against a git ref other than `HEAD` |
+| `--worktree <id>` | Check a tracked worktree from the control checkout |
+
+Examples:
+
+```shell
+# Check working tree changes
+ods agent-check
+
+# Check only staged changes
+ods agent-check --staged
+
+# Compare the branch against main
+ods agent-check --base-ref origin/main
+
+# Limit the diff to specific paths
+ods agent-check web/src backend/onyx/server/features/build
+
+# Run against a tracked product worktree from the control checkout
+ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
+```
+
 ### `run-ci` - Run CI on Fork PRs

 Pull requests from forks don't automatically trigger GitHub Actions for security reasons.
@@ -516,6 +741,148 @@ ods trace --project admin
 ods trace --list
 ```

+### `journey` - Capture Before/After Browser Journeys
+
+Run a registered Playwright journey with video capture. The default workflow is
+to record `before` and `after` inside the same tracked worktree as the change.
+`journey compare` remains available as a recovery path when you need to compare
+two explicit revisions/worktrees after the fact.
+
+Registered journeys live in `web/tests/e2e/journeys/registry.json`.
+An optional `.github/agent-journeys.json` file can list journeys for a PR:
+
+```json
+{
+  "journeys": ["auth-landing"]
+}
+```
+
+```shell
+ods journey <subcommand>
+```
+
+**Subcommands:**
+
+- `list` - Show registered journeys
+- `run` - Run one journey against the current or target worktree
+- `compare` - Capture `before` and `after` artifacts across two revisions/worktrees when a missed baseline must be recovered
+- `publish` - Upload a compare run to S3 and upsert the PR comment
+
+**Examples:**
+
+```shell
+# List journey definitions
+ods journey list
+
+# Capture before in the tracked product worktree before editing
+ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label before
+
+# Capture after in that same worktree after validating the fix
+ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label after
+
+# Recover a missed baseline later by comparing origin/main to a tracked product worktree
+ods journey compare \
+  --journey auth-landing \
+  --after-worktree codex/fix/auth-banner-modal
+
+# Publish an existing compare run to PR #10007
+ods journey publish \
+  --run-dir .git/onyx-agent-lab/journeys/20260408-123000 \
+  --pr 10007
+```
+
+`journey run` writes a `summary.json` into the capture directory. `journey compare`
+writes a `summary.json` into its run directory and, when `--pr` is supplied,
+uploads that directory to S3 and upserts a PR comment with before/after links.
+
+### `pr-review` - Fetch and Respond to GitHub Review Threads
+
+Treat PR review comments as a local machine-readable workflow instead of relying
+on the GitHub UI alone.
+
+```shell
+ods pr-review <subcommand>
+```
+
+**Subcommands:**
+
+- `fetch` - Download review threads into local harness state
+- `triage` - Classify threads as actionable, duplicate, outdated, or resolved
+- `respond` - Reply to an inline review comment and optionally resolve its thread
+- `resolve` - Resolve a review thread without posting a reply
+
+**Examples:**
+
+```shell
+# Fetch review threads for the current branch PR
+ods pr-review fetch
+
+# Triage review threads for a specific PR
+ods pr-review triage --pr 10007
+
+# Reply to a top-level review comment and resolve the thread
+ods pr-review respond \
+  --pr 10007 \
+  --comment-id 2512997464 \
+  --thread-id PRRT_kwDO... \
+  --body "Fixed in the latest patch. Added a regression journey as well."
+```
+
+Fetched and triaged review data is written under the local harness state
+directory:
+
+```text
+$(git rev-parse --git-common-dir)/onyx-agent-lab/reviews/pr-<number>/
+```
+
+### `pr-checks` - Diagnose Failing GitHub Checks
+
+Inspect the latest checks on a PR and surface the failing ones with the next
+recommended remediation command.
+
+```shell
+ods pr-checks <subcommand>
+```
+
+**Subcommands:**
+
+- `status` - list all checks for the PR
+- `diagnose` - list only failing checks and point to the next step
+
+**Examples:**
+
+```shell
+# Show all checks on the current branch PR
+ods pr-checks status
+
+# Show only failing checks and the next remediation command
+ods pr-checks diagnose --pr 10007
+```
+
+`pr-checks diagnose` is especially useful after pushing a fix or after replying
+to review comments. For Playwright failures it points directly at `ods trace`.
+
+### `pr-open` - Open a PR With the Repo Template
+
+Create a pull request through `gh` while enforcing a conventional-commit title.
+If `--title` is omitted, `ods` uses the latest commit subject. The PR body
+defaults to `.github/pull_request_template.md`. PRs are ready-for-review by
+default; use `--draft` only when you explicitly need that state.
+
+```shell
+ods pr-open
+ods pr-open --title "fix: suppress logged-out modal on fresh auth load"
+```
+
+### `pr-merge` - Merge a PR Through `gh`
+
+Merge or auto-merge a pull request with an explicit merge method.
+
+```shell
+ods pr-merge --pr 10007 --method squash
+ods pr-merge --pr 10007 --method squash --auto --delete-branch
+```
+
 ### Testing Changes Locally (Dry Run)

 Both `run-ci` and `cherry-pick` support `--dry-run` to test without making remote changes:
--- a/tools/ods/cmd/agent_check.go
+++ b/tools/ods/cmd/agent_check.go
@@ -0,0 +1,161 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"sort"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentcheck"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentdocs"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+type AgentCheckOptions struct {
+	Staged   bool
+	BaseRef  string
+	Worktree string
+	RepoRoot string
+}
+
+type AgentCheckResult struct {
+	Violations    []agentcheck.Violation
+	DocViolations []agentdocs.Violation
+}
+
+// NewAgentCheckCommand creates the agent-check command.
+func NewAgentCheckCommand() *cobra.Command {
+	opts := &AgentCheckOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "agent-check [paths...]",
+		Short: "Run diff-based checks for agent-safe changes",
+		Long: `Run diff-based checks for agent-safe changes.
+
+This command inspects added lines in the current git diff and flags a small set
+of newly introduced repo-level violations without failing on historical debt.
+
+By default it compares the working tree against HEAD. Use --staged to inspect
+the staged diff instead, or --base-ref to compare against a different ref.
+Use --worktree to run the same check against a tracked target worktree from the
+agent-lab control checkout.
+
+Examples:
+  ods agent-check
+  ods agent-check --staged
+  ods agent-check --base-ref origin/main
+  ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
+  ods agent-check web/src backend/onyx/server/features/build`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runAgentCheck(opts, args)
+		},
+	}
+
+	cmd.Flags().BoolVar(&opts.Staged, "staged", false, "check staged changes instead of the working tree")
+	cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to diff against instead of HEAD")
+	cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to check instead of the current checkout")
+
+	return cmd
+}
+
+func runAgentCheck(opts *AgentCheckOptions, providedPaths []string) {
+	repoRoot, _, _ := resolveAgentLabTarget(opts.Worktree)
+	opts.RepoRoot = repoRoot
+	result, err := evaluateAgentCheck(opts, providedPaths)
+	if err != nil {
+		log.Fatalf("Failed to run agent-check: %v", err)
+	}
+
+	if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
+		log.Info("✅ agent-check found no new violations.")
+		return
+	}
+
+	sort.Slice(result.Violations, func(i, j int) bool {
+		if result.Violations[i].Path != result.Violations[j].Path {
+			return result.Violations[i].Path < result.Violations[j].Path
+		}
+		if result.Violations[i].LineNum != result.Violations[j].LineNum {
+			return result.Violations[i].LineNum < result.Violations[j].LineNum
+		}
+		return result.Violations[i].RuleID < result.Violations[j].RuleID
+	})
+
+	for _, violation := range result.Violations {
+		log.Errorf("\n❌ %s:%d [%s]", violation.Path, violation.LineNum, violation.RuleID)
+		log.Errorf("  %s", violation.Message)
+		log.Errorf("  Added line: %s", strings.TrimSpace(violation.Content))
+	}
+
+	for _, violation := range result.DocViolations {
+		log.Errorf("\n❌ %s [agent-docs]", violation.Path)
+		log.Errorf("  %s", violation.Message)
+	}
+
+	fmt.Fprintf(
+		os.Stderr,
+		"\nFound %d agent-check violation(s) and %d agent-docs violation(s).\n",
+		len(result.Violations),
+		len(result.DocViolations),
+	)
+	os.Exit(1)
+}
+
+func evaluateAgentCheck(opts *AgentCheckOptions, providedPaths []string) (*AgentCheckResult, error) {
+	diffOutput, err := getAgentCheckDiff(opts, providedPaths)
+	if err != nil {
+		return nil, err
+	}
+
+	addedLines, err := agentcheck.ParseAddedLines(diffOutput)
+	if err != nil {
+		return nil, err
+	}
+
+	root := opts.RepoRoot
+	if root == "" {
+		var err error
+		root, err = paths.GitRoot()
+		if err != nil {
+			return nil, fmt.Errorf("determine git root: %w", err)
+		}
+	}
+
+	result := &AgentCheckResult{
+		Violations:    agentcheck.CheckAddedLines(addedLines),
+		DocViolations: agentdocs.Validate(root),
+	}
+	return result, nil
+}
+
+func getAgentCheckDiff(opts *AgentCheckOptions, providedPaths []string) (string, error) {
+	args := []string{"diff", "--no-color", "--unified=0"}
+
+	if opts.Staged {
+		args = append(args, "--cached")
+	} else if opts.BaseRef != "" {
+		args = append(args, opts.BaseRef)
+	} else {
+		args = append(args, "HEAD")
+	}
+
+	if len(providedPaths) > 0 {
+		args = append(args, "--")
+		args = append(args, providedPaths...)
+	}
+
+	cmd := exec.Command("git", args...)
+	if opts.RepoRoot != "" {
+		cmd.Dir = opts.RepoRoot
+	}
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("git %s failed: %w\n%s", strings.Join(args, " "), err, string(output))
+	}
+
+	return string(output), nil
+}
--- a/tools/ods/cmd/agentlab_target.go
+++ b/tools/ods/cmd/agentlab_target.go
@@ -0,0 +1,32 @@
+package cmd
+
+import (
+	log "github.com/sirupsen/logrus"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+func resolveAgentLabTarget(identifier string) (string, agentlab.Manifest, bool) {
+	if identifier == "" {
+		repoRoot, err := paths.GitRoot()
+		if err != nil {
+			log.Fatalf("Failed to determine git root: %v", err)
+		}
+		manifest, found := currentAgentLabManifest(repoRoot)
+		return repoRoot, manifest, found
+	}
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		log.Fatalf("Failed to resolve worktree %q: %v", identifier, err)
+	}
+	if !found {
+		log.Fatalf("No agent-lab worktree found for %q", identifier)
+	}
+	return manifest.CheckoutPath, manifest, true
+}
--- a/tools/ods/cmd/backend.go
+++ b/tools/ods/cmd/backend.go
@@ -1,7 +1,6 @@
 package cmd

 import (
-	"bufio"
 	"errors"
 	"fmt"
 	"net"
@@ -14,14 +13,16 @@ import (
 	log "github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"

-	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
 )

 // NewBackendCommand creates the parent "backend" command with subcommands for
 // running backend services.
 // BackendOptions holds options shared across backend subcommands.
 type BackendOptions struct {
-	NoEE bool
+	NoEE     bool
+	Worktree string
 }

 func NewBackendCommand() *cobra.Command {
@@ -44,6 +45,7 @@ Available subcommands:
 	}

 	cmd.PersistentFlags().BoolVar(&opts.NoEE, "no-ee", false, "Disable Enterprise Edition features (enabled by default)")
+	cmd.PersistentFlags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")

 	cmd.AddCommand(newBackendAPICommand(opts))
 	cmd.AddCommand(newBackendModelServerCommand(opts))
@@ -62,9 +64,10 @@ func newBackendAPICommand(opts *BackendOptions) *cobra.Command {
 Examples:
  ods backend api
  ods backend api --port 9090
-  ods backend api --no-ee`,
+  ods backend api --no-ee
+  ods backend api --worktree codex/fix/auth-banner-modal`,
 		Run: func(cmd *cobra.Command, args []string) {
-			runBackendService("api", "onyx.main:app", port, opts)
+			runBackendService("api", "onyx.main:app", port, cmd.Flags().Changed("port"), opts)
 		},
 	}

@@ -83,9 +86,10 @@ func newBackendModelServerCommand(opts *BackendOptions) *cobra.Command {

 Examples:
  ods backend model_server
-  ods backend model_server --port 9001`,
+  ods backend model_server --port 9001
+  ods backend model_server --worktree codex/fix/auth-banner-modal`,
 		Run: func(cmd *cobra.Command, args []string) {
-			runBackendService("model_server", "model_server.main:app", port, opts)
+			runBackendService("model_server", "model_server.main:app", port, cmd.Flags().Changed("port"), opts)
 		},
 	}

@@ -137,16 +141,25 @@ func resolvePort(port string) string {
 	return port
 }

-func runBackendService(name, module, port string, opts *BackendOptions) {
-	root, err := paths.GitRoot()
-	if err != nil {
-		log.Fatalf("Failed to find git root: %v", err)
+func runBackendService(name, module, port string, portExplicit bool, opts *BackendOptions) {
+	root, worktreeManifest, hasWorktreeManifest := resolveAgentLabTarget(opts.Worktree)
+
+	if hasWorktreeManifest && !portExplicit {
+		switch name {
+		case "api":
+			port = strconv.Itoa(worktreeManifest.Ports.API)
+		case "model_server":
+			port = strconv.Itoa(worktreeManifest.Ports.ModelServer)
+		}
 	}

 	port = resolvePort(port)

 	envFile := ensureBackendEnvFile(root)
-	fileVars := loadBackendEnvFile(envFile)
+	fileVars, err := envutil.LoadFile(envFile)
+	if err != nil {
+		log.Fatalf("Failed to load env file %s: %v", envFile, err)
+	}

 	eeDefaults := eeEnvDefaults(opts.NoEE)
 	fileVars = append(fileVars, eeDefaults...)
@@ -162,9 +175,17 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
 	if !opts.NoEE {
 		log.Info("Enterprise Edition enabled (use --no-ee to disable)")
 	}
+	if hasWorktreeManifest {
+		log.Infof("agent-lab worktree %s detected: web=%s api=%s", worktreeManifest.Branch, worktreeManifest.URLs.Web, worktreeManifest.URLs.API)
+		log.Infof("lane=%s base-ref=%s", worktreeManifest.ResolvedLane(), worktreeManifest.BaseRef)
+		log.Infof("dependency mode=%s search-infra=%s", worktreeManifest.ResolvedDependencies().Mode, worktreeManifest.ResolvedDependencies().SearchInfraMode)
+	}
 	log.Debugf("Running in %s: uv %v", backendDir, uvicornArgs)

-	mergedEnv := mergeEnv(os.Environ(), fileVars)
+	mergedEnv := envutil.Merge(os.Environ(), fileVars)
+	if hasWorktreeManifest {
+		mergedEnv = envutil.ApplyOverrides(mergedEnv, worktreeManifest.RuntimeEnv())
+	}
 	log.Debugf("Applied %d env vars from %s (shell takes precedence)", len(fileVars), envFile)

 	svcCmd := exec.Command("uv", uvicornArgs...)
@@ -185,6 +206,18 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
 	}
 }

+func currentAgentLabManifest(repoRoot string) (agentlab.Manifest, bool) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		return agentlab.Manifest{}, false
+	}
+	manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
+	if err != nil {
+		return agentlab.Manifest{}, false
+	}
+	return manifest, found
+}
+
 // eeEnvDefaults returns env entries for EE and license enforcement settings.
 // These are appended to the file vars so they act as defaults — shell env
 // and .env file values still take precedence via mergeEnv.
@@ -231,59 +264,3 @@ func ensureBackendEnvFile(root string) string {
 	log.Infof("Created %s from template (review and fill in <REPLACE THIS> values)", envFile)
 	return envFile
 }
-
-// mergeEnv combines shell environment with file-based defaults. Shell values
-// take precedence — file entries are only added for keys not already present.
-func mergeEnv(shellEnv, fileVars []string) []string {
-	existing := make(map[string]bool, len(shellEnv))
-	for _, entry := range shellEnv {
-		if idx := strings.Index(entry, "="); idx > 0 {
-			existing[entry[:idx]] = true
-		}
-	}
-
-	merged := make([]string, len(shellEnv))
-	copy(merged, shellEnv)
-	for _, entry := range fileVars {
-		if idx := strings.Index(entry, "="); idx > 0 {
-			key := entry[:idx]
-			if !existing[key] {
-				merged = append(merged, entry)
-			} else {
-				log.Debugf("Env var %s already set in shell, skipping .env value", key)
-			}
-		}
-	}
-	return merged
-}
-
-// loadBackendEnvFile parses a .env file into KEY=VALUE entries suitable for
-// appending to os.Environ(). Blank lines and comments are skipped.
-func loadBackendEnvFile(path string) []string {
-	f, err := os.Open(path)
-	if err != nil {
-		log.Fatalf("Failed to open env file %s: %v", path, err)
-	}
-	defer func() { _ = f.Close() }()
-
-	var envVars []string
-	scanner := bufio.NewScanner(f)
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		if line == "" || strings.HasPrefix(line, "#") {
-			continue
-		}
-		if idx := strings.Index(line, "="); idx > 0 {
-			key := strings.TrimSpace(line[:idx])
-			value := strings.TrimSpace(line[idx+1:])
-			value = strings.Trim(value, `"'`)
-			envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
-		}
-	}
-
-	if err := scanner.Err(); err != nil {
-		log.Fatalf("Failed to read env file %s: %v", path, err)
-	}
-
-	return envVars
-}
--- a/tools/ods/cmd/desktop.go
+++ b/tools/ods/cmd/desktop.go
@@ -37,8 +37,6 @@ func NewDesktopCommand() *cobra.Command {
 			runDesktopScript(args)
 		},
 	}
-	cmd.Flags().SetInterspersed(false)
-
 	return cmd
 }

--- a/tools/ods/cmd/github_helpers.go
+++ b/tools/ods/cmd/github_helpers.go
@@ -0,0 +1,63 @@
+package cmd
+
+import (
+	"fmt"
+	"os/exec"
+	"strings"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+)
+
+func ghString(args ...string) (string, error) {
+	git.CheckGitHubCLI()
+
+	cmd := exec.Command("gh", args...)
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return "", fmt.Errorf("gh %s failed: %w: %s", strings.Join(args, " "), err, strings.TrimSpace(string(exitErr.Stderr)))
+		}
+		return "", fmt.Errorf("gh %s failed: %w", strings.Join(args, " "), err)
+	}
+	return strings.TrimSpace(string(output)), nil
+}
+
+func resolvePRNumber(explicit string) (string, error) {
+	if strings.TrimSpace(explicit) != "" {
+		return explicit, nil
+	}
+	return ghString("pr", "view", "--json", "number", "--jq", ".number")
+}
+
+func currentRepoSlug() (string, error) {
+	return ghString("repo", "view", "--json", "owner,name", "--jq", `.owner.login + "/" + .name`)
+}
+
+func upsertIssueComment(repoSlug, prNumber, marker, body string) error {
+	commentID, err := ghString(
+		"api",
+		fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
+		"--jq",
+		fmt.Sprintf(".[] | select(.body | startswith(%q)) | .id", marker),
+	)
+	if err != nil {
+		return err
+	}
+	if commentID != "" {
+		_, err := ghString(
+			"api",
+			"--method", "PATCH",
+			fmt.Sprintf("repos/%s/issues/comments/%s", repoSlug, commentID),
+			"-f", fmt.Sprintf("body=%s", body),
+		)
+		return err
+	}
+
+	_, err = ghString(
+		"api",
+		"--method", "POST",
+		fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
+		"-f", fmt.Sprintf("body=%s", body),
+	)
+	return err
+}
--- a/tools/ods/cmd/journey.go
+++ b/tools/ods/cmd/journey.go
@@ -0,0 +1,865 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strings"
+	"syscall"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/journey"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/s3"
+)
+
+const defaultJourneyHTTPRegion = "us-east-2"
+
+type JourneyRunOptions struct {
+	Journey   string
+	Label     string
+	Worktree  string
+	OutputDir string
+	Project   string
+}
+
+type JourneyCompareOptions struct {
+	Journeys       []string
+	PlanFile       string
+	BeforeRef      string
+	AfterRef       string
+	AfterWorktree  string
+	DependencyMode string
+	PR             string
+	KeepWorktrees  bool
+	Bucket         string
+}
+
+type JourneyPublishOptions struct {
+	RunDir string
+	PR     string
+	Bucket string
+}
+
+type JourneyCaptureSummary struct {
+	Journey      string   `json:"journey"`
+	Label        string   `json:"label"`
+	Worktree     string   `json:"worktree,omitempty"`
+	URL          string   `json:"url"`
+	ArtifactDir  string   `json:"artifact_dir"`
+	LogPath      string   `json:"log_path"`
+	VideoFiles   []string `json:"video_files,omitempty"`
+	TraceFiles   []string `json:"trace_files,omitempty"`
+	Screenshots  []string `json:"screenshots,omitempty"`
+	MetadataJSON []string `json:"metadata_json,omitempty"`
+}
+
+type JourneyCompareSummary struct {
+	GeneratedAt string                  `json:"generated_at"`
+	BeforeRef   string                  `json:"before_ref"`
+	AfterRef    string                  `json:"after_ref"`
+	RunDir      string                  `json:"run_dir"`
+	S3Prefix    string                  `json:"s3_prefix,omitempty"`
+	S3HTTPBase  string                  `json:"s3_http_base,omitempty"`
+	Captures    []JourneyCaptureSummary `json:"captures"`
+}
+
+type managedProcess struct {
+	Name    string
+	Cmd     *exec.Cmd
+	LogPath string
+}
+
+// NewJourneyCommand creates the journey command surface.
+func NewJourneyCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "journey",
+		Short: "Capture before/after browser journeys as agent artifacts",
+	}
+
+	cmd.AddCommand(newJourneyListCommand())
+	cmd.AddCommand(newJourneyRunCommand())
+	cmd.AddCommand(newJourneyCompareCommand())
+	cmd.AddCommand(newJourneyPublishCommand())
+
+	return cmd
+}
+
+func newJourneyListCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:   "list",
+		Short: "List registered browser journeys",
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyList()
+		},
+	}
+}
+
+func newJourneyRunCommand() *cobra.Command {
+	opts := &JourneyRunOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "run",
+		Short: "Run a single registered journey against the current or target worktree",
+		Long: `Run one registered journey against the current checkout or a tracked worktree.
+
+This is the default before/after workflow for product changes:
+  1. capture --label before in the target worktree before editing
+  2. implement and validate the change in that same worktree
+  3. capture --label after in that same worktree
+
+Use journey compare only when you need to recover a missed baseline or compare
+two explicit revisions after the fact.`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyRun(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.Journey, "journey", "", "registered journey name to run")
+	cmd.Flags().StringVar(&opts.Label, "label", "after", "artifact label for this capture (for example before or after)")
+	cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
+	cmd.Flags().StringVar(&opts.OutputDir, "output-dir", "", "explicit artifact directory for the capture")
+	cmd.Flags().StringVar(&opts.Project, "project", "", "override the Playwright project from the journey registry")
+	_ = cmd.MarkFlagRequired("journey")
+
+	return cmd
+}
+
+func newJourneyCompareCommand() *cobra.Command {
+	opts := &JourneyCompareOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "compare",
+		Short: "Capture before and after videos by replaying registered journeys against two revisions",
+		Long: `Create or reuse worktrees for the before and after revisions, boot the app in each one,
+record the configured journeys, and write a machine-readable summary. If --pr is supplied,
+the compare run is also uploaded to S3 and linked from the pull request.
+
+This is the fallback path, not the default workflow. Prefer journey run inside a
+single tracked product worktree when you can capture before and after during the
+normal edit loop.`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyCompare(opts)
+		},
+	}
+
+	cmd.Flags().StringArrayVar(&opts.Journeys, "journey", nil, "registered journey name to capture (repeatable)")
+	cmd.Flags().StringVar(&opts.PlanFile, "plan-file", "", "JSON file containing {\"journeys\":[...]} (defaults to .github/agent-journeys.json when present)")
+	cmd.Flags().StringVar(&opts.BeforeRef, "before-ref", "origin/main", "git ref for the before capture")
+	cmd.Flags().StringVar(&opts.AfterRef, "after-ref", "HEAD", "git ref for the after capture when --after-worktree is not supplied")
+	cmd.Flags().StringVar(&opts.AfterWorktree, "after-worktree", "", "existing tracked worktree to use for the after capture")
+	cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode for temporary worktrees: namespaced or shared")
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to upload/comment against after capture")
+	cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
+	cmd.Flags().BoolVar(&opts.KeepWorktrees, "keep-worktrees", false, "keep temporary journey worktrees after the capture run")
+
+	return cmd
+}
+
+func newJourneyPublishCommand() *cobra.Command {
+	opts := &JourneyPublishOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "publish",
+		Short: "Upload a previously captured compare run and update the pull request comment",
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyPublish(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.RunDir, "run-dir", "", "compare run directory containing summary.json")
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to publish against")
+	cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
+	_ = cmd.MarkFlagRequired("run-dir")
+
+	return cmd
+}
+
+func runJourneyList() {
+	repoRoot, err := paths.GitRoot()
+	if err != nil {
+		log.Fatalf("Failed to determine git root: %v", err)
+	}
+
+	registry, err := journey.LoadRegistry(repoRoot)
+	if err != nil {
+		log.Fatalf("Failed to load journey registry: %v", err)
+	}
+
+	for _, definition := range registry.Journeys {
+		fmt.Printf("%s\t%s\tproject=%s\tmodel_server=%t\n", definition.Name, definition.Description, definition.Project, definition.RequiresModelServer)
+	}
+}
+
+func runJourneyRun(opts *JourneyRunOptions) {
+	repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
+	harnessRoot, err := resolveJourneyHarnessRoot(repoRoot, manifest, hasManifest)
+	if err != nil {
+		log.Fatalf("Failed to resolve journey harness root: %v", err)
+	}
+	capture, err := captureJourney(harnessRoot, repoRoot, manifest, hasManifest, opts.Journey, opts.Label, opts.OutputDir, opts.Project)
+	if err != nil {
+		log.Fatalf("Journey capture failed: %v", err)
+	}
+
+	summaryPath := filepath.Join(capture.ArtifactDir, "summary.json")
+	data, err := json.MarshalIndent(capture, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode journey summary: %v", err)
+	}
+	if err := os.WriteFile(summaryPath, data, 0644); err != nil {
+		log.Fatalf("Failed to write journey summary: %v", err)
+	}
+
+	log.Infof("Journey %s (%s) captured to %s", capture.Journey, capture.Label, capture.ArtifactDir)
+}
+
+func runJourneyCompare(opts *JourneyCompareOptions) {
+	repoRoot, err := paths.GitRoot()
+	if err != nil {
+		log.Fatalf("Failed to determine git root: %v", err)
+	}
+
+	definitions, err := resolveJourneyDefinitions(repoRoot, opts.Journeys, opts.PlanFile)
+	if err != nil {
+		log.Fatalf("Failed to resolve journeys: %v", err)
+	}
+
+	currentRoot, currentManifest, hasCurrentManifest := resolveAgentLabTarget("")
+	if opts.AfterWorktree == "" && strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") && !hasCurrentManifest && git.HasUncommittedChanges() {
+		log.Fatalf("The current checkout has uncommitted changes, but it is not a tracked agent-lab worktree. Create the product worktree first and rerun with --after-worktree <branch> so the after capture reflects the real patch.")
+	}
+	_ = currentRoot
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	runDir := filepath.Join(agentlab.StateRoot(commonGitDir), "journeys", time.Now().UTC().Format("20060102-150405"))
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		log.Fatalf("Failed to create journey run dir: %v", err)
+	}
+
+	beforeTarget, err := createTemporaryJourneyWorktree(opts.BeforeRef, "before", agentlab.DependencyMode(opts.DependencyMode))
+	if err != nil {
+		log.Fatalf("Failed to create before worktree: %v", err)
+	}
+	if !opts.KeepWorktrees {
+		defer cleanupJourneyTarget(beforeTarget)
+	}
+
+	var afterTarget journeyTarget
+	if opts.AfterWorktree != "" {
+		afterTarget, err = resolveJourneyTarget(opts.AfterWorktree)
+		if err != nil {
+			log.Fatalf("Failed to resolve after worktree: %v", err)
+		}
+		if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
+			log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
+		}
+	} else if strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") {
+		if hasCurrentManifest {
+			afterTarget = journeyTarget{
+				Identifier: currentManifest.Branch,
+				Manifest:   currentManifest,
+			}
+			if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
+				log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
+			}
+			log.Infof("Using current tracked worktree %s for the after capture", afterTarget.Identifier)
+		} else {
+			afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
+			if err != nil {
+				log.Fatalf("Failed to create after worktree: %v", err)
+			}
+			if !opts.KeepWorktrees {
+				defer cleanupJourneyTarget(afterTarget)
+			}
+		}
+	} else {
+		afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
+		if err != nil {
+			log.Fatalf("Failed to create after worktree: %v", err)
+		}
+		if !opts.KeepWorktrees {
+			defer cleanupJourneyTarget(afterTarget)
+		}
+	}
+
+	summary := JourneyCompareSummary{
+		GeneratedAt: time.Now().UTC().Format(time.RFC3339),
+		BeforeRef:   opts.BeforeRef,
+		AfterRef:    opts.AfterRef,
+		RunDir:      runDir,
+		Captures:    []JourneyCaptureSummary{},
+	}
+
+	beforeCaptures, err := captureJourneySet(beforeTarget, definitions, "before", runDir)
+	if err != nil {
+		log.Fatalf("Before capture failed: %v", err)
+	}
+	summary.Captures = append(summary.Captures, beforeCaptures...)
+
+	afterCaptures, err := captureJourneySet(afterTarget, definitions, "after", runDir)
+	if err != nil {
+		log.Fatalf("After capture failed: %v", err)
+	}
+	summary.Captures = append(summary.Captures, afterCaptures...)
+
+	writeJourneyCompareSummary(runDir, summary)
+	log.Infof("Journey compare summary written to %s", filepath.Join(runDir, "summary.json"))
+
+	if opts.PR != "" {
+		prNumber, err := resolvePRNumber(opts.PR)
+		if err != nil {
+			log.Fatalf("Failed to resolve PR number: %v", err)
+		}
+		updated, err := publishJourneyCompare(runDir, prNumber, opts.Bucket)
+		if err != nil {
+			log.Fatalf("Failed to publish journey compare run: %v", err)
+		}
+		writeJourneyCompareSummary(runDir, updated)
+	}
+}
+
+func runJourneyPublish(opts *JourneyPublishOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	updated, err := publishJourneyCompare(opts.RunDir, prNumber, opts.Bucket)
+	if err != nil {
+		log.Fatalf("Failed to publish journey compare run: %v", err)
+	}
+	writeJourneyCompareSummary(opts.RunDir, updated)
+	log.Infof("Published journey compare run from %s", opts.RunDir)
+}
+
+func resolveJourneyDefinitions(repoRoot string, requested []string, planFile string) ([]journey.Definition, error) {
+	journeyNames := append([]string{}, requested...)
+	resolvedPlan := strings.TrimSpace(planFile)
+	if resolvedPlan == "" {
+		defaultPlan := filepath.Join(repoRoot, journey.DefaultPlanPath)
+		if _, err := os.Stat(defaultPlan); err == nil {
+			resolvedPlan = defaultPlan
+		}
+	}
+	if resolvedPlan != "" {
+		plan, err := journey.LoadPlan(resolvedPlan)
+		if err != nil {
+			return nil, err
+		}
+		journeyNames = append(journeyNames, plan.Journeys...)
+	}
+	if len(journeyNames) == 0 {
+		return nil, fmt.Errorf("no journeys requested; pass --journey or provide %s", journey.DefaultPlanPath)
+	}
+
+	seen := map[string]bool{}
+	deduped := make([]string, 0, len(journeyNames))
+	for _, name := range journeyNames {
+		if !seen[name] {
+			seen[name] = true
+			deduped = append(deduped, name)
+		}
+	}
+	return journey.ResolveDefinitions(repoRoot, deduped)
+}
+
+func resolveJourneyHarnessRoot(targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool) (string, error) {
+	candidates := []string{targetRepoRoot}
+	if hasManifest && manifest.CreatedFromPath != "" {
+		candidates = append([]string{manifest.CreatedFromPath}, candidates...)
+	}
+	for _, candidate := range candidates {
+		if _, err := os.Stat(filepath.Join(candidate, journey.RegistryPath)); err == nil {
+			return candidate, nil
+		}
+	}
+	return "", fmt.Errorf("no journey registry found in target repo %s or control checkout %s", targetRepoRoot, manifest.CreatedFromPath)
+}
+
+func captureJourney(harnessRoot, targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool, journeyName, label, outputDir, projectOverride string) (JourneyCaptureSummary, error) {
+	definitions, err := journey.ResolveDefinitions(harnessRoot, []string{journeyName})
+	if err != nil {
+		return JourneyCaptureSummary{}, err
+	}
+	definition := definitions[0]
+
+	targetDir := strings.TrimSpace(outputDir)
+	if targetDir == "" {
+		if hasManifest {
+			targetDir = filepath.Join(manifest.ArtifactDir, "journeys", journey.Slug(definition.Name), journey.Slug(label))
+		} else {
+			targetDir = filepath.Join(targetRepoRoot, "web", "output", "journeys", journey.Slug(definition.Name), journey.Slug(label))
+		}
+	}
+	if err := os.MkdirAll(targetDir, 0755); err != nil {
+		return JourneyCaptureSummary{}, fmt.Errorf("create journey artifact dir: %w", err)
+	}
+
+	playwrightOutputDir := filepath.Join(targetDir, "playwright")
+	logPath := filepath.Join(targetDir, "journey.log")
+
+	projectName := definition.Project
+	if strings.TrimSpace(projectOverride) != "" {
+		projectName = projectOverride
+	}
+
+	envOverrides := map[string]string{
+		"PLAYWRIGHT_JOURNEY_MODE":        "1",
+		"PLAYWRIGHT_JOURNEY_CAPTURE_DIR": targetDir,
+		"PLAYWRIGHT_OUTPUT_DIR":          playwrightOutputDir,
+	}
+	if definition.SkipGlobalSetup {
+		envOverrides["PLAYWRIGHT_SKIP_GLOBAL_SETUP"] = "1"
+	}
+	if hasManifest {
+		for key, value := range manifest.RuntimeEnv() {
+			envOverrides[key] = value
+		}
+	}
+
+	step, passed := runLoggedCommand(
+		"journey-"+definition.Name,
+		logPath,
+		filepath.Join(harnessRoot, "web"),
+		envOverrides,
+		"npx",
+		"playwright", "test", definition.TestPath, "--project", projectName,
+	)
+	if !passed {
+		return JourneyCaptureSummary{}, fmt.Errorf("%s", strings.Join(step.Details, "\n"))
+	}
+
+	artifactSummary, err := summarizeJourneyArtifacts(targetDir)
+	if err != nil {
+		return JourneyCaptureSummary{}, err
+	}
+	artifactSummary.Journey = definition.Name
+	artifactSummary.Label = label
+	artifactSummary.ArtifactDir = targetDir
+	artifactSummary.LogPath = logPath
+	if hasManifest {
+		artifactSummary.Worktree = manifest.Branch
+		artifactSummary.URL = manifest.URLs.Web
+	} else {
+		artifactSummary.URL = envOverrides["BASE_URL"]
+	}
+	return artifactSummary, nil
+}
+
+type journeyTarget struct {
+	Identifier string
+	Manifest   agentlab.Manifest
+	Temporary  bool
+}
+
+func resolveJourneyTarget(identifier string) (journeyTarget, error) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		return journeyTarget{}, err
+	}
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		return journeyTarget{}, err
+	}
+	if !found {
+		return journeyTarget{}, fmt.Errorf("no worktree found for %q", identifier)
+	}
+	return journeyTarget{Identifier: manifest.Branch, Manifest: manifest}, nil
+}
+
+func createTemporaryJourneyWorktree(ref, label string, mode agentlab.DependencyMode) (journeyTarget, error) {
+	branch := fmt.Sprintf("codex/journey-%s-%s-%d", journey.Slug(label), journey.Slug(strings.ReplaceAll(ref, "/", "-")), time.Now().UTC().UnixNano())
+	if err := runSelfCommand("worktree", "create", branch, "--from", ref, "--dependency-mode", string(mode)); err != nil {
+		return journeyTarget{}, err
+	}
+	if err := runSelfCommand("worktree", "deps", "up", branch); err != nil {
+		return journeyTarget{}, err
+	}
+	target, err := resolveJourneyTarget(branch)
+	if err != nil {
+		return journeyTarget{}, err
+	}
+	target.Temporary = true
+	return target, nil
+}
+
+func cleanupJourneyTarget(target journeyTarget) {
+	if !target.Temporary {
+		return
+	}
+	if err := runSelfCommand("worktree", "remove", target.Identifier, "--force", "--drop-deps"); err != nil {
+		log.Warnf("Failed to remove temporary worktree %s: %v", target.Identifier, err)
+	}
+	if err := exec.Command("git", "branch", "-D", target.Identifier).Run(); err != nil {
+		log.Warnf("Failed to delete temporary branch %s: %v", target.Identifier, err)
+	}
+}
+
+func captureJourneySet(target journeyTarget, definitions []journey.Definition, label, runDir string) ([]JourneyCaptureSummary, error) {
+	harnessRoot, err := resolveJourneyHarnessRoot(target.Manifest.CheckoutPath, target.Manifest, true)
+	if err != nil {
+		return nil, err
+	}
+	requiresModelServer := false
+	for _, definition := range definitions {
+		if definition.RequiresModelServer {
+			requiresModelServer = true
+			break
+		}
+	}
+
+	processes, err := startJourneyServices(target, runDir, requiresModelServer)
+	if err != nil {
+		return nil, err
+	}
+	defer stopManagedProcesses(processes)
+
+	captures := make([]JourneyCaptureSummary, 0, len(definitions))
+	for _, definition := range definitions {
+		outputDir := filepath.Join(runDir, journey.Slug(definition.Name), journey.Slug(label))
+		capture, err := captureJourney(harnessRoot, target.Manifest.CheckoutPath, target.Manifest, true, definition.Name, label, outputDir, "")
+		if err != nil {
+			return nil, err
+		}
+		captures = append(captures, capture)
+	}
+	return captures, nil
+}
+
+func startJourneyServices(target journeyTarget, runDir string, includeModelServer bool) ([]managedProcess, error) {
+	logDir := filepath.Join(runDir, "services", journey.Slug(target.Manifest.Branch))
+	if err := os.MkdirAll(logDir, 0755); err != nil {
+		return nil, fmt.Errorf("create service log dir: %w", err)
+	}
+
+	processes := make([]managedProcess, 0, 3)
+
+	apiProcess, err := startManagedProcess(
+		"api",
+		filepath.Join(logDir, "api.log"),
+		"backend", "api", "--worktree", target.Identifier,
+	)
+	if err != nil {
+		return nil, err
+	}
+	processes = append(processes, apiProcess)
+
+	if includeModelServer {
+		modelProcess, err := startManagedProcess(
+			"model_server",
+			filepath.Join(logDir, "model_server.log"),
+			"backend", "model_server", "--worktree", target.Identifier,
+		)
+		if err != nil {
+			stopManagedProcesses(processes)
+			return nil, err
+		}
+		processes = append(processes, modelProcess)
+	}
+
+	webProcess, err := startManagedProcess(
+		"web",
+		filepath.Join(logDir, "web.log"),
+		"web", "dev", "--worktree", target.Identifier, "--", "--webpack",
+	)
+	if err != nil {
+		stopManagedProcesses(processes)
+		return nil, err
+	}
+	processes = append(processes, webProcess)
+
+	if err := waitForJourneyURL(target.Manifest.URLs.API+"/health", 2*time.Minute, processes...); err != nil {
+		stopManagedProcesses(processes)
+		return nil, err
+	}
+	if err := waitForJourneyURL(target.Manifest.URLs.Web+"/api/health", 3*time.Minute, processes...); err != nil {
+		stopManagedProcesses(processes)
+		return nil, err
+	}
+	return processes, nil
+}
+
+func startManagedProcess(name, logPath string, args ...string) (managedProcess, error) {
+	executable, err := os.Executable()
+	if err != nil {
+		return managedProcess{}, fmt.Errorf("determine ods executable: %w", err)
+	}
+
+	if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
+		return managedProcess{}, fmt.Errorf("create log dir: %w", err)
+	}
+	logFile, err := os.Create(logPath)
+	if err != nil {
+		return managedProcess{}, fmt.Errorf("create log file: %w", err)
+	}
+
+	cmd := exec.Command(executable, args...)
+	cmd.Stdout = logFile
+	cmd.Stderr = logFile
+	cmd.Stdin = nil
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	if err := cmd.Start(); err != nil {
+		_ = logFile.Close()
+		return managedProcess{}, fmt.Errorf("start %s: %w", name, err)
+	}
+	_ = logFile.Close()
+
+	return managedProcess{Name: name, Cmd: cmd, LogPath: logPath}, nil
+}
+
+func stopManagedProcesses(processes []managedProcess) {
+	for i := len(processes) - 1; i >= 0; i-- {
+		process := processes[i]
+		if process.Cmd == nil || process.Cmd.Process == nil {
+			continue
+		}
+		_ = process.Cmd.Process.Signal(os.Interrupt)
+		done := make(chan struct{})
+		go func(cmd *exec.Cmd) {
+			_, _ = cmd.Process.Wait()
+			close(done)
+		}(process.Cmd)
+		select {
+		case <-done:
+		case <-time.After(10 * time.Second):
+			_ = process.Cmd.Process.Kill()
+		}
+	}
+}
+
+func waitForJourneyURL(url string, timeout time.Duration, processes ...managedProcess) error {
+	client := &http.Client{Timeout: 5 * time.Second}
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		if err := ensureManagedProcessesRunning(processes); err != nil {
+			return fmt.Errorf("while waiting for %s: %w", url, err)
+		}
+		resp, err := client.Get(url)
+		if err == nil {
+			_ = resp.Body.Close()
+			if resp.StatusCode >= 200 && resp.StatusCode < 500 {
+				return nil
+			}
+		}
+		time.Sleep(3 * time.Second)
+	}
+	if err := ensureManagedProcessesRunning(processes); err != nil {
+		return fmt.Errorf("while waiting for %s: %w", url, err)
+	}
+	return fmt.Errorf("timed out waiting for %s", url)
+}
+
+func ensureManagedProcessesRunning(processes []managedProcess) error {
+	for _, process := range processes {
+		if process.Cmd == nil || process.Cmd.Process == nil {
+			continue
+		}
+		if err := syscall.Kill(process.Cmd.Process.Pid, 0); err != nil {
+			if err == syscall.ESRCH {
+				return fmt.Errorf("%s exited early\n%s", process.Name, readJourneyLogTail(process.LogPath, 40))
+			}
+			if err != syscall.EPERM {
+				return fmt.Errorf("check %s process health: %w", process.Name, err)
+			}
+		}
+	}
+	return nil
+}
+
+func readJourneyLogTail(path string, lineCount int) string {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return fmt.Sprintf("failed to read %s: %v", path, err)
+	}
+	trimmed := strings.TrimRight(string(data), "\n")
+	if trimmed == "" {
+		return fmt.Sprintf("%s is empty", path)
+	}
+	lines := strings.Split(trimmed, "\n")
+	if len(lines) > lineCount {
+		lines = lines[len(lines)-lineCount:]
+	}
+	return fmt.Sprintf("recent log tail from %s:\n%s", path, strings.Join(lines, "\n"))
+}
+
+func summarizeJourneyArtifacts(root string) (JourneyCaptureSummary, error) {
+	summary := JourneyCaptureSummary{}
+	err := filepath.Walk(root, func(path string, info os.FileInfo, walkErr error) error {
+		if walkErr != nil {
+			return walkErr
+		}
+		if info.IsDir() {
+			return nil
+		}
+		relative, err := filepath.Rel(root, path)
+		if err != nil {
+			return err
+		}
+		switch {
+		case strings.HasSuffix(path, ".webm"):
+			summary.VideoFiles = append(summary.VideoFiles, relative)
+		case strings.HasSuffix(path, "trace.zip"):
+			summary.TraceFiles = append(summary.TraceFiles, relative)
+		case strings.HasSuffix(path, ".png"):
+			summary.Screenshots = append(summary.Screenshots, relative)
+		case strings.HasSuffix(path, ".json") && filepath.Base(path) != "summary.json":
+			summary.MetadataJSON = append(summary.MetadataJSON, relative)
+		}
+		return nil
+	})
+	if err != nil {
+		return summary, fmt.Errorf("walk journey artifacts: %w", err)
+	}
+	sort.Strings(summary.VideoFiles)
+	sort.Strings(summary.TraceFiles)
+	sort.Strings(summary.Screenshots)
+	sort.Strings(summary.MetadataJSON)
+	return summary, nil
+}
+
+func runSelfCommand(args ...string) error {
+	executable, err := os.Executable()
+	if err != nil {
+		return err
+	}
+	cmd := exec.Command(executable, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	return cmd.Run()
+}
+
+func writeJourneyCompareSummary(runDir string, summary JourneyCompareSummary) {
+	data, err := json.MarshalIndent(summary, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode journey compare summary: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "summary.json"), data, 0644); err != nil {
+		log.Fatalf("Failed to write journey compare summary: %v", err)
+	}
+}
+
+func publishJourneyCompare(runDir, prNumber, bucketOverride string) (JourneyCompareSummary, error) {
+	var summary JourneyCompareSummary
+	data, err := os.ReadFile(filepath.Join(runDir, "summary.json"))
+	if err != nil {
+		return summary, fmt.Errorf("read compare summary: %w", err)
+	}
+	if err := json.Unmarshal(data, &summary); err != nil {
+		return summary, fmt.Errorf("parse compare summary: %w", err)
+	}
+
+	bucket := bucketOverride
+	if bucket == "" {
+		bucket = getS3Bucket()
+	}
+
+	timestamp := filepath.Base(runDir)
+	s3Prefix := fmt.Sprintf("s3://%s/journeys/pr-%s/%s/", bucket, prNumber, timestamp)
+	if err := s3.SyncUp(runDir, s3Prefix, true); err != nil {
+		return summary, err
+	}
+
+	httpBase := fmt.Sprintf("https://%s.s3.%s.amazonaws.com/journeys/pr-%s/%s/", bucket, defaultJourneyHTTPRegion, prNumber, timestamp)
+	summary.S3Prefix = s3Prefix
+	summary.S3HTTPBase = httpBase
+
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		return summary, err
+	}
+	body := buildJourneyPRComment(summary)
+	if err := upsertIssueComment(repoSlug, prNumber, "<!-- agent-journey-report -->", body); err != nil {
+		return summary, err
+	}
+	return summary, nil
+}
+
+func buildJourneyPRComment(summary JourneyCompareSummary) string {
+	type capturePair struct {
+		before *JourneyCaptureSummary
+		after  *JourneyCaptureSummary
+	}
+	byJourney := map[string]*capturePair{}
+	for i := range summary.Captures {
+		capture := &summary.Captures[i]
+		pair := byJourney[capture.Journey]
+		if pair == nil {
+			pair = &capturePair{}
+			byJourney[capture.Journey] = pair
+		}
+		switch capture.Label {
+		case "before":
+			pair.before = capture
+		case "after":
+			pair.after = capture
+		}
+	}
+
+	names := make([]string, 0, len(byJourney))
+	for name := range byJourney {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	lines := []string{
+		"<!-- agent-journey-report -->",
+		"### Agent Journey Report",
+		"",
+		fmt.Sprintf("Before ref: `%s`", summary.BeforeRef),
+		fmt.Sprintf("After ref: `%s`", summary.AfterRef),
+		"",
+		"| Journey | Before | After |",
+		"|---------|--------|-------|",
+	}
+
+	for _, name := range names {
+		pair := byJourney[name]
+		before := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.before)
+		after := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.after)
+		lines = append(lines, fmt.Sprintf("| `%s` | %s | %s |", name, before, after))
+	}
+
+	return strings.Join(lines, "\n")
+}
+
+func journeyLink(runDir, httpBase string, capture *JourneyCaptureSummary) string {
+	if capture == nil {
+		return "_not captured_"
+	}
+	artifactDir, err := filepath.Rel(runDir, capture.ArtifactDir)
+	if err != nil {
+		artifactDir = capture.ArtifactDir
+	}
+	if len(capture.VideoFiles) > 0 {
+		return fmt.Sprintf("[video](%s%s)", httpBase, pathJoin(artifactDir, capture.VideoFiles[0]))
+	}
+	if len(capture.Screenshots) > 0 {
+		return fmt.Sprintf("[screenshot](%s%s)", httpBase, pathJoin(artifactDir, capture.Screenshots[0]))
+	}
+	return "_no artifact_"
+}
+
+func pathJoin(parts ...string) string {
+	clean := make([]string, 0, len(parts))
+	for _, part := range parts {
+		if part == "" {
+			continue
+		}
+		clean = append(clean, strings.Trim(part, "/"))
+	}
+	return strings.Join(clean, "/")
+}
--- a/tools/ods/cmd/pr_checks.go
+++ b/tools/ods/cmd/pr_checks.go
@@ -0,0 +1,289 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"os/exec"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+)
+
+type PRChecksOptions struct {
+	PR string
+}
+
+type ghChecksResponse struct {
+	Data struct {
+		Repository struct {
+			PullRequest struct {
+				Number  int    `json:"number"`
+				Title   string `json:"title"`
+				URL     string `json:"url"`
+				HeadRef string `json:"headRefName"`
+				Commits struct {
+					Nodes []struct {
+						Commit struct {
+							StatusCheckRollup struct {
+								Contexts struct {
+									Nodes []struct {
+										Type         string `json:"__typename"`
+										Name         string `json:"name"`
+										DisplayTitle string `json:"displayTitle"`
+										WorkflowName string `json:"workflowName"`
+										Status       string `json:"status"`
+										Conclusion   string `json:"conclusion"`
+										DetailsURL   string `json:"detailsUrl"`
+										Context      string `json:"context"`
+										State        string `json:"state"`
+										TargetURL    string `json:"targetUrl"`
+										Description  string `json:"description"`
+									} `json:"nodes"`
+								} `json:"contexts"`
+							} `json:"statusCheckRollup"`
+						} `json:"commit"`
+					} `json:"nodes"`
+				} `json:"commits"`
+			} `json:"pullRequest"`
+		} `json:"repository"`
+	} `json:"data"`
+}
+
+// NewPRChecksCommand creates the pr-checks command surface.
+func NewPRChecksCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "pr-checks",
+		Short: "Inspect GitHub PR checks and surface failing runs for remediation",
+	}
+
+	cmd.AddCommand(newPRChecksStatusCommand())
+	cmd.AddCommand(newPRChecksDiagnoseCommand())
+	return cmd
+}
+
+func newPRChecksStatusCommand() *cobra.Command {
+	opts := &PRChecksOptions{}
+	cmd := &cobra.Command{
+		Use:   "status",
+		Short: "List all status checks for a pull request",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRChecksStatus(opts)
+		},
+	}
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	return cmd
+}
+
+func newPRChecksDiagnoseCommand() *cobra.Command {
+	opts := &PRChecksOptions{}
+	cmd := &cobra.Command{
+		Use:   "diagnose",
+		Short: "List only failing checks and point to the next remediation command",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRChecksDiagnose(opts)
+		},
+	}
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	return cmd
+}
+
+func runPRChecksStatus(opts *PRChecksOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	response, err := fetchPRChecks(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR checks: %v", err)
+	}
+
+	fmt.Printf("PR #%d %s\n", response.Data.Repository.PullRequest.Number, response.Data.Repository.PullRequest.Title)
+	for _, check := range flattenChecks(response) {
+		fmt.Printf("[%s] %s (%s) %s\n", check.result(), check.displayName(), check.kind(), check.url())
+	}
+}
+
+func runPRChecksDiagnose(opts *PRChecksOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	response, err := fetchPRChecks(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR checks: %v", err)
+	}
+
+	failing := failingChecks(response)
+	if len(failing) == 0 {
+		fmt.Printf("No failing checks found on PR #%s\n", prNumber)
+		return
+	}
+
+	fmt.Printf("Failing checks for PR #%s:\n", prNumber)
+	for _, check := range failing {
+		fmt.Printf("- %s (%s)\n", check.displayName(), check.url())
+		if strings.Contains(strings.ToLower(check.displayName()), "playwright") {
+			fmt.Printf("  next: ods trace --pr %s\n", prNumber)
+		} else {
+			fmt.Printf("  next: gh run view <run-id> --log-failed\n")
+		}
+	}
+}
+
+func fetchPRChecks(prNumber string) (*ghChecksResponse, error) {
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		return nil, err
+	}
+	parts := strings.SplitN(repoSlug, "/", 2)
+	if len(parts) != 2 {
+		return nil, fmt.Errorf("unexpected repo slug %q", repoSlug)
+	}
+
+	git.CheckGitHubCLI()
+	query := `query($owner:String!, $name:String!, $number:Int!) {
+  repository(owner:$owner, name:$name) {
+    pullRequest(number:$number) {
+      number
+      title
+      url
+      headRefName
+      commits(last:1) {
+        nodes {
+          commit {
+            statusCheckRollup {
+              contexts(first:100) {
+                nodes {
+                  __typename
+                  ... on CheckRun {
+                    name
+                    status
+                    conclusion
+                    detailsUrl
+                  }
+                  ... on StatusContext {
+                    context
+                    state
+                    targetUrl
+                    description
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}`
+
+	cmd := exec.Command(
+		"gh", "api", "graphql",
+		"-f", "query="+query,
+		"-F", "owner="+parts[0],
+		"-F", "name="+parts[1],
+		"-F", "number="+prNumber,
+	)
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
+		}
+		return nil, fmt.Errorf("gh api graphql failed: %w", err)
+	}
+
+	var response ghChecksResponse
+	if err := json.Unmarshal(output, &response); err != nil {
+		return nil, fmt.Errorf("parse PR checks: %w", err)
+	}
+	return &response, nil
+}
+
+type flattenedCheck struct {
+	Type         string
+	Name         string
+	DisplayTitle string
+	WorkflowName string
+	Status       string
+	Conclusion   string
+	DetailsURL   string
+	Context      string
+	State        string
+	TargetURL    string
+}
+
+func flattenChecks(response *ghChecksResponse) []flattenedCheck {
+	result := []flattenedCheck{}
+	if response == nil || len(response.Data.Repository.PullRequest.Commits.Nodes) == 0 {
+		return result
+	}
+	for _, node := range response.Data.Repository.PullRequest.Commits.Nodes[0].Commit.StatusCheckRollup.Contexts.Nodes {
+		result = append(result, flattenedCheck{
+			Type:         node.Type,
+			Name:         node.Name,
+			DisplayTitle: node.DisplayTitle,
+			WorkflowName: node.WorkflowName,
+			Status:       node.Status,
+			Conclusion:   node.Conclusion,
+			DetailsURL:   node.DetailsURL,
+			Context:      node.Context,
+			State:        node.State,
+			TargetURL:    node.TargetURL,
+		})
+	}
+	return result
+}
+
+func (c flattenedCheck) displayName() string {
+	switch c.Type {
+	case "CheckRun":
+		if c.DisplayTitle != "" {
+			return c.DisplayTitle
+		}
+		if c.WorkflowName != "" && c.Name != "" {
+			return c.WorkflowName + " / " + c.Name
+		}
+		return c.Name
+	default:
+		return c.Context
+	}
+}
+
+func (c flattenedCheck) kind() string {
+	if c.Type == "" {
+		return "status"
+	}
+	return c.Type
+}
+
+func (c flattenedCheck) result() string {
+	if c.Type == "CheckRun" {
+		if c.Conclusion != "" {
+			return strings.ToLower(c.Conclusion)
+		}
+		return strings.ToLower(c.Status)
+	}
+	return strings.ToLower(c.State)
+}
+
+func (c flattenedCheck) url() string {
+	if c.DetailsURL != "" {
+		return c.DetailsURL
+	}
+	return c.TargetURL
+}
+
+func failingChecks(response *ghChecksResponse) []flattenedCheck {
+	checks := flattenChecks(response)
+	failing := make([]flattenedCheck, 0, len(checks))
+	for _, check := range checks {
+		result := check.result()
+		if result == "failure" || result == "failed" || result == "timed_out" || result == "cancelled" || result == "error" {
+			failing = append(failing, check)
+		}
+	}
+	return failing
+}
--- a/tools/ods/cmd/pr_merge.go
+++ b/tools/ods/cmd/pr_merge.go
@@ -0,0 +1,73 @@
+package cmd
+
+import (
+	"os"
+	"os/exec"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+)
+
+type PRMergeOptions struct {
+	PR           string
+	Auto         bool
+	DeleteBranch bool
+	Method       string
+}
+
+// NewPRMergeCommand creates the pr-merge command.
+func NewPRMergeCommand() *cobra.Command {
+	opts := &PRMergeOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "pr-merge",
+		Short: "Merge a GitHub pull request through gh with explicit method flags",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRMerge(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().BoolVar(&opts.Auto, "auto", false, "enable auto-merge instead of merging immediately")
+	cmd.Flags().BoolVar(&opts.DeleteBranch, "delete-branch", false, "delete the branch after merge")
+	cmd.Flags().StringVar(&opts.Method, "method", "squash", "merge method: squash, merge, or rebase")
+
+	return cmd
+}
+
+func runPRMerge(opts *PRMergeOptions) {
+	git.CheckGitHubCLI()
+
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+
+	args := []string{"pr", "merge", prNumber}
+	switch opts.Method {
+	case "squash":
+		args = append(args, "--squash")
+	case "merge":
+		args = append(args, "--merge")
+	case "rebase":
+		args = append(args, "--rebase")
+	default:
+		log.Fatalf("Invalid merge method %q: expected squash, merge, or rebase", opts.Method)
+	}
+	if opts.Auto {
+		args = append(args, "--auto")
+	}
+	if opts.DeleteBranch {
+		args = append(args, "--delete-branch")
+	}
+
+	cmd := exec.Command("gh", args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	if err := cmd.Run(); err != nil {
+		log.Fatalf("Failed to merge PR #%s: %v", prNumber, err)
+	}
+}
--- a/tools/ods/cmd/pr_open.go
+++ b/tools/ods/cmd/pr_open.go
@@ -0,0 +1,89 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+var conventionalPRTitlePattern = regexp.MustCompile(`^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test)(\([^)]+\))?: .+`)
+
+type PROpenOptions struct {
+	Title    string
+	Base     string
+	BodyFile string
+	Draft    bool
+}
+
+// NewPROpenCommand creates the pr-open command.
+func NewPROpenCommand() *cobra.Command {
+	opts := &PROpenOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "pr-open",
+		Short: "Open a GitHub pull request using the repo template and a conventional-commit title",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPROpen(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.Title, "title", "", "PR title (defaults to the latest commit subject)")
+	cmd.Flags().StringVar(&opts.Base, "base", "main", "base branch for the PR")
+	cmd.Flags().StringVar(&opts.BodyFile, "body-file", "", "explicit PR body file (defaults to .github/pull_request_template.md)")
+	cmd.Flags().BoolVar(&opts.Draft, "draft", false, "open the PR as a draft")
+
+	return cmd
+}
+
+func runPROpen(opts *PROpenOptions) {
+	git.CheckGitHubCLI()
+
+	title := strings.TrimSpace(opts.Title)
+	if title == "" {
+		subject, err := git.GetCommitMessage("HEAD")
+		if err != nil {
+			log.Fatalf("Failed to determine PR title from HEAD: %v", err)
+		}
+		title = subject
+	}
+	if !conventionalPRTitlePattern.MatchString(title) {
+		log.Fatalf("PR title must follow conventional-commit style. Got %q", title)
+	}
+
+	bodyFile := strings.TrimSpace(opts.BodyFile)
+	if bodyFile == "" {
+		repoRoot, err := paths.GitRoot()
+		if err != nil {
+			log.Fatalf("Failed to determine git root: %v", err)
+		}
+		bodyFile = filepath.Join(repoRoot, ".github", "pull_request_template.md")
+	}
+	bodyBytes, err := os.ReadFile(bodyFile)
+	if err != nil {
+		log.Fatalf("Failed to read PR body file %s: %v", bodyFile, err)
+	}
+
+	args := []string{"pr", "create", "--base", opts.Base, "--title", title, "--body", string(bodyBytes)}
+	if opts.Draft {
+		args = append(args, "--draft")
+	}
+
+	cmd := exec.Command("gh", args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	if err := cmd.Run(); err != nil {
+		log.Fatalf("Failed to open PR: %v", err)
+	}
+
+	fmt.Printf("Opened PR with title %q\n", title)
+}
--- a/tools/ods/cmd/pr_review.go
+++ b/tools/ods/cmd/pr_review.go
@@ -0,0 +1,393 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/prreview"
+)
+
+type PRReviewFetchOptions struct {
+	PR     string
+	Output string
+}
+
+type PRReviewTriageOptions struct {
+	PR     string
+	Output string
+}
+
+type PRReviewRespondOptions struct {
+	PR        string
+	CommentID int
+	Body      string
+	ThreadID  string
+}
+
+type ghReviewResponse struct {
+	Data struct {
+		Repository struct {
+			PullRequest struct {
+				Number        int    `json:"number"`
+				Title         string `json:"title"`
+				URL           string `json:"url"`
+				ReviewThreads struct {
+					Nodes []struct {
+						ID         string `json:"id"`
+						IsResolved bool   `json:"isResolved"`
+						IsOutdated bool   `json:"isOutdated"`
+						Path       string `json:"path"`
+						Line       int    `json:"line"`
+						StartLine  int    `json:"startLine"`
+						Comments   struct {
+							Nodes []struct {
+								DatabaseID int `json:"databaseId"`
+								Body       string
+								URL        string `json:"url"`
+								CreatedAt  string `json:"createdAt"`
+								Author     struct {
+									Login string `json:"login"`
+								} `json:"author"`
+							} `json:"nodes"`
+						} `json:"comments"`
+					} `json:"nodes"`
+				} `json:"reviewThreads"`
+			} `json:"pullRequest"`
+		} `json:"repository"`
+	} `json:"data"`
+}
+
+// NewPRReviewCommand creates the pr-review command surface.
+func NewPRReviewCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "pr-review",
+		Short: "Fetch, triage, and respond to GitHub pull request review threads",
+	}
+
+	cmd.AddCommand(newPRReviewFetchCommand())
+	cmd.AddCommand(newPRReviewTriageCommand())
+	cmd.AddCommand(newPRReviewRespondCommand())
+	cmd.AddCommand(newPRReviewResolveCommand())
+
+	return cmd
+}
+
+func newPRReviewFetchCommand() *cobra.Command {
+	opts := &PRReviewFetchOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "fetch",
+		Short: "Fetch pull request review threads and write them to local harness state",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewFetch(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the fetched review JSON")
+	return cmd
+}
+
+func newPRReviewTriageCommand() *cobra.Command {
+	opts := &PRReviewTriageOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "triage",
+		Short: "Classify unresolved review threads into actionable, duplicate, outdated, or resolved",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewTriage(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the triage JSON")
+	return cmd
+}
+
+func newPRReviewRespondCommand() *cobra.Command {
+	opts := &PRReviewRespondOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "respond",
+		Short: "Reply to an inline pull request review comment and optionally resolve the thread",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewRespond(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().IntVar(&opts.CommentID, "comment-id", 0, "top-level pull request review comment ID to reply to")
+	cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve after replying")
+	cmd.Flags().StringVar(&opts.Body, "body", "", "reply body to post")
+	_ = cmd.MarkFlagRequired("comment-id")
+	_ = cmd.MarkFlagRequired("body")
+
+	return cmd
+}
+
+func newPRReviewResolveCommand() *cobra.Command {
+	opts := &PRReviewRespondOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "resolve",
+		Short: "Resolve a review thread without posting a reply",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewResolve(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve")
+	_ = cmd.MarkFlagRequired("thread-id")
+
+	return cmd
+}
+
+func runPRReviewFetch(opts *PRReviewFetchOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+
+	review, err := fetchPRReview(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR review threads: %v", err)
+	}
+
+	outputPath, err := reviewOutputPath(prNumber, opts.Output, "threads.json")
+	if err != nil {
+		log.Fatalf("Failed to determine output path: %v", err)
+	}
+	writeJSON(outputPath, review)
+	log.Infof("Fetched %d review threads into %s", len(review.Threads), outputPath)
+}
+
+func runPRReviewTriage(opts *PRReviewTriageOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+
+	review, err := fetchPRReview(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR review threads: %v", err)
+	}
+	triage := prreview.Triage(review)
+
+	outputPath, err := reviewOutputPath(prNumber, opts.Output, "triage.json")
+	if err != nil {
+		log.Fatalf("Failed to determine output path: %v", err)
+	}
+	writeJSON(outputPath, triage)
+
+	for _, summary := range triage.Summaries {
+		lineRef := ""
+		if summary.Thread.Path != "" {
+			lineRef = summary.Thread.Path
+			if summary.Thread.Line > 0 {
+				lineRef = fmt.Sprintf("%s:%d", lineRef, summary.Thread.Line)
+			}
+		}
+		fmt.Printf("[%s] %s %s %s\n", summary.Category, summary.Source, summary.Thread.ID, lineRef)
+		for _, reason := range summary.Reasons {
+			fmt.Printf("  - %s\n", reason)
+		}
+	}
+	log.Infof("Wrote PR review triage to %s", outputPath)
+}
+
+func runPRReviewRespond(opts *PRReviewRespondOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		log.Fatalf("Failed to resolve repo slug: %v", err)
+	}
+
+	if err := replyToReviewComment(repoSlug, prNumber, opts.CommentID, opts.Body); err != nil {
+		log.Fatalf("Failed to reply to review comment: %v", err)
+	}
+	if strings.TrimSpace(opts.ThreadID) != "" {
+		if err := resolveReviewThread(opts.ThreadID); err != nil {
+			log.Fatalf("Failed to resolve review thread: %v", err)
+		}
+	}
+	log.Infof("Posted reply to review comment %d on PR #%s", opts.CommentID, prNumber)
+}
+
+func runPRReviewResolve(opts *PRReviewRespondOptions) {
+	if err := resolveReviewThread(opts.ThreadID); err != nil {
+		log.Fatalf("Failed to resolve review thread: %v", err)
+	}
+	log.Infof("Resolved review thread %s", opts.ThreadID)
+}
+
+func fetchPRReview(prNumber string) (prreview.PullRequest, error) {
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		return prreview.PullRequest{}, err
+	}
+	parts := strings.SplitN(repoSlug, "/", 2)
+	if len(parts) != 2 {
+		return prreview.PullRequest{}, fmt.Errorf("unexpected repo slug %q", repoSlug)
+	}
+
+	response, err := ghGraphQL(parts[0], parts[1], prNumber)
+	if err != nil {
+		return prreview.PullRequest{}, err
+	}
+
+	pr := prreview.PullRequest{
+		Number:  response.Data.Repository.PullRequest.Number,
+		Title:   response.Data.Repository.PullRequest.Title,
+		URL:     response.Data.Repository.PullRequest.URL,
+		Threads: []prreview.Thread{},
+	}
+
+	for _, thread := range response.Data.Repository.PullRequest.ReviewThreads.Nodes {
+		item := prreview.Thread{
+			ID:         thread.ID,
+			IsResolved: thread.IsResolved,
+			IsOutdated: thread.IsOutdated,
+			Path:       thread.Path,
+			Line:       thread.Line,
+			StartLine:  thread.StartLine,
+			Comments:   []prreview.Comment{},
+		}
+		for _, comment := range thread.Comments.Nodes {
+			item.Comments = append(item.Comments, prreview.Comment{
+				ID:          comment.DatabaseID,
+				Body:        comment.Body,
+				AuthorLogin: comment.Author.Login,
+				URL:         comment.URL,
+				CreatedAt:   comment.CreatedAt,
+			})
+		}
+		pr.Threads = append(pr.Threads, item)
+	}
+
+	return pr, nil
+}
+
+func ghGraphQL(owner, name, prNumber string) (*ghReviewResponse, error) {
+	git.CheckGitHubCLI()
+	query := `query($owner:String!, $name:String!, $number:Int!) {
+  repository(owner:$owner, name:$name) {
+    pullRequest(number:$number) {
+      number
+      title
+      url
+      reviewThreads(first:100) {
+        nodes {
+          id
+          isResolved
+          isOutdated
+          path
+          line
+          startLine
+          comments(first:100) {
+            nodes {
+              databaseId
+              body
+              url
+              createdAt
+              author {
+                login
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}`
+
+	cmd := exec.Command(
+		"gh", "api", "graphql",
+		"-f", "query="+query,
+		"-F", "owner="+owner,
+		"-F", "name="+name,
+		"-F", "number="+prNumber,
+	)
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
+		}
+		return nil, fmt.Errorf("gh api graphql failed: %w", err)
+	}
+
+	var response ghReviewResponse
+	if err := json.Unmarshal(output, &response); err != nil {
+		return nil, fmt.Errorf("parse graphql response: %w", err)
+	}
+	return &response, nil
+}
+
+func replyToReviewComment(repoSlug, prNumber string, commentID int, body string) error {
+	_, err := ghString(
+		"api",
+		"--method", "POST",
+		fmt.Sprintf("repos/%s/pulls/%s/comments/%d/replies", repoSlug, prNumber, commentID),
+		"-f", "body="+body,
+	)
+	return err
+}
+
+func resolveReviewThread(threadID string) error {
+	git.CheckGitHubCLI()
+	mutation := `mutation($threadId:ID!) {
+  resolveReviewThread(input:{threadId:$threadId}) {
+    thread {
+      id
+      isResolved
+    }
+  }
+}`
+
+	cmd := exec.Command(
+		"gh", "api", "graphql",
+		"-f", "query="+mutation,
+		"-F", "threadId="+threadID,
+	)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("resolve review thread: %w: %s", err, strings.TrimSpace(string(output)))
+	}
+	return nil
+}
+
+func reviewOutputPath(prNumber, explicit, fileName string) (string, error) {
+	if strings.TrimSpace(explicit) != "" {
+		return explicit, nil
+	}
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		return "", err
+	}
+	stateDir := filepath.Join(agentlab.StateRoot(commonGitDir), "reviews", "pr-"+prNumber)
+	if err := os.MkdirAll(stateDir, 0755); err != nil {
+		return "", fmt.Errorf("create review state dir: %w", err)
+	}
+	return filepath.Join(stateDir, fileName), nil
+}
+
+func writeJSON(path string, value any) {
+	data, err := json.MarshalIndent(value, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode JSON for %s: %v", path, err)
+	}
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		log.Fatalf("Failed to write %s: %v", path, err)
+	}
+}
--- a/tools/ods/cmd/root.go
+++ b/tools/ods/cmd/root.go
@@ -41,6 +41,7 @@ func NewRootCommand() *cobra.Command {
 	cmd.PersistentFlags().BoolVar(&opts.Debug, "debug", false, "run in debug mode")

 	// Add subcommands
+	cmd.AddCommand(NewAgentCheckCommand())
 	cmd.AddCommand(NewBackendCommand())
 	cmd.AddCommand(NewCheckLazyImportsCommand())
 	cmd.AddCommand(NewCherryPickCommand())
@@ -48,8 +49,13 @@ func NewRootCommand() *cobra.Command {
 	cmd.AddCommand(NewDeployCommand())
 	cmd.AddCommand(NewOpenAPICommand())
 	cmd.AddCommand(NewComposeCommand())
+	cmd.AddCommand(NewJourneyCommand())
 	cmd.AddCommand(NewLogsCommand())
 	cmd.AddCommand(NewPullCommand())
+	cmd.AddCommand(NewPRChecksCommand())
+	cmd.AddCommand(NewPRMergeCommand())
+	cmd.AddCommand(NewPROpenCommand())
+	cmd.AddCommand(NewPRReviewCommand())
 	cmd.AddCommand(NewRunCICommand())
 	cmd.AddCommand(NewScreenshotDiffCommand())
 	cmd.AddCommand(NewDesktopCommand())
@@ -58,6 +64,8 @@ func NewRootCommand() *cobra.Command {
 	cmd.AddCommand(NewLatestStableTagCommand())
 	cmd.AddCommand(NewWhoisCommand())
 	cmd.AddCommand(NewTraceCommand())
+	cmd.AddCommand(NewVerifyCommand())
+	cmd.AddCommand(NewWorktreeCommand())

 	return cmd
 }
--- a/tools/ods/cmd/verify.go
+++ b/tools/ods/cmd/verify.go
@@ -0,0 +1,318 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
+)
+
+type VerifyOptions struct {
+	BaseRef           string
+	SkipAgentCheck    bool
+	Worktree          string
+	PytestPaths       []string
+	PlaywrightPaths   []string
+	PlaywrightGrep    string
+	PlaywrightProject string
+}
+
+type VerifySummary struct {
+	GeneratedAt string              `json:"generated_at"`
+	RepoRoot    string              `json:"repo_root"`
+	Worktree    *agentlab.Manifest  `json:"worktree,omitempty"`
+	Steps       []VerifyStepSummary `json:"steps"`
+}
+
+type VerifyStepSummary struct {
+	Name        string   `json:"name"`
+	Status      string   `json:"status"`
+	Command     []string `json:"command,omitempty"`
+	DurationMS  int64    `json:"duration_ms"`
+	LogPath     string   `json:"log_path,omitempty"`
+	ArtifactDir string   `json:"artifact_dir,omitempty"`
+	Details     []string `json:"details,omitempty"`
+}
+
+// NewVerifyCommand creates the verify command.
+func NewVerifyCommand() *cobra.Command {
+	opts := &VerifyOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "verify",
+		Short: "Run the agent-lab verification ladder and write a machine-readable summary",
+		Long: `Run the agent-lab verification ladder for the current checkout.
+
+This command composes the diff-based agent-check with optional pytest and
+Playwright execution, then writes a JSON summary into the worktree artifact
+directory so agents can inspect the result without re-parsing console output.
+
+Use --worktree to run the same flow against a tracked target worktree from the
+agent-lab control checkout.`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runVerify(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to compare against for agent-check (defaults to the worktree base ref or HEAD)")
+	cmd.Flags().BoolVar(&opts.SkipAgentCheck, "skip-agent-check", false, "skip the diff-based agent-check step")
+	cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to verify from instead of the current checkout")
+	cmd.Flags().StringArrayVar(&opts.PytestPaths, "pytest", nil, "pytest path or node id to run (repeatable)")
+	cmd.Flags().StringArrayVar(&opts.PlaywrightPaths, "playwright", nil, "Playwright test path to run (repeatable)")
+	cmd.Flags().StringVar(&opts.PlaywrightGrep, "playwright-grep", "", "grep passed through to Playwright")
+	cmd.Flags().StringVar(&opts.PlaywrightProject, "playwright-project", "", "Playwright project to run")
+
+	return cmd
+}
+
+func runVerify(opts *VerifyOptions) {
+	repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	runAt := time.Now().UTC()
+	artifactRoot := filepath.Join(repoRoot, "web", "output")
+	if hasManifest {
+		artifactRoot = filepath.Join(manifest.ArtifactDir, "verify", runAt.Format("20060102-150405"))
+	}
+	if err := os.MkdirAll(artifactRoot, 0755); err != nil {
+		log.Fatalf("Failed to create verify artifact dir: %v", err)
+	}
+
+	summary := VerifySummary{
+		GeneratedAt: runAt.Format(time.RFC3339),
+		RepoRoot:    repoRoot,
+		Steps:       make([]VerifyStepSummary, 0, 3),
+	}
+	if hasManifest {
+		manifestCopy := manifest
+		summary.Worktree = &manifestCopy
+	}
+
+	if !opts.SkipAgentCheck {
+		baseRef := opts.BaseRef
+		if baseRef == "" && hasManifest {
+			baseRef = manifest.BaseRef
+		}
+		if baseRef == "" {
+			baseRef = "HEAD"
+		}
+
+		step, passed := runAgentCheckVerifyStep(repoRoot, opts.Worktree, baseRef)
+		summary.Steps = append(summary.Steps, step)
+		if !passed {
+			writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+			os.Exit(1)
+		}
+	}
+
+	if len(opts.PytestPaths) > 0 {
+		step, passed := runPytestVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts.PytestPaths)
+		summary.Steps = append(summary.Steps, step)
+		if !passed {
+			writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+			os.Exit(1)
+		}
+	}
+
+	if len(opts.PlaywrightPaths) > 0 || opts.PlaywrightGrep != "" {
+		step, passed := runPlaywrightVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts)
+		summary.Steps = append(summary.Steps, step)
+		if !passed {
+			writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+			os.Exit(1)
+		}
+	}
+
+	writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+	log.Infof("Verification summary written to %s", filepath.Join(artifactRoot, "summary.json"))
+}
+
+func runAgentCheckVerifyStep(repoRoot, worktree, baseRef string) (VerifyStepSummary, bool) {
+	startedAt := time.Now()
+	opts := &AgentCheckOptions{BaseRef: baseRef, Worktree: worktree, RepoRoot: repoRoot}
+	result, err := evaluateAgentCheck(opts, nil)
+
+	step := VerifyStepSummary{
+		Name:       "agent-check",
+		Command:    []string{"ods", "agent-check", "--base-ref", baseRef},
+		DurationMS: time.Since(startedAt).Milliseconds(),
+	}
+	if worktree != "" {
+		step.Command = append(step.Command, "--worktree", worktree)
+	}
+	if err != nil {
+		step.Status = "failed"
+		step.Details = []string{err.Error()}
+		return step, false
+	}
+
+	if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
+		step.Status = "passed"
+		return step, true
+	}
+
+	step.Status = "failed"
+	for _, violation := range result.Violations {
+		step.Details = append(step.Details, fmt.Sprintf("%s:%d [%s] %s", violation.Path, violation.LineNum, violation.RuleID, violation.Message))
+	}
+	for _, violation := range result.DocViolations {
+		step.Details = append(step.Details, fmt.Sprintf("%s [agent-docs] %s", violation.Path, violation.Message))
+	}
+	return step, false
+}
+
+func runPytestVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, pytestPaths []string) (VerifyStepSummary, bool) {
+	pythonExecutable := pythonForRepo(repoRoot)
+	args := append([]string{"-m", "dotenv", "-f", ".vscode/.env", "run", "--", "pytest"}, pytestPaths...)
+	extraEnv := map[string]string{}
+	if hasManifest {
+		for key, value := range manifest.RuntimeEnv() {
+			extraEnv[key] = value
+		}
+	}
+
+	step, passed := runLoggedCommand(
+		"pytest",
+		filepath.Join(artifactRoot, "pytest.log"),
+		filepath.Join(repoRoot, "backend"),
+		extraEnv,
+		pythonExecutable,
+		args...,
+	)
+	if hasManifest {
+		step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
+		step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
+	}
+	return step, passed
+}
+
+func runPlaywrightVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, opts *VerifyOptions) (VerifyStepSummary, bool) {
+	args := []string{"playwright", "test"}
+	args = append(args, opts.PlaywrightPaths...)
+	if opts.PlaywrightGrep != "" {
+		args = append(args, "--grep", opts.PlaywrightGrep)
+	}
+	if opts.PlaywrightProject != "" {
+		args = append(args, "--project", opts.PlaywrightProject)
+	}
+
+	extraEnv := map[string]string{}
+	if hasManifest {
+		for key, value := range manifest.RuntimeEnv() {
+			extraEnv[key] = value
+		}
+	}
+
+	step, passed := runLoggedCommand(
+		"playwright",
+		filepath.Join(artifactRoot, "playwright.log"),
+		filepath.Join(repoRoot, "web"),
+		extraEnv,
+		"npx",
+		args...,
+	)
+	step.ArtifactDir = filepath.Join(repoRoot, "web", "output")
+	if hasManifest {
+		step.Details = append(step.Details, fmt.Sprintf("base url: %s", manifest.URLs.Web))
+		step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
+		step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
+		step.Details = append(step.Details, fmt.Sprintf("reuse Chrome DevTools MCP against %s for interactive browser validation", manifest.URLs.Web))
+		step.Details = append(step.Details, manifest.DependencyWarnings()...)
+	}
+	return step, passed
+}
+
+func runLoggedCommand(name, logPath, workdir string, extraEnv map[string]string, executable string, args ...string) (VerifyStepSummary, bool) {
+	startedAt := time.Now()
+	if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
+		return VerifyStepSummary{
+			Name:       name,
+			Status:     "failed",
+			DurationMS: time.Since(startedAt).Milliseconds(),
+			Details:    []string{fmt.Sprintf("create log dir: %v", err)},
+		}, false
+	}
+
+	logFile, err := os.Create(logPath)
+	if err != nil {
+		return VerifyStepSummary{
+			Name:       name,
+			Status:     "failed",
+			DurationMS: time.Since(startedAt).Milliseconds(),
+			Details:    []string{fmt.Sprintf("create log file: %v", err)},
+		}, false
+	}
+	defer func() { _ = logFile.Close() }()
+
+	cmd := exec.Command(executable, args...)
+	cmd.Dir = workdir
+	cmd.Stdout = io.MultiWriter(os.Stdout, logFile)
+	cmd.Stderr = io.MultiWriter(os.Stderr, logFile)
+	if len(extraEnv) > 0 {
+		cmd.Env = envutil.ApplyOverrides(os.Environ(), extraEnv)
+	}
+
+	step := VerifyStepSummary{
+		Name:       name,
+		Command:    append([]string{executable}, args...),
+		LogPath:    logPath,
+		DurationMS: 0,
+	}
+
+	err = cmd.Run()
+	step.DurationMS = time.Since(startedAt).Milliseconds()
+	if err != nil {
+		step.Status = "failed"
+		step.Details = []string{err.Error()}
+		return step, false
+	}
+
+	step.Status = "passed"
+	return step, true
+}
+
+func writeVerifySummary(summary VerifySummary, artifactRoot, commonGitDir string, manifest agentlab.Manifest, hasManifest bool, runAt time.Time) {
+	summaryPath := filepath.Join(artifactRoot, "summary.json")
+	data, err := json.MarshalIndent(summary, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode verify summary: %v", err)
+	}
+	if err := os.WriteFile(summaryPath, data, 0644); err != nil {
+		log.Fatalf("Failed to write verify summary: %v", err)
+	}
+
+	if hasManifest {
+		if err := agentlab.UpdateVerification(commonGitDir, manifest, summaryPath, runAt); err != nil {
+			log.Warnf("Failed to update worktree verification metadata: %v", err)
+		}
+	}
+}
+
+func pythonForRepo(repoRoot string) string {
+	candidate := filepath.Join(repoRoot, ".venv", "bin", "python")
+	if _, err := os.Stat(candidate); err == nil {
+		return candidate
+	}
+
+	if manifest, found := currentAgentLabManifest(repoRoot); found {
+		sharedCandidate := filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python")
+		if _, err := os.Stat(sharedCandidate); err == nil {
+			return sharedCandidate
+		}
+	}
+
+	return "python"
+}
--- a/tools/ods/cmd/web.go
+++ b/tools/ods/cmd/web.go
@@ -13,6 +13,7 @@ import (
 	log "github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"

+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
 	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
 )

@@ -22,28 +23,32 @@ type webPackageJSON struct {

 // NewWebCommand creates a command that runs npm scripts from the web directory.
 func NewWebCommand() *cobra.Command {
+	var worktree string
+
 	cmd := &cobra.Command{
 		Use:   "web <script> [args...]",
 		Short: "Run web/package.json npm scripts",
 		Long:  webHelpDescription(),
-		Args: cobra.MinimumNArgs(1),
+		Args:  cobra.MinimumNArgs(1),
 		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
 			if len(args) > 0 {
 				return nil, cobra.ShellCompDirectiveNoFileComp
 			}
 			return webScriptNames(), cobra.ShellCompDirectiveNoFileComp
 		},
-		Run: func(cmd *cobra.Command, args []string) {
-			runWebScript(args)
-		},
 	}
-	cmd.Flags().SetInterspersed(false)
+	cmd.Flags().StringVar(&worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
+	cmd.Run = func(cmd *cobra.Command, args []string) {
+		runWebScript(args, worktree)
+	}

 	return cmd
 }

-func runWebScript(args []string) {
-	webDir, err := webDir()
+func runWebScript(args []string, worktree string) {
+	repoRoot, manifest, hasManifest := resolveAgentLabTarget(worktree)
+
+	webDir, err := webDirForRoot(repoRoot)
 	if err != nil {
 		log.Fatalf("Failed to find web directory: %v", err)
 	}
@@ -68,6 +73,13 @@ func runWebScript(args []string) {
 	webCmd.Stderr = os.Stderr
 	webCmd.Stdin = os.Stdin

+	if hasManifest {
+		webCmd.Env = envutil.ApplyOverrides(os.Environ(), manifest.RuntimeEnv())
+		log.Infof("agent-lab worktree %s detected: web=%s api=%s", manifest.Branch, manifest.URLs.Web, manifest.URLs.API)
+		log.Infof("lane=%s base-ref=%s", manifest.ResolvedLane(), manifest.BaseRef)
+		log.Infof("dependency mode=%s search-infra=%s", manifest.ResolvedDependencies().Mode, manifest.ResolvedDependencies().SearchInfraMode)
+	}
+
 	if err := webCmd.Run(); err != nil {
 		// For wrapped commands, preserve the child process's exit code and
 		// avoid duplicating already-printed stderr output.
@@ -101,7 +113,8 @@ func webHelpDescription() string {
 Examples:
  ods web dev
  ods web lint
-  ods web test --watch`
+  ods web test --watch
+  ods web dev --worktree codex/fix/auth-banner-modal`

 	scripts := webScriptNames()
 	if len(scripts) == 0 {
@@ -112,7 +125,7 @@ Examples:
 }

 func loadWebScripts() (map[string]string, error) {
-	webDir, err := webDir()
+	webDir, err := webDirForRoot("")
 	if err != nil {
 		return nil, err
 	}
@@ -135,10 +148,13 @@ func loadWebScripts() (map[string]string, error) {
 	return pkg.Scripts, nil
 }

-func webDir() (string, error) {
-	root, err := paths.GitRoot()
-	if err != nil {
-		return "", err
+func webDirForRoot(root string) (string, error) {
+	if root == "" {
+		var err error
+		root, err = paths.GitRoot()
+		if err != nil {
+			return "", err
+		}
 	}
 	return filepath.Join(root, "web"), nil
 }
--- a/tools/ods/cmd/worktree.go
+++ b/tools/ods/cmd/worktree.go
@@ -0,0 +1,626 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"text/tabwriter"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+type WorktreeCreateOptions struct {
+	From           string
+	Path           string
+	Bootstrap      bool
+	DependencyMode string
+}
+
+type WorktreeRemoveOptions struct {
+	Force    bool
+	DropDeps bool
+}
+
+type WorktreeBootstrapOptions struct {
+	EnvMode    string
+	PythonMode string
+	WebMode    string
+}
+
+// NewWorktreeCommand creates the parent worktree command.
+func NewWorktreeCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "worktree",
+		Short: "Manage agent-lab git worktrees and harness metadata",
+		Long: `Manage agent-lab git worktrees and the local harness state that makes
+them bootable with isolated ports, URLs, and artifact directories.`,
+	}
+
+	cmd.AddCommand(newWorktreeCreateCommand())
+	cmd.AddCommand(newWorktreeBootstrapCommand())
+	cmd.AddCommand(newWorktreeDepsCommand())
+	cmd.AddCommand(newWorktreeStatusCommand())
+	cmd.AddCommand(newWorktreeShowCommand())
+	cmd.AddCommand(newWorktreeRemoveCommand())
+
+	return cmd
+}
+
+func newWorktreeCreateCommand() *cobra.Command {
+	opts := &WorktreeCreateOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "create <branch>",
+		Short: "Create a new agent-lab worktree with isolated runtime metadata",
+		Long: `Create a tracked agent-lab worktree and bootstrap its local runtime state.
+
+Branch lanes control the default base ref when --from is not supplied:
+  codex/lab/<name>   -> codex/agent-lab
+  codex/fix/<name>   -> origin/main
+  codex/feat/<name>  -> origin/main
+
+Use conventional branch lanes for product work so the base stays explicit.`,
+		Args: cobra.ExactArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			runWorktreeCreate(args[0], opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.From, "from", "", "git ref to branch from (defaults are inferred from the branch lane)")
+	cmd.Flags().StringVar(&opts.Path, "path", "", "custom checkout path for the new worktree")
+	cmd.Flags().BoolVar(&opts.Bootstrap, "bootstrap", true, "bootstrap env, Python, and frontend dependencies for the worktree")
+	cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode: namespaced or shared")
+
+	return cmd
+}
+
+func newWorktreeBootstrapCommand() *cobra.Command {
+	opts := &WorktreeBootstrapOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "bootstrap [worktree]",
+		Short: "Bootstrap env files and dependencies for an agent-lab worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeBootstrap(identifier, opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.EnvMode, "env-mode", string(agentlab.BootstrapModeAuto), "env bootstrap mode: auto, skip, link, copy")
+	cmd.Flags().StringVar(&opts.PythonMode, "python-mode", string(agentlab.BootstrapModeAuto), "Python bootstrap mode: auto, skip, link, copy")
+	cmd.Flags().StringVar(&opts.WebMode, "web-mode", string(agentlab.BootstrapModeAuto), "frontend bootstrap mode: auto, skip, clone, copy, npm")
+
+	return cmd
+}
+
+func newWorktreeDepsCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "deps",
+		Short: "Manage namespaced external dependencies for an agent-lab worktree",
+	}
+
+	cmd.AddCommand(&cobra.Command{
+		Use:   "up [worktree]",
+		Short: "Provision external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsUp(identifier)
+		},
+	})
+	cmd.AddCommand(&cobra.Command{
+		Use:   "status [worktree]",
+		Short: "Inspect external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsStatus(identifier)
+		},
+	})
+	cmd.AddCommand(&cobra.Command{
+		Use:   "reset [worktree]",
+		Short: "Reset namespaced external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsReset(identifier)
+		},
+	})
+	cmd.AddCommand(&cobra.Command{
+		Use:   "down [worktree]",
+		Short: "Tear down namespaced external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsDown(identifier)
+		},
+	})
+
+	return cmd
+}
+
+func newWorktreeStatusCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:   "status",
+		Short: "List tracked agent-lab worktrees",
+		Run: func(cmd *cobra.Command, args []string) {
+			runWorktreeStatus()
+		},
+	}
+}
+
+func newWorktreeShowCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:   "show [worktree]",
+		Short: "Show detailed metadata for an agent-lab worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeShow(identifier)
+		},
+	}
+}
+
+func newWorktreeRemoveCommand() *cobra.Command {
+	opts := &WorktreeRemoveOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "remove <worktree>",
+		Short: "Remove an agent-lab worktree and its local state",
+		Args:  cobra.ExactArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			runWorktreeRemove(args[0], opts)
+		},
+	}
+
+	cmd.Flags().BoolVar(&opts.Force, "force", false, "force removal even if git reports uncommitted changes")
+	cmd.Flags().BoolVar(&opts.DropDeps, "drop-deps", false, "tear down namespaced dependencies before removing the worktree")
+
+	return cmd
+}
+
+func runWorktreeCreate(branch string, opts *WorktreeCreateOptions) {
+	repoRoot, err := paths.GitRoot()
+	if err != nil {
+		log.Fatalf("Failed to determine git root: %v", err)
+	}
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	if manifest, found, err := agentlab.FindByIdentifier(commonGitDir, branch); err != nil {
+		log.Fatalf("Failed to inspect existing worktrees: %v", err)
+	} else if found {
+		log.Fatalf("Worktree already exists for %s at %s", manifest.Branch, manifest.CheckoutPath)
+	}
+
+	manifests, err := agentlab.LoadAll(commonGitDir)
+	if err != nil {
+		log.Fatalf("Failed to load worktree metadata: %v", err)
+	}
+
+	ports, err := agentlab.AllocatePorts(manifests)
+	if err != nil {
+		log.Fatalf("Failed to allocate worktree ports: %v", err)
+	}
+
+	dependencyMode := agentlab.DependencyMode(opts.DependencyMode)
+	switch dependencyMode {
+	case agentlab.DependencyModeShared, agentlab.DependencyModeNamespaced:
+	default:
+		log.Fatalf("Invalid dependency mode %q: must be shared or namespaced", opts.DependencyMode)
+	}
+
+	checkoutPath := opts.Path
+	if checkoutPath == "" {
+		checkoutPath = agentlab.DefaultCheckoutPath(repoRoot, branch)
+	}
+	checkoutPath, err = filepath.Abs(checkoutPath)
+	if err != nil {
+		log.Fatalf("Failed to resolve checkout path: %v", err)
+	}
+
+	if _, err := os.Stat(checkoutPath); err == nil {
+		log.Fatalf("Checkout path already exists: %s", checkoutPath)
+	}
+
+	baseSelection := agentlab.ResolveCreateBaseRef(branch, opts.From, agentlab.GitRefExists)
+	manifest := agentlab.BuildManifest(
+		repoRoot,
+		commonGitDir,
+		branch,
+		baseSelection.Lane,
+		baseSelection.Ref,
+		checkoutPath,
+		ports,
+		dependencyMode,
+	)
+	args := []string{"-c", "core.hooksPath=/dev/null", "worktree", "add", "-b", branch, checkoutPath, baseSelection.Ref}
+	log.Infof("Creating worktree %s at %s", branch, checkoutPath)
+	gitCmd := exec.Command("git", args...)
+	gitCmd.Stdout = os.Stdout
+	gitCmd.Stderr = os.Stderr
+	gitCmd.Stdin = os.Stdin
+	if err := gitCmd.Run(); err != nil {
+		log.Fatalf("git worktree add failed: %v", err)
+	}
+
+	if resolvedPath, err := filepath.EvalSymlinks(checkoutPath); err == nil {
+		manifest.CheckoutPath = resolvedPath
+	}
+
+	if err := agentlab.WriteEnvFiles(manifest); err != nil {
+		log.Fatalf("Failed to write worktree env files: %v", err)
+	}
+	if err := agentlab.WriteManifest(commonGitDir, manifest); err != nil {
+		log.Fatalf("Failed to write worktree manifest: %v", err)
+	}
+
+	if opts.Bootstrap {
+		bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
+			EnvMode:    agentlab.BootstrapModeAuto,
+			PythonMode: agentlab.BootstrapModeAuto,
+			WebMode:    agentlab.BootstrapModeAuto,
+		})
+		if err != nil {
+			log.Fatalf("Failed to bootstrap worktree: %v", err)
+		}
+		for _, action := range bootstrapResult.Actions {
+			fmt.Printf("  bootstrap: %s\n", action)
+		}
+	}
+
+	manifest, dependencyResult, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to provision worktree dependencies: %v", err)
+	}
+	for _, action := range dependencyResult.Actions {
+		fmt.Printf("  deps: %s\n", action)
+	}
+
+	fmt.Printf("Created agent-lab worktree %s\n", manifest.Branch)
+	fmt.Printf("  checkout: %s\n", manifest.CheckoutPath)
+	fmt.Printf("  lane: %s\n", manifest.ResolvedLane())
+	fmt.Printf("  base ref: %s\n", manifest.BaseRef)
+	fmt.Printf("  base selection: %s\n", baseSelection.Reason)
+	fmt.Printf("  dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
+	if manifest.ResolvedDependencies().Namespace != "" {
+		fmt.Printf("  dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
+	}
+	if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced {
+		fmt.Printf("  postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
+		fmt.Printf("  redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
+		fmt.Printf("  file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
+	}
+	fmt.Printf("  web url:  %s\n", manifest.URLs.Web)
+	fmt.Printf("  api url:  %s\n", manifest.URLs.API)
+	fmt.Printf("  mcp url:  %s\n", manifest.URLs.MCP)
+	fmt.Printf("  artifacts: %s\n", manifest.ArtifactDir)
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("  note: %s\n", warning)
+	}
+	fmt.Printf("\nNext steps:\n")
+	fmt.Printf("  cd %s\n", manifest.CheckoutPath)
+	fmt.Printf("  # Make edits in the worktree itself.\n")
+	if manifest.ResolvedLane() == agentlab.WorktreeLaneProduct {
+		fmt.Printf("  # Run harness commands from the control checkout with --worktree %s.\n", manifest.Branch)
+		fmt.Printf("  ods verify --worktree %s\n", manifest.Branch)
+		fmt.Printf("  ods backend api --worktree %s\n", manifest.Branch)
+		fmt.Printf("  ods web dev --worktree %s\n", manifest.Branch)
+	} else {
+		fmt.Printf("  ods backend api\n")
+		fmt.Printf("  ods backend model_server\n")
+		fmt.Printf("  ods web dev\n")
+		fmt.Printf("  ods verify\n")
+	}
+}
+
+func runWorktreeBootstrap(identifier string, opts *WorktreeBootstrapOptions) {
+	manifest := mustResolveWorktree(identifier)
+	bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
+		EnvMode:    agentlab.BootstrapMode(opts.EnvMode),
+		PythonMode: agentlab.BootstrapMode(opts.PythonMode),
+		WebMode:    agentlab.BootstrapMode(opts.WebMode),
+	})
+	if err != nil {
+		log.Fatalf("Failed to bootstrap worktree %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Bootstrapped %s\n", manifest.Branch)
+	for _, action := range bootstrapResult.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+}
+
+func runWorktreeDepsUp(identifier string) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest := mustResolveWorktree(identifier)
+	manifest, result, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to provision dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Provisioned dependencies for %s\n", manifest.Branch)
+	for _, action := range result.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("  note: %s\n", warning)
+	}
+}
+
+func runWorktreeDepsStatus(identifier string) {
+	manifest := mustResolveWorktree(identifier)
+	status, err := agentlab.InspectDependencies(manifest)
+	if err != nil {
+		log.Fatalf("Failed to inspect dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("branch: %s\n", manifest.Branch)
+	fmt.Printf("mode: %s\n", status.Mode)
+	if status.Namespace != "" {
+		fmt.Printf("namespace: %s\n", status.Namespace)
+	}
+	if status.PostgresDatabase != "" {
+		fmt.Printf("postgres database: %s (ready=%t tables=%d)\n", status.PostgresDatabase, status.PostgresReady, status.PostgresTableCount)
+	}
+	if status.RedisPrefix != "" {
+		fmt.Printf("redis prefix: %s (ready=%t keys=%d)\n", status.RedisPrefix, status.RedisReady, status.RedisKeyCount)
+	}
+	if status.FileStoreBucket != "" {
+		fmt.Printf("file-store bucket: %s (ready=%t objects=%d)\n", status.FileStoreBucket, status.FileStoreReady, status.FileStoreObjectCount)
+	}
+	fmt.Printf("search infra: %s\n", status.SearchInfraMode)
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("note: %s\n", warning)
+	}
+}
+
+func runWorktreeDepsReset(identifier string) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest := mustResolveWorktree(identifier)
+	manifest, result, err := agentlab.ResetDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to reset dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Reset dependencies for %s\n", manifest.Branch)
+	for _, action := range result.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+}
+
+func runWorktreeDepsDown(identifier string) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest := mustResolveWorktree(identifier)
+	manifest, result, err := agentlab.TeardownDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to tear down dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Tore down dependencies for %s\n", manifest.Branch)
+	for _, action := range result.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+}
+
+func runWorktreeStatus() {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	repoRoot, _ := paths.GitRoot()
+	current, _, _ := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
+
+	manifests, err := agentlab.LoadAll(commonGitDir)
+	if err != nil {
+		log.Fatalf("Failed to load worktree manifests: %v", err)
+	}
+
+	if len(manifests) == 0 {
+		log.Info("No agent-lab worktrees tracked yet.")
+		return
+	}
+
+	tw := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
+	if _, err := fmt.Fprintln(tw, "CURRENT\tBRANCH\tLANE\tMODE\tWEB\tAPI\tPATH"); err != nil {
+		log.Fatalf("Failed to write worktree header: %v", err)
+	}
+	for _, manifest := range manifests {
+		marker := ""
+		if manifest.ID == current.ID && manifest.ID != "" {
+			marker = "*"
+		}
+		if _, err := fmt.Fprintf(
+			tw,
+			"%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+			marker,
+			manifest.Branch,
+			manifest.ResolvedLane(),
+			manifest.ResolvedDependencies().Mode,
+			manifest.URLs.Web,
+			manifest.URLs.API,
+			manifest.CheckoutPath,
+		); err != nil {
+			log.Fatalf("Failed to write worktree row for %s: %v", manifest.Branch, err)
+		}
+	}
+	_ = tw.Flush()
+}
+
+func runWorktreeShow(identifier string) {
+	manifest := mustResolveWorktree(identifier)
+
+	fmt.Printf("branch: %s\n", manifest.Branch)
+	fmt.Printf("id: %s\n", manifest.ID)
+	fmt.Printf("lane: %s\n", manifest.ResolvedLane())
+	fmt.Printf("checkout: %s\n", manifest.CheckoutPath)
+	fmt.Printf("base-ref: %s\n", manifest.BaseRef)
+	fmt.Printf("state-dir: %s\n", manifest.StateDir)
+	fmt.Printf("artifacts: %s\n", manifest.ArtifactDir)
+	fmt.Printf("backend env: %s\n", manifest.EnvFile)
+	fmt.Printf("web env: %s\n", manifest.WebEnvFile)
+	fmt.Printf("compose project: %s\n", manifest.ComposeProject)
+	fmt.Printf("dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
+	if manifest.ResolvedDependencies().Namespace != "" {
+		fmt.Printf("dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
+	}
+	if manifest.ResolvedDependencies().PostgresDatabase != "" {
+		fmt.Printf("postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
+		fmt.Printf("redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
+		fmt.Printf("file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
+	}
+	fmt.Printf("search infra: %s\n", manifest.ResolvedDependencies().SearchInfraMode)
+	fmt.Printf("web url: %s\n", manifest.URLs.Web)
+	fmt.Printf("api url: %s\n", manifest.URLs.API)
+	fmt.Printf("mcp url: %s\n", manifest.URLs.MCP)
+	fmt.Printf("ports: web=%d api=%d model_server=%d mcp=%d\n", manifest.Ports.Web, manifest.Ports.API, manifest.Ports.ModelServer, manifest.Ports.MCP)
+	if manifest.LastVerifiedAt != "" {
+		fmt.Printf("last verified: %s\n", manifest.LastVerifiedAt)
+	}
+	if manifest.LastVerifySummary != "" {
+		fmt.Printf("last summary: %s\n", manifest.LastVerifySummary)
+	}
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("note: %s\n", warning)
+	}
+}
+
+func mustResolveWorktree(identifier string) agentlab.Manifest {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	if identifier == "" {
+		repoRoot, err := paths.GitRoot()
+		if err != nil {
+			log.Fatalf("Failed to determine git root: %v", err)
+		}
+		manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
+		if err != nil {
+			log.Fatalf("Failed to resolve current worktree manifest: %v", err)
+		}
+		if !found {
+			log.Fatalf("No agent-lab worktree found for %q", identifier)
+		}
+		return manifest
+	}
+
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		log.Fatalf("Failed to resolve worktree manifest: %v", err)
+	}
+	if !found {
+		log.Fatalf("No agent-lab worktree found for %q", identifier)
+	}
+	return manifest
+}
+
+func runWorktreeRemove(identifier string, opts *WorktreeRemoveOptions) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		log.Fatalf("Failed to resolve worktree: %v", err)
+	}
+	if !found {
+		log.Fatalf("No agent-lab worktree found for %q", identifier)
+	}
+
+	if opts.DropDeps {
+		var teardownResult *agentlab.DependencyResult
+		manifest, teardownResult, err = agentlab.TeardownDependencies(commonGitDir, manifest)
+		if err != nil {
+			log.Fatalf("Failed to tear down worktree dependencies: %v", err)
+		}
+		for _, action := range teardownResult.Actions {
+			fmt.Printf("  deps: %s\n", action)
+		}
+	}
+
+	args := []string{"worktree", "remove"}
+	if opts.Force {
+		args = append(args, "--force")
+	}
+	args = append(args, manifest.CheckoutPath)
+
+	log.Infof("Removing worktree %s", manifest.Branch)
+	gitCmd := exec.Command("git", args...)
+	gitCmd.Stdout = os.Stdout
+	gitCmd.Stderr = os.Stderr
+	gitCmd.Stdin = os.Stdin
+	if err := gitCmd.Run(); err != nil {
+		if opts.Force && isOrphanedWorktree(manifest.CheckoutPath) {
+			log.Warnf("git detached %s but left an orphaned checkout behind; removing %s", manifest.Branch, manifest.CheckoutPath)
+			if removeErr := os.RemoveAll(manifest.CheckoutPath); removeErr != nil {
+				log.Fatalf("git worktree remove failed: %v (fallback cleanup failed: %v)", err, removeErr)
+			}
+		} else {
+			log.Fatalf("git worktree remove failed: %v", err)
+		}
+	}
+
+	if err := agentlab.RemoveState(commonGitDir, manifest.ID); err != nil {
+		log.Fatalf("Failed to remove worktree state: %v", err)
+	}
+
+	fmt.Printf("Removed agent-lab worktree %s\n", manifest.Branch)
+	if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced && !opts.DropDeps {
+		fmt.Printf("  note: namespaced Postgres/Redis/MinIO state was left in place. Use `ods worktree deps down %s` before removal if you want cleanup.\n", manifest.Branch)
+	}
+}
+
+func isOrphanedWorktree(checkoutPath string) bool {
+	output, err := exec.Command("git", "worktree", "list", "--porcelain").Output()
+	if err == nil && strings.Contains(string(output), "worktree "+checkoutPath+"\n") {
+		return false
+	}
+	if _, statErr := os.Stat(checkoutPath); os.IsNotExist(statErr) {
+		return true
+	}
+	if statusErr := exec.Command("git", "-C", checkoutPath, "status", "--short").Run(); statusErr != nil {
+		return true
+	}
+	return false
+}
--- a/tools/ods/internal/agentcheck/agentcheck.go
+++ b/tools/ods/internal/agentcheck/agentcheck.go
@@ -0,0 +1,95 @@
+package agentcheck
+
+import (
+	"bufio"
+	"fmt"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+var hunkHeaderPattern = regexp.MustCompile(`^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@`)
+
+type AddedLine struct {
+	Path    string
+	LineNum int
+	Content string
+}
+
+type Violation struct {
+	RuleID  string
+	Path    string
+	LineNum int
+	Message string
+	Content string
+}
+
+func ParseAddedLines(diff string) ([]AddedLine, error) {
+	scanner := bufio.NewScanner(strings.NewReader(diff))
+	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+
+	var addedLines []AddedLine
+	currentPath := ""
+	currentNewLine := 0
+	inHunk := false
+
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		switch {
+		case strings.HasPrefix(line, "+++ "):
+			currentPath = normalizeDiffPath(strings.TrimPrefix(line, "+++ "))
+			inHunk = false
+		case strings.HasPrefix(line, "@@ "):
+			match := hunkHeaderPattern.FindStringSubmatch(line)
+			if len(match) != 2 {
+				return nil, fmt.Errorf("failed to parse hunk header: %s", line)
+			}
+			var err error
+			currentNewLine, err = parseLineNumber(match[1])
+			if err != nil {
+				return nil, err
+			}
+			inHunk = true
+		case !inHunk || currentPath == "":
+			continue
+		case strings.HasPrefix(line, "+") && !strings.HasPrefix(line, "+++"):
+			addedLines = append(addedLines, AddedLine{
+				Path:    currentPath,
+				LineNum: currentNewLine,
+				Content: strings.TrimPrefix(line, "+"),
+			})
+			currentNewLine++
+		case strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---"):
+			continue
+		default:
+			currentNewLine++
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("failed to scan diff: %w", err)
+	}
+
+	return addedLines, nil
+}
+
+func normalizeDiffPath(path string) string {
+	path = strings.TrimSpace(path)
+	path = strings.TrimPrefix(path, "b/")
+	if path == "/dev/null" {
+		return ""
+	}
+	return filepath.ToSlash(path)
+}
+
+func parseLineNumber(value string) (int, error) {
+	lineNum := 0
+	for _, ch := range value {
+		if ch < '0' || ch > '9' {
+			return 0, fmt.Errorf("invalid line number: %s", value)
+		}
+		lineNum = lineNum*10 + int(ch-'0')
+	}
+	return lineNum, nil
+}
--- a/tools/ods/internal/agentcheck/agentcheck_test.go
+++ b/tools/ods/internal/agentcheck/agentcheck_test.go
@@ -0,0 +1,143 @@
+package agentcheck
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestParseAddedLines(t *testing.T) {
+	diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
+index 1111111..2222222 100644
+--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
+@@ -10,1 +11,3 @@
+ context = old_value
+from fastapi import HTTPException
+-raise OldError()
+raise HTTPException(status_code=400, detail="bad")
+@@ -20,0 +23,1 @@
+task.delay (payload)
+diff --git a/web/src/sections/Foo.tsx b/web/src/sections/Foo.tsx
+index 1111111..2222222 100644
+--- a/web/src/sections/Foo.tsx
+++ b/web/src/sections/Foo.tsx
+@@ -3,0 +4 @@
+import { Thing } from "@/components/Thing";`
+
+	addedLines, err := ParseAddedLines(diff)
+	if err != nil {
+		t.Fatalf("ParseAddedLines returned error: %v", err)
+	}
+
+	if len(addedLines) != 4 {
+		t.Fatalf("expected 4 added lines, got %d", len(addedLines))
+	}
+
+	if addedLines[0].Path != "backend/onyx/server/foo.py" || addedLines[0].LineNum != 12 {
+		t.Fatalf("unexpected first added line: %+v", addedLines[0])
+	}
+
+	if addedLines[2].Path != "backend/onyx/server/foo.py" || addedLines[2].LineNum != 23 {
+		t.Fatalf("unexpected third added line: %+v", addedLines[2])
+	}
+
+	if addedLines[3].Path != "web/src/sections/Foo.tsx" || addedLines[3].LineNum != 4 {
+		t.Fatalf("unexpected final added line: %+v", addedLines[3])
+	}
+}
+
+func TestParseAddedLinesRejectsMalformedHunkHeader(t *testing.T) {
+	diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
+--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
+@@ invalid @@
+raise HTTPException(status_code=400, detail="bad")`
+
+	if _, err := ParseAddedLines(diff); err == nil {
+		t.Fatal("expected malformed hunk header to return an error")
+	}
+}
+
+func TestCheckAddedLinesFindsExpectedViolations(t *testing.T) {
+	lines := []AddedLine{
+		{Path: "backend/onyx/server/foo.py", LineNum: 10, Content: "from fastapi import HTTPException"},
+		{Path: "backend/onyx/server/foo.py", LineNum: 11, Content: `raise HTTPException(status_code=400, detail="bad")`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
+		{Path: "backend/onyx/server/foo.py", LineNum: 13, Content: "my_task.delay (payload)"},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `export { Thing } from "@/components/Thing";`},
+	}
+
+	violations := CheckAddedLines(lines)
+
+	if len(violations) != 5 {
+		t.Fatalf("expected 5 violations, got %d: %+v", len(violations), violations)
+	}
+
+	expectedRules := []string{
+		"no-new-http-exception",
+		"no-new-http-exception",
+		"no-new-response-model",
+		"no-new-delay",
+		"no-new-legacy-component-import",
+	}
+
+	for i, expectedRule := range expectedRules {
+		if violations[i].RuleID != expectedRule {
+			t.Fatalf("expected rule %q at index %d, got %q", expectedRule, i, violations[i].RuleID)
+		}
+	}
+}
+
+func TestCheckAddedLinesIgnoresCommentsStringsAndAllowedScopes(t *testing.T) {
+	lines := []AddedLine{
+		{Path: "backend/onyx/server/foo.py", LineNum: 1, Content: `message = "HTTPException"`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 2, Content: `detail = "response_model="`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 3, Content: `note = ".delay("`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 4, Content: `# HTTPException`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 5, Content: `handler = HTTPExceptionAlias`},
+		{Path: "backend/onyx/main.py", LineNum: 6, Content: `raise HTTPException(status_code=400, detail="bad")`},
+		{Path: "backend/tests/unit/test_foo.py", LineNum: 7, Content: `from fastapi import HTTPException`},
+		{Path: "backend/model_server/foo.py", LineNum: 8, Content: `task.delay(payload)`},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 9, Content: `const path = "@/components/Thing";`},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 10, Content: `// import { Thing } from "@/components/Thing";`},
+		{Path: "web/src/components/Foo.tsx", LineNum: 11, Content: `import { Bar } from "@/components/Bar";`},
+	}
+
+	violations := CheckAddedLines(lines)
+	if len(violations) != 0 {
+		t.Fatalf("expected no violations, got %+v", violations)
+	}
+}
+
+func TestCheckAddedLinesWithRulesSupportsCustomRuleSets(t *testing.T) {
+	lines := []AddedLine{
+		{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `import type { Thing } from "@/components/Thing";`},
+	}
+
+	rules := []Rule{
+		{
+			ID:      "python-response-model-only",
+			Message: "response_model is not allowed",
+			Scope:   backendProductPythonScope(),
+			Match: func(line lineView) bool {
+				return responseModelPattern.MatchString(line.CodeSansStrings)
+			},
+		},
+	}
+
+	violations := CheckAddedLinesWithRules(lines, rules)
+	expected := []Violation{
+		{
+			RuleID:  "python-response-model-only",
+			Path:    "backend/onyx/server/foo.py",
+			LineNum: 12,
+			Message: "response_model is not allowed",
+			Content: "response_model = FooResponse",
+		},
+	}
+
+	if !reflect.DeepEqual(expected, violations) {
+		t.Fatalf("unexpected violations: %+v", violations)
+	}
+}
--- a/tools/ods/internal/agentcheck/lex.go
+++ b/tools/ods/internal/agentcheck/lex.go
@@ -0,0 +1,101 @@
+package agentcheck
+
+import "strings"
+
+func stripLineComment(path string, content string) string {
+	switch {
+	case strings.HasSuffix(path, ".py"):
+		return stripCommentMarker(content, "#")
+	case isJSLikePath(path):
+		return stripCommentMarker(content, "//")
+	default:
+		return content
+	}
+}
+
+func isJSLikePath(path string) bool {
+	return strings.HasSuffix(path, ".js") ||
+		strings.HasSuffix(path, ".jsx") ||
+		strings.HasSuffix(path, ".ts") ||
+		strings.HasSuffix(path, ".tsx")
+}
+
+func stripCommentMarker(line string, marker string) string {
+	if marker == "" {
+		return line
+	}
+
+	var builder strings.Builder
+	quote := byte(0)
+	escaped := false
+
+	for i := 0; i < len(line); i++ {
+		ch := line[i]
+
+		if quote != 0 {
+			builder.WriteByte(ch)
+			if escaped {
+				escaped = false
+				continue
+			}
+			if ch == '\\' && quote != '`' {
+				escaped = true
+				continue
+			}
+			if ch == quote {
+				quote = 0
+			}
+			continue
+		}
+
+		if strings.HasPrefix(line[i:], marker) {
+			break
+		}
+
+		builder.WriteByte(ch)
+		if isQuote(ch) {
+			quote = ch
+		}
+	}
+
+	return builder.String()
+}
+
+func stripQuotedStrings(line string) string {
+	var builder strings.Builder
+	quote := byte(0)
+	escaped := false
+
+	for i := 0; i < len(line); i++ {
+		ch := line[i]
+
+		if quote != 0 {
+			if escaped {
+				escaped = false
+				continue
+			}
+			if ch == '\\' && quote != '`' {
+				escaped = true
+				continue
+			}
+			if ch == quote {
+				quote = 0
+			}
+			continue
+		}
+
+		if isQuote(ch) {
+			quote = ch
+			builder.WriteByte(' ')
+			continue
+		}
+
+		builder.WriteByte(ch)
+	}
+
+	return builder.String()
+}
+
+func isQuote(ch byte) bool {
+	return ch == '"' || ch == '\'' || ch == '`'
+}
--- a/tools/ods/internal/agentcheck/rules.go
+++ b/tools/ods/internal/agentcheck/rules.go
@@ -0,0 +1,170 @@
+package agentcheck
+
+import (
+	"regexp"
+	"strings"
+)
+
+var (
+	httpExceptionPattern = regexp.MustCompile(`\bHTTPException\b`)
+	responseModelPattern = regexp.MustCompile(`\bresponse_model\s*=`)
+	delayCallPattern     = regexp.MustCompile(`\.\s*delay\s*\(`)
+	componentPathPattern = regexp.MustCompile(`["'](?:@/components/|\.\.?/components/|\.\.?/.*/components/)`)
+	importExportPattern  = regexp.MustCompile(`^\s*(?:import|export)\b`)
+)
+
+type Scope func(path string) bool
+
+type Matcher func(line lineView) bool
+
+type Rule struct {
+	ID      string
+	Message string
+	Scope   Scope
+	Match   Matcher
+}
+
+type lineView struct {
+	AddedLine
+	Path            string
+	Code            string
+	CodeSansStrings string
+	TrimmedCode     string
+}
+
+func CheckAddedLines(lines []AddedLine) []Violation {
+	return CheckAddedLinesWithRules(lines, DefaultRules())
+}
+
+func CheckAddedLinesWithRules(lines []AddedLine, rules []Rule) []Violation {
+	var violations []Violation
+
+	for _, addedLine := range lines {
+		line := buildLineView(addedLine)
+		if line.Path == "" {
+			continue
+		}
+
+		for _, rule := range rules {
+			if rule.Scope != nil && !rule.Scope(line.Path) {
+				continue
+			}
+			if rule.Match == nil || !rule.Match(line) {
+				continue
+			}
+
+			violations = append(violations, Violation{
+				RuleID:  rule.ID,
+				Path:    line.Path,
+				LineNum: line.LineNum,
+				Message: rule.Message,
+				Content: line.Content,
+			})
+		}
+	}
+
+	return violations
+}
+
+func DefaultRules() []Rule {
+	return append([]Rule(nil), defaultRules...)
+}
+
+var defaultRules = []Rule{
+	{
+		ID:      "no-new-http-exception",
+		Message: "Do not introduce new HTTPException usage in backend product code. Raise OnyxError instead.",
+		Scope:   backendProductPythonScope(exactPath("backend/onyx/main.py")),
+		Match: func(line lineView) bool {
+			return hasPythonCode(line) && httpExceptionPattern.MatchString(line.CodeSansStrings)
+		},
+	},
+	{
+		ID:      "no-new-response-model",
+		Message: "Do not introduce response_model on new FastAPI APIs. Type the function directly instead.",
+		Scope:   backendProductPythonScope(),
+		Match: func(line lineView) bool {
+			return hasPythonCode(line) && responseModelPattern.MatchString(line.CodeSansStrings)
+		},
+	},
+	{
+		ID:      "no-new-delay",
+		Message: "Do not introduce Celery .delay() calls. Use an enqueue path that sets expires= explicitly.",
+		Scope:   backendProductPythonScope(),
+		Match: func(line lineView) bool {
+			return hasPythonCode(line) && delayCallPattern.MatchString(line.CodeSansStrings)
+		},
+	},
+	{
+		ID:      "no-new-legacy-component-import",
+		Message: "Do not introduce new imports from web/src/components. Prefer Opal or refresh-components.",
+		Scope:   nonLegacyWebSourceScope(),
+		Match: func(line lineView) bool {
+			return isLegacyComponentImport(line)
+		},
+	},
+}
+
+func buildLineView(line AddedLine) lineView {
+	path := normalizeDiffPath(line.Path)
+	code := stripLineComment(path, line.Content)
+	return lineView{
+		AddedLine:       line,
+		Path:            path,
+		Code:            code,
+		CodeSansStrings: stripQuotedStrings(code),
+		TrimmedCode:     strings.TrimSpace(code),
+	}
+}
+
+func backendProductPythonScope(excluded ...Scope) Scope {
+	return func(path string) bool {
+		if !strings.HasPrefix(path, "backend/") || !strings.HasSuffix(path, ".py") {
+			return false
+		}
+		if strings.HasPrefix(path, "backend/tests/") {
+			return false
+		}
+		if strings.HasPrefix(path, "backend/model_server/") {
+			return false
+		}
+		if strings.Contains(path, "/__pycache__/") {
+			return false
+		}
+		for _, exclude := range excluded {
+			if exclude != nil && exclude(path) {
+				return false
+			}
+		}
+		return true
+	}
+}
+
+func nonLegacyWebSourceScope() Scope {
+	return func(path string) bool {
+		if !strings.HasPrefix(path, "web/src/") {
+			return false
+		}
+		return !strings.HasPrefix(path, "web/src/components/")
+	}
+}
+
+func exactPath(target string) Scope {
+	return func(path string) bool {
+		return path == target
+	}
+}
+
+func hasPythonCode(line lineView) bool {
+	return strings.TrimSpace(line.CodeSansStrings) != ""
+}
+
+func isLegacyComponentImport(line lineView) bool {
+	if line.TrimmedCode == "" {
+		return false
+	}
+	if !importExportPattern.MatchString(line.TrimmedCode) {
+		return false
+	}
+	return componentPathPattern.MatchString(line.Code)
+}
--- a/tools/ods/internal/agentdocs/agentdocs.go
+++ b/tools/ods/internal/agentdocs/agentdocs.go
@@ -0,0 +1,107 @@
+package agentdocs
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+var markdownLinkPattern = regexp.MustCompile(`\[[^\]]+\]\(([^)]+)\)`)
+
+var requiredFiles = []string{
+	"AGENTS.md",
+	"docs/agent/README.md",
+	"docs/agent/ARCHITECTURE.md",
+	"docs/agent/BRANCHING.md",
+	"docs/agent/HARNESS.md",
+	"docs/agent/GOLDEN_RULES.md",
+	"docs/agent/LEGACY_ZONES.md",
+	"docs/agent/QUALITY_SCORE.md",
+}
+
+type Violation struct {
+	Path    string
+	Message string
+}
+
+func Validate(root string) []Violation {
+	if _, err := os.Stat(filepath.Join(root, filepath.FromSlash("docs/agent/README.md"))); err != nil {
+		return nil
+	}
+
+	var violations []Violation
+
+	for _, relPath := range requiredFiles {
+		if _, err := os.Stat(filepath.Join(root, filepath.FromSlash(relPath))); err != nil {
+			violations = append(violations, Violation{
+				Path:    relPath,
+				Message: "required agent-lab knowledge-base file is missing",
+			})
+		}
+	}
+
+	for _, relPath := range requiredFiles {
+		absPath := filepath.Join(root, filepath.FromSlash(relPath))
+		content, err := os.ReadFile(absPath)
+		if err != nil {
+			continue
+		}
+
+		violations = append(violations, validateMarkdownLinks(root, relPath, string(content))...)
+	}
+
+	return violations
+}
+
+func validateMarkdownLinks(root string, relPath string, content string) []Violation {
+	var violations []Violation
+	matches := markdownLinkPattern.FindAllStringSubmatch(content, -1)
+	docDir := filepath.Dir(filepath.Join(root, filepath.FromSlash(relPath)))
+
+	for _, match := range matches {
+		if len(match) != 2 {
+			continue
+		}
+
+		target := strings.TrimSpace(match[1])
+		if target == "" {
+			continue
+		}
+		if strings.HasPrefix(target, "http://") || strings.HasPrefix(target, "https://") {
+			continue
+		}
+		if strings.HasPrefix(target, "#") || strings.HasPrefix(target, "mailto:") {
+			continue
+		}
+
+		target = stripAnchor(target)
+
+		var absTarget string
+		if filepath.IsAbs(target) {
+			absTarget = target
+		} else {
+			absTarget = filepath.Join(docDir, target)
+		}
+
+		if _, err := os.Stat(absTarget); err != nil {
+			violations = append(violations, Violation{
+				Path: relPath,
+				Message: fmt.Sprintf(
+					"broken markdown link target: %s",
+					target,
+				),
+			})
+		}
+	}
+
+	return violations
+}
+
+func stripAnchor(target string) string {
+	if idx := strings.Index(target, "#"); idx >= 0 {
+		return target[:idx]
+	}
+	return target
+}
--- a/tools/ods/internal/agentdocs/agentdocs_test.go
+++ b/tools/ods/internal/agentdocs/agentdocs_test.go
@@ -0,0 +1,61 @@
+package agentdocs
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestValidateSuccess(t *testing.T) {
+	root := t.TempDir()
+
+	writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Architecture](./ARCHITECTURE.md)
+[Root](../../AGENTS.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/BRANCHING.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/HARNESS.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/GOLDEN_RULES.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/LEGACY_ZONES.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/QUALITY_SCORE.md"), `ok`)
+
+	violations := Validate(root)
+	if len(violations) != 0 {
+		t.Fatalf("expected no violations, got %+v", violations)
+	}
+}
+
+func TestValidateMissingAndBrokenLinks(t *testing.T) {
+	root := t.TempDir()
+
+	writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Missing](./MISSING.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
+
+	violations := Validate(root)
+	if len(violations) < 2 {
+		t.Fatalf("expected multiple violations, got %+v", violations)
+	}
+}
+
+func TestValidateSkipsReposWithoutAgentLabDocs(t *testing.T) {
+	root := t.TempDir()
+
+	writeFile(t, filepath.Join(root, "README.md"), `plain repo`)
+
+	violations := Validate(root)
+	if len(violations) != 0 {
+		t.Fatalf("expected no violations for repo without agent-lab docs, got %+v", violations)
+	}
+}
+
+func writeFile(t *testing.T, path string, content string) {
+	t.Helper()
+
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("failed to create dir for %s: %v", path, err)
+	}
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatalf("failed to write %s: %v", path, err)
+	}
+}
--- a/tools/ods/internal/agentlab/agentlab.go
+++ b/tools/ods/internal/agentlab/agentlab.go
@@ -0,0 +1,585 @@
+package agentlab
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+	"time"
+)
+
+const (
+	stateDirName        = "onyx-agent-lab"
+	worktreesDirName    = "worktrees"
+	envFileName         = ".env.agent-lab"
+	webEnvFileName      = ".env.web.agent-lab"
+	defaultWebPort      = 3300
+	defaultAPIPort      = 8380
+	defaultModelPort    = 9300
+	defaultMCPPort      = 8390
+	portSearchWindow    = 400
+	dockerProjectPrefix = "onyx"
+	searchInfraMode     = "shared"
+)
+
+var nonAlphaNumPattern = regexp.MustCompile(`[^a-z0-9]+`)
+
+type DependencyMode string
+
+const (
+	DependencyModeShared     DependencyMode = "shared"
+	DependencyModeNamespaced DependencyMode = "namespaced"
+)
+
+type WorktreeLane string
+
+const (
+	WorktreeLaneLab     WorktreeLane = "lab"
+	WorktreeLaneProduct WorktreeLane = "product"
+	WorktreeLaneCustom  WorktreeLane = "custom"
+)
+
+var productBranchPrefixes = []string{
+	"build/",
+	"chore/",
+	"ci/",
+	"docs/",
+	"feat/",
+	"fix/",
+	"perf/",
+	"refactor/",
+	"revert/",
+	"style/",
+	"test/",
+}
+
+type DependencyConfig struct {
+	Mode              DependencyMode `json:"mode"`
+	Namespace         string         `json:"namespace,omitempty"`
+	PostgresDatabase  string         `json:"postgres_database,omitempty"`
+	RedisPrefix       string         `json:"redis_prefix,omitempty"`
+	FileStoreBucket   string         `json:"file_store_bucket,omitempty"`
+	SearchInfraMode   string         `json:"search_infra_mode"`
+	LastProvisionedAt string         `json:"last_provisioned_at,omitempty"`
+}
+
+type PortSet struct {
+	Web         int `json:"web"`
+	API         int `json:"api"`
+	ModelServer int `json:"model_server"`
+	MCP         int `json:"mcp"`
+}
+
+type URLSet struct {
+	Web string `json:"web"`
+	API string `json:"api"`
+	MCP string `json:"mcp"`
+}
+
+type Manifest struct {
+	ID                string           `json:"id"`
+	Branch            string           `json:"branch"`
+	Lane              WorktreeLane     `json:"lane,omitempty"`
+	BaseRef           string           `json:"base_ref"`
+	CreatedFromPath   string           `json:"created_from_path"`
+	CheckoutPath      string           `json:"checkout_path"`
+	StateDir          string           `json:"state_dir"`
+	ArtifactDir       string           `json:"artifact_dir"`
+	EnvFile           string           `json:"env_file"`
+	WebEnvFile        string           `json:"web_env_file"`
+	ComposeProject    string           `json:"compose_project"`
+	Dependencies      DependencyConfig `json:"dependencies"`
+	Ports             PortSet          `json:"ports"`
+	URLs              URLSet           `json:"urls"`
+	CreatedAt         time.Time        `json:"created_at"`
+	LastVerifiedAt    string           `json:"last_verified_at,omitempty"`
+	LastVerifySummary string           `json:"last_verify_summary,omitempty"`
+}
+
+func Slug(value string) string {
+	normalized := strings.ToLower(strings.TrimSpace(value))
+	normalized = strings.ReplaceAll(normalized, "/", "-")
+	normalized = strings.ReplaceAll(normalized, "_", "-")
+	normalized = nonAlphaNumPattern.ReplaceAllString(normalized, "-")
+	normalized = strings.Trim(normalized, "-")
+	if normalized == "" {
+		return "worktree"
+	}
+	return normalized
+}
+
+func worktreeID(value string) string {
+	slug := Slug(value)
+	sum := sha256.Sum256([]byte(value))
+	return fmt.Sprintf("%s-%s", slug, hex.EncodeToString(sum[:4]))
+}
+
+func ComposeProjectName(id string) string {
+	slug := Slug(id)
+	if len(slug) > 32 {
+		slug = slug[:32]
+	}
+	return fmt.Sprintf("%s-%s", dockerProjectPrefix, slug)
+}
+
+func GetCommonGitDir() (string, error) {
+	cmd := exec.Command("git", "rev-parse", "--path-format=absolute", "--git-common-dir")
+	output, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("git rev-parse --git-common-dir failed: %w", err)
+	}
+	return strings.TrimSpace(string(output)), nil
+}
+
+func StateRoot(commonGitDir string) string {
+	return filepath.Join(commonGitDir, stateDirName)
+}
+
+func WorktreesRoot(commonGitDir string) string {
+	return filepath.Join(StateRoot(commonGitDir), worktreesDirName)
+}
+
+func WorktreeStateDir(commonGitDir, id string) string {
+	return filepath.Join(WorktreesRoot(commonGitDir), Slug(id))
+}
+
+func ManifestPath(commonGitDir, id string) string {
+	return filepath.Join(WorktreeStateDir(commonGitDir, id), "manifest.json")
+}
+
+func DefaultCheckoutPath(repoRoot, id string) string {
+	parent := filepath.Dir(repoRoot)
+	worktreesRoot := filepath.Join(parent, filepath.Base(repoRoot)+"-worktrees")
+	return filepath.Join(worktreesRoot, worktreeID(id))
+}
+
+func NormalizeBranchForLane(branch string) string {
+	normalized := strings.TrimSpace(branch)
+	normalized = strings.TrimPrefix(normalized, "refs/heads/")
+	normalized = strings.TrimPrefix(normalized, "origin/")
+	normalized = strings.TrimPrefix(normalized, "codex/")
+	return normalized
+}
+
+func InferLane(branch string) WorktreeLane {
+	normalized := NormalizeBranchForLane(branch)
+	if strings.HasPrefix(normalized, "lab/") {
+		return WorktreeLaneLab
+	}
+	for _, prefix := range productBranchPrefixes {
+		if strings.HasPrefix(normalized, prefix) {
+			return WorktreeLaneProduct
+		}
+	}
+	return WorktreeLaneCustom
+}
+
+type BaseRefSelection struct {
+	Ref    string
+	Lane   WorktreeLane
+	Reason string
+}
+
+func ResolveCreateBaseRef(branch, requested string, refExists func(string) bool) BaseRefSelection {
+	lane := InferLane(branch)
+	if requested != "" {
+		return BaseRefSelection{
+			Ref:    requested,
+			Lane:   lane,
+			Reason: "using explicit --from value",
+		}
+	}
+
+	switch lane {
+	case WorktreeLaneLab:
+		for _, candidate := range []string{"codex/agent-lab", "agent-lab", "origin/codex/agent-lab", "origin/agent-lab"} {
+			if refExists(candidate) {
+				return BaseRefSelection{
+					Ref:    candidate,
+					Lane:   lane,
+					Reason: fmt.Sprintf("inferred lab lane from branch name; using %s as the base ref", candidate),
+				}
+			}
+		}
+		return BaseRefSelection{
+			Ref:    "HEAD",
+			Lane:   lane,
+			Reason: "inferred lab lane from branch name, but no agent-lab ref exists locally; falling back to HEAD",
+		}
+	case WorktreeLaneProduct:
+		for _, candidate := range []string{"origin/main", "main"} {
+			if refExists(candidate) {
+				return BaseRefSelection{
+					Ref:    candidate,
+					Lane:   lane,
+					Reason: fmt.Sprintf("inferred product lane from branch name; using %s as the base ref", candidate),
+				}
+			}
+		}
+		return BaseRefSelection{
+			Ref:    "HEAD",
+			Lane:   lane,
+			Reason: "inferred product lane from branch name, but no main ref exists locally; falling back to HEAD",
+		}
+	default:
+		return BaseRefSelection{
+			Ref:    "HEAD",
+			Lane:   lane,
+			Reason: "no lane inferred from branch name; defaulting to HEAD. Prefer codex/lab/... for harness work and codex/fix... or codex/feat... for product work, or pass --from explicitly",
+		}
+	}
+}
+
+func GitRefExists(ref string) bool {
+	cmd := exec.Command("git", "rev-parse", "--verify", "--quiet", ref)
+	return cmd.Run() == nil
+}
+
+func BuildManifest(repoRoot, commonGitDir, branch string, lane WorktreeLane, baseRef, checkoutPath string, ports PortSet, dependencyMode DependencyMode) Manifest {
+	id := worktreeID(branch)
+	stateDir := WorktreeStateDir(commonGitDir, id)
+	artifactDir := filepath.Join(stateDir, "artifacts")
+	envDir := filepath.Join(checkoutPath, ".vscode")
+
+	return Manifest{
+		ID:              id,
+		Branch:          branch,
+		Lane:            lane,
+		BaseRef:         baseRef,
+		CreatedFromPath: repoRoot,
+		CheckoutPath:    checkoutPath,
+		StateDir:        stateDir,
+		ArtifactDir:     artifactDir,
+		EnvFile:         filepath.Join(envDir, envFileName),
+		WebEnvFile:      filepath.Join(envDir, webEnvFileName),
+		ComposeProject:  ComposeProjectName(id),
+		Dependencies:    BuildDependencyConfig(branch, dependencyMode),
+		Ports:           ports,
+		URLs: URLSet{
+			Web: fmt.Sprintf("http://127.0.0.1:%d", ports.Web),
+			API: fmt.Sprintf("http://127.0.0.1:%d", ports.API),
+			MCP: fmt.Sprintf("http://127.0.0.1:%d", ports.MCP),
+		},
+		CreatedAt: time.Now().UTC(),
+	}
+}
+
+func (m Manifest) ResolvedLane() WorktreeLane {
+	if m.Lane == "" {
+		return InferLane(m.Branch)
+	}
+	return m.Lane
+}
+
+func BuildDependencyConfig(branch string, mode DependencyMode) DependencyConfig {
+	if mode == "" {
+		mode = DependencyModeShared
+	}
+
+	config := DependencyConfig{
+		Mode:            mode,
+		SearchInfraMode: searchInfraMode,
+	}
+
+	if mode != DependencyModeNamespaced {
+		return config
+	}
+
+	namespace := worktreeID(branch)
+	dbSuffix := strings.ReplaceAll(namespace, "-", "_")
+	database := fmt.Sprintf("agentlab_%s", dbSuffix)
+	if len(database) > 63 {
+		database = database[:63]
+	}
+	bucket := fmt.Sprintf("onyx-agentlab-%s", namespace)
+	if len(bucket) > 63 {
+		bucket = bucket[:63]
+		bucket = strings.Trim(bucket, "-")
+	}
+
+	config.Namespace = namespace
+	config.PostgresDatabase = database
+	config.RedisPrefix = fmt.Sprintf("agentlab:%s", namespace)
+	config.FileStoreBucket = bucket
+	return config
+}
+
+func (m Manifest) ResolvedDependencies() DependencyConfig {
+	if m.Dependencies.Mode == "" {
+		return BuildDependencyConfig(m.Branch, DependencyModeShared)
+	}
+	resolved := m.Dependencies
+	if resolved.SearchInfraMode == "" {
+		resolved.SearchInfraMode = searchInfraMode
+	}
+	return resolved
+}
+
+func (m Manifest) RuntimeEnv() map[string]string {
+	env := map[string]string{
+		"AGENT_LAB_ARTIFACT_DIR":      m.ArtifactDir,
+		"AGENT_LAB_DEPENDENCY_MODE":   string(m.ResolvedDependencies().Mode),
+		"AGENT_LAB_SEARCH_INFRA_MODE": m.ResolvedDependencies().SearchInfraMode,
+		"AGENT_LAB_WORKTREE_ID":       m.ID,
+		"AGENT_LAB_WORKTREE_URL":      m.URLs.Web,
+		"BASE_URL":                    m.URLs.Web,
+		"INTERNAL_URL":                m.URLs.API,
+		"MCP_INTERNAL_URL":            m.URLs.MCP,
+		"PORT":                        fmt.Sprintf("%d", m.Ports.Web),
+		"WEB_DOMAIN":                  m.URLs.Web,
+	}
+
+	deps := m.ResolvedDependencies()
+	if deps.Namespace != "" {
+		env["AGENT_LAB_NAMESPACE"] = deps.Namespace
+	}
+	if deps.Mode == DependencyModeNamespaced {
+		env["POSTGRES_DB"] = deps.PostgresDatabase
+		env["DEFAULT_REDIS_PREFIX"] = deps.RedisPrefix
+		env["S3_FILE_STORE_BUCKET_NAME"] = deps.FileStoreBucket
+	}
+
+	return env
+}
+
+func (m Manifest) ShellEnv() map[string]string {
+	return m.RuntimeEnv()
+}
+
+func (m Manifest) DependencyWarnings() []string {
+	deps := m.ResolvedDependencies()
+	if deps.SearchInfraMode == searchInfraMode {
+		return []string{
+			"Search infrastructure remains shared across worktrees. OpenSearch/Vespa state is not namespaced or torn down by agent-lab.",
+		}
+	}
+	return nil
+}
+
+func (m Manifest) EnvFileContents(kind string) string {
+	values := m.RuntimeEnv()
+	deps := m.ResolvedDependencies()
+	var lines []string
+	lines = append(lines, "# Generated by `ods worktree create` for agent-lab.")
+	lines = append(lines, "# This file only contains worktree-local overrides.")
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_WORKTREE_ID=%s", m.ID))
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_ARTIFACT_DIR=%s", m.ArtifactDir))
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_DEPENDENCY_MODE=%s", deps.Mode))
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_SEARCH_INFRA_MODE=%s", deps.SearchInfraMode))
+	if deps.Namespace != "" {
+		lines = append(lines, fmt.Sprintf("AGENT_LAB_NAMESPACE=%s", deps.Namespace))
+	}
+	switch kind {
+	case "web":
+		lines = append(lines, fmt.Sprintf("PORT=%d", m.Ports.Web))
+		lines = append(lines, fmt.Sprintf("BASE_URL=%s", values["BASE_URL"]))
+		lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
+		lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
+		lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
+	default:
+		lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
+		lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
+		lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
+		if deps.Mode == DependencyModeNamespaced {
+			lines = append(lines, fmt.Sprintf("POSTGRES_DB=%s", deps.PostgresDatabase))
+			lines = append(lines, fmt.Sprintf("DEFAULT_REDIS_PREFIX=%s", deps.RedisPrefix))
+			lines = append(lines, fmt.Sprintf("S3_FILE_STORE_BUCKET_NAME=%s", deps.FileStoreBucket))
+		}
+	}
+	return strings.Join(lines, "\n") + "\n"
+}
+
+func WriteManifest(commonGitDir string, manifest Manifest) error {
+	stateDir := WorktreeStateDir(commonGitDir, manifest.ID)
+	if err := os.MkdirAll(stateDir, 0755); err != nil {
+		return fmt.Errorf("create worktree state dir: %w", err)
+	}
+	if err := os.MkdirAll(manifest.ArtifactDir, 0755); err != nil {
+		return fmt.Errorf("create artifact dir: %w", err)
+	}
+
+	data, err := json.MarshalIndent(manifest, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal manifest: %w", err)
+	}
+
+	if err := os.WriteFile(ManifestPath(commonGitDir, manifest.ID), data, 0644); err != nil {
+		return fmt.Errorf("write manifest: %w", err)
+	}
+
+	return nil
+}
+
+func WriteEnvFiles(manifest Manifest) error {
+	if err := os.MkdirAll(filepath.Dir(manifest.EnvFile), 0755); err != nil {
+		return fmt.Errorf("create env dir: %w", err)
+	}
+	if err := os.WriteFile(manifest.EnvFile, []byte(manifest.EnvFileContents("backend")), 0644); err != nil {
+		return fmt.Errorf("write backend env file: %w", err)
+	}
+	if err := os.WriteFile(manifest.WebEnvFile, []byte(manifest.EnvFileContents("web")), 0644); err != nil {
+		return fmt.Errorf("write web env file: %w", err)
+	}
+	return nil
+}
+
+func LoadAll(commonGitDir string) ([]Manifest, error) {
+	worktreesRoot := WorktreesRoot(commonGitDir)
+	entries, err := os.ReadDir(worktreesRoot)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			return nil, nil
+		}
+		return nil, fmt.Errorf("read worktrees dir: %w", err)
+	}
+
+	manifests := make([]Manifest, 0, len(entries))
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+		manifest, err := LoadManifest(filepath.Join(worktreesRoot, entry.Name(), "manifest.json"))
+		if err != nil {
+			return nil, err
+		}
+		manifests = append(manifests, manifest)
+	}
+
+	sort.Slice(manifests, func(i, j int) bool {
+		return manifests[i].Branch < manifests[j].Branch
+	})
+
+	return manifests, nil
+}
+
+func LoadManifest(path string) (Manifest, error) {
+	var manifest Manifest
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return manifest, fmt.Errorf("read manifest %s: %w", path, err)
+	}
+	if err := json.Unmarshal(data, &manifest); err != nil {
+		return manifest, fmt.Errorf("parse manifest %s: %w", path, err)
+	}
+	return manifest, nil
+}
+
+func FindByRepoRoot(commonGitDir, repoRoot string) (Manifest, bool, error) {
+	manifests, err := LoadAll(commonGitDir)
+	if err != nil {
+		return Manifest{}, false, err
+	}
+
+	repoRoot = normalizePath(repoRoot)
+	for _, manifest := range manifests {
+		if normalizePath(manifest.CheckoutPath) == repoRoot {
+			return manifest, true, nil
+		}
+	}
+
+	return Manifest{}, false, nil
+}
+
+func FindByIdentifier(commonGitDir, identifier string) (Manifest, bool, error) {
+	manifests, err := LoadAll(commonGitDir)
+	if err != nil {
+		return Manifest{}, false, err
+	}
+
+	slug := Slug(identifier)
+	cleanIdentifier := normalizePath(identifier)
+	var slugMatches []Manifest
+
+	for _, manifest := range manifests {
+		switch {
+		case manifest.ID == slug:
+			return manifest, true, nil
+		case manifest.Branch == identifier:
+			return manifest, true, nil
+		case normalizePath(manifest.CheckoutPath) == cleanIdentifier:
+			return manifest, true, nil
+		case slug != "" && Slug(manifest.Branch) == slug:
+			slugMatches = append(slugMatches, manifest)
+		}
+	}
+
+	if len(slugMatches) == 1 {
+		return slugMatches[0], true, nil
+	}
+	if len(slugMatches) > 1 {
+		return Manifest{}, false, fmt.Errorf("identifier %q matches multiple worktrees; use the branch, full id, or checkout path", identifier)
+	}
+
+	return Manifest{}, false, nil
+}
+
+func RemoveState(commonGitDir, id string) error {
+	if err := os.RemoveAll(WorktreeStateDir(commonGitDir, id)); err != nil {
+		return fmt.Errorf("remove worktree state: %w", err)
+	}
+	return nil
+}
+
+func UpdateVerification(commonGitDir string, manifest Manifest, summaryPath string, verifiedAt time.Time) error {
+	manifest.LastVerifySummary = summaryPath
+	manifest.LastVerifiedAt = verifiedAt.UTC().Format(time.RFC3339)
+	return WriteManifest(commonGitDir, manifest)
+}
+
+func AllocatePorts(existing []Manifest) (PortSet, error) {
+	reserved := make(map[int]bool)
+	for _, manifest := range existing {
+		reserved[manifest.Ports.Web] = true
+		reserved[manifest.Ports.API] = true
+		reserved[manifest.Ports.ModelServer] = true
+		reserved[manifest.Ports.MCP] = true
+	}
+
+	for offset := 0; offset < portSearchWindow; offset++ {
+		ports := PortSet{
+			Web:         defaultWebPort + offset,
+			API:         defaultAPIPort + offset,
+			ModelServer: defaultModelPort + offset,
+			MCP:         defaultMCPPort + offset,
+		}
+
+		if reserved[ports.Web] || reserved[ports.API] || reserved[ports.ModelServer] || reserved[ports.MCP] {
+			continue
+		}
+
+		if portsAvailable(ports) {
+			return ports, nil
+		}
+	}
+
+	return PortSet{}, fmt.Errorf("failed to allocate an available worktree port set after %d attempts", portSearchWindow)
+}
+
+func portsAvailable(ports PortSet) bool {
+	candidates := []int{ports.Web, ports.API, ports.ModelServer, ports.MCP}
+	for _, port := range candidates {
+		ln, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
+		if err != nil {
+			return false
+		}
+		_ = ln.Close()
+	}
+	return true
+}
+
+func normalizePath(path string) string {
+	clean := filepath.Clean(path)
+	resolved, err := filepath.EvalSymlinks(clean)
+	if err == nil {
+		return filepath.Clean(resolved)
+	}
+	return clean
+}
--- a/tools/ods/internal/agentlab/agentlab_test.go
+++ b/tools/ods/internal/agentlab/agentlab_test.go
@@ -0,0 +1,312 @@
+package agentlab
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestSlug(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]string{
+		"feat/My Feature": "feat-my-feature",
+		"lab/agent_docs":  "lab-agent-docs",
+		"  ":              "worktree",
+	}
+
+	for input, want := range tests {
+		input := input
+		want := want
+		t.Run(input, func(t *testing.T) {
+			t.Parallel()
+			if got := Slug(input); got != want {
+				t.Fatalf("Slug(%q) = %q, want %q", input, got, want)
+			}
+		})
+	}
+}
+
+func TestWorktreeIDIsCollisionResistant(t *testing.T) {
+	t.Parallel()
+
+	idOne := worktreeID("feat/foo_bar")
+	idTwo := worktreeID("feat/foo-bar")
+	if idOne == idTwo {
+		t.Fatalf("expected distinct worktree ids, got %q", idOne)
+	}
+	if !strings.HasPrefix(idOne, "feat-foo-bar-") {
+		t.Fatalf("unexpected worktree id format: %s", idOne)
+	}
+}
+
+func TestInferLane(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]WorktreeLane{
+		"lab/docs":                 WorktreeLaneLab,
+		"codex/lab/docs":           WorktreeLaneLab,
+		"fix/auth-banner-modal":    WorktreeLaneProduct,
+		"codex/feat/agent-check":   WorktreeLaneProduct,
+		"chore/update-readme":      WorktreeLaneProduct,
+		"codex/auth-banner-modal":  WorktreeLaneCustom,
+		"agent-lab":                WorktreeLaneCustom,
+	}
+
+	for branch, want := range tests {
+		branch := branch
+		want := want
+		t.Run(branch, func(t *testing.T) {
+			t.Parallel()
+			if got := InferLane(branch); got != want {
+				t.Fatalf("InferLane(%q) = %q, want %q", branch, got, want)
+			}
+		})
+	}
+}
+
+func TestResolveCreateBaseRef(t *testing.T) {
+	t.Parallel()
+
+	refExists := func(ref string) bool {
+		switch ref {
+		case "codex/agent-lab", "origin/main":
+			return true
+		default:
+			return false
+		}
+	}
+
+	product := ResolveCreateBaseRef("codex/fix/auth-banner-modal", "", refExists)
+	if product.Ref != "origin/main" || product.Lane != WorktreeLaneProduct {
+		t.Fatalf("unexpected product base selection: %+v", product)
+	}
+
+	lab := ResolveCreateBaseRef("codex/lab/bootstrap-docs", "", refExists)
+	if lab.Ref != "codex/agent-lab" || lab.Lane != WorktreeLaneLab {
+		t.Fatalf("unexpected lab base selection: %+v", lab)
+	}
+
+	explicit := ResolveCreateBaseRef("codex/auth-banner-modal", "origin/release", refExists)
+	if explicit.Ref != "origin/release" || explicit.Lane != WorktreeLaneCustom {
+		t.Fatalf("unexpected explicit base selection: %+v", explicit)
+	}
+
+	custom := ResolveCreateBaseRef("codex/auth-banner-modal", "", refExists)
+	if custom.Ref != "HEAD" || custom.Lane != WorktreeLaneCustom {
+		t.Fatalf("unexpected custom base selection: %+v", custom)
+	}
+}
+
+func TestBuildManifest(t *testing.T) {
+	t.Parallel()
+
+	ports := PortSet{Web: 3301, API: 8381, ModelServer: 9301, MCP: 8391}
+	manifest := BuildManifest(
+		"/repo/main",
+		"/repo/.git",
+		"feat/agent-harness",
+		WorktreeLaneProduct,
+		"origin/main",
+		"/worktrees/feat-agent-harness",
+		ports,
+		DependencyModeNamespaced,
+	)
+
+	if manifest.ID != worktreeID("feat/agent-harness") {
+		t.Fatalf("unexpected manifest id: %s", manifest.ID)
+	}
+	if manifest.URLs.Web != "http://127.0.0.1:3301" {
+		t.Fatalf("unexpected web url: %s", manifest.URLs.Web)
+	}
+	if manifest.ComposeProject != "onyx-"+worktreeID("feat/agent-harness") {
+		t.Fatalf("unexpected compose project: %s", manifest.ComposeProject)
+	}
+	if got := manifest.ShellEnv()["INTERNAL_URL"]; got != "http://127.0.0.1:8381" {
+		t.Fatalf("unexpected INTERNAL_URL: %s", got)
+	}
+	if got := manifest.ResolvedDependencies().PostgresDatabase; got != "agentlab_"+strings.ReplaceAll(worktreeID("feat/agent-harness"), "-", "_") {
+		t.Fatalf("unexpected postgres database: %s", got)
+	}
+	if got := manifest.RuntimeEnv()["DEFAULT_REDIS_PREFIX"]; got != "agentlab:"+worktreeID("feat/agent-harness") {
+		t.Fatalf("unexpected redis prefix: %s", got)
+	}
+}
+
+func TestWriteManifestAndLoadAll(t *testing.T) {
+	t.Parallel()
+
+	commonGitDir := t.TempDir()
+	manifest := BuildManifest(
+		"/repo/main",
+		commonGitDir,
+		"lab/docs",
+		WorktreeLaneLab,
+		"HEAD",
+		"/repo-worktrees/lab-docs",
+		PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
+		DependencyModeShared,
+	)
+
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		t.Fatalf("WriteManifest() error = %v", err)
+	}
+
+	manifests, err := LoadAll(commonGitDir)
+	if err != nil {
+		t.Fatalf("LoadAll() error = %v", err)
+	}
+	if len(manifests) != 1 {
+		t.Fatalf("LoadAll() length = %d, want 1", len(manifests))
+	}
+	if manifests[0].Branch != manifest.Branch {
+		t.Fatalf("unexpected branch: %s", manifests[0].Branch)
+	}
+}
+
+func TestWriteEnvFiles(t *testing.T) {
+	t.Parallel()
+
+	root := t.TempDir()
+	manifest := BuildManifest(
+		"/repo/main",
+		filepath.Join(root, ".git"),
+		"feat/env",
+		WorktreeLaneProduct,
+		"HEAD",
+		root,
+		PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
+		DependencyModeNamespaced,
+	)
+
+	if err := WriteEnvFiles(manifest); err != nil {
+		t.Fatalf("WriteEnvFiles() error = %v", err)
+	}
+
+	for _, path := range []string{manifest.EnvFile, manifest.WebEnvFile} {
+		if _, err := os.Stat(path); err != nil {
+			t.Fatalf("expected env file %s to exist: %v", path, err)
+		}
+	}
+
+	backendEnv, err := os.ReadFile(manifest.EnvFile)
+	if err != nil {
+		t.Fatalf("read backend env file: %v", err)
+	}
+	if !containsAll(
+		string(backendEnv),
+		"POSTGRES_DB=agentlab_"+strings.ReplaceAll(worktreeID("feat/env"), "-", "_"),
+		"DEFAULT_REDIS_PREFIX=agentlab:"+worktreeID("feat/env"),
+		"S3_FILE_STORE_BUCKET_NAME=onyx-agentlab-"+worktreeID("feat/env"),
+	) {
+		t.Fatalf("backend env file missing dependency namespace entries: %s", string(backendEnv))
+	}
+}
+
+func TestFindByIdentifierRejectsAmbiguousSlug(t *testing.T) {
+	t.Parallel()
+
+	commonGitDir := t.TempDir()
+	manifests := []Manifest{
+		BuildManifest(
+			"/repo/main",
+			commonGitDir,
+			"feat/foo_bar",
+			WorktreeLaneProduct,
+			"HEAD",
+			"/repo-worktrees/"+worktreeID("feat/foo_bar"),
+			PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
+			DependencyModeNamespaced,
+		),
+		BuildManifest(
+			"/repo/main",
+			commonGitDir,
+			"feat/foo-bar",
+			WorktreeLaneProduct,
+			"HEAD",
+			"/repo-worktrees/"+worktreeID("feat/foo-bar"),
+			PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
+			DependencyModeNamespaced,
+		),
+	}
+
+	for _, manifest := range manifests {
+		if err := WriteManifest(commonGitDir, manifest); err != nil {
+			t.Fatalf("WriteManifest() error = %v", err)
+		}
+	}
+
+	if _, found, err := FindByIdentifier(commonGitDir, "feat-foo-bar"); err == nil || found {
+		t.Fatalf("expected ambiguous slug lookup to fail, found=%t err=%v", found, err)
+	}
+}
+
+func TestBootstrapLinksAndClonesFromSource(t *testing.T) {
+	t.Parallel()
+
+	sourceRoot := t.TempDir()
+	checkoutRoot := t.TempDir()
+	commonGitDir := filepath.Join(sourceRoot, ".git")
+
+	writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env"), "OPENAI_API_KEY=test\n")
+	writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env.web"), "AUTH_TYPE=basic\n")
+	writeTestFile(t, filepath.Join(sourceRoot, ".venv", "bin", "python"), "#!/bin/sh\n")
+	writeTestFile(t, filepath.Join(sourceRoot, "web", "node_modules", ".bin", "next"), "#!/bin/sh\n")
+
+	manifest := BuildManifest(
+		sourceRoot,
+		commonGitDir,
+		"feat/bootstrap",
+		WorktreeLaneProduct,
+		"HEAD",
+		checkoutRoot,
+		PortSet{Web: 3305, API: 8385, ModelServer: 9305, MCP: 8395},
+		DependencyModeNamespaced,
+	)
+
+	result, err := Bootstrap(manifest, BootstrapOptions{
+		EnvMode:    BootstrapModeLink,
+		PythonMode: BootstrapModeLink,
+		WebMode:    BootstrapModeClone,
+	})
+	if err != nil {
+		t.Fatalf("Bootstrap() error = %v", err)
+	}
+
+	if len(result.Actions) == 0 {
+		t.Fatal("expected bootstrap actions to be recorded")
+	}
+
+	if target, err := os.Readlink(filepath.Join(checkoutRoot, ".vscode", ".env")); err != nil || target == "" {
+		t.Fatalf("expected .vscode/.env symlink, err=%v target=%q", err, target)
+	}
+	if target, err := os.Readlink(filepath.Join(checkoutRoot, ".venv")); err != nil || target == "" {
+		t.Fatalf("expected .venv symlink, err=%v target=%q", err, target)
+	}
+	if _, err := os.Stat(filepath.Join(checkoutRoot, "web", "node_modules", ".bin", "next")); err != nil {
+		t.Fatalf("expected cloned node_modules marker: %v", err)
+	}
+	if _, err := os.Lstat(filepath.Join(checkoutRoot, "web", "node_modules")); err != nil {
+		t.Fatalf("expected node_modules to exist: %v", err)
+	}
+}
+
+func writeTestFile(t *testing.T, path string, content string) {
+	t.Helper()
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
+	}
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatalf("write %s: %v", path, err)
+	}
+}
+
+func containsAll(value string, parts ...string) bool {
+	for _, part := range parts {
+		if !strings.Contains(value, part) {
+			return false
+		}
+	}
+	return true
+}
--- a/tools/ods/internal/agentlab/bootstrap.go
+++ b/tools/ods/internal/agentlab/bootstrap.go
@@ -0,0 +1,233 @@
+package agentlab
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+)
+
+type BootstrapMode string
+
+const (
+	BootstrapModeAuto  BootstrapMode = "auto"
+	BootstrapModeSkip  BootstrapMode = "skip"
+	BootstrapModeLink  BootstrapMode = "link"
+	BootstrapModeCopy  BootstrapMode = "copy"
+	BootstrapModeClone BootstrapMode = "clone"
+	BootstrapModeNPM   BootstrapMode = "npm"
+)
+
+type BootstrapOptions struct {
+	EnvMode    BootstrapMode
+	PythonMode BootstrapMode
+	WebMode    BootstrapMode
+}
+
+type BootstrapResult struct {
+	Actions []string
+}
+
+func Bootstrap(manifest Manifest, opts BootstrapOptions) (*BootstrapResult, error) {
+	result := &BootstrapResult{}
+
+	if err := bootstrapEnvFiles(manifest, opts.EnvMode, result); err != nil {
+		return nil, err
+	}
+	if err := bootstrapPython(manifest, opts.PythonMode, result); err != nil {
+		return nil, err
+	}
+	if err := bootstrapWeb(manifest, opts.WebMode, result); err != nil {
+		return nil, err
+	}
+
+	return result, nil
+}
+
+func bootstrapEnvFiles(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
+	if mode == BootstrapModeSkip {
+		return nil
+	}
+
+	vscodeDir := filepath.Join(manifest.CheckoutPath, ".vscode")
+	if err := os.MkdirAll(vscodeDir, 0755); err != nil {
+		return fmt.Errorf("create .vscode dir: %w", err)
+	}
+
+	sources := []struct {
+		source string
+		target string
+		label  string
+	}{
+		{
+			source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env"),
+			target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env"),
+			label:  ".vscode/.env",
+		},
+		{
+			source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env.web"),
+			target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env.web"),
+			label:  ".vscode/.env.web",
+		},
+	}
+
+	for _, item := range sources {
+		if _, err := os.Stat(item.source); err != nil {
+			continue
+		}
+		if _, err := os.Lstat(item.target); err == nil {
+			result.Actions = append(result.Actions, fmt.Sprintf("kept existing %s", item.label))
+			continue
+		}
+
+		currentMode := mode
+		if currentMode == BootstrapModeAuto {
+			currentMode = BootstrapModeLink
+		}
+		switch currentMode {
+		case BootstrapModeLink:
+			if err := os.Symlink(item.source, item.target); err != nil {
+				return fmt.Errorf("symlink %s: %w", item.label, err)
+			}
+			result.Actions = append(result.Actions, fmt.Sprintf("linked %s from source checkout", item.label))
+		case BootstrapModeCopy, BootstrapModeClone:
+			if err := copyFile(item.source, item.target); err != nil {
+				return fmt.Errorf("copy %s: %w", item.label, err)
+			}
+			result.Actions = append(result.Actions, fmt.Sprintf("copied %s from source checkout", item.label))
+		default:
+			return fmt.Errorf("unsupported env bootstrap mode: %s", currentMode)
+		}
+	}
+
+	return nil
+}
+
+func bootstrapPython(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
+	if mode == BootstrapModeSkip {
+		return nil
+	}
+
+	sourceVenv := filepath.Join(manifest.CreatedFromPath, ".venv")
+	targetVenv := filepath.Join(manifest.CheckoutPath, ".venv")
+	if _, err := os.Stat(targetVenv); err == nil {
+		result.Actions = append(result.Actions, "kept existing .venv")
+		return nil
+	}
+	if _, err := os.Stat(sourceVenv); err != nil {
+		result.Actions = append(result.Actions, "source .venv missing; backend bootstrap deferred")
+		return nil
+	}
+
+	currentMode := mode
+	if currentMode == BootstrapModeAuto {
+		currentMode = BootstrapModeLink
+	}
+
+	switch currentMode {
+	case BootstrapModeLink:
+		if err := os.Symlink(sourceVenv, targetVenv); err != nil {
+			return fmt.Errorf("symlink .venv: %w", err)
+		}
+		result.Actions = append(result.Actions, "linked shared .venv from source checkout")
+	case BootstrapModeCopy, BootstrapModeClone:
+		if err := cloneDirectory(sourceVenv, targetVenv); err != nil {
+			return fmt.Errorf("clone .venv: %w", err)
+		}
+		result.Actions = append(result.Actions, "cloned .venv from source checkout")
+	default:
+		return fmt.Errorf("unsupported python bootstrap mode: %s", currentMode)
+	}
+
+	return nil
+}
+
+func bootstrapWeb(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
+	if mode == BootstrapModeSkip {
+		return nil
+	}
+
+	sourceModules := filepath.Join(manifest.CreatedFromPath, "web", "node_modules")
+	targetModules := filepath.Join(manifest.CheckoutPath, "web", "node_modules")
+	if _, err := os.Lstat(targetModules); err == nil {
+		result.Actions = append(result.Actions, "kept existing web/node_modules")
+		return nil
+	}
+
+	currentMode := mode
+	if currentMode == BootstrapModeAuto {
+		if _, err := os.Stat(sourceModules); err == nil {
+			currentMode = BootstrapModeClone
+		} else {
+			currentMode = BootstrapModeNPM
+		}
+	}
+
+	switch currentMode {
+	case BootstrapModeClone, BootstrapModeCopy:
+		if _, err := os.Stat(sourceModules); err != nil {
+			webDir := filepath.Join(manifest.CheckoutPath, "web")
+			cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
+			cmd.Dir = webDir
+			cmd.Stdout = os.Stdout
+			cmd.Stderr = os.Stderr
+			cmd.Stdin = os.Stdin
+			if err := cmd.Run(); err != nil {
+				return fmt.Errorf("npm ci: %w", err)
+			}
+			result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
+			return nil
+		}
+		if err := cloneDirectory(sourceModules, targetModules); err != nil {
+			return fmt.Errorf("clone web/node_modules: %w", err)
+		}
+		result.Actions = append(result.Actions, "cloned local web/node_modules into worktree")
+		return nil
+	case BootstrapModeNPM:
+		webDir := filepath.Join(manifest.CheckoutPath, "web")
+		cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
+		cmd.Dir = webDir
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		cmd.Stdin = os.Stdin
+		if err := cmd.Run(); err != nil {
+			return fmt.Errorf("npm ci: %w", err)
+		}
+		result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
+	default:
+		return fmt.Errorf("unsupported web bootstrap mode: %s", currentMode)
+	}
+
+	return nil
+}
+
+func cloneDirectory(source, target string) error {
+	if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
+		return fmt.Errorf("create parent dir for %s: %w", target, err)
+	}
+
+	if runtime.GOOS == "darwin" {
+		cmd := exec.Command("cp", "-R", "-c", source, target)
+		if err := cmd.Run(); err == nil {
+			return nil
+		}
+	}
+
+	if runtime.GOOS != "windows" {
+		cmd := exec.Command("cp", "-R", source, target)
+		if err := cmd.Run(); err == nil {
+			return nil
+		}
+	}
+
+	return fmt.Errorf("no supported directory clone strategy succeeded for %s", source)
+}
+
+func copyFile(source, target string) error {
+	data, err := os.ReadFile(source)
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(target, data, 0644)
+}
--- a/tools/ods/internal/agentlab/deps.go
+++ b/tools/ods/internal/agentlab/deps.go
@@ -0,0 +1,252 @@
+package agentlab
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/alembic"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
+)
+
+type DependencyResult struct {
+	Actions []string
+}
+
+type DependencyStatus struct {
+	Mode                 DependencyMode `json:"mode"`
+	Namespace            string         `json:"namespace,omitempty"`
+	PostgresDatabase     string         `json:"postgres_database,omitempty"`
+	PostgresReady        bool           `json:"postgres_ready"`
+	PostgresTableCount   int            `json:"postgres_table_count,omitempty"`
+	RedisPrefix          string         `json:"redis_prefix,omitempty"`
+	RedisReady           bool           `json:"redis_ready"`
+	RedisKeyCount        int            `json:"redis_key_count,omitempty"`
+	FileStoreBucket      string         `json:"file_store_bucket,omitempty"`
+	FileStoreReady       bool           `json:"file_store_ready"`
+	FileStoreObjectCount int            `json:"file_store_object_count,omitempty"`
+	SearchInfraMode      string         `json:"search_infra_mode"`
+}
+
+func ProvisionDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
+	deps := manifest.ResolvedDependencies()
+	result := &DependencyResult{}
+
+	switch deps.Mode {
+	case DependencyModeShared:
+		result.Actions = append(result.Actions, "using shared Postgres, Redis, and MinIO state")
+	case DependencyModeNamespaced:
+		if _, err := runPythonScript(manifest, "ensure_database.py"); err != nil {
+			return manifest, nil, fmt.Errorf("ensure PostgreSQL database %s: %w", deps.PostgresDatabase, err)
+		}
+		result.Actions = append(result.Actions, fmt.Sprintf("ensured PostgreSQL database %s", deps.PostgresDatabase))
+
+		envMap, err := runtimeEnvMap(manifest)
+		if err != nil {
+			return manifest, nil, err
+		}
+		if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
+			return manifest, nil, fmt.Errorf("migrate namespaced database %s: %w", deps.PostgresDatabase, err)
+		}
+		result.Actions = append(result.Actions, fmt.Sprintf("migrated PostgreSQL database %s", deps.PostgresDatabase))
+
+		if _, err := runPythonScript(manifest, "ensure_bucket.py"); err != nil {
+			return manifest, nil, fmt.Errorf("ensure file-store bucket %s: %w", deps.FileStoreBucket, err)
+		}
+		result.Actions = append(result.Actions, fmt.Sprintf("ensured file-store bucket %s", deps.FileStoreBucket))
+		result.Actions = append(result.Actions, fmt.Sprintf("reserved Redis prefix %s", deps.RedisPrefix))
+	default:
+		return manifest, nil, fmt.Errorf("unsupported dependency mode: %s", deps.Mode)
+	}
+
+	result.Actions = append(result.Actions, "search infrastructure remains shared-only")
+	manifest.Dependencies = deps
+	manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		return manifest, nil, err
+	}
+	return manifest, result, nil
+}
+
+func InspectDependencies(manifest Manifest) (*DependencyStatus, error) {
+	deps := manifest.ResolvedDependencies()
+	status := &DependencyStatus{
+		Mode:             deps.Mode,
+		Namespace:        deps.Namespace,
+		PostgresDatabase: deps.PostgresDatabase,
+		RedisPrefix:      deps.RedisPrefix,
+		FileStoreBucket:  deps.FileStoreBucket,
+		SearchInfraMode:  deps.SearchInfraMode,
+	}
+
+	if deps.Mode == DependencyModeShared {
+		status.PostgresReady = true
+		status.RedisReady = true
+		status.FileStoreReady = true
+		return status, nil
+	}
+
+	output, err := runPythonScript(manifest, "dependency_status.py")
+	if err != nil {
+		return nil, fmt.Errorf("inspect namespaced dependencies: %w", err)
+	}
+	if err := json.Unmarshal([]byte(output), status); err != nil {
+		return nil, fmt.Errorf("parse dependency status: %w", err)
+	}
+	return status, nil
+}
+
+func ResetDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
+	deps := manifest.ResolvedDependencies()
+	result := &DependencyResult{}
+	if deps.Mode == DependencyModeShared {
+		result.Actions = append(result.Actions, "shared dependency mode selected; reset is a no-op")
+		return manifest, result, nil
+	}
+
+	if _, err := runPythonScript(manifest, "reset_dependencies.py"); err != nil {
+		return manifest, nil, fmt.Errorf("reset namespaced dependencies: %w", err)
+	}
+	result.Actions = append(result.Actions, fmt.Sprintf("dropped and recreated PostgreSQL database %s", deps.PostgresDatabase))
+	result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
+	result.Actions = append(result.Actions, fmt.Sprintf("emptied file-store bucket %s", deps.FileStoreBucket))
+
+	envMap, err := runtimeEnvMap(manifest)
+	if err != nil {
+		return manifest, nil, err
+	}
+	if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
+		return manifest, nil, fmt.Errorf("re-migrate namespaced database %s: %w", deps.PostgresDatabase, err)
+	}
+	result.Actions = append(result.Actions, fmt.Sprintf("re-migrated PostgreSQL database %s", deps.PostgresDatabase))
+	result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not reset")
+
+	manifest.Dependencies = deps
+	manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		return manifest, nil, err
+	}
+	return manifest, result, nil
+}
+
+func TeardownDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
+	deps := manifest.ResolvedDependencies()
+	result := &DependencyResult{}
+	if deps.Mode == DependencyModeShared {
+		result.Actions = append(result.Actions, "shared dependency mode selected; teardown is a no-op")
+		return manifest, result, nil
+	}
+
+	if _, err := runPythonScript(manifest, "teardown_dependencies.py"); err != nil {
+		return manifest, nil, fmt.Errorf("tear down namespaced dependencies: %w", err)
+	}
+	result.Actions = append(result.Actions, fmt.Sprintf("dropped PostgreSQL database %s", deps.PostgresDatabase))
+	result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
+	result.Actions = append(result.Actions, fmt.Sprintf("deleted file-store bucket %s", deps.FileStoreBucket))
+	result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not torn down")
+
+	manifest.Dependencies = deps
+	manifest.Dependencies.LastProvisionedAt = ""
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		return manifest, nil, err
+	}
+	return manifest, result, nil
+}
+
+func runtimeEnvMap(manifest Manifest) (map[string]string, error) {
+	envMap := make(map[string]string)
+	repoRoot := runtimeRepoRoot(manifest)
+
+	backendEnvPath := filepath.Join(repoRoot, ".vscode", ".env")
+	if _, err := os.Stat(backendEnvPath); err == nil {
+		fileVars, err := envutil.LoadFile(backendEnvPath)
+		if err != nil {
+			return nil, err
+		}
+		for _, entry := range fileVars {
+			if idx := strings.Index(entry, "="); idx > 0 {
+				envMap[entry[:idx]] = entry[idx+1:]
+			}
+		}
+	}
+
+	for key, value := range manifest.RuntimeEnv() {
+		envMap[key] = value
+	}
+	return envMap, nil
+}
+
+func runPythonScript(manifest Manifest, scriptName string) (string, error) {
+	pythonBinary, err := findPythonBinary(manifest)
+	if err != nil {
+		return "", err
+	}
+	code, err := loadPythonScript(scriptName)
+	if err != nil {
+		return "", err
+	}
+
+	envMap, err := runtimeEnvMap(manifest)
+	if err != nil {
+		return "", err
+	}
+
+	cmd := exec.Command(pythonBinary, "-c", code)
+	cmd.Dir = filepath.Join(runtimeRepoRoot(manifest), "backend")
+	cmd.Env = envutil.ApplyOverrides(os.Environ(), envMap)
+
+	var stdout bytes.Buffer
+	var stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		message := strings.TrimSpace(stderr.String())
+		if message == "" {
+			message = strings.TrimSpace(stdout.String())
+		}
+		if message == "" {
+			message = err.Error()
+		}
+		return "", fmt.Errorf("%s", message)
+	}
+
+	return strings.TrimSpace(stdout.String()), nil
+}
+
+func findPythonBinary(manifest Manifest) (string, error) {
+	var candidates []string
+	if runtime.GOOS == "windows" {
+		candidates = []string{
+			filepath.Join(manifest.CheckoutPath, ".venv", "Scripts", "python.exe"),
+			filepath.Join(manifest.CreatedFromPath, ".venv", "Scripts", "python.exe"),
+		}
+	} else {
+		candidates = []string{
+			filepath.Join(manifest.CheckoutPath, ".venv", "bin", "python"),
+			filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python"),
+		}
+	}
+
+	for _, candidate := range candidates {
+		if _, err := os.Stat(candidate); err == nil {
+			return candidate, nil
+		}
+	}
+	return "", fmt.Errorf("could not find a Python interpreter in %s/.venv or %s/.venv", manifest.CheckoutPath, manifest.CreatedFromPath)
+}
+
+func runtimeRepoRoot(manifest Manifest) string {
+	if manifest.CheckoutPath != "" {
+		if _, err := os.Stat(filepath.Join(manifest.CheckoutPath, "backend")); err == nil {
+			return manifest.CheckoutPath
+		}
+	}
+	return manifest.CreatedFromPath
+}
--- a/tools/ods/internal/agentlab/scripts.go
+++ b/tools/ods/internal/agentlab/scripts.go
@@ -0,0 +1,17 @@
+package agentlab
+
+import (
+	"embed"
+	"fmt"
+)
+
+//go:embed scripts/*.py
+var pythonScripts embed.FS
+
+func loadPythonScript(name string) (string, error) {
+	data, err := pythonScripts.ReadFile("scripts/" + name)
+	if err != nil {
+		return "", fmt.Errorf("load python script %s: %w", name, err)
+	}
+	return string(data), nil
+}
--- a/tools/ods/internal/agentlab/scripts/dependency_status.py
+++ b/tools/ods/internal/agentlab/scripts/dependency_status.py
@@ -0,0 +1,90 @@
+import json
+import os
+
+import boto3
+import psycopg2
+import urllib3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from redis import Redis
+
+
+db_name = os.environ["POSTGRES_DB"]
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=db_name
+)
+with conn.cursor() as cur:
+    cur.execute(
+        "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"
+    )
+    table_count = int(cur.fetchone()[0])
+conn.close()
+
+redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+
+redis_client = Redis(
+    host=os.environ.get("REDIS_HOST", "localhost"),
+    port=int(os.environ.get("REDIS_PORT", "6379")),
+    db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
+    password=os.environ.get("REDIS_PASSWORD") or None,
+    ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
+    ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
+)
+redis_key_count = 0
+for _ in redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000):
+    redis_key_count += 1
+
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+s3_client = boto3.client(**kwargs)
+bucket_ready = True
+bucket_object_count = 0
+try:
+    s3_client.head_bucket(Bucket=bucket)
+    paginator = s3_client.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=bucket):
+        bucket_object_count += len(page.get("Contents", []))
+except ClientError:
+    bucket_ready = False
+
+print(
+    json.dumps(
+        {
+            "mode": os.environ["AGENT_LAB_DEPENDENCY_MODE"],
+            "namespace": os.environ.get("AGENT_LAB_NAMESPACE", ""),
+            "postgres_database": db_name,
+            "postgres_ready": True,
+            "postgres_table_count": table_count,
+            "redis_prefix": redis_prefix,
+            "redis_ready": True,
+            "redis_key_count": redis_key_count,
+            "file_store_bucket": bucket,
+            "file_store_ready": bucket_ready,
+            "file_store_object_count": bucket_object_count,
+            "search_infra_mode": os.environ.get(
+                "AGENT_LAB_SEARCH_INFRA_MODE", "shared"
+            ),
+        }
+    )
+)
--- a/tools/ods/internal/agentlab/scripts/ensure_bucket.py
+++ b/tools/ods/internal/agentlab/scripts/ensure_bucket.py
@@ -0,0 +1,40 @@
+import os
+
+import boto3
+import urllib3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+
+
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+client = boto3.client(**kwargs)
+try:
+    client.head_bucket(Bucket=bucket)
+except ClientError as exc:
+    status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
+    if status not in (403, 404):
+        raise
+    if endpoint or region == "us-east-1":
+        client.create_bucket(Bucket=bucket)
+    else:
+        client.create_bucket(
+            Bucket=bucket, CreateBucketConfiguration={"LocationConstraint": region}
+        )
+print(bucket)
--- a/tools/ods/internal/agentlab/scripts/ensure_database.py
+++ b/tools/ods/internal/agentlab/scripts/ensure_database.py
@@ -0,0 +1,23 @@
+import os
+
+import psycopg2
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+
+
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+target_db = os.environ["POSTGRES_DB"]
+admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=admin_db
+)
+conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+with conn.cursor() as cur:
+    cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (target_db,))
+    if cur.fetchone() is None:
+        cur.execute(f'CREATE DATABASE "{target_db}"')
+conn.close()
+print(target_db)
--- a/tools/ods/internal/agentlab/scripts/reset_dependencies.py
+++ b/tools/ods/internal/agentlab/scripts/reset_dependencies.py
@@ -0,0 +1,67 @@
+import os
+
+import boto3
+import psycopg2
+import urllib3
+from botocore.config import Config
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+from redis import Redis
+
+
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+target_db = os.environ["POSTGRES_DB"]
+admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=admin_db
+)
+conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+with conn.cursor() as cur:
+    cur.execute(
+        "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
+        (target_db,),
+    )
+    cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
+    cur.execute(f'CREATE DATABASE "{target_db}"')
+conn.close()
+
+redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
+redis_client = Redis(
+    host=os.environ.get("REDIS_HOST", "localhost"),
+    port=int(os.environ.get("REDIS_PORT", "6379")),
+    db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
+    password=os.environ.get("REDIS_PASSWORD") or None,
+    ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
+    ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
+)
+keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
+if keys:
+    redis_client.delete(*keys)
+
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+s3_client = boto3.client(**kwargs)
+paginator = s3_client.get_paginator("list_objects_v2")
+for page in paginator.paginate(Bucket=bucket):
+    objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
+    if objects:
+        s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})
--- a/tools/ods/internal/agentlab/scripts/teardown_dependencies.py
+++ b/tools/ods/internal/agentlab/scripts/teardown_dependencies.py
@@ -0,0 +1,73 @@
+import os
+
+import boto3
+import psycopg2
+import urllib3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+from redis import Redis
+
+
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+target_db = os.environ["POSTGRES_DB"]
+admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=admin_db
+)
+conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+with conn.cursor() as cur:
+    cur.execute(
+        "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
+        (target_db,),
+    )
+    cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
+conn.close()
+
+redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
+redis_client = Redis(
+    host=os.environ.get("REDIS_HOST", "localhost"),
+    port=int(os.environ.get("REDIS_PORT", "6379")),
+    db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
+    password=os.environ.get("REDIS_PASSWORD") or None,
+    ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
+    ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
+)
+keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
+if keys:
+    redis_client.delete(*keys)
+
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+s3_client = boto3.client(**kwargs)
+try:
+    paginator = s3_client.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=bucket):
+        objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
+        if objects:
+            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})
+    s3_client.delete_bucket(Bucket=bucket)
+except ClientError as exc:
+    status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
+    if status not in (403, 404):
+        raise
--- a/tools/ods/internal/alembic/alembic.go
+++ b/tools/ods/internal/alembic/alembic.go
@@ -53,12 +53,17 @@ func FindAlembicBinary() (string, error) {
 // otherwise it will attempt to run via docker exec on a container
 // that has alembic installed (e.g., api_server).
 func Run(args []string, schema Schema) error {
+	return RunWithEnv(args, schema, nil)
+}
+
+// RunWithEnv executes an alembic command with explicit environment overrides.
+func RunWithEnv(args []string, schema Schema, extraEnv map[string]string) error {
 	// Check if we need to run via docker exec
 	if shouldUseDockerExec() {
-		return runViaDockerExec(args, schema)
+		return runViaDockerExec(args, schema, extraEnv)
 	}

-	return runLocally(args, schema)
+	return runLocally(args, schema, extraEnv)
 }

 // shouldUseDockerExec determines if we should run alembic via docker exec.
@@ -79,7 +84,7 @@ func shouldUseDockerExec() bool {
 }

 // runLocally runs alembic on the local machine.
-func runLocally(args []string, schema Schema) error {
+func runLocally(args []string, schema Schema, extraEnv map[string]string) error {
 	backendDir, err := paths.BackendDir()
 	if err != nil {
 		return fmt.Errorf("failed to find backend directory: %w", err)
@@ -104,13 +109,13 @@ func runLocally(args []string, schema Schema) error {
 	cmd.Stdin = os.Stdin

 	// Pass through POSTGRES_* environment variables
-	cmd.Env = buildAlembicEnv()
+	cmd.Env = buildAlembicEnv(extraEnv)

 	return cmd.Run()
 }

 // runViaDockerExec runs alembic inside a Docker container that has network access.
-func runViaDockerExec(args []string, schema Schema) error {
+func runViaDockerExec(args []string, schema Schema, extraEnv map[string]string) error {
 	// Find a container with alembic installed (api_server)
 	container, err := findAlembicContainer()
 	if err != nil {
@@ -136,7 +141,11 @@ func runViaDockerExec(args []string, schema Schema) error {

 	// Run alembic inside the container
 	// The container should have the correct env vars and network access
-	dockerArgs := []string{"exec", "-i", container, "alembic"}
+	dockerArgs := []string{"exec", "-i"}
+	for key, value := range extraEnv {
+		dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
+	}
+	dockerArgs = append(dockerArgs, container, "alembic")
 	dockerArgs = append(dockerArgs, alembicArgs...)

 	cmd := exec.Command("docker", dockerArgs...)
@@ -158,7 +167,7 @@ var alembicContainerNames = []string{
 // It inherits the current environment and ensures POSTGRES_* variables are set.
 // If POSTGRES_HOST is not explicitly set, it attempts to detect the PostgreSQL
 // container IP address automatically.
-func buildAlembicEnv() []string {
+func buildAlembicEnv(extraEnv map[string]string) []string {
 	env := os.Environ()

 	// Get postgres config (which reads from env with defaults)
@@ -188,6 +197,10 @@ func buildAlembicEnv() []string {
 		}
 	}

+	for key, value := range extraEnv {
+		env = append(env, fmt.Sprintf("%s=%s", key, value))
+	}
+
 	return env
 }

@@ -238,6 +251,14 @@ func Upgrade(revision string, schema Schema) error {
 	return Run([]string{"upgrade", revision}, schema)
 }

+// UpgradeWithEnv runs alembic upgrade with explicit environment overrides.
+func UpgradeWithEnv(revision string, schema Schema, extraEnv map[string]string) error {
+	if revision == "" {
+		revision = "head"
+	}
+	return RunWithEnv([]string{"upgrade", revision}, schema, extraEnv)
+}
+
 // Downgrade runs alembic downgrade to the specified revision.
 func Downgrade(revision string, schema Schema) error {
 	return Run([]string{"downgrade", revision}, schema)
--- a/tools/ods/internal/envutil/envutil.go
+++ b/tools/ods/internal/envutil/envutil.go
@@ -0,0 +1,105 @@
+package envutil
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"sort"
+	"strings"
+)
+
+// LoadFile parses a .env-style file into KEY=VALUE entries suitable for
+// appending to os.Environ(). Blank lines and comments are skipped.
+func LoadFile(path string) ([]string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("open env file %s: %w", path, err)
+	}
+	defer func() { _ = f.Close() }()
+
+	var envVars []string
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		if idx := strings.Index(line, "="); idx > 0 {
+			key := strings.TrimSpace(line[:idx])
+			value := strings.TrimSpace(line[idx+1:])
+			value = strings.Trim(value, `"'`)
+			envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("read env file %s: %w", path, err)
+	}
+
+	return envVars, nil
+}
+
+// Merge combines shell environment with file-based defaults. Shell values take
+// precedence, so file entries are only added for keys not already present.
+func Merge(shellEnv, fileVars []string) []string {
+	existing := make(map[string]bool, len(shellEnv))
+	for _, entry := range shellEnv {
+		if idx := strings.Index(entry, "="); idx > 0 {
+			existing[entry[:idx]] = true
+		}
+	}
+
+	merged := make([]string, len(shellEnv))
+	copy(merged, shellEnv)
+	for _, entry := range fileVars {
+		if idx := strings.Index(entry, "="); idx > 0 {
+			key := entry[:idx]
+			if !existing[key] {
+				merged = append(merged, entry)
+			}
+		}
+	}
+
+	return merged
+}
+
+// ApplyOverrides replaces or appends KEY=VALUE entries in env with the provided
+// overrides. The returned slice contains at most one entry per overridden key.
+func ApplyOverrides(env []string, overrides map[string]string) []string {
+	if len(overrides) == 0 {
+		return env
+	}
+
+	overrideKeys := make(map[string]bool, len(overrides))
+	for key := range overrides {
+		overrideKeys[key] = true
+	}
+
+	filtered := make([]string, 0, len(env)+len(overrides))
+	for _, entry := range env {
+		if idx := strings.Index(entry, "="); idx > 0 {
+			if overrideKeys[entry[:idx]] {
+				continue
+			}
+		}
+		filtered = append(filtered, entry)
+	}
+
+	filtered = append(filtered, MapToEnvEntries(overrides)...)
+	return filtered
+}
+
+// MapToEnvEntries converts a string map into KEY=VALUE entries in stable order.
+func MapToEnvEntries(values map[string]string) []string {
+	keys := make([]string, 0, len(values))
+	for key := range values {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+
+	entries := make([]string, 0, len(keys))
+	for _, key := range keys {
+		entries = append(entries, fmt.Sprintf("%s=%s", key, values[key]))
+	}
+	return entries
+}
--- a/tools/ods/internal/journey/journey.go
+++ b/tools/ods/internal/journey/journey.go
@@ -0,0 +1,122 @@
+package journey
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+const (
+	RegistryPath    = "web/tests/e2e/journeys/registry.json"
+	DefaultPlanPath = ".github/agent-journeys.json"
+)
+
+type Definition struct {
+	Name                string `json:"name"`
+	Description         string `json:"description"`
+	TestPath            string `json:"test_path"`
+	Project             string `json:"project"`
+	RequiresModelServer bool   `json:"requires_model_server"`
+	SkipGlobalSetup     bool   `json:"skip_global_setup"`
+}
+
+type Registry struct {
+	Journeys []Definition `json:"journeys"`
+}
+
+type Plan struct {
+	Journeys []string `json:"journeys"`
+}
+
+func LoadRegistry(repoRoot string) (Registry, error) {
+	var registry Registry
+
+	data, err := os.ReadFile(filepath.Join(repoRoot, RegistryPath))
+	if err != nil {
+		return registry, fmt.Errorf("read journey registry: %w", err)
+	}
+	if err := json.Unmarshal(data, &registry); err != nil {
+		return registry, fmt.Errorf("parse journey registry: %w", err)
+	}
+	if len(registry.Journeys) == 0 {
+		return registry, fmt.Errorf("journey registry is empty")
+	}
+
+	for _, journey := range registry.Journeys {
+		if strings.TrimSpace(journey.Name) == "" {
+			return registry, fmt.Errorf("journey registry contains an entry with an empty name")
+		}
+		if strings.TrimSpace(journey.TestPath) == "" {
+			return registry, fmt.Errorf("journey %q is missing test_path", journey.Name)
+		}
+		if strings.TrimSpace(journey.Project) == "" {
+			return registry, fmt.Errorf("journey %q is missing project", journey.Name)
+		}
+	}
+
+	return registry, nil
+}
+
+func LoadPlan(planPath string) (Plan, error) {
+	var plan Plan
+
+	data, err := os.ReadFile(planPath)
+	if err != nil {
+		return plan, fmt.Errorf("read journey plan: %w", err)
+	}
+	if err := json.Unmarshal(data, &plan); err != nil {
+		return plan, fmt.Errorf("parse journey plan: %w", err)
+	}
+	if len(plan.Journeys) == 0 {
+		return plan, fmt.Errorf("journey plan contains no journeys")
+	}
+	return plan, nil
+}
+
+func ResolveDefinitions(repoRoot string, names []string) ([]Definition, error) {
+	registry, err := LoadRegistry(repoRoot)
+	if err != nil {
+		return nil, err
+	}
+
+	byName := make(map[string]Definition, len(registry.Journeys))
+	for _, definition := range registry.Journeys {
+		byName[definition.Name] = definition
+	}
+
+	definitions := make([]Definition, 0, len(names))
+	for _, name := range names {
+		definition, ok := byName[name]
+		if !ok {
+			return nil, fmt.Errorf("unknown journey %q", name)
+		}
+		definitions = append(definitions, definition)
+	}
+
+	return definitions, nil
+}
+
+func Slug(value string) string {
+	normalized := strings.TrimSpace(strings.ToLower(value))
+	normalized = strings.ReplaceAll(normalized, "/", "-")
+	var builder strings.Builder
+	lastDash := false
+	for _, r := range normalized {
+		if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
+			builder.WriteRune(r)
+			lastDash = false
+			continue
+		}
+		if !lastDash {
+			builder.WriteByte('-')
+			lastDash = true
+		}
+	}
+	slug := strings.Trim(builder.String(), "-")
+	if slug == "" {
+		return "journey"
+	}
+	return slug
+}
--- a/tools/ods/internal/journey/journey_test.go
+++ b/tools/ods/internal/journey/journey_test.go
@@ -0,0 +1,59 @@
+package journey
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestResolveDefinitions(t *testing.T) {
+	t.Helper()
+
+	root := t.TempDir()
+	registryDir := filepath.Join(root, "web", "tests", "e2e", "journeys")
+	if err := os.MkdirAll(registryDir, 0755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(registryDir, "registry.json"), []byte(`{
+  "journeys": [
+    {
+      "name": "auth-landing",
+      "description": "test",
+      "test_path": "tests/e2e/journeys/auth_landing.spec.ts",
+      "project": "journey",
+      "requires_model_server": false,
+      "skip_global_setup": true
+    }
+  ]
+}`), 0644); err != nil {
+		t.Fatalf("write registry: %v", err)
+	}
+
+	definitions, err := ResolveDefinitions(root, []string{"auth-landing"})
+	if err != nil {
+		t.Fatalf("resolve definitions: %v", err)
+	}
+	if len(definitions) != 1 {
+		t.Fatalf("expected 1 definition, got %d", len(definitions))
+	}
+	if definitions[0].Project != "journey" {
+		t.Fatalf("expected project journey, got %q", definitions[0].Project)
+	}
+}
+
+func TestLoadPlanRequiresJourneys(t *testing.T) {
+	t.Helper()
+
+	path := filepath.Join(t.TempDir(), "journeys.json")
+	if err := os.WriteFile(path, []byte(`{"journeys":["auth-landing"]}`), 0644); err != nil {
+		t.Fatalf("write plan: %v", err)
+	}
+
+	plan, err := LoadPlan(path)
+	if err != nil {
+		t.Fatalf("load plan: %v", err)
+	}
+	if len(plan.Journeys) != 1 || plan.Journeys[0] != "auth-landing" {
+		t.Fatalf("unexpected plan contents: %+v", plan)
+	}
+}
--- a/tools/ods/internal/prreview/prreview.go
+++ b/tools/ods/internal/prreview/prreview.go
@@ -0,0 +1,147 @@
+package prreview
+
+import (
+	"fmt"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+type Source string
+
+const (
+	SourceHuman    Source = "human"
+	SourceCodex    Source = "codex"
+	SourceGreptile Source = "greptile"
+	SourceCubic    Source = "cubic"
+	SourceBot      Source = "bot"
+)
+
+type Comment struct {
+	ID          int    `json:"id"`
+	Body        string `json:"body"`
+	AuthorLogin string `json:"author_login"`
+	URL         string `json:"url,omitempty"`
+	CreatedAt   string `json:"created_at,omitempty"`
+}
+
+type Thread struct {
+	ID         string    `json:"id"`
+	IsResolved bool      `json:"is_resolved"`
+	IsOutdated bool      `json:"is_outdated"`
+	Path       string    `json:"path,omitempty"`
+	Line       int       `json:"line,omitempty"`
+	StartLine  int       `json:"start_line,omitempty"`
+	Comments   []Comment `json:"comments"`
+}
+
+type PullRequest struct {
+	Number  int      `json:"number"`
+	Title   string   `json:"title"`
+	URL     string   `json:"url,omitempty"`
+	Threads []Thread `json:"threads"`
+}
+
+type ThreadSummary struct {
+	Thread      Thread   `json:"thread"`
+	Source      Source   `json:"source"`
+	Category    string   `json:"category"`
+	DuplicateOf string   `json:"duplicate_of,omitempty"`
+	Reasons     []string `json:"reasons,omitempty"`
+}
+
+type TriageResult struct {
+	PullRequest PullRequest     `json:"pull_request"`
+	Summaries   []ThreadSummary `json:"summaries"`
+}
+
+var nonAlphaNum = regexp.MustCompile(`[^a-z0-9]+`)
+
+func ClassifySource(login string) Source {
+	lower := strings.ToLower(strings.TrimSpace(login))
+	switch {
+	case strings.Contains(lower, "codex"):
+		return SourceCodex
+	case strings.Contains(lower, "greptile"):
+		return SourceGreptile
+	case strings.Contains(lower, "cubic"):
+		return SourceCubic
+	case strings.HasSuffix(lower, "[bot]") || strings.Contains(lower, "bot"):
+		return SourceBot
+	default:
+		return SourceHuman
+	}
+}
+
+func Triage(pr PullRequest) TriageResult {
+	summaries := make([]ThreadSummary, 0, len(pr.Threads))
+	seen := map[string]string{}
+
+	for _, thread := range pr.Threads {
+		source := SourceHuman
+		if len(thread.Comments) > 0 {
+			source = ClassifySource(thread.Comments[0].AuthorLogin)
+		}
+
+		summary := ThreadSummary{
+			Thread:   thread,
+			Source:   source,
+			Category: "actionable",
+		}
+
+		if thread.IsResolved {
+			summary.Category = "resolved"
+			summary.Reasons = append(summary.Reasons, "thread already resolved")
+		} else if thread.IsOutdated {
+			summary.Category = "outdated"
+			summary.Reasons = append(summary.Reasons, "thread marked outdated by GitHub")
+		}
+
+		key := duplicateKey(thread)
+		if existing, ok := seen[key]; ok && summary.Category == "actionable" {
+			summary.Category = "duplicate"
+			summary.DuplicateOf = existing
+			summary.Reasons = append(summary.Reasons, fmt.Sprintf("duplicates %s", existing))
+		} else if summary.Category == "actionable" {
+			seen[key] = thread.ID
+		}
+
+		if source == SourceHuman && summary.Category == "actionable" {
+			summary.Reasons = append(summary.Reasons, "human review requires explicit response or fix")
+		}
+		if source != SourceHuman && summary.Category == "actionable" {
+			summary.Reasons = append(summary.Reasons, fmt.Sprintf("%s-generated review comment", source))
+		}
+
+		summaries = append(summaries, summary)
+	}
+
+	sort.Slice(summaries, func(i, j int) bool {
+		if summaries[i].Category != summaries[j].Category {
+			return summaries[i].Category < summaries[j].Category
+		}
+		if summaries[i].Source != summaries[j].Source {
+			return summaries[i].Source < summaries[j].Source
+		}
+		return summaries[i].Thread.ID < summaries[j].Thread.ID
+	})
+
+	return TriageResult{
+		PullRequest: pr,
+		Summaries:   summaries,
+	}
+}
+
+func duplicateKey(thread Thread) string {
+	parts := []string{thread.Path, fmt.Sprintf("%d", thread.Line)}
+	if len(thread.Comments) > 0 {
+		parts = append(parts, normalizeBody(thread.Comments[0].Body))
+	}
+	return strings.Join(parts, "::")
+}
+
+func normalizeBody(body string) string {
+	normalized := strings.ToLower(strings.TrimSpace(body))
+	normalized = nonAlphaNum.ReplaceAllString(normalized, " ")
+	return strings.Join(strings.Fields(normalized), " ")
+}
--- a/tools/ods/internal/prreview/prreview_test.go
+++ b/tools/ods/internal/prreview/prreview_test.go
@@ -0,0 +1,61 @@
+package prreview
+
+import "testing"
+
+func TestClassifySource(t *testing.T) {
+	t.Helper()
+
+	cases := map[string]Source{
+		"openai-codex-reviewer[bot]": SourceCodex,
+		"greptile-ai[bot]":           SourceGreptile,
+		"cubic-review[bot]":          SourceCubic,
+		"renovate[bot]":              SourceBot,
+		"human-user":                 SourceHuman,
+	}
+
+	for login, expected := range cases {
+		if actual := ClassifySource(login); actual != expected {
+			t.Fatalf("classify %q: expected %s, got %s", login, expected, actual)
+		}
+	}
+}
+
+func TestTriageMarksDuplicates(t *testing.T) {
+	t.Helper()
+
+	result := Triage(PullRequest{
+		Number: 42,
+		Threads: []Thread{
+			{
+				ID:   "thread-1",
+				Path: "web/src/foo.tsx",
+				Line: 10,
+				Comments: []Comment{
+					{ID: 1, AuthorLogin: "greptile-ai[bot]", Body: "Handle null values here."},
+				},
+			},
+			{
+				ID:   "thread-2",
+				Path: "web/src/foo.tsx",
+				Line: 10,
+				Comments: []Comment{
+					{ID: 2, AuthorLogin: "openai-codex-reviewer[bot]", Body: "Handle null values here"},
+				},
+			},
+		},
+	})
+
+	if len(result.Summaries) != 2 {
+		t.Fatalf("expected 2 summaries, got %d", len(result.Summaries))
+	}
+
+	var duplicateFound bool
+	for _, summary := range result.Summaries {
+		if summary.Thread.ID == "thread-2" && summary.Category == "duplicate" {
+			duplicateFound = true
+		}
+	}
+	if !duplicateFound {
+		t.Fatalf("expected duplicate thread to be detected: %+v", result.Summaries)
+	}
+}
--- a/web/playwright.config.ts
+++ b/web/playwright.config.ts
@@ -3,8 +3,13 @@ import * as dotenv from "dotenv";

 dotenv.config({ path: ".vscode/.env" });

+const journeyMode = process.env.PLAYWRIGHT_JOURNEY_MODE === "1";
+
 export default defineConfig({
-  globalSetup: require.resolve("./tests/e2e/global-setup"),
+  globalSetup:
+    process.env.PLAYWRIGHT_SKIP_GLOBAL_SETUP === "1"
+      ? undefined
+      : require.resolve("./tests/e2e/global-setup"),
  timeout: 100000, // 100 seconds timeout
  expect: {
    timeout: 15000, // 15 seconds timeout for all assertions to reduce flakiness
@@ -26,12 +31,12 @@ export default defineConfig({
  reporter: [["list"]],
  // Only run Playwright tests from tests/e2e directory (ignore Jest tests in src/)
  testMatch: /.*\/tests\/e2e\/.*\.spec\.ts/,
-  outputDir: "output/playwright",
+  outputDir: process.env.PLAYWRIGHT_OUTPUT_DIR || "output/playwright",
  use: {
    // Base URL for the application, can be overridden via BASE_URL environment variable
    baseURL: process.env.BASE_URL || "http://localhost:3000",
    // Capture trace on failure
-    trace: "retain-on-failure",
+    trace: journeyMode ? "on" : "retain-on-failure",
  },
  projects: [
    {
@@ -65,5 +70,15 @@ export default defineConfig({
      },
      grep: /@lite/,
    },
+    {
+      name: "journey",
+      use: {
+        ...devices["Desktop Chrome"],
+        viewport: { width: 1280, height: 720 },
+        video: "on",
+      },
+      grep: /@journey/,
+      workers: 1,
+    },
  ],
 });
--- a/web/src/interfaces/settings.ts
+++ b/web/src/interfaces/settings.ts
@@ -75,10 +75,6 @@ export interface Settings {
  // Factory defaults for the restore button.
  default_user_file_max_upload_size_mb?: number;
  default_file_token_count_threshold_k?: number;
-
-  // True when the backend runs inside a container (Docker/Podman).
-  // Used to default local-service URLs to host.docker.internal.
-  is_containerized?: boolean;
 }

 export enum NotificationType {
--- a/web/src/sections/AppHealthBanner.test.tsx
+++ b/web/src/sections/AppHealthBanner.test.tsx
@@ -0,0 +1,106 @@
+import React from "react";
+import { render, screen, waitFor } from "@tests/setup/test-utils";
+import { RedirectError } from "@/lib/fetcher";
+import AppHealthBanner from "./AppHealthBanner";
+
+const mockLogout = jest.fn();
+const mockUseSWR = jest.fn();
+const mockUseCurrentUser = jest.fn();
+const mockUsePathname = jest.fn();
+
+jest.mock("swr", () => ({
+  __esModule: true,
+  ...jest.requireActual("swr"),
+  default: (...args: unknown[]) => mockUseSWR(...args),
+}));
+
+jest.mock("next/navigation", () => ({
+  usePathname: () => mockUsePathname(),
+  useRouter: () => ({
+    push: jest.fn(),
+  }),
+}));
+
+jest.mock("@/hooks/useCurrentUser", () => ({
+  useCurrentUser: () => mockUseCurrentUser(),
+}));
+
+jest.mock("@/lib/user", () => ({
+  logout: (...args: unknown[]) => mockLogout(...args),
+  refreshToken: jest.fn(),
+}));
+
+describe("AppHealthBanner logout handling", () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+    mockLogout.mockResolvedValue(undefined);
+    mockUseSWR.mockReturnValue({ error: undefined });
+    mockUseCurrentUser.mockReturnValue({
+      user: undefined,
+      mutateUser: jest.fn(),
+      userError: undefined,
+    });
+    mockUsePathname.mockReturnValue("/auth/login");
+  });
+
+  it("does not show the logged-out modal or call logout on auth pages after a 403", async () => {
+    mockUseCurrentUser.mockReturnValue({
+      user: undefined,
+      mutateUser: jest.fn(),
+      userError: {
+        status: 403,
+      },
+    });
+
+    render(<AppHealthBanner />);
+
+    await waitFor(() => {
+      expect(mockLogout).not.toHaveBeenCalled();
+    });
+
+    expect(
+      screen.queryByText(/you have been logged out/i)
+    ).not.toBeInTheDocument();
+  });
+
+  it("does not show the logged-out modal on a fresh unauthenticated load", async () => {
+    mockUsePathname.mockReturnValue("/");
+    mockUseSWR.mockReturnValue({
+      error: new RedirectError("auth redirect", 403, {}),
+    });
+
+    render(<AppHealthBanner />);
+
+    await waitFor(() => {
+      expect(mockLogout).not.toHaveBeenCalled();
+    });
+
+    expect(
+      screen.queryByText(/you have been logged out/i)
+    ).not.toBeInTheDocument();
+  });
+
+  it("shows the logged-out modal after a 403 when a user was previously loaded", async () => {
+    mockUsePathname.mockReturnValue("/chat");
+    mockUseCurrentUser.mockReturnValue({
+      user: {
+        id: "user-1",
+        email: "a@example.com",
+      },
+      mutateUser: jest.fn(),
+      userError: {
+        status: 403,
+      },
+    });
+
+    render(<AppHealthBanner />);
+
+    await waitFor(() => {
+      expect(mockLogout).toHaveBeenCalled();
+    });
+
+    expect(
+      await screen.findByText(/you have been logged out/i)
+    ).toBeInTheDocument();
+  });
+});
--- a/web/src/sections/AppHealthBanner.tsx
+++ b/web/src/sections/AppHealthBanner.tsx
@@ -4,7 +4,7 @@ import { errorHandlingFetcher, RedirectError } from "@/lib/fetcher";
 import useSWR from "swr";
 import { SWR_KEYS } from "@/lib/swr-keys";
 import Modal from "@/refresh-components/Modal";
-import { useCallback, useEffect, useState, useRef } from "react";
+import { useCallback, useEffect, useRef, useState } from "react";
 import { getSecondsUntilExpiration } from "@/lib/time";
 import { refreshToken } from "@/lib/user";
 import { NEXT_PUBLIC_CUSTOM_REFRESH_URL } from "@/lib/constants";
@@ -24,19 +24,51 @@ export default function AppHealthBanner() {
  const pathname = usePathname();
  const expirationTimeoutRef = useRef<NodeJS.Timeout | null>(null);
  const refreshIntervalRef = useRef<NodeJS.Timer | null>(null);
+  const pathnameRef = useRef<string | null>(pathname);
+  const hasSeenAuthenticatedUserRef = useRef(false);
+  const isAuthPage = pathname?.startsWith("/auth") ?? false;

  const { user, mutateUser, userError } = useCurrentUser();
+  pathnameRef.current = pathname;
+  if (user) {
+    hasSeenAuthenticatedUserRef.current = true;
+  }
+
+  const maybeShowLoggedOutModal = useCallback(() => {
+    const currentPath = pathnameRef.current;
+    if (
+      !hasSeenAuthenticatedUserRef.current ||
+      !currentPath ||
+      currentPath.startsWith("/auth")
+    ) {
+      return;
+    }
+    setShowLoggedOutModal(true);
+  }, []);

  // Handle 403 errors from the /api/me endpoint.
  // Skip entirely on auth pages — the user isn't logged in yet, so there's
  // nothing to "log out" of and hitting /auth/logout just creates noise.
  useEffect(() => {
-    if (userError && userError.status === 403 && !pathname?.includes("/auth")) {
-      logout().then(() => {
-        setShowLoggedOutModal(true);
+    if (
+      userError &&
+      userError.status === 403 &&
+      hasSeenAuthenticatedUserRef.current &&
+      pathname &&
+      !isAuthPage
+    ) {
+      let cancelled = false;
+      logout().finally(() => {
+        if (!cancelled) {
+          maybeShowLoggedOutModal();
+        }
      });
+
+      return () => {
+        cancelled = true;
+      };
    }
-  }, [userError, pathname]);
+  }, [userError, pathname, isAuthPage, maybeShowLoggedOutModal]);

  // Function to handle the "Log in" button click
  function handleLogin() {
@@ -67,13 +99,10 @@ export default function AppHealthBanner() {
      const timeUntilExpire = (secondsUntilExpiration + 10) * 1000;
      expirationTimeoutRef.current = setTimeout(() => {
        setExpired(true);
-
-        if (!pathname?.includes("/auth")) {
-          setShowLoggedOutModal(true);
-        }
+        maybeShowLoggedOutModal();
      }, timeUntilExpire);
    },
-    [pathname]
+    [maybeShowLoggedOutModal]
  );

  // Clean up any timeouts/intervals when component unmounts
@@ -187,6 +216,12 @@ export default function AppHealthBanner() {
    }
  }, [user, setupExpirationTimeout, mutateUser]);

+  useEffect(() => {
+    if (error instanceof RedirectError || expired) {
+      maybeShowLoggedOutModal();
+    }
+  }, [error, expired, maybeShowLoggedOutModal]);
+
  // Logged out modal
  if (showLoggedOutModal) {
    return (
@@ -211,9 +246,6 @@ export default function AppHealthBanner() {
  }

  if (error instanceof RedirectError || expired) {
-    if (!pathname?.includes("/auth")) {
-      setShowLoggedOutModal(true);
-    }
    return null;
  } else {
    return (
--- a/web/src/sections/modals/llmConfig/LMStudioModal.tsx
+++ b/web/src/sections/modals/llmConfig/LMStudioModal.tsx
@@ -26,7 +26,8 @@ import {
 import { fetchModels } from "@/lib/llmConfig/svc";
 import { toast } from "@/hooks/useToast";
 import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
-import { useSettingsContext } from "@/providers/SettingsProvider";
+
+const DEFAULT_API_BASE = "http://localhost:1234";

 interface LMStudioModalValues extends BaseLLMModalValues {
  api_base: string;
@@ -108,10 +109,6 @@ export default function LMStudioModal({
 }: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
-  const { settings } = useSettingsContext();
-  const defaultApiBase = settings.is_containerized
-    ? "http://host.docker.internal:1234"
-    : "http://localhost:1234";

  const onClose = () => onOpenChange?.(false);

@@ -121,7 +118,7 @@ export default function LMStudioModal({
      LLMProviderName.LM_STUDIO,
      existingLlmProvider
    ),
-    api_base: existingLlmProvider?.api_base ?? defaultApiBase,
+    api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
    custom_config: {
      LM_STUDIO_API_KEY: existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY,
    },
--- a/web/src/sections/modals/llmConfig/OllamaModal.tsx
+++ b/web/src/sections/modals/llmConfig/OllamaModal.tsx
@@ -30,7 +30,8 @@ import { Card } from "@opal/components";
 import { toast } from "@/hooks/useToast";
 import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
 import InputTypeInField from "@/refresh-components/form/InputTypeInField";
-import { useSettingsContext } from "@/providers/SettingsProvider";
+
+const DEFAULT_API_BASE = "http://127.0.0.1:11434";
 const CLOUD_API_BASE = "https://ollama.com";

 enum Tab {
@@ -155,10 +156,6 @@ export default function OllamaModal({
 }: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
-  const { settings } = useSettingsContext();
-  const defaultApiBase = settings.is_containerized
-    ? "http://host.docker.internal:11434"
-    : "http://127.0.0.1:11434";
  const apiKey = existingLlmProvider?.custom_config?.OLLAMA_API_KEY;
  const defaultTab =
    existingLlmProvider && !!apiKey ? Tab.TAB_CLOUD : Tab.TAB_SELF_HOSTED;
@@ -172,7 +169,7 @@ export default function OllamaModal({
      LLMProviderName.OLLAMA_CHAT,
      existingLlmProvider
    ),
-    api_base: existingLlmProvider?.api_base ?? defaultApiBase,
+    api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
    custom_config: {
      OLLAMA_API_KEY: apiKey,
    },
--- a/web/tests/e2e/journeys/auth_landing.spec.ts
+++ b/web/tests/e2e/journeys/auth_landing.spec.ts
@@ -0,0 +1,42 @@
+import { test, expect } from "@playwright/test";
+import { captureJourneyCheckpoint } from "@tests/e2e/utils/journey";
+import { logPageState } from "@tests/e2e/utils/pageStateLogger";
+
+test.describe("Journey: auth landing", () => {
+  test.beforeEach(async ({ page }) => {
+    await page.context().clearCookies();
+  });
+
+  test("Fresh auth landing is clean @journey", async ({ page }) => {
+    await page.goto("/", { waitUntil: "domcontentloaded" });
+    await expect
+      .poll(() => page.url(), { timeout: 60000 })
+      .toMatch(/\/auth\/(login|signup)(\?.*)?$/);
+    await expect
+      .poll(async () => (await page.locator("body").innerText()).trim(), {
+        timeout: 60000,
+      })
+      .toMatch(
+        /Create account|Create Account|Already have an account|New to Onyx\?|Sign In/i
+      );
+    await page.waitForTimeout(1000);
+
+    const loggedOutModal = page.getByText("You Have Been Logged Out", {
+      exact: true,
+    });
+    console.log(
+      `[journey-auth-landing] ${JSON.stringify({
+        url: page.url(),
+        loggedOutModalVisible: (await loggedOutModal.count()) > 0,
+      })}`
+    );
+
+    await logPageState(page, "journey auth landing");
+    await captureJourneyCheckpoint(page, "auth-landing");
+    await expect(loggedOutModal).toHaveCount(0);
+
+    await expect(page.locator("body")).toContainText(
+      /New to Onyx\?|Create an Account|Sign In/
+    );
+  });
+});
--- a/web/tests/e2e/journeys/registry.json
+++ b/web/tests/e2e/journeys/registry.json
@@ -0,0 +1,12 @@
+{
+  "journeys": [
+    {
+      "name": "auth-landing",
+      "description": "Fresh unauthenticated load lands on the auth UI without showing a logged-out modal.",
+      "test_path": "tests/e2e/journeys/auth_landing.spec.ts",
+      "project": "journey",
+      "requires_model_server": false,
+      "skip_global_setup": true
+    }
+  ]
+}
--- a/web/tests/e2e/utils/journey.ts
+++ b/web/tests/e2e/utils/journey.ts
@@ -0,0 +1,51 @@
+import { mkdir, writeFile } from "fs/promises";
+import path from "path";
+
+import type { Page } from "@playwright/test";
+
+function captureDir(): string | null {
+  const value = process.env.PLAYWRIGHT_JOURNEY_CAPTURE_DIR;
+  if (!value) {
+    return null;
+  }
+  return value;
+}
+
+function slug(value: string): string {
+  return value
+    .trim()
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, "-")
+    .replace(/^-+|-+$/g, "");
+}
+
+export async function captureJourneyCheckpoint(
+  page: Page,
+  name: string
+): Promise<void> {
+  const dir = captureDir();
+  if (!dir) {
+    return;
+  }
+
+  const checkpoint = slug(name) || "checkpoint";
+  await mkdir(dir, { recursive: true });
+
+  const screenshotPath = path.join(dir, `${checkpoint}.png`);
+  const metadataPath = path.join(dir, `${checkpoint}.json`);
+
+  await page.screenshot({ path: screenshotPath, fullPage: true });
+  await writeFile(
+    metadataPath,
+    JSON.stringify(
+      {
+        checkpoint,
+        url: page.url(),
+        title: await page.title(),
+        captured_at: new Date().toISOString(),
+      },
+      null,
+      2
+    )
+  );
+}
Author	SHA1	Message	Date
Wenxi Onyx	4f793ff870	Merge remote-tracking branch 'origin/main' into codex/agent-lab	2026-04-09 16:15:03 -07:00
Jamison Lahman	4a96ef13d7	chore(devtools): devcontainer allows go and rust repos (#10041 )	2026-04-09 15:46:50 -07:00
Wenxi Onyx	55f570261f	Merge remote-tracking branch 'origin/main' into codex/agent-lab	2026-04-09 15:07:50 -07:00
Wenxi Onyx	289a7b807e	agent lab init	2026-04-09 15:07:02 -07:00