Merge remote-tracking branch 'origin/main' into codex/agent-lab

agent lab init
2026-04-12 02:12:42 +00:00 · 2026-04-09 16:15:03 -07:00 · 2026-04-09 15:07:50 -07:00 · 2026-04-09 15:07:02 -07:00
132 changed files with 7877 additions and 3391 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,6 +1,7 @@
 FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0ad8fe7cef1

 RUN apt-get update && apt-get install -y --no-install-recommends \
+  acl \
  curl \
  fd-find \
  fzf \
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -14,6 +14,12 @@ A containerized development environment for working on Onyx.

 ## Usage

+### VS Code
+
+1. Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+2. Open this repo in VS Code
+3. "Reopen in Container" when prompted
+
 ### CLI (`ods dev`)

 The [`ods` devtools CLI](../tools/ods/README.md) provides workspace-aware wrappers
@@ -33,8 +39,25 @@ ods dev exec npm test
 ods dev stop
 ```

+If you don't have `ods` installed, use the `devcontainer` CLI directly:
+
+```bash
+npm install -g @devcontainers/cli
+
+devcontainer up --workspace-folder .
+devcontainer exec --workspace-folder . zsh
+```
+
 ## Restarting the container

+### VS Code
+
+Open the Command Palette (`Ctrl+Shift+P` / `Cmd+Shift+P`) and run:
+
+- **Dev Containers: Reopen in Container** — restarts the container without rebuilding
+
+### CLI
+
 ```bash
 # Restart the container
 ods dev restart
@@ -43,6 +66,12 @@ ods dev restart
 ods dev rebuild
 ```

+Or without `ods`:
+
+```bash
+devcontainer up --workspace-folder . --remove-existing-container
+```
+
 ## Image

 The devcontainer uses a prebuilt image published to `onyxdotapp/onyx-devcontainer`.
@@ -59,19 +88,15 @@ The `devcontainer` target is defined in `docker-bake.hcl` at the repo root.
 ## User & permissions

 The container runs as the `dev` user by default (`remoteUser` in devcontainer.json).
-An init script (`init-dev-user.sh`) runs at container start to ensure the active
-user has read/write access to the bind-mounted workspace:
+An init script (`init-dev-user.sh`) runs at container start to ensure `dev` has
+read/write access to the bind-mounted workspace:

 - **Standard Docker** — `dev`'s UID/GID is remapped to match the workspace owner,
  so file permissions work seamlessly.
 - **Rootless Docker** — The workspace appears as root-owned (UID 0) inside the
-  container due to user-namespace mapping. `ods dev up` auto-detects rootless Docker
-  and sets `DEVCONTAINER_REMOTE_USER=root` so the container runs as root — which
-  maps back to your host user via the user namespace. New files are owned by your
-  host UID and no ACL workarounds are needed.
-
-  To override the auto-detection, set `DEVCONTAINER_REMOTE_USER` before running
-  `ods dev up`.
+  container due to user-namespace mapping. The init script grants `dev` access via
+  POSIX ACLs (`setfacl`), which adds a few seconds to the first container start on
+  large repos.

 ## Docker socket

@@ -84,7 +109,9 @@ from inside. `ods dev` auto-detects the socket path and sets `DOCKER_SOCK`:
 | macOS (Docker Desktop)  | `~/.docker/run/docker.sock`    |
 | Linux (standard Docker) | `/var/run/docker.sock`         |

-To override, set `DOCKER_SOCK` before running `ods dev up`.
+To override, set `DOCKER_SOCK` before running `ods dev up`. When using the
+VS Code extension or `devcontainer` CLI directly (without `ods`), you must set
+`DOCKER_SOCK` yourself.

 ## Firewall

--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -7,15 +7,13 @@
    "source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
    "source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
    "source=${localEnv:HOME}/.zshrc,target=/home/dev/.zshrc.host,type=bind,readonly",
-    "source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig,type=bind,readonly",
-    "source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim,type=bind,readonly",
+    "source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig.host,type=bind,readonly",
+    "source=${localEnv:HOME}/.ssh,target=/home/dev/.ssh.host,type=bind,readonly",
+    "source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim.host,type=bind,readonly",
    "source=onyx-devcontainer-cache,target=/home/dev/.cache,type=volume",
    "source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
  ],
-  "containerEnv": {
-    "SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
-  },
-  "remoteUser": "${localEnv:DEVCONTAINER_REMOTE_USER:dev}",
+  "remoteUser": "dev",
  "updateRemoteUserUID": false,
  "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
  "workspaceFolder": "/workspace",
--- a/.devcontainer/init-dev-user.sh
+++ b/.devcontainer/init-dev-user.sh
@@ -8,68 +8,38 @@ set -euo pipefail
 #                    We remap dev to that UID -- fast and seamless.
 #
 # Rootless Docker:   Workspace appears as root-owned (UID 0) inside the
-#                    container due to user-namespace mapping.  Requires
-#                    DEVCONTAINER_REMOTE_USER=root (set automatically by
-#                    ods dev up).  Container root IS the host user, so
-#                    bind-mounts and named volumes are symlinked into /root.
+#                    container due to user-namespace mapping.  We can't remap
+#                    dev to UID 0 (that's root), so we grant access with
+#                    POSIX ACLs instead.

 WORKSPACE=/workspace
 TARGET_USER=dev
-REMOTE_USER="${SUDO_USER:-$TARGET_USER}"

 WS_UID=$(stat -c '%u' "$WORKSPACE")
 WS_GID=$(stat -c '%g' "$WORKSPACE")
 DEV_UID=$(id -u "$TARGET_USER")
 DEV_GID=$(id -g "$TARGET_USER")

-# devcontainer.json bind-mounts and named volumes target /home/dev regardless
-# of remoteUser.  When running as root ($HOME=/root), Phase 1 bridges the gap
-# with symlinks from ACTIVE_HOME → MOUNT_HOME.
-MOUNT_HOME=/home/"$TARGET_USER"
+DEV_HOME=/home/"$TARGET_USER"

-if [ "$REMOTE_USER" = "root" ]; then
-    ACTIVE_HOME="/root"
-else
-    ACTIVE_HOME="$MOUNT_HOME"
+# Ensure directories that tools expect exist under ~dev.
+# ~/.local and ~/.cache are named Docker volumes -- ensure they are owned by dev.
+mkdir -p "$DEV_HOME"/.local/state "$DEV_HOME"/.local/share
+chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.local
+chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.cache
+
+# Copy host configs mounted as *.host into their real locations.
+# This gives the dev user owned copies without touching host originals.
+if [ -d "$DEV_HOME/.ssh.host" ]; then
+    cp -a "$DEV_HOME/.ssh.host" "$DEV_HOME/.ssh"
+    chmod 700 "$DEV_HOME/.ssh"
+    chmod 600 "$DEV_HOME"/.ssh/id_* 2>/dev/null || true
+    chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.ssh"
 fi
-
-# ── Phase 1: home directory setup ───────────────────────────────────
-
-# ~/.local and ~/.cache are named Docker volumes mounted under MOUNT_HOME.
-mkdir -p "$MOUNT_HOME"/.local/state "$MOUNT_HOME"/.local/share
-
-# When running as root, symlink bind-mounts and named volumes into /root
-# so that $HOME-relative tools (Claude Code, git, etc.) find them.
-if [ "$ACTIVE_HOME" != "$MOUNT_HOME" ]; then
-    for item in .claude .cache .local; do
-        [ -d "$MOUNT_HOME/$item" ] || continue
-        if [ -e "$ACTIVE_HOME/$item" ] && [ ! -L "$ACTIVE_HOME/$item" ]; then
-            echo "warning: replacing $ACTIVE_HOME/$item with symlink to $MOUNT_HOME/$item" >&2
-            rm -rf "$ACTIVE_HOME/$item"
-        fi
-        ln -sfn "$MOUNT_HOME/$item" "$ACTIVE_HOME/$item"
-    done
-    # Symlink files (not directories).
-    for file in .claude.json .gitconfig .zshrc.host; do
-        [ -f "$MOUNT_HOME/$file" ] && ln -sf "$MOUNT_HOME/$file" "$ACTIVE_HOME/$file"
-    done
-
-    # Nested mount: .config/nvim
-    if [ -d "$MOUNT_HOME/.config/nvim" ]; then
-        mkdir -p "$ACTIVE_HOME/.config"
-        if [ -e "$ACTIVE_HOME/.config/nvim" ] && [ ! -L "$ACTIVE_HOME/.config/nvim" ]; then
-            echo "warning: replacing $ACTIVE_HOME/.config/nvim with symlink" >&2
-            rm -rf "$ACTIVE_HOME/.config/nvim"
-        fi
-        ln -sfn "$MOUNT_HOME/.config/nvim" "$ACTIVE_HOME/.config/nvim"
-    fi
-fi
-
-# ── Phase 2: workspace access ───────────────────────────────────────
-
-# Root always has workspace access; Phase 1 handled home setup.
-if [ "$REMOTE_USER" = "root" ]; then
-    exit 0
+if [ -d "$DEV_HOME/.config/nvim.host" ]; then
+    mkdir -p "$DEV_HOME/.config"
+    cp -a "$DEV_HOME/.config/nvim.host" "$DEV_HOME/.config/nvim"
+    chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.config/nvim"
 fi

 # Already matching -- nothing to do.
@@ -91,17 +61,45 @@ if [ "$WS_UID" != "0" ]; then
            echo "warning: failed to remap $TARGET_USER UID to $WS_UID" >&2
        fi
    fi
-    if ! chown -R "$TARGET_USER":"$TARGET_USER" "$MOUNT_HOME" 2>&1; then
-        echo "warning: failed to chown $MOUNT_HOME" >&2
+    if ! chown -R "$TARGET_USER":"$TARGET_USER" /home/"$TARGET_USER" 2>&1; then
+        echo "warning: failed to chown /home/$TARGET_USER" >&2
    fi
 else
    # ── Rootless Docker ──────────────────────────────────────────────
-    # Workspace is root-owned (UID 0) due to user-namespace mapping.
-    # The supported path is remoteUser=root (set DEVCONTAINER_REMOTE_USER=root),
-    # which is handled above.  If we reach here, the user is running as dev
-    # under rootless Docker without the override.
-    echo "error: rootless Docker detected but remoteUser is not root." >&2
-    echo "       Set DEVCONTAINER_REMOTE_USER=root before starting the container," >&2
-    echo "       or use 'ods dev up' which sets it automatically." >&2
-    exit 1
+    # Workspace is root-owned inside the container.  Grant dev access
+    # via POSIX ACLs (preserves ownership, works across the namespace
+    # boundary).
+    if command -v setfacl &>/dev/null; then
+        setfacl -Rm  "u:${TARGET_USER}:rwX" "$WORKSPACE"
+        setfacl -Rdm "u:${TARGET_USER}:rwX" "$WORKSPACE"   # default ACL for new files
+
+        # Git refuses to operate in repos owned by a different UID.
+        # Host gitconfig is mounted readonly as ~/.gitconfig.host.
+        # Create a real ~/.gitconfig that includes it plus container overrides.
+        printf '[include]\n\tpath = %s/.gitconfig.host\n[safe]\n\tdirectory = %s\n' \
+            "$DEV_HOME" "$WORKSPACE" > "$DEV_HOME/.gitconfig"
+        chown "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.gitconfig"
+
+        # If this is a worktree, the main .git dir is bind-mounted at its
+        # host absolute path. Grant dev access so git operations work.
+        GIT_COMMON_DIR=$(git -C "$WORKSPACE" rev-parse --git-common-dir 2>/dev/null || true)
+        if [ -n "$GIT_COMMON_DIR" ] && [ "$GIT_COMMON_DIR" != "$WORKSPACE/.git" ]; then
+            [ ! -d "$GIT_COMMON_DIR" ] && GIT_COMMON_DIR="$WORKSPACE/$GIT_COMMON_DIR"
+            if [ -d "$GIT_COMMON_DIR" ]; then
+                setfacl -Rm "u:${TARGET_USER}:rwX" "$GIT_COMMON_DIR"
+                setfacl -Rdm "u:${TARGET_USER}:rwX" "$GIT_COMMON_DIR"
+                git config -f "$DEV_HOME/.gitconfig" --add safe.directory "$(dirname "$GIT_COMMON_DIR")"
+            fi
+        fi
+
+        # Also fix bind-mounted dirs under ~dev that appear root-owned.
+        for dir in /home/"$TARGET_USER"/.claude; do
+            [ -d "$dir" ] && setfacl -Rm "u:${TARGET_USER}:rwX" "$dir" && setfacl -Rdm "u:${TARGET_USER}:rwX" "$dir"
+        done
+        [ -f /home/"$TARGET_USER"/.claude.json ] && \
+            setfacl -m "u:${TARGET_USER}:rw" /home/"$TARGET_USER"/.claude.json
+    else
+        echo "warning: setfacl not found; dev user may not have write access to workspace" >&2
+        echo "         install the 'acl' package or set remoteUser to root" >&2
+    fi
 fi
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,361 +1,55 @@
-# PROJECT KNOWLEDGE BASE
-
-This file provides guidance to AI agents when working with code in this repository.
-
-## KEY NOTES
-
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
-  to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
-  `a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
-  make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
-  outside of those directories.
-
-## Project Overview
-
-**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
-
-### Background Workers (Celery)
-
-Onyx uses Celery for asynchronous task processing with multiple specialized workers:
-
-#### Worker Types
-
-1. **Primary Worker** (`celery_app.py`)
-   - Coordinates core background tasks and system-wide operations
-   - Handles connector management, document sync, pruning, and periodic checks
-   - Runs with 4 threads concurrency
-   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
-
-2. **Docfetching Worker** (`docfetching`)
-   - Fetches documents from external data sources (connectors)
-   - Spawns docprocessing tasks for each document batch
-   - Implements watchdog monitoring for stuck connectors
-   - Configurable concurrency (default from env)
-
-3. **Docprocessing Worker** (`docprocessing`)
-   - Processes fetched documents through the indexing pipeline:
-     - Upserts documents to PostgreSQL
-     - Chunks documents and adds contextual information
-     - Embeds chunks via model server
-     - Writes chunks to Vespa vector database
-     - Updates document metadata
-   - Configurable concurrency (default from env)
-
-4. **Light Worker** (`light`)
-   - Handles lightweight, fast operations
-   - Tasks: vespa operations, document permissions sync, external group sync
-   - Higher concurrency for quick tasks
-
-5. **Heavy Worker** (`heavy`)
-   - Handles resource-intensive operations
-   - Primary task: document pruning operations
-   - Runs with 4 threads concurrency
-
-6. **KG Processing Worker** (`kg_processing`)
-   - Handles Knowledge Graph processing and clustering
-   - Builds relationships between documents
-   - Runs clustering algorithms
-   - Configurable concurrency
-
-7. **Monitoring Worker** (`monitoring`)
-   - System health monitoring and metrics collection
-   - Monitors Celery queues, process memory, and system status
-   - Single thread (monitoring doesn't need parallelism)
-   - Cloud-specific monitoring tasks
-
-8. **User File Processing Worker** (`user_file_processing`)
-   - Processes user-uploaded files
-   - Handles user file indexing and project synchronization
-   - Configurable concurrency
-
-9. **Beat Worker** (`beat`)
-   - Celery's scheduler for periodic tasks
-   - Uses DynamicTenantScheduler for multi-tenant support
-   - Schedules tasks like:
-     - Indexing checks (every 15 seconds)
-     - Connector deletion checks (every 20 seconds)
-     - Vespa sync checks (every 20 seconds)
-     - Pruning checks (every 20 seconds)
-     - KG processing (every 60 seconds)
-     - Monitoring tasks (every 5 minutes)
-     - Cleanup tasks (hourly)
-
-#### Key Features
-
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
-  middleware layer that automatically finds the appropriate tenant ID when sending tasks
-  via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
-
-#### Important Notes
-
-**Defining Tasks**:
-
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
- Never enqueue a task without an expiration. Always supply `expires=` when
-  sending tasks, either from the beat schedule or directly from another task. It
-  should never be acceptable to submit code which enqueues tasks without an
-  expiration, as doing so can lead to unbounded task queue growth.
-
-**Defining APIs**:
-When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
-function.
-
-**Testing Updates**:
-If you make any updates to a celery worker and you want to test these changes, you will need
-to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
-
-**Task Time Limits**:
-Since all tasks are executed in thread pools, the time limit features of Celery are silently 
-disabled and won't work. Timeout logic must be implemented within the task itself.
-
-### Code Quality
-
-```bash
-# Install and run pre-commit hooks
-pre-commit install
-pre-commit run --all-files
-```
-
-NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
-
-## Architecture Overview
-
-### Technology Stack
-
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
-
-### Directory Structure
-
-```
-backend/
-├── onyx/
-│   ├── auth/                    # Authentication & authorization
-│   ├── chat/                    # Chat functionality & LLM interactions
-│   ├── connectors/              # Data source connectors
-│   ├── db/                      # Database models & operations
-│   ├── document_index/          # Vespa integration
-│   ├── federated_connectors/    # External search connectors
-│   ├── llm/                     # LLM provider integrations
-│   └── server/                  # API endpoints & routers
-├── ee/                          # Enterprise Edition features
-├── alembic/                     # Database migrations
-└── tests/                       # Test suites
-
-web/
-├── src/app/                     # Next.js app router pages
-├── src/components/              # Reusable React components
-└── src/lib/                     # Utilities & business logic
-```
-
-## Frontend Standards
-
-Frontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.
-
-## Database & Migrations
-
-### Running Migrations
-
-```bash
-# Standard migrations
-alembic upgrade head
-
-# Multi-tenant (Enterprise)
-alembic -n schema_private upgrade head
-```
-
-### Creating Migrations
-
-```bash
-# Create migration
-alembic revision -m "description"
-
-# Multi-tenant migration
-alembic -n schema_private revision -m "description"
-```
-
-Write the migration manually and place it in the file that alembic creates when running the above command.
-
-## Testing Strategy
-
-First, you must activate the virtual environment with `source .venv/bin/activate`.
-
-There are 4 main types of tests within Onyx:
-
-### Unit Tests
-
-These should not assume any Onyx/external services are available to be called.
-Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
-write these for complex, isolated modules e.g. `citation_processing.py`.
-
-To run them:
-
-```bash
-pytest -xv backend/tests/unit
-```
-
-### External Dependency Unit Tests
-
-These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
-MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
-
-However, the actual Onyx containers are not running and with these tests we call the function to test directly.
-We can also mock components/calls at will.
-
-The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
-need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
-with certain args, something that would be impossible with proper integration tests).
-
-A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
-
-To run them:
-
-```bash
-python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
-```
-
-### Integration Tests
-
-Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
-mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
-verification is necessary) over any other type of test.
-
-Tests are parallelized at a directory level.
-
-When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
-class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
-calling the utilities directly (e.g. do NOT create admin users with
-`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
-
-A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.
-
-To run them:
-
-```bash
-python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
-```
-
-### Playwright (E2E) Tests
-
-These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
-running, _including_ the Web Server.
-
-Use these tests for anything that requires significant frontend <-> backend coordination.
-
-Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
-
-To run them:
-
-```bash
-npx playwright test <TEST_NAME>
-```
-
-For shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.
-
-## Logs
-
-When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
-to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
-will be tailing their logs to this file.
-
-## Security Considerations
-
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
-
-## AI/LLM Integration
-
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
-
-## Creating a Plan
-
-When creating a plan in the `plans` directory, make sure to include at least these elements:
-
-**Issues to Address**
-What the change is meant to do.
-
-**Important Notes**
-Things you come across in your research that are important to the implementation.
-
-**Implementation strategy**
-How you are going to make the changes happen. High level approach.
-
-**Tests**
-What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
-verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
-
-Do NOT include these: _Timeline_, _Rollback plan_
-
-This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
-Keep it high level. You can reference certain files or functions though.
-
-Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
-
-## Error Handling
-
-**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
-Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
-
-A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
-`{"error_code": "...", "detail": "..."}` shape. This eliminates boilerplate and keeps error
-handling consistent across the entire backend.
-
-```python
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-
-# ✅ Good
-raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
-
-# ✅ Good — no extra message needed
-raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
-
-# ✅ Good — upstream service with dynamic status code
-raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
-
-# ❌ Bad — using HTTPException directly
-raise HTTPException(status_code=404, detail="Session not found")
-
-# ❌ Bad — starlette constant
-raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
-```
-
-Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
-category is needed, add it there first — do not invent ad-hoc codes.
-
-**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
-status code is dynamic (comes from the upstream response), use `status_code_override`:
-
-```python
-raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
-```
-
-## Best Practices
-
-In addition to the other content in this file, best practices for contributing
-to the codebase can be found in the "Engineering Best Practices" section of
-`CONTRIBUTING.md`. Understand its contents and follow them.
+# Project Knowledge Base
+
+This file is the entrypoint for agents working in this repository. Keep it small.
+
+## Start Here
+
+- General development workflow and repo conventions: [CONTRIBUTING.md](./CONTRIBUTING.md)
+- Frontend standards for `web/` and `desktop/`: [web/AGENTS.md](./web/AGENTS.md)
+- Backend testing strategy and commands: [backend/tests/README.md](./backend/tests/README.md)
+- Celery worker and task guidance: [backend/onyx/background/celery/README.md](./backend/onyx/background/celery/README.md)
+- Backend API error-handling rules: [backend/onyx/error_handling/README.md](./backend/onyx/error_handling/README.md)
+- Plan-writing guidance: [plans/README.md](./plans/README.md)
+
+## Agent-Lab Docs
+
+When working on `agent-lab` or on tasks explicitly about agent-engineering, use:
+
+- [docs/agent/README.md](./docs/agent/README.md)
+
+These docs are the system of record for the `agent-lab` workflow.
+
+## Universal Notes
+
+- For non-trivial work, create the target worktree first and keep the edit, test, and PR loop
+  inside that worktree. Do not prototype in one checkout and copy the patch into another unless
+  you are explicitly debugging the harness itself.
+- Use `ods worktree create` for harness-managed worktrees. Do not use raw `git worktree add` when
+  you want the `agent-lab` workflow, because it will skip the manifest, env overlays, dependency
+  bootstrap, and lane-aware base-ref selection.
+- When a change needs browser proof, use the harness journey flow instead of ad hoc screen capture:
+  record `before` in the target worktree before making the change, then record `after` in that
+  same worktree after validation. Use `ods journey compare` only when you need to recover a missed
+  baseline or compare two explicit revisions after the fact.
+- After opening a PR, treat review feedback and failing checks as part of the same loop:
+  use `ods pr-review ...` for GitHub review threads and `ods pr-checks diagnose` plus `ods trace`
+  for failing Playwright runs.
+- PR titles and commit messages should use conventional-commit style such as `fix: ...` or
+  `feat: ...`. Do not use `[codex]` prefixes in this repo.
+- If Python dependencies appear missing, activate the root venv with `source .venv/bin/activate`.
+- To make tests work, check the root `.env` file for an OpenAI key.
+- If using Playwright to explore the frontend, you can usually log in with username `a@example.com`
+  and password `a` at `http://localhost:3000`.
+- Assume Onyx services are already running unless the task indicates otherwise. Check `backend/log`
+  if you need to verify service activity.
+- When making backend calls in local development flows, go through the frontend proxy:
+  `http://localhost:3000/api/...`, not `http://localhost:8080/...`.
+- Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`. Do not add ad hoc DB access
+  elsewhere.
+
+## How To Use This File
+
+- Use this file as a map, not a manual.
+- Follow the nearest authoritative doc for the subsystem you are changing.
+- If a repeated rule matters enough to teach every future agent, document it near the code it
+  governs or encode it mechanically.
--- a/backend/ee/onyx/db/license.py
+++ b/backend/ee/onyx/db/license.py
@@ -13,7 +13,6 @@ from ee.onyx.server.license.models import LicenseSource
 from onyx.auth.schemas import UserRole
 from onyx.cache.factory import get_cache_backend
 from onyx.configs.constants import ANONYMOUS_USER_EMAIL
-from onyx.db.enums import AccountType
 from onyx.db.models import License
 from onyx.db.models import User
 from onyx.utils.logger import setup_logger
@@ -108,13 +107,12 @@ def get_used_seats(tenant_id: str | None = None) -> int:
    Get current seat usage directly from database.

    For multi-tenant: counts users in UserTenantMapping for this tenant.
-    For self-hosted: counts all active users.
+    For self-hosted: counts all active users (excludes EXT_PERM_USER role
+    and the anonymous system user).

-    Only human accounts count toward seat limits.
-    SERVICE_ACCOUNT (API key dummy users), EXT_PERM_USER, and the
-    anonymous system user are excluded. BOT (Slack users) ARE counted
-    because they represent real humans and get upgraded to STANDARD
-    when they log in via web.
+    TODO: Exclude API key dummy users from seat counting. API keys create
+    users with emails like `__DANSWER_API_KEY_*` that should not count toward
+    seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
    """
    if MULTI_TENANT:
        from ee.onyx.server.tenants.user_mapping import get_tenant_count
@@ -131,7 +129,6 @@ def get_used_seats(tenant_id: str | None = None) -> int:
                    User.is_active == True,  # type: ignore  # noqa: E712
                    User.role != UserRole.EXT_PERM_USER,
                    User.email != ANONYMOUS_USER_EMAIL,  # type: ignore
-                    User.account_type != AccountType.SERVICE_ACCOUNT,
                )
            )
            return result.scalar() or 0
--- a/backend/ee/onyx/server/scim/api.py
+++ b/backend/ee/onyx/server/scim/api.py
@@ -11,8 +11,6 @@ require a valid SCIM bearer token.

 from __future__ import annotations

-import hashlib
-import struct
 from uuid import UUID

 from fastapi import APIRouter
@@ -24,7 +22,6 @@ from fastapi import Response
 from fastapi.responses import JSONResponse
 from fastapi_users.password import PasswordHelper
 from sqlalchemy import func
-from sqlalchemy import text
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session

@@ -68,25 +65,12 @@ from onyx.db.permissions import recompute_user_permissions__no_commit
 from onyx.db.users import assign_user_to_default_groups__no_commit
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
-from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()

 # Group names reserved for system default groups (seeded by migration).
 _RESERVED_GROUP_NAMES = frozenset({"Admin", "Basic"})

-# Namespace prefix for the seat-allocation advisory lock. Hashed together
-# with the tenant ID so the lock is scoped per-tenant (unrelated tenants
-# never block each other) and cannot collide with unrelated advisory locks.
-_SEAT_LOCK_NAMESPACE = "onyx_scim_seat_lock"
-
-
-def _seat_lock_id_for_tenant(tenant_id: str) -> int:
-    """Derive a stable 64-bit signed int lock id for this tenant's seat lock."""
-    digest = hashlib.sha256(f"{_SEAT_LOCK_NAMESPACE}:{tenant_id}".encode()).digest()
-    # pg_advisory_xact_lock takes a signed 8-byte int; unpack as such.
-    return struct.unpack("q", digest[:8])[0]
-

 class ScimJSONResponse(JSONResponse):
    """JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1)."""
@@ -225,37 +209,12 @@ def _apply_exclusions(


 def _check_seat_availability(dal: ScimDAL) -> str | None:
-    """Return an error message if seat limit is reached, else None.
-
-    Acquires a transaction-scoped advisory lock so that concurrent
-    SCIM requests are serialized.  IdPs like Okta send provisioning
-    requests in parallel batches — without serialization the check is
-    vulnerable to a TOCTOU race where N concurrent requests each see
-    "seats available", all insert, and the tenant ends up over its
-    seat limit.
-
-    The lock is held until the caller's next COMMIT or ROLLBACK, which
-    means the seat count cannot change between the check here and the
-    subsequent INSERT/UPDATE.  Each call site in this module follows
-    the pattern: _check_seat_availability → write → dal.commit()
-    (which releases the lock for the next waiting request).
-    """
+    """Return an error message if seat limit is reached, else None."""
    check_fn = fetch_ee_implementation_or_noop(
        "onyx.db.license", "check_seat_availability", None
    )
    if check_fn is None:
        return None
-
-    # Transaction-scoped advisory lock — released on dal.commit() / dal.rollback().
-    # The lock id is derived from the tenant so unrelated tenants never block
-    # each other, and from a namespace string so it cannot collide with
-    # unrelated advisory locks elsewhere in the codebase.
-    lock_id = _seat_lock_id_for_tenant(get_current_tenant_id())
-    dal.session.execute(
-        text("SELECT pg_advisory_xact_lock(:lock_id)"),
-        {"lock_id": lock_id},
-    )
-
    result = check_fn(dal.session, seats_needed=1)
    if not result.available:
        return result.error_message or "Seat limit reached"
--- a/backend/onyx/background/celery/README.md
+++ b/backend/onyx/background/celery/README.md
@@ -0,0 +1,37 @@
+# Celery Development Notes
+
+This document is the local reference for Celery worker structure and task-writing rules in Onyx.
+
+## Worker Types
+
+Onyx uses multiple specialized workers:
+
+1. `primary`: coordinates core background tasks and system-wide operations.
+2. `docfetching`: fetches documents from connectors and schedules downstream work.
+3. `docprocessing`: runs the indexing pipeline for fetched documents.
+4. `light`: handles lightweight and fast operations.
+5. `heavy`: handles more resource-intensive operations.
+6. `kg_processing`: runs knowledge-graph processing and clustering.
+7. `monitoring`: collects health and system metrics.
+8. `user_file_processing`: processes user-uploaded files.
+9. `beat`: schedules periodic work.
+
+For actual implementation details, inspect:
+
+- `backend/onyx/background/celery/apps/`
+- `backend/onyx/background/celery/configs/`
+- `backend/onyx/background/celery/tasks/`
+
+## Task Rules
+
+- Always use `@shared_task` rather than `@celery_app`.
+- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks/`.
+- Never enqueue a task without `expires=`. This is a hard requirement because stale queued work can
+  accumulate without bound.
+- Do not rely on Celery time-limit enforcement. These workers run in thread pools, so timeout logic
+  must be implemented inside the task itself.
+
+## Testing Note
+
+If you change Celery worker code and want to validate it against a running local worker, the worker
+usually needs to be restarted manually. There is no general auto-restart on code change.
--- a/backend/onyx/chat/llm_step.py
+++ b/backend/onyx/chat/llm_step.py
@@ -818,10 +818,7 @@ def translate_history_to_llm_format(
                    )
                ]

-                # Add image parts. Each image is preceded by a text tag
-                # carrying its file_id so the LLM can reference the image by
-                # ID when calling tools like generate_image (which expects
-                # reference_image_file_ids to edit a specific image).
+                # Add image parts
                for img_file in msg.image_files:
                    if img_file.file_type == ChatFileType.IMAGE:
                        try:
@@ -829,12 +826,6 @@ def translate_history_to_llm_format(
                            base64_data = img_file.to_base64()
                            image_url = f"data:{image_type};base64,{base64_data}"

-                            content_parts.append(
-                                TextContentPart(
-                                    type="text",
-                                    text=f"[attached image — file_id: {img_file.file_id}]",
-                                )
-                            )
                            image_part = ImageContentPart(
                                type="image_url",
                                image_url=ImageUrlDetail(
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -42,9 +42,6 @@ from onyx.connectors.google_drive.file_retrieval import (
    get_all_files_in_my_drive_and_shared,
 )
 from onyx.connectors.google_drive.file_retrieval import get_external_access_for_folder
-from onyx.connectors.google_drive.file_retrieval import (
-    get_files_by_web_view_links_batch,
-)
 from onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive
 from onyx.connectors.google_drive.file_retrieval import get_folder_metadata
 from onyx.connectors.google_drive.file_retrieval import get_root_folder_id
@@ -73,13 +70,11 @@ from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
 from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import NormalizationResult
-from onyx.connectors.interfaces import Resolver
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import DocumentFailure
 from onyx.connectors.models import EntityFailure
 from onyx.connectors.models import HierarchyNode
 from onyx.connectors.models import SlimDocument
@@ -207,9 +202,7 @@ class DriveIdStatus(Enum):


 class GoogleDriveConnector(
-    SlimConnectorWithPermSync,
-    CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint],
-    Resolver,
+    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
 ):
    def __init__(
        self,
@@ -1672,82 +1665,6 @@ class GoogleDriveConnector(
            start, end, checkpoint, include_permissions=True
        )

-    @override
-    def resolve_errors(
-        self,
-        errors: list[ConnectorFailure],
-        include_permissions: bool = False,
-    ) -> Generator[Document | ConnectorFailure | HierarchyNode, None, None]:
-        if self._creds is None or self._primary_admin_email is None:
-            raise RuntimeError(
-                "Credentials missing, should not call this method before calling load_credentials"
-            )
-
-        logger.info(f"Resolving {len(errors)} errors")
-        doc_ids = [
-            failure.failed_document.document_id
-            for failure in errors
-            if failure.failed_document
-        ]
-        service = get_drive_service(self.creds, self.primary_admin_email)
-        field_type = (
-            DriveFileFieldType.WITH_PERMISSIONS
-            if include_permissions or self.exclude_domain_link_only
-            else DriveFileFieldType.STANDARD
-        )
-        batch_result = get_files_by_web_view_links_batch(service, doc_ids, field_type)
-
-        for doc_id, error in batch_result.errors.items():
-            yield ConnectorFailure(
-                failed_document=DocumentFailure(
-                    document_id=doc_id,
-                    document_link=doc_id,
-                ),
-                failure_message=f"Failed to retrieve file during error resolution: {error}",
-                exception=error,
-            )
-
-        permission_sync_context = (
-            PermissionSyncContext(
-                primary_admin_email=self.primary_admin_email,
-                google_domain=self.google_domain,
-            )
-            if include_permissions
-            else None
-        )
-
-        retrieved_files = [
-            RetrievedDriveFile(
-                drive_file=file,
-                user_email=self.primary_admin_email,
-                completion_stage=DriveRetrievalStage.DONE,
-            )
-            for file in batch_result.files.values()
-        ]
-
-        yield from self._get_new_ancestors_for_files(
-            files=retrieved_files,
-            seen_hierarchy_node_raw_ids=ThreadSafeSet(),
-            fully_walked_hierarchy_node_raw_ids=ThreadSafeSet(),
-            permission_sync_context=permission_sync_context,
-            add_prefix=True,
-        )
-
-        func_with_args = [
-            (
-                self._convert_retrieved_file_to_document,
-                (rf, permission_sync_context),
-            )
-            for rf in retrieved_files
-        ]
-        results = cast(
-            list[Document | ConnectorFailure | None],
-            run_functions_tuples_in_parallel(func_with_args, max_workers=8),
-        )
-        for result in results:
-            if result is not None:
-                yield result
-
    def _extract_slim_docs_from_google_drive(
        self,
        checkpoint: GoogleDriveCheckpoint,
--- a/backend/onyx/connectors/google_drive/file_retrieval.py
+++ b/backend/onyx/connectors/google_drive/file_retrieval.py
@@ -9,7 +9,6 @@ from urllib.parse import urlparse

 from googleapiclient.discovery import Resource  # type: ignore
 from googleapiclient.errors import HttpError  # type: ignore
-from googleapiclient.http import BatchHttpRequest  # type: ignore

 from onyx.access.models import ExternalAccess
 from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
@@ -61,8 +60,6 @@ SLIM_FILE_FIELDS = (
 )
 FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"

-MAX_BATCH_SIZE = 100
-
 HIERARCHY_FIELDS = "id, name, parents, webViewLink, mimeType, driveId"

 HIERARCHY_FIELDS_WITH_PERMISSIONS = (
@@ -219,7 +216,7 @@ def get_external_access_for_folder(


 def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
-    """Get the appropriate fields string for files().list() based on the field type enum."""
+    """Get the appropriate fields string based on the field type enum"""
    if field_type == DriveFileFieldType.SLIM:
        return SLIM_FILE_FIELDS
    elif field_type == DriveFileFieldType.WITH_PERMISSIONS:
@@ -228,25 +225,6 @@ def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
        return FILE_FIELDS


-def _extract_single_file_fields(list_fields: str) -> str:
-    """Convert a files().list() fields string to one suitable for files().get().
-
-    List fields look like "nextPageToken, files(field1, field2, ...)"
-    Single-file fields should be just "field1, field2, ..."
-    """
-    start = list_fields.find("files(")
-    if start == -1:
-        return list_fields
-    inner_start = start + len("files(")
-    inner_end = list_fields.rfind(")")
-    return list_fields[inner_start:inner_end]
-
-
-def _get_single_file_fields(field_type: DriveFileFieldType) -> str:
-    """Get the appropriate fields string for files().get() based on the field type enum."""
-    return _extract_single_file_fields(_get_fields_for_file_type(field_type))
-
-
 def _get_files_in_parent(
    service: Resource,
    parent_id: str,
@@ -558,74 +536,3 @@ def get_file_by_web_view_link(
        )
        .execute()
    )
-
-
-class BatchRetrievalResult:
-    """Result of a batch file retrieval, separating successes from errors."""
-
-    def __init__(self) -> None:
-        self.files: dict[str, GoogleDriveFileType] = {}
-        self.errors: dict[str, Exception] = {}
-
-
-def get_files_by_web_view_links_batch(
-    service: GoogleDriveService,
-    web_view_links: list[str],
-    field_type: DriveFileFieldType,
-) -> BatchRetrievalResult:
-    """Retrieve multiple Google Drive files by webViewLink using the batch API.
-
-    Returns a BatchRetrievalResult containing successful file retrievals
-    and errors for any files that could not be fetched.
-    Automatically splits into chunks of MAX_BATCH_SIZE.
-    """
-    fields = _get_single_file_fields(field_type)
-    if len(web_view_links) <= MAX_BATCH_SIZE:
-        return _get_files_by_web_view_links_batch(service, web_view_links, fields)
-
-    combined = BatchRetrievalResult()
-    for i in range(0, len(web_view_links), MAX_BATCH_SIZE):
-        chunk = web_view_links[i : i + MAX_BATCH_SIZE]
-        chunk_result = _get_files_by_web_view_links_batch(service, chunk, fields)
-        combined.files.update(chunk_result.files)
-        combined.errors.update(chunk_result.errors)
-    return combined
-
-
-def _get_files_by_web_view_links_batch(
-    service: GoogleDriveService,
-    web_view_links: list[str],
-    fields: str,
-) -> BatchRetrievalResult:
-    """Single-batch implementation."""
-
-    result = BatchRetrievalResult()
-
-    def callback(
-        request_id: str,
-        response: GoogleDriveFileType,
-        exception: Exception | None,
-    ) -> None:
-        if exception:
-            logger.warning(f"Error retrieving file {request_id}: {exception}")
-            result.errors[request_id] = exception
-        else:
-            result.files[request_id] = response
-
-    batch = cast(BatchHttpRequest, service.new_batch_http_request(callback=callback))
-
-    for web_view_link in web_view_links:
-        try:
-            file_id = _extract_file_id_from_web_view_link(web_view_link)
-            request = service.files().get(
-                fileId=file_id,
-                supportsAllDrives=True,
-                fields=fields,
-            )
-            batch.add(request, request_id=web_view_link)
-        except ValueError as e:
-            logger.warning(f"Failed to extract file ID from {web_view_link}: {e}")
-            result.errors[web_view_link] = e
-
-    batch.execute()
-    return result
--- a/backend/onyx/connectors/interfaces.py
+++ b/backend/onyx/connectors/interfaces.py
@@ -298,22 +298,6 @@ class CheckpointedConnectorWithPermSync(CheckpointedConnector[CT]):
        raise NotImplementedError


-class Resolver(BaseConnector):
-    @abc.abstractmethod
-    def resolve_errors(
-        self,
-        errors: list[ConnectorFailure],
-        include_permissions: bool = False,
-    ) -> Generator[Document | ConnectorFailure | HierarchyNode, None, None]:
-        """Attempts to yield back ALL the documents described by the errors, no checkpointing.
-
-        Caller's responsibility is to delete the old ConnectorFailures and replace with the new ones.
-        If include_permissions is True, the documents will have permissions synced.
-        May also yield HierarchyNode objects for ancestor folders of resolved documents.
-        """
-        raise NotImplementedError
-
-
 class HierarchyConnector(BaseConnector):
    @abc.abstractmethod
    def load_hierarchy(
--- a/backend/onyx/connectors/jira/connector.py
+++ b/backend/onyx/connectors/jira/connector.py
@@ -60,10 +60,8 @@ logger = setup_logger()

 ONE_HOUR = 3600

-_MAX_RESULTS_FETCH_IDS = 5000
+_MAX_RESULTS_FETCH_IDS = 5000  # 5000
 _JIRA_FULL_PAGE_SIZE = 50
-# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
-_JIRA_BULK_FETCH_LIMIT = 100

 # Constants for Jira field names
 _FIELD_REPORTER = "reporter"
@@ -257,13 +255,15 @@ def _bulk_fetch_request(
    return resp.json()["issues"]


-def _bulk_fetch_batch(
-    jira_client: JIRA, issue_ids: list[str], fields: str | None
-) -> list[dict[str, Any]]:
-    """Fetch a single batch (must be <= _JIRA_BULK_FETCH_LIMIT).
-    On JSONDecodeError, recursively bisects until it succeeds or reaches size 1."""
+def bulk_fetch_issues(
+    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
+) -> list[Issue]:
+    # TODO(evan): move away from this jira library if they continue to not support
+    # the endpoints we need. Using private fields is not ideal, but
+    # is likely fine for now since we pin the library version
+
    try:
-        return _bulk_fetch_request(jira_client, issue_ids, fields)
+        raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
    except requests.exceptions.JSONDecodeError:
        if len(issue_ids) <= 1:
            logger.exception(
@@ -277,25 +277,12 @@ def _bulk_fetch_batch(
            f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
            f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
        )
-        left = _bulk_fetch_batch(jira_client, issue_ids[:mid], fields)
-        right = _bulk_fetch_batch(jira_client, issue_ids[mid:], fields)
+        left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
+        right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
        return left + right
-
-
-def bulk_fetch_issues(
-    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
-) -> list[Issue]:
-    # TODO(evan): move away from this jira library if they continue to not support
-    # the endpoints we need. Using private fields is not ideal, but
-    # is likely fine for now since we pin the library version
-
-    raw_issues: list[dict[str, Any]] = []
-    for batch in chunked(issue_ids, _JIRA_BULK_FETCH_LIMIT):
-        try:
-            raw_issues.extend(_bulk_fetch_batch(jira_client, list(batch), fields))
-        except Exception as e:
-            logger.error(f"Error fetching issues: {e}")
-            raise
+    except Exception as e:
+        logger.error(f"Error fetching issues: {e}")
+        raise

    return [
        Issue(jira_client._options, jira_client._session, raw=issue)
--- a/backend/onyx/context/search/federated/models.py
+++ b/backend/onyx/context/search/federated/models.py
@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from datetime import datetime
 from typing import TypedDict

@@ -7,14 +6,6 @@ from pydantic import BaseModel
 from onyx.onyxbot.slack.models import ChannelType


-@dataclass(frozen=True)
-class DirectThreadFetch:
-    """Request to fetch a Slack thread directly by channel and timestamp."""
-
-    channel_id: str
-    thread_ts: str
-
-
 class ChannelMetadata(TypedDict):
    """Type definition for cached channel metadata."""

--- a/backend/onyx/context/search/federated/slack_search.py
+++ b/backend/onyx/context/search/federated/slack_search.py
@@ -19,7 +19,6 @@ from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.connectors.models import IndexingDocument
 from onyx.connectors.models import TextSection
 from onyx.context.search.federated.models import ChannelMetadata
-from onyx.context.search.federated.models import DirectThreadFetch
 from onyx.context.search.federated.models import SlackMessage
 from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
 from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
@@ -50,6 +49,7 @@ from onyx.server.federated.models import FederatedConnectorDetail
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 from onyx.utils.timing import log_function_time
+from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE

 logger = setup_logger()

@@ -58,6 +58,7 @@ HIGHLIGHT_END_CHAR = "\ue001"

 CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours
 USER_PROFILE_CACHE_TTL = 60 * 60 * 24  # 24 hours
+SLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include
 CHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching
 CHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)

@@ -420,94 +421,6 @@ class SlackQueryResult(BaseModel):
    filtered_channels: list[str]  # Channels filtered out during this query


-def _fetch_thread_from_url(
-    thread_fetch: DirectThreadFetch,
-    access_token: str,
-    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
-) -> SlackQueryResult:
-    """Fetch a thread directly from a Slack URL via conversations.replies."""
-    channel_id = thread_fetch.channel_id
-    thread_ts = thread_fetch.thread_ts
-
-    slack_client = WebClient(token=access_token)
-    try:
-        response = slack_client.conversations_replies(
-            channel=channel_id,
-            ts=thread_ts,
-        )
-        response.validate()
-        messages: list[dict[str, Any]] = response.get("messages", [])
-    except SlackApiError as e:
-        logger.warning(
-            f"Failed to fetch thread from URL (channel={channel_id}, ts={thread_ts}): {e}"
-        )
-        return SlackQueryResult(messages=[], filtered_channels=[])
-
-    if not messages:
-        logger.warning(
-            f"No messages found for URL override (channel={channel_id}, ts={thread_ts})"
-        )
-        return SlackQueryResult(messages=[], filtered_channels=[])
-
-    # Build thread text from all messages
-    thread_text = _build_thread_text(messages, access_token, None, slack_client)
-
-    # Get channel name from metadata cache or API
-    channel_name = "unknown"
-    if channel_metadata_dict and channel_id in channel_metadata_dict:
-        channel_name = channel_metadata_dict[channel_id].get("name", "unknown")
-    else:
-        try:
-            ch_response = slack_client.conversations_info(channel=channel_id)
-            ch_response.validate()
-            channel_info: dict[str, Any] = ch_response.get("channel", {})
-            channel_name = channel_info.get("name", "unknown")
-        except SlackApiError:
-            pass
-
-    # Build the SlackMessage
-    parent_msg = messages[0]
-    message_ts = parent_msg.get("ts", thread_ts)
-    username = parent_msg.get("user", "unknown_user")
-    parent_text = parent_msg.get("text", "")
-    snippet = (
-        parent_text[:50].rstrip() + "..." if len(parent_text) > 50 else parent_text
-    ).replace("\n", " ")
-
-    doc_time = datetime.fromtimestamp(float(message_ts))
-    decay_factor = DOC_TIME_DECAY
-    doc_age_years = (datetime.now() - doc_time).total_seconds() / (365 * 24 * 60 * 60)
-    recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
-
-    permalink = (
-        f"https://slack.com/archives/{channel_id}/p{message_ts.replace('.', '')}"
-    )
-
-    slack_message = SlackMessage(
-        document_id=f"{channel_id}_{message_ts}",
-        channel_id=channel_id,
-        message_id=message_ts,
-        thread_id=None,  # Prevent double-enrichment in thread context fetch
-        link=permalink,
-        metadata={
-            "channel": channel_name,
-            "time": doc_time.isoformat(),
-        },
-        timestamp=doc_time,
-        recency_bias=recency_bias,
-        semantic_identifier=f"{username} in #{channel_name}: {snippet}",
-        text=thread_text,
-        highlighted_texts=set(),
-        slack_score=100000.0,  # High priority — user explicitly asked for this thread
-    )
-
-    logger.info(
-        f"URL override: fetched thread from channel={channel_id}, ts={thread_ts}, {len(messages)} messages"
-    )
-
-    return SlackQueryResult(messages=[slack_message], filtered_channels=[])
-
-
 def query_slack(
    query_string: str,
    access_token: str,
@@ -519,6 +432,7 @@ def query_slack(
    available_channels: list[str] | None = None,
    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
 ) -> SlackQueryResult:
+
    # Check if query has channel override (user specified channels in query)
    has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")

@@ -748,6 +662,7 @@ def _fetch_thread_context(
    """
    channel_id = message.channel_id
    thread_id = message.thread_id
+    message_id = message.message_id

    # If not a thread, return original text as success
    if thread_id is None:
@@ -780,37 +695,62 @@ def _fetch_thread_context(
    if len(messages) <= 1:
        return ThreadContextResult.success(message.text)

-    # Build thread text from thread starter + all replies
-    thread_text = _build_thread_text(messages, access_token, team_id, slack_client)
+    # Build thread text from thread starter + context window around matched message
+    thread_text = _build_thread_text(
+        messages, message_id, thread_id, access_token, team_id, slack_client
+    )
    return ThreadContextResult.success(thread_text)


 def _build_thread_text(
    messages: list[dict[str, Any]],
+    message_id: str,
+    thread_id: str,
    access_token: str,
    team_id: str | None,
    slack_client: WebClient,
 ) -> str:
-    """Build thread text including all replies.
-
-    Includes the thread parent message followed by all replies in order.
-    """
+    """Build the thread text from messages."""
    msg_text = messages[0].get("text", "")
    msg_sender = messages[0].get("user", "")
    thread_text = f"<@{msg_sender}>: {msg_text}"

-    # All messages after index 0 are replies
-    replies = messages[1:]
-    if not replies:
-        return thread_text
-
-    logger.debug(f"Thread {messages[0].get('ts')}: {len(replies)} replies included")
    thread_text += "\n\nReplies:"
+    if thread_id == message_id:
+        message_id_idx = 0
+    else:
+        message_id_idx = next(
+            (i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
+        )
+        if not message_id_idx:
+            return thread_text

-    for msg in replies:
+        start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)
+
+        if start_idx > 1:
+            thread_text += "\n..."
+
+        for i in range(start_idx, message_id_idx):
+            msg_text = messages[i].get("text", "")
+            msg_sender = messages[i].get("user", "")
+            thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
+
+        msg_text = messages[message_id_idx].get("text", "")
+        msg_sender = messages[message_id_idx].get("user", "")
+        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
+
+    # Add following replies
+    len_replies = 0
+    for msg in messages[message_id_idx + 1 :]:
        msg_text = msg.get("text", "")
        msg_sender = msg.get("user", "")
-        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
+        reply = f"\n\n<@{msg_sender}>: {msg_text}"
+        thread_text += reply
+
+        len_replies += len(reply)
+        if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
+            thread_text += "\n..."
+            break

    # Replace user IDs with names using cached lookups
    userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))
@@ -1036,16 +976,7 @@ def slack_retrieval(

    # Query slack with entity filtering
    llm = get_default_llm()
-    query_items = build_slack_queries(query, llm, entities, available_channels)
-
-    # Partition into direct thread fetches and search query strings
-    direct_fetches: list[DirectThreadFetch] = []
-    query_strings: list[str] = []
-    for item in query_items:
-        if isinstance(item, DirectThreadFetch):
-            direct_fetches.append(item)
-        else:
-            query_strings.append(item)
+    query_strings = build_slack_queries(query, llm, entities, available_channels)

    # Determine filtering based on entities OR context (bot)
    include_dm = False
@@ -1062,16 +993,8 @@ def slack_retrieval(
                f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
            )

-    # Build search tasks — direct thread fetches + keyword searches
-    search_tasks: list[tuple] = [
-        (
-            _fetch_thread_from_url,
-            (fetch, access_token, channel_metadata_dict),
-        )
-        for fetch in direct_fetches
-    ]
-
-    search_tasks.extend(
+    # Build search tasks
+    search_tasks = [
        (
            query_slack,
            (
@@ -1087,7 +1010,7 @@ def slack_retrieval(
            ),
        )
        for query_string in query_strings
-    )
+    ]

    # If include_dm is True AND we're not already searching all channels,
    # add additional searches without channel filters.
--- a/backend/onyx/context/search/federated/slack_search_utils.py
+++ b/backend/onyx/context/search/federated/slack_search_utils.py
@@ -10,7 +10,6 @@ from pydantic import ValidationError

 from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
 from onyx.context.search.federated.models import ChannelMetadata
-from onyx.context.search.federated.models import DirectThreadFetch
 from onyx.context.search.models import ChunkIndexRequest
 from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.llm.interfaces import LLM
@@ -639,38 +638,12 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
        return [query_text]


-SLACK_URL_PATTERN = re.compile(
-    r"https?://[a-z0-9-]+\.slack\.com/archives/([A-Z0-9]+)/p(\d{16})"
-)
-
-
-def extract_slack_message_urls(
-    query_text: str,
-) -> list[tuple[str, str]]:
-    """Extract Slack message URLs from query text.
-
-    Parses URLs like:
-      https://onyx-company.slack.com/archives/C097NBWMY8Y/p1775491616524769
-
-    Returns list of (channel_id, thread_ts) tuples.
-    The 16-digit timestamp is converted to Slack ts format (with dot).
-    """
-    results = []
-    for match in SLACK_URL_PATTERN.finditer(query_text):
-        channel_id = match.group(1)
-        raw_ts = match.group(2)
-        # Convert p1775491616524769 -> 1775491616.524769
-        thread_ts = f"{raw_ts[:10]}.{raw_ts[10:]}"
-        results.append((channel_id, thread_ts))
-    return results
-
-
 def build_slack_queries(
    query: ChunkIndexRequest,
    llm: LLM,
    entities: dict[str, Any] | None = None,
    available_channels: list[str] | None = None,
-) -> list[str | DirectThreadFetch]:
+) -> list[str]:
    """Build Slack query strings with date filtering and query expansion."""
    default_search_days = 30
    if entities:
@@ -695,15 +668,6 @@ def build_slack_queries(
            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
            time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"

-    # Check for Slack message URLs — if found, add direct fetch requests
-    url_fetches: list[DirectThreadFetch] = []
-    slack_urls = extract_slack_message_urls(query.query)
-    for channel_id, thread_ts in slack_urls:
-        url_fetches.append(
-            DirectThreadFetch(channel_id=channel_id, thread_ts=thread_ts)
-        )
-        logger.info(f"Detected Slack URL: channel={channel_id}, ts={thread_ts}")
-
    # ALWAYS extract channel references from the query (not just for recency queries)
    channel_references = extract_channel_references_from_query(query.query)

@@ -720,9 +684,7 @@ def build_slack_queries(

            # If valid channels detected, use ONLY those channels with NO keywords
            # Return query with ONLY time filter + channel filter (no keywords)
-            return url_fetches + [
-                build_channel_override_query(channel_references, time_filter)
-            ]
+            return [build_channel_override_query(channel_references, time_filter)]
        except ValueError as e:
            # If validation fails, log the error and continue with normal flow
            logger.warning(f"Channel reference validation failed: {e}")
@@ -740,8 +702,7 @@ def build_slack_queries(
        rephrased_queries = expand_query_with_llm(query.query, llm)

    # Build final query strings with time filters
-    search_queries = [
+    return [
        rephrased_query.strip() + time_filter
        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
    ]
-    return url_fetches + search_queries
--- a/backend/onyx/error_handling/README.md
+++ b/backend/onyx/error_handling/README.md
@@ -0,0 +1,47 @@
+# Error Handling
+
+This directory is the local source of truth for backend API error handling.
+
+## Primary Rule
+
+Raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
+
+The global FastAPI exception handler converts `OnyxError` into the standard JSON shape:
+
+```json
+{"error_code": "...", "detail": "..."}
+```
+
+This keeps API behavior consistent and avoids repetitive route-level boilerplate.
+
+## Examples
+
+```python
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+
+# Good
+raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
+
+# Good
+raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
+
+# Good: preserve a dynamic upstream status code
+raise OnyxError(
+    OnyxErrorCode.BAD_GATEWAY,
+    detail,
+    status_code_override=e.response.status_code,
+)
+```
+
+Avoid:
+
+```python
+raise HTTPException(status_code=404, detail="Session not found")
+```
+
+## Notes
+
+- Available error codes are defined in `backend/onyx/error_handling/error_codes.py`.
+- If a new error category is needed, add it there first rather than inventing ad hoc strings.
+- When forwarding upstream service failures with dynamic status codes, use `status_code_override`.
--- a/backend/onyx/prompts/tool_prompts.py
+++ b/backend/onyx/prompts/tool_prompts.py
@@ -64,20 +64,9 @@ IMPORTANT: each call to this tool is independent. Variables from previous calls

 GENERATE_IMAGE_GUIDANCE = """
 ## generate_image
-NEVER use generate_image unless the user specifically requests an image or asks to
-edit/modify an existing image in the conversation.
-To edit, modify, restyle, or create a variation of an image already in the
-conversation, put that image's file_id in `reference_image_file_ids`. File IDs come
-from two places, and both can be passed the same way:
-  - Images the user attached to a message carry a `[attached image — file_id: <id>]`
-    tag immediately before the image content. Copy the id out of that tag.
-  - Images produced by previous `generate_image` calls have their file_id in that
-    call's tool response JSON.
-Only pass file_ids that actually appear in the conversation — never invent or guess
-one. Leave `reference_image_file_ids` unset for a brand-new generation that doesn't
-edit any existing image (for example when the user attached an image for context but
-asked for a completely unrelated new picture). The first file_id in the list is the
-primary edit source; any later file_ids are additional reference context.
+NEVER use generate_image unless the user specifically requests an image.
+For edits/variations of a previously generated image, pass `reference_image_file_ids` with
+the `file_id` values returned by earlier `generate_image` tool results.
 """.lstrip()

 MEMORY_GUIDANCE = """
--- a/backend/onyx/server/features/mcp/api.py
+++ b/backend/onyx/server/features/mcp/api.py
@@ -96,32 +96,6 @@ def _truncate_description(description: str | None, max_length: int = 500) -> str
    return description[: max_length - 3] + "..."


-# TODO: Replace mask-comparison approach with an explicit Unset sentinel from the
-# frontend indicating whether each credential field was actually modified. The current
-# approach is brittle (e.g. short credentials produce a fixed-length mask that could
-# collide) and mutates request values, which is surprising. The frontend should signal
-# "unchanged" vs "new value" directly rather than relying on masked-string equality.
-def _restore_masked_oauth_credentials(
-    request_client_id: str | None,
-    request_client_secret: str | None,
-    existing_client: OAuthClientInformationFull,
-) -> tuple[str | None, str | None]:
-    """If the frontend sent back masked credentials, restore the real stored values."""
-    if (
-        request_client_id
-        and existing_client.client_id
-        and request_client_id == mask_string(existing_client.client_id)
-    ):
-        request_client_id = existing_client.client_id
-    if (
-        request_client_secret
-        and existing_client.client_secret
-        and request_client_secret == mask_string(existing_client.client_secret)
-    ):
-        request_client_secret = existing_client.client_secret
-    return request_client_id, request_client_secret
-
-
 router = APIRouter(prefix="/mcp")
 admin_router = APIRouter(prefix="/admin/mcp")
 STATE_TTL_SECONDS = 60 * 5  # 5 minutes
@@ -418,26 +392,6 @@ async def _connect_oauth(
            detail=f"Server was configured with authentication type {auth_type_str}",
        )

-    # If the frontend sent back masked credentials (unchanged by the user),
-    # restore the real stored values so we don't overwrite them with masks.
-    if mcp_server.admin_connection_config:
-        existing_data = extract_connection_data(
-            mcp_server.admin_connection_config, apply_mask=False
-        )
-        existing_client_raw = existing_data.get(MCPOAuthKeys.CLIENT_INFO.value)
-        if existing_client_raw:
-            existing_client = OAuthClientInformationFull.model_validate(
-                existing_client_raw
-            )
-            (
-                request.oauth_client_id,
-                request.oauth_client_secret,
-            ) = _restore_masked_oauth_credentials(
-                request.oauth_client_id,
-                request.oauth_client_secret,
-                existing_client,
-            )
-
    # Create admin config with client info if provided
    config_data = MCPConnectionData(headers={})
    if request.oauth_client_id and request.oauth_client_secret:
@@ -1402,19 +1356,6 @@ def _upsert_mcp_server(
            if client_info_raw:
                client_info = OAuthClientInformationFull.model_validate(client_info_raw)

-        # If the frontend sent back masked credentials (unchanged by the user),
-        # restore the real stored values so the comparison below sees no change
-        # and the credentials aren't overwritten with masked strings.
-        if client_info and request.auth_type == MCPAuthenticationType.OAUTH:
-            (
-                request.oauth_client_id,
-                request.oauth_client_secret,
-            ) = _restore_masked_oauth_credentials(
-                request.oauth_client_id,
-                request.oauth_client_secret,
-                client_info,
-            )
-
        changing_connection_config = (
            not mcp_server.admin_connection_config
            or (
--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -111,43 +111,6 @@ def _mask_string(value: str) -> str:
    return value[:4] + "****" + value[-4:]


-def _resolve_api_key(
-    api_key: str | None,
-    provider_name: str | None,
-    api_base: str | None,
-    db_session: Session,
-) -> str | None:
-    """Return the real API key for model-fetch endpoints.
-
-    When editing an existing provider the form value is masked (e.g.
-    ``sk-a****b1c2``).  If *provider_name* is supplied we can look up
-    the unmasked key from the database so the external request succeeds.
-
-    The stored key is only returned when the request's *api_base*
-    matches the value stored in the database.
-    """
-    if not provider_name:
-        return api_key
-
-    existing_provider = fetch_existing_llm_provider(
-        name=provider_name, db_session=db_session
-    )
-    if existing_provider and existing_provider.api_key:
-        # Normalise both URLs before comparing so trailing-slash
-        # differences don't cause a false mismatch.
-        stored_base = (existing_provider.api_base or "").strip().rstrip("/")
-        request_base = (api_base or "").strip().rstrip("/")
-        if stored_base != request_base:
-            return api_key
-
-        stored_key = existing_provider.api_key.get_value(apply_mask=False)
-        # Only resolve when the incoming value is the masked form of the
-        # stored key — i.e. the user hasn't typed a new key.
-        if api_key and api_key == _mask_string(stored_key):
-            return stored_key
-    return api_key
-
-
 def _sync_fetched_models(
    db_session: Session,
    provider_name: str,
@@ -1211,17 +1174,16 @@ def get_ollama_available_models(
    return sorted_results


-def _get_openrouter_models_response(api_base: str, api_key: str | None) -> dict:
+def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
    """Perform GET to OpenRouter /models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    url = f"{cleaned_api_base}/models"
-    headers: dict[str, str] = {
+    headers = {
+        "Authorization": f"Bearer {api_key}",
        # Optional headers recommended by OpenRouter for attribution
        "HTTP-Referer": "https://onyx.app",
        "X-Title": "Onyx",
    }
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
    try:
        response = httpx.get(url, headers=headers, timeout=10.0)
        response.raise_for_status()
@@ -1244,12 +1206,8 @@ def get_openrouter_available_models(
    Parses id, name (display), context_length, and architecture.input_modalities.
    """

-    api_key = _resolve_api_key(
-        request.api_key, request.provider_name, request.api_base, db_session
-    )
-
    response_json = _get_openrouter_models_response(
-        api_base=request.api_base, api_key=api_key
+        api_base=request.api_base, api_key=request.api_key
    )

    data = response_json.get("data", [])
@@ -1342,18 +1300,13 @@ def get_lm_studio_available_models(

    # If provider_name is given and the api_key hasn't been changed by the user,
    # fall back to the stored API key from the database (the form value is masked).
-    # Only do so when the api_base matches what is stored.
    api_key = request.api_key
    if request.provider_name and not request.api_key_changed:
        existing_provider = fetch_existing_llm_provider(
            name=request.provider_name, db_session=db_session
        )
        if existing_provider and existing_provider.custom_config:
-            stored_base = (existing_provider.api_base or "").strip().rstrip("/")
-            if stored_base == cleaned_api_base:
-                api_key = existing_provider.custom_config.get(
-                    LM_STUDIO_API_KEY_CONFIG_KEY
-                )
+            api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)

    url = f"{cleaned_api_base}/api/v1/models"
    headers: dict[str, str] = {}
@@ -1437,12 +1390,8 @@ def get_litellm_available_models(
    db_session: Session = Depends(get_session),
 ) -> list[LitellmFinalModelResponse]:
    """Fetch available models from Litellm proxy /v1/models endpoint."""
-    api_key = _resolve_api_key(
-        request.api_key, request.provider_name, request.api_base, db_session
-    )
-
    response_json = _get_litellm_models_response(
-        api_key=api_key, api_base=request.api_base
+        api_key=request.api_key, api_base=request.api_base
    )

    models = response_json.get("data", [])
@@ -1499,7 +1448,7 @@ def get_litellm_available_models(
    return sorted_results


-def _get_litellm_models_response(api_key: str | None, api_base: str) -> dict:
+def _get_litellm_models_response(api_key: str, api_base: str) -> dict:
    """Perform GET to Litellm proxy /api/v1/models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    url = f"{cleaned_api_base}/v1/models"
@@ -1574,12 +1523,8 @@ def get_bifrost_available_models(
    db_session: Session = Depends(get_session),
 ) -> list[BifrostFinalModelResponse]:
    """Fetch available models from Bifrost gateway /v1/models endpoint."""
-    api_key = _resolve_api_key(
-        request.api_key, request.provider_name, request.api_base, db_session
-    )
-
    response_json = _get_bifrost_models_response(
-        api_base=request.api_base, api_key=api_key
+        api_base=request.api_base, api_key=request.api_key
    )

    models = response_json.get("data", [])
@@ -1668,12 +1613,8 @@ def get_openai_compatible_server_available_models(
    db_session: Session = Depends(get_session),
 ) -> list[OpenAICompatibleFinalModelResponse]:
    """Fetch available models from a generic OpenAI-compatible /v1/models endpoint."""
-    api_key = _resolve_api_key(
-        request.api_key, request.provider_name, request.api_base, db_session
-    )
-
    response_json = _get_openai_compatible_server_response(
-        api_base=request.api_base, api_key=api_key
+        api_base=request.api_base, api_key=request.api_key
    )

    models = response_json.get("data", [])
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -208,6 +208,12 @@ class PythonToolOverrideKwargs(BaseModel):
    chat_files: list[ChatFile] = []


+class ImageGenerationToolOverrideKwargs(BaseModel):
+    """Override kwargs for image generation tool calls."""
+
+    recent_generated_image_file_ids: list[str] = []
+
+
 class SearchToolRunContext(BaseModel):
    emitter: Emitter

--- a/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
+++ b/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
@@ -26,6 +26,7 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
 from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
 from onyx.server.query_and_chat.streaming_models import Packet
 from onyx.tools.interface import Tool
+from onyx.tools.models import ImageGenerationToolOverrideKwargs
 from onyx.tools.models import ToolCallException
 from onyx.tools.models import ToolExecutionException
 from onyx.tools.models import ToolResponse
@@ -47,16 +48,9 @@ PROMPT_FIELD = "prompt"
 REFERENCE_IMAGE_FILE_IDS_FIELD = "reference_image_file_ids"


-class ImageGenerationTool(Tool[None]):
+class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
    NAME = "generate_image"
-    DESCRIPTION = (
-        "Generate a new image from a prompt, or edit/modify existing images"
-        " from this conversation. To edit existing images — whether the user"
-        " attached them or they were produced by a previous generate_image"
-        " call — pass their file_id values in `reference_image_file_ids`."
-        " Do not use unless the user specifically requests an image or asks"
-        " to edit an image."
-    )
+    DESCRIPTION = "Generate an image based on a prompt. Do not use unless the user specifically requests an image."
    DISPLAY_NAME = "Image Generation"

    def __init__(
@@ -148,14 +142,8 @@ class ImageGenerationTool(Tool[None]):
                        REFERENCE_IMAGE_FILE_IDS_FIELD: {
                            "type": "array",
                            "description": (
-                                "Optional list of image file_id values to edit/modify/use as reference."
-                                " Accepts file_ids from two sources, with the same mechanics for both:"
-                                " (1) images the user attached to a user message — their file_id appears"
-                                " in the tag `[attached image — file_id: <id>]` right before the image"
-                                " in that message; (2) images returned by previous generate_image tool"
-                                " calls — their file_id appears in that call's response JSON. Leave"
-                                " unset/empty for a brand-new generation unrelated to any existing image."
-                                " The first file_id in the list is treated as the primary edit source."
+                                "Optional image file IDs to use as reference context for edits/variations. "
+                                "Use the file_id values returned by previous generate_image calls."
                            ),
                            "items": {
                                "type": "string",
@@ -266,31 +254,41 @@ class ImageGenerationTool(Tool[None]):
    def _resolve_reference_image_file_ids(
        self,
        llm_kwargs: dict[str, Any],
+        override_kwargs: ImageGenerationToolOverrideKwargs | None,
    ) -> list[str]:
        raw_reference_ids = llm_kwargs.get(REFERENCE_IMAGE_FILE_IDS_FIELD)
-        if raw_reference_ids is None:
-            # No references requested — plain generation.
-            return []
-
-        if not isinstance(raw_reference_ids, list) or not all(
-            isinstance(file_id, str) for file_id in raw_reference_ids
+        if raw_reference_ids is not None:
+            if not isinstance(raw_reference_ids, list) or not all(
+                isinstance(file_id, str) for file_id in raw_reference_ids
+            ):
+                raise ToolCallException(
+                    message=(
+                        f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
+                    ),
+                    llm_facing_message=(
+                        f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
+                    ),
+                )
+            reference_image_file_ids = [
+                file_id.strip() for file_id in raw_reference_ids if file_id.strip()
+            ]
+        elif (
+            override_kwargs
+            and override_kwargs.recent_generated_image_file_ids
+            and self.img_provider.supports_reference_images
        ):
-            raise ToolCallException(
-                message=(
-                    f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
-                ),
-                llm_facing_message=(
-                    f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
-                ),
-            )
+            # If no explicit reference was provided, default to the most recently generated image.
+            reference_image_file_ids = [
+                override_kwargs.recent_generated_image_file_ids[-1]
+            ]
+        else:
+            reference_image_file_ids = []

-        # Deduplicate while preserving order (first occurrence wins, so the
-        # LLM's intended "primary edit source" stays at index 0).
+        # Deduplicate while preserving order.
        deduped_reference_image_ids: list[str] = []
        seen_ids: set[str] = set()
-        for file_id in raw_reference_ids:
-            file_id = file_id.strip()
-            if not file_id or file_id in seen_ids:
+        for file_id in reference_image_file_ids:
+            if file_id in seen_ids:
                continue
            seen_ids.add(file_id)
            deduped_reference_image_ids.append(file_id)
@@ -304,14 +302,14 @@ class ImageGenerationTool(Tool[None]):
                    f"Reference images requested but provider '{self.provider}' does not support image-editing context."
                ),
                llm_facing_message=(
-                    "This image provider does not support editing from existing images. "
+                    "This image provider does not support editing from previous image context. "
                    "Try text-only generation, or switch to a provider/model that supports image edits."
                ),
            )

        max_reference_images = self.img_provider.max_reference_images
        if max_reference_images > 0:
-            return deduped_reference_image_ids[:max_reference_images]
+            return deduped_reference_image_ids[-max_reference_images:]
        return deduped_reference_image_ids

    def _load_reference_images(
@@ -360,7 +358,7 @@ class ImageGenerationTool(Tool[None]):
    def run(
        self,
        placement: Placement,
-        override_kwargs: None = None,  # noqa: ARG002
+        override_kwargs: ImageGenerationToolOverrideKwargs | None = None,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        if PROMPT_FIELD not in llm_kwargs:
@@ -375,6 +373,7 @@ class ImageGenerationTool(Tool[None]):
        shape = ImageShape(llm_kwargs.get("shape", ImageShape.SQUARE.value))
        reference_image_file_ids = self._resolve_reference_image_file_ids(
            llm_kwargs=llm_kwargs,
+            override_kwargs=override_kwargs,
        )
        reference_images = self._load_reference_images(reference_image_file_ids)

--- a/backend/onyx/tools/tool_runner.py
+++ b/backend/onyx/tools/tool_runner.py
@@ -1,3 +1,4 @@
+import json
 import traceback
 from collections import defaultdict
 from typing import Any
@@ -13,6 +14,7 @@ from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.tools.interface import Tool
 from onyx.tools.models import ChatFile
 from onyx.tools.models import ChatMinimalTextMessage
+from onyx.tools.models import ImageGenerationToolOverrideKwargs
 from onyx.tools.models import OpenURLToolOverrideKwargs
 from onyx.tools.models import ParallelToolCallResponse
 from onyx.tools.models import PythonToolOverrideKwargs
@@ -22,6 +24,9 @@ from onyx.tools.models import ToolCallKickoff
 from onyx.tools.models import ToolExecutionException
 from onyx.tools.models import ToolResponse
 from onyx.tools.models import WebSearchToolOverrideKwargs
+from onyx.tools.tool_implementations.images.image_generation_tool import (
+    ImageGenerationTool,
+)
 from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
 from onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs
 from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
@@ -105,6 +110,63 @@ def _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff
    return merged_calls


+def _extract_image_file_ids_from_tool_response_message(
+    message: str,
+) -> list[str]:
+    try:
+        parsed_message = json.loads(message)
+    except json.JSONDecodeError:
+        return []
+
+    parsed_items: list[Any] = (
+        parsed_message if isinstance(parsed_message, list) else [parsed_message]
+    )
+    file_ids: list[str] = []
+    for item in parsed_items:
+        if not isinstance(item, dict):
+            continue
+
+        file_id = item.get("file_id")
+        if isinstance(file_id, str):
+            file_ids.append(file_id)
+
+    return file_ids
+
+
+def _extract_recent_generated_image_file_ids(
+    message_history: list[ChatMessageSimple],
+) -> list[str]:
+    tool_name_by_tool_call_id: dict[str, str] = {}
+    recent_image_file_ids: list[str] = []
+    seen_file_ids: set[str] = set()
+
+    for message in message_history:
+        if message.message_type == MessageType.ASSISTANT and message.tool_calls:
+            for tool_call in message.tool_calls:
+                tool_name_by_tool_call_id[tool_call.tool_call_id] = tool_call.tool_name
+            continue
+
+        if (
+            message.message_type != MessageType.TOOL_CALL_RESPONSE
+            or not message.tool_call_id
+        ):
+            continue
+
+        tool_name = tool_name_by_tool_call_id.get(message.tool_call_id)
+        if tool_name != ImageGenerationTool.NAME:
+            continue
+
+        for file_id in _extract_image_file_ids_from_tool_response_message(
+            message.message
+        ):
+            if file_id in seen_file_ids:
+                continue
+            seen_file_ids.add(file_id)
+            recent_image_file_ids.append(file_id)
+
+    return recent_image_file_ids
+
+
 def _safe_run_single_tool(
    tool: Tool,
    tool_call: ToolCallKickoff,
@@ -324,6 +386,9 @@ def run_tool_calls(
    url_to_citation: dict[str, int] = {
        url: citation_num for citation_num, url in citation_mapping.items()
    }
+    recent_generated_image_file_ids = _extract_recent_generated_image_file_ids(
+        message_history
+    )

    # Prepare all tool calls with their override_kwargs
    # Each tool gets a unique starting citation number to avoid conflicts when running in parallel
@@ -340,6 +405,7 @@ def run_tool_calls(
            | WebSearchToolOverrideKwargs
            | OpenURLToolOverrideKwargs
            | PythonToolOverrideKwargs
+            | ImageGenerationToolOverrideKwargs
            | MemoryToolOverrideKwargs
            | None
        ) = None
@@ -388,6 +454,10 @@ def run_tool_calls(
            override_kwargs = PythonToolOverrideKwargs(
                chat_files=chat_files or [],
            )
+        elif isinstance(tool, ImageGenerationTool):
+            override_kwargs = ImageGenerationToolOverrideKwargs(
+                recent_generated_image_file_ids=recent_generated_image_file_ids
+            )
        elif isinstance(tool, MemoryTool):
            override_kwargs = MemoryToolOverrideKwargs(
                user_name=(
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -254,7 +254,7 @@ oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
-onyx-devtools==0.7.5
+onyx-devtools==0.7.4
 openai==2.14.0
    # via
    #   litellm
--- a/backend/tests/README.md
+++ b/backend/tests/README.md
@@ -45,6 +45,15 @@ npx playwright test <TEST_NAME>
 Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
 their own `conftest.py` for directory-scoped fixtures.

+## Additional Onyx-Specific Guidance
+
+- Activate the root venv first with `source .venv/bin/activate`.
+- For many product changes in this repo, prefer integration tests or external dependency unit tests
+  over isolated unit tests.
+- When writing integration tests, check `backend/tests/integration/common_utils/` and the root
+  `conftest.py` for fixtures and managers before inventing new helpers.
+- Prefer existing fixtures over constructing users or entities manually inside tests.
+
 ## Running Tests Repeatedly (`pytest-repeat`)

 Use `pytest-repeat` to catch flaky tests by running them multiple times:
--- a/backend/tests/daily/connectors/google_drive/test_resolver.py
+++ b/backend/tests/daily/connectors/google_drive/test_resolver.py
@@ -1,239 +0,0 @@
-"""Tests for GoogleDriveConnector.resolve_errors against real Google Drive."""
-
-import json
-import os
-from collections.abc import Callable
-from unittest.mock import patch
-
-from onyx.connectors.google_drive.connector import GoogleDriveConnector
-from onyx.connectors.models import ConnectorFailure
-from onyx.connectors.models import Document
-from onyx.connectors.models import DocumentFailure
-from onyx.connectors.models import HierarchyNode
-from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
-from tests.daily.connectors.google_drive.consts_and_utils import (
-    ALL_EXPECTED_HIERARCHY_NODES,
-)
-from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_ID
-from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_ID
-
-_DRIVE_ID_MAPPING_PATH = os.path.join(
-    os.path.dirname(__file__), "drive_id_mapping.json"
-)
-
-
-def _load_web_view_links(file_ids: list[int]) -> list[str]:
-    with open(_DRIVE_ID_MAPPING_PATH) as f:
-        mapping: dict[str, str] = json.load(f)
-    return [mapping[str(fid)] for fid in file_ids]
-
-
-def _build_failures(web_view_links: list[str]) -> list[ConnectorFailure]:
-    return [
-        ConnectorFailure(
-            failed_document=DocumentFailure(
-                document_id=link,
-                document_link=link,
-            ),
-            failure_message=f"Synthetic failure for {link}",
-        )
-        for link in web_view_links
-    ]
-
-
-@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
-def test_resolve_single_file(
-    mock_api_key: None,  # noqa: ARG001
-    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
-) -> None:
-    """Resolve a single known file and verify we get back exactly one Document."""
-    connector = google_drive_service_acct_connector_factory(
-        primary_admin_email=ADMIN_EMAIL,
-        include_shared_drives=True,
-        shared_drive_urls=None,
-        include_my_drives=True,
-        my_drive_emails=None,
-        shared_folder_urls=None,
-        include_files_shared_with_me=False,
-    )
-
-    web_view_links = _load_web_view_links([0])
-    failures = _build_failures(web_view_links)
-
-    results = list(connector.resolve_errors(failures))
-
-    docs = [r for r in results if isinstance(r, Document)]
-    new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
-    hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
-
-    assert len(docs) == 1
-    assert len(new_failures) == 0
-    assert docs[0].semantic_identifier == "file_0.txt"
-
-    # Should yield at least one hierarchy node (the file's parent folder chain)
-    assert len(hierarchy_nodes) > 0
-
-
-@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
-def test_resolve_multiple_files(
-    mock_api_key: None,  # noqa: ARG001
-    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
-) -> None:
-    """Resolve multiple files across different folders via batch API."""
-    connector = google_drive_service_acct_connector_factory(
-        primary_admin_email=ADMIN_EMAIL,
-        include_shared_drives=True,
-        shared_drive_urls=None,
-        include_my_drives=True,
-        my_drive_emails=None,
-        shared_folder_urls=None,
-        include_files_shared_with_me=False,
-    )
-
-    # Pick files from different folders: admin files (0-4), shared drive 1 (20-24), folder_2 (45-49)
-    file_ids = [0, 1, 20, 21, 45]
-    web_view_links = _load_web_view_links(file_ids)
-    failures = _build_failures(web_view_links)
-
-    results = list(connector.resolve_errors(failures))
-
-    docs = [r for r in results if isinstance(r, Document)]
-    new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
-    hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
-
-    assert len(new_failures) == 0
-    retrieved_names = {doc.semantic_identifier for doc in docs}
-    expected_names = {f"file_{fid}.txt" for fid in file_ids}
-    assert expected_names == retrieved_names
-
-    # Files span multiple folders, so we should get hierarchy nodes
-    assert len(hierarchy_nodes) > 0
-
-
-@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
-def test_resolve_hierarchy_nodes_are_valid(
-    mock_api_key: None,  # noqa: ARG001
-    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
-) -> None:
-    """Verify that hierarchy nodes from resolve_errors match expected structure."""
-    connector = google_drive_service_acct_connector_factory(
-        primary_admin_email=ADMIN_EMAIL,
-        include_shared_drives=True,
-        shared_drive_urls=None,
-        include_my_drives=True,
-        my_drive_emails=None,
-        shared_folder_urls=None,
-        include_files_shared_with_me=False,
-    )
-
-    # File in folder_1 (inside shared_drive_1) — should walk up to shared_drive_1 root
-    web_view_links = _load_web_view_links([25])
-    failures = _build_failures(web_view_links)
-
-    results = list(connector.resolve_errors(failures))
-
-    hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
-    node_ids = {node.raw_node_id for node in hierarchy_nodes}
-
-    # File 25 is in folder_1 which is inside shared_drive_1.
-    # The parent walk must yield at least these two ancestors.
-    assert (
-        FOLDER_1_ID in node_ids
-    ), f"Expected folder_1 ({FOLDER_1_ID}) in hierarchy nodes, got: {node_ids}"
-    assert (
-        SHARED_DRIVE_1_ID in node_ids
-    ), f"Expected shared_drive_1 ({SHARED_DRIVE_1_ID}) in hierarchy nodes, got: {node_ids}"
-
-    for node in hierarchy_nodes:
-        if node.raw_node_id not in ALL_EXPECTED_HIERARCHY_NODES:
-            continue
-        expected = ALL_EXPECTED_HIERARCHY_NODES[node.raw_node_id]
-        assert node.display_name == expected.display_name, (
-            f"Display name mismatch for {node.raw_node_id}: "
-            f"expected '{expected.display_name}', got '{node.display_name}'"
-        )
-        assert node.node_type == expected.node_type, (
-            f"Node type mismatch for {node.raw_node_id}: "
-            f"expected '{expected.node_type}', got '{node.node_type}'"
-        )
-
-
-@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
-def test_resolve_with_invalid_link(
-    mock_api_key: None,  # noqa: ARG001
-    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
-) -> None:
-    """Resolve with a mix of valid and invalid links — invalid ones yield ConnectorFailure."""
-    connector = google_drive_service_acct_connector_factory(
-        primary_admin_email=ADMIN_EMAIL,
-        include_shared_drives=True,
-        shared_drive_urls=None,
-        include_my_drives=True,
-        my_drive_emails=None,
-        shared_folder_urls=None,
-        include_files_shared_with_me=False,
-    )
-
-    valid_links = _load_web_view_links([0])
-    invalid_link = "https://drive.google.com/file/d/NONEXISTENT_FILE_ID_12345"
-    failures = _build_failures(valid_links + [invalid_link])
-
-    results = list(connector.resolve_errors(failures))
-
-    docs = [r for r in results if isinstance(r, Document)]
-    new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
-
-    assert len(docs) == 1
-    assert docs[0].semantic_identifier == "file_0.txt"
-    assert len(new_failures) == 1
-    assert new_failures[0].failed_document is not None
-    assert new_failures[0].failed_document.document_id == invalid_link
-
-
-@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
-def test_resolve_empty_errors(
-    mock_api_key: None,  # noqa: ARG001
-    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
-) -> None:
-    """Resolving an empty error list should yield nothing."""
-    connector = google_drive_service_acct_connector_factory(
-        primary_admin_email=ADMIN_EMAIL,
-        include_shared_drives=True,
-        shared_drive_urls=None,
-        include_my_drives=True,
-        my_drive_emails=None,
-        shared_folder_urls=None,
-        include_files_shared_with_me=False,
-    )
-
-    results = list(connector.resolve_errors([]))
-
-    assert len(results) == 0
-
-
-@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
-def test_resolve_entity_failures_are_skipped(
-    mock_api_key: None,  # noqa: ARG001
-    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
-) -> None:
-    """Entity failures (not document failures) should be skipped by resolve_errors."""
-    from onyx.connectors.models import EntityFailure
-
-    connector = google_drive_service_acct_connector_factory(
-        primary_admin_email=ADMIN_EMAIL,
-        include_shared_drives=True,
-        shared_drive_urls=None,
-        include_my_drives=True,
-        my_drive_emails=None,
-        shared_folder_urls=None,
-        include_files_shared_with_me=False,
-    )
-
-    entity_failure = ConnectorFailure(
-        failed_entity=EntityFailure(entity_id="some_stage"),
-        failure_message="retrieval failure",
-    )
-
-    results = list(connector.resolve_errors([entity_failure]))
-
-    assert len(results) == 0
--- a/backend/tests/unit/ee/onyx/db/test_license.py
+++ b/backend/tests/unit/ee/onyx/db/test_license.py
@@ -9,7 +9,6 @@ from unittest.mock import patch
 from ee.onyx.db.license import check_seat_availability
 from ee.onyx.db.license import delete_license
 from ee.onyx.db.license import get_license
-from ee.onyx.db.license import get_used_seats
 from ee.onyx.db.license import upsert_license
 from ee.onyx.server.license.models import LicenseMetadata
 from ee.onyx.server.license.models import LicenseSource
@@ -215,43 +214,3 @@ class TestCheckSeatAvailabilityMultiTenant:
        assert result.available is False
        assert result.error_message is not None
        mock_tenant_count.assert_called_once_with("tenant-abc")
-
-
-class TestGetUsedSeatsAccountTypeFiltering:
-    """Verify get_used_seats query excludes SERVICE_ACCOUNT but includes BOT."""
-
-    @patch("ee.onyx.db.license.MULTI_TENANT", False)
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_excludes_service_accounts(self, mock_get_session: MagicMock) -> None:
-        """SERVICE_ACCOUNT users should not count toward seats."""
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-        mock_session.execute.return_value.scalar.return_value = 5
-
-        result = get_used_seats()
-
-        assert result == 5
-        # Inspect the compiled query to verify account_type filter
-        call_args = mock_session.execute.call_args
-        query = call_args[0][0]
-        compiled = str(query.compile(compile_kwargs={"literal_binds": True}))
-        assert "SERVICE_ACCOUNT" in compiled
-        # BOT should NOT be excluded
-        assert "BOT" not in compiled
-
-    @patch("ee.onyx.db.license.MULTI_TENANT", False)
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_still_excludes_ext_perm_user(self, mock_get_session: MagicMock) -> None:
-        """EXT_PERM_USER exclusion should still be present."""
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-        mock_session.execute.return_value.scalar.return_value = 3
-
-        get_used_seats()
-
-        call_args = mock_session.execute.call_args
-        query = call_args[0][0]
-        compiled = str(query.compile(compile_kwargs={"literal_binds": True}))
-        assert "EXT_PERM_USER" in compiled
--- a/backend/tests/unit/onyx/connectors/jira/test_jira_bulk_fetch.py
+++ b/backend/tests/unit/onyx/connectors/jira/test_jira_bulk_fetch.py
@@ -6,7 +6,6 @@ import requests
 from jira import JIRA
 from jira.resources import Issue

-from onyx.connectors.jira.connector import _JIRA_BULK_FETCH_LIMIT
 from onyx.connectors.jira.connector import bulk_fetch_issues


@@ -146,29 +145,3 @@ def test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:

    with pytest.raises(requests.exceptions.JSONDecodeError):
        bulk_fetch_issues(client, ["1", "2", bad_id, "3", "4", "5"])
-
-
-def test_bulk_fetch_respects_api_batch_limit() -> None:
-    """Requests to the bulkfetch endpoint never exceed _JIRA_BULK_FETCH_LIMIT IDs."""
-    client = _mock_jira_client()
-    total_issues = _JIRA_BULK_FETCH_LIMIT * 3 + 7
-    all_ids = [str(i) for i in range(total_issues)]
-
-    batch_sizes: list[int] = []
-
-    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001
-        ids = json["issueIdsOrKeys"]
-        batch_sizes.append(len(ids))
-        resp = MagicMock()
-        resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
-        return resp
-
-    client._session.post.side_effect = _post_side_effect
-
-    result = bulk_fetch_issues(client, all_ids)
-
-    assert len(result) == total_issues
-    # keeping this hardcoded because it's the documented limit
-    # https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
-    assert all(size <= 100 for size in batch_sizes)
-    assert len(batch_sizes) == 4
--- a/backend/tests/unit/onyx/context/search/federated/test_build_thread_text.py
+++ b/backend/tests/unit/onyx/context/search/federated/test_build_thread_text.py
@@ -1,67 +0,0 @@
-"""Tests for _build_thread_text function."""
-
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.context.search.federated.slack_search import _build_thread_text
-
-
-def _make_msg(user: str, text: str, ts: str) -> dict[str, str]:
-    return {"user": user, "text": text, "ts": ts}
-
-
-class TestBuildThreadText:
-    """Verify _build_thread_text includes full thread replies up to cap."""
-
-    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
-    def test_includes_all_replies(self, mock_profiles: MagicMock) -> None:
-        """All replies within cap are included in output."""
-        mock_profiles.return_value = {}
-        messages = [
-            _make_msg("U1", "parent msg", "1000.0"),
-            _make_msg("U2", "reply 1", "1001.0"),
-            _make_msg("U3", "reply 2", "1002.0"),
-            _make_msg("U4", "reply 3", "1003.0"),
-        ]
-        result = _build_thread_text(messages, "token", "T123", MagicMock())
-        assert "parent msg" in result
-        assert "reply 1" in result
-        assert "reply 2" in result
-        assert "reply 3" in result
-        assert "..." not in result
-
-    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
-    def test_non_thread_returns_parent_only(self, mock_profiles: MagicMock) -> None:
-        """Single message (no replies) returns just the parent text."""
-        mock_profiles.return_value = {}
-        messages = [_make_msg("U1", "just a message", "1000.0")]
-        result = _build_thread_text(messages, "token", "T123", MagicMock())
-        assert "just a message" in result
-        assert "Replies:" not in result
-
-    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
-    def test_parent_always_first(self, mock_profiles: MagicMock) -> None:
-        """Thread parent message is always the first line of output."""
-        mock_profiles.return_value = {}
-        messages = [
-            _make_msg("U1", "I am the parent", "1000.0"),
-            _make_msg("U2", "I am a reply", "1001.0"),
-        ]
-        result = _build_thread_text(messages, "token", "T123", MagicMock())
-        parent_pos = result.index("I am the parent")
-        reply_pos = result.index("I am a reply")
-        assert parent_pos < reply_pos
-
-    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
-    def test_user_profiles_resolved(self, mock_profiles: MagicMock) -> None:
-        """User IDs in thread text are replaced with display names."""
-        mock_profiles.return_value = {"U1": "Alice", "U2": "Bob"}
-        messages = [
-            _make_msg("U1", "hello", "1000.0"),
-            _make_msg("U2", "world", "1001.0"),
-        ]
-        result = _build_thread_text(messages, "token", "T123", MagicMock())
-        assert "Alice" in result
-        assert "Bob" in result
-        assert "<@U1>" not in result
-        assert "<@U2>" not in result
--- a/backend/tests/unit/onyx/context/search/federated/test_url_override.py
+++ b/backend/tests/unit/onyx/context/search/federated/test_url_override.py
@@ -1,108 +0,0 @@
-"""Tests for Slack URL parsing and direct thread fetch via URL override."""
-
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.context.search.federated.models import DirectThreadFetch
-from onyx.context.search.federated.slack_search import _fetch_thread_from_url
-from onyx.context.search.federated.slack_search_utils import extract_slack_message_urls
-
-
-class TestExtractSlackMessageUrls:
-    """Verify URL parsing extracts channel_id and timestamp correctly."""
-
-    def test_standard_url(self) -> None:
-        query = "summarize https://mycompany.slack.com/archives/C097NBWMY8Y/p1775491616524769"
-        results = extract_slack_message_urls(query)
-        assert len(results) == 1
-        assert results[0] == ("C097NBWMY8Y", "1775491616.524769")
-
-    def test_multiple_urls(self) -> None:
-        query = (
-            "compare https://co.slack.com/archives/C111/p1234567890123456 "
-            "and https://co.slack.com/archives/C222/p9876543210987654"
-        )
-        results = extract_slack_message_urls(query)
-        assert len(results) == 2
-        assert results[0] == ("C111", "1234567890.123456")
-        assert results[1] == ("C222", "9876543210.987654")
-
-    def test_no_urls(self) -> None:
-        query = "what happened in #general last week?"
-        results = extract_slack_message_urls(query)
-        assert len(results) == 0
-
-    def test_non_slack_url_ignored(self) -> None:
-        query = "check https://google.com/archives/C111/p1234567890123456"
-        results = extract_slack_message_urls(query)
-        assert len(results) == 0
-
-    def test_timestamp_conversion(self) -> None:
-        """p prefix removed, dot inserted after 10th digit."""
-        query = "https://x.slack.com/archives/CABC123/p1775491616524769"
-        results = extract_slack_message_urls(query)
-        channel_id, ts = results[0]
-        assert channel_id == "CABC123"
-        assert ts == "1775491616.524769"
-        assert not ts.startswith("p")
-        assert "." in ts
-
-
-class TestFetchThreadFromUrl:
-    """Verify _fetch_thread_from_url calls conversations.replies and returns SlackMessage."""
-
-    @patch("onyx.context.search.federated.slack_search._build_thread_text")
-    @patch("onyx.context.search.federated.slack_search.WebClient")
-    def test_successful_fetch(
-        self, mock_webclient_cls: MagicMock, mock_build_thread: MagicMock
-    ) -> None:
-        mock_client = MagicMock()
-        mock_webclient_cls.return_value = mock_client
-
-        # Mock conversations_replies
-        mock_response = MagicMock()
-        mock_response.get.return_value = [
-            {"user": "U1", "text": "parent", "ts": "1775491616.524769"},
-            {"user": "U2", "text": "reply 1", "ts": "1775491617.000000"},
-            {"user": "U3", "text": "reply 2", "ts": "1775491618.000000"},
-        ]
-        mock_client.conversations_replies.return_value = mock_response
-
-        # Mock channel info
-        mock_ch_response = MagicMock()
-        mock_ch_response.get.return_value = {"name": "general"}
-        mock_client.conversations_info.return_value = mock_ch_response
-
-        mock_build_thread.return_value = (
-            "U1: parent\n\nReplies:\n\nU2: reply 1\n\nU3: reply 2"
-        )
-
-        fetch = DirectThreadFetch(
-            channel_id="C097NBWMY8Y", thread_ts="1775491616.524769"
-        )
-        result = _fetch_thread_from_url(fetch, "xoxp-token")
-
-        assert len(result.messages) == 1
-        msg = result.messages[0]
-        assert msg.channel_id == "C097NBWMY8Y"
-        assert msg.thread_id is None  # Prevents double-enrichment
-        assert msg.slack_score == 100000.0
-        assert "parent" in msg.text
-        mock_client.conversations_replies.assert_called_once_with(
-            channel="C097NBWMY8Y", ts="1775491616.524769"
-        )
-
-    @patch("onyx.context.search.federated.slack_search.WebClient")
-    def test_api_error_returns_empty(self, mock_webclient_cls: MagicMock) -> None:
-        from slack_sdk.errors import SlackApiError
-
-        mock_client = MagicMock()
-        mock_webclient_cls.return_value = mock_client
-        mock_client.conversations_replies.side_effect = SlackApiError(
-            message="channel_not_found",
-            response=MagicMock(status_code=404),
-        )
-
-        fetch = DirectThreadFetch(channel_id="CBAD", thread_ts="1234567890.123456")
-        result = _fetch_thread_from_url(fetch, "xoxp-token")
-        assert len(result.messages) == 0
--- a/backend/tests/unit/onyx/server/manage/llm/test_fetch_models_api.py
+++ b/backend/tests/unit/onyx/server/manage/llm/test_fetch_models_api.py
@@ -505,7 +505,6 @@ class TestGetLMStudioAvailableModels:

        mock_session = MagicMock()
        mock_provider = MagicMock()
-        mock_provider.api_base = "http://localhost:1234"
        mock_provider.custom_config = {"LM_STUDIO_API_KEY": "stored-secret"}

        response = {
--- a/backend/tests/unit/onyx/server/scim/test_user_endpoints.py
+++ b/backend/tests/unit/onyx/server/scim/test_user_endpoints.py
@@ -2,7 +2,6 @@

 from __future__ import annotations

-from typing import Any
 from unittest.mock import MagicMock
 from unittest.mock import patch
 from uuid import uuid4
@@ -10,9 +9,7 @@ from uuid import uuid4
 from fastapi import Response
 from sqlalchemy.exc import IntegrityError

-from ee.onyx.server.scim.api import _check_seat_availability
 from ee.onyx.server.scim.api import _scim_name_to_str
-from ee.onyx.server.scim.api import _seat_lock_id_for_tenant
 from ee.onyx.server.scim.api import create_user
 from ee.onyx.server.scim.api import delete_user
 from ee.onyx.server.scim.api import get_user
@@ -744,80 +741,3 @@ class TestEmailCasePreservation:
        resource = parse_scim_user(result)
        assert resource.userName == "Alice@Example.COM"
        assert resource.emails[0].value == "Alice@Example.COM"
-
-
-class TestSeatLock:
-    """Tests for the advisory lock in _check_seat_availability."""
-
-    @patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_abc")
-    def test_acquires_advisory_lock_before_checking(
-        self,
-        _mock_tenant: MagicMock,
-        mock_dal: MagicMock,
-    ) -> None:
-        """The advisory lock must be acquired before the seat check runs."""
-        call_order: list[str] = []
-
-        def track_execute(stmt: Any, _params: Any = None) -> None:
-            if "pg_advisory_xact_lock" in str(stmt):
-                call_order.append("lock")
-
-        mock_dal.session.execute.side_effect = track_execute
-
-        with patch(
-            "ee.onyx.server.scim.api.fetch_ee_implementation_or_noop"
-        ) as mock_fetch:
-            mock_result = MagicMock()
-            mock_result.available = True
-            mock_fn = MagicMock(return_value=mock_result)
-            mock_fetch.return_value = mock_fn
-
-            def track_check(*_args: Any, **_kwargs: Any) -> Any:
-                call_order.append("check")
-                return mock_result
-
-            mock_fn.side_effect = track_check
-
-            _check_seat_availability(mock_dal)
-
-        assert call_order == ["lock", "check"]
-
-    @patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_xyz")
-    def test_lock_uses_tenant_scoped_key(
-        self,
-        _mock_tenant: MagicMock,
-        mock_dal: MagicMock,
-    ) -> None:
-        """The lock id must be derived from the tenant via _seat_lock_id_for_tenant."""
-        mock_result = MagicMock()
-        mock_result.available = True
-        mock_check = MagicMock(return_value=mock_result)
-
-        with patch(
-            "ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
-            return_value=mock_check,
-        ):
-            _check_seat_availability(mock_dal)
-
-        mock_dal.session.execute.assert_called_once()
-        params = mock_dal.session.execute.call_args[0][1]
-        assert params["lock_id"] == _seat_lock_id_for_tenant("tenant_xyz")
-
-    def test_seat_lock_id_is_stable_and_tenant_scoped(self) -> None:
-        """Lock id must be deterministic and differ across tenants."""
-        assert _seat_lock_id_for_tenant("t1") == _seat_lock_id_for_tenant("t1")
-        assert _seat_lock_id_for_tenant("t1") != _seat_lock_id_for_tenant("t2")
-
-    def test_no_lock_when_ee_absent(
-        self,
-        mock_dal: MagicMock,
-    ) -> None:
-        """No advisory lock should be acquired when the EE check is absent."""
-        with patch(
-            "ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
-            return_value=None,
-        ):
-            result = _check_seat_availability(mock_dal)
-
-        assert result is None
-        mock_dal.session.execute.assert_not_called()
--- a/backend/tests/unit/onyx/tools/test_image_generation_reference_resolution.py
+++ b/backend/tests/unit/onyx/tools/test_image_generation_reference_resolution.py
@@ -1,115 +0,0 @@
-"""Tests for ``ImageGenerationTool._resolve_reference_image_file_ids``.
-
-The resolver turns the LLM's ``reference_image_file_ids`` argument into a
-cleaned list of file IDs to hand to ``_load_reference_images``. It trusts
-the LLM's picks — the LLM can only see file IDs that actually appear in
-the conversation (via ``[attached image — file_id: <id>]`` tags on user
-messages and the JSON returned by prior generate_image calls), so we
-don't re-validate against an allow-list in the tool itself.
-"""
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-import pytest
-
-from onyx.tools.models import ToolCallException
-from onyx.tools.tool_implementations.images.image_generation_tool import (
-    ImageGenerationTool,
-)
-from onyx.tools.tool_implementations.images.image_generation_tool import (
-    REFERENCE_IMAGE_FILE_IDS_FIELD,
-)
-
-
-def _make_tool(
-    supports_reference_images: bool = True,
-    max_reference_images: int = 16,
-) -> ImageGenerationTool:
-    """Construct a tool with a mock provider so no credentials/network are needed."""
-    with patch(
-        "onyx.tools.tool_implementations.images.image_generation_tool.get_image_generation_provider"
-    ) as mock_get_provider:
-        mock_provider = MagicMock()
-        mock_provider.supports_reference_images = supports_reference_images
-        mock_provider.max_reference_images = max_reference_images
-        mock_get_provider.return_value = mock_provider
-
-        return ImageGenerationTool(
-            image_generation_credentials=MagicMock(),
-            tool_id=1,
-            emitter=MagicMock(),
-            model="gpt-image-1",
-            provider="openai",
-        )
-
-
-class TestResolveReferenceImageFileIds:
-    def test_unset_returns_empty_plain_generation(self) -> None:
-        tool = _make_tool()
-        assert tool._resolve_reference_image_file_ids(llm_kwargs={}) == []
-
-    def test_empty_list_is_treated_like_unset(self) -> None:
-        tool = _make_tool()
-        result = tool._resolve_reference_image_file_ids(
-            llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: []},
-        )
-        assert result == []
-
-    def test_passes_llm_supplied_ids_through(self) -> None:
-        tool = _make_tool()
-        result = tool._resolve_reference_image_file_ids(
-            llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["upload-1", "gen-1"]},
-        )
-        # Order preserved — first entry is the primary edit source.
-        assert result == ["upload-1", "gen-1"]
-
-    def test_invalid_shape_raises(self) -> None:
-        tool = _make_tool()
-        with pytest.raises(ToolCallException):
-            tool._resolve_reference_image_file_ids(
-                llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: "not-a-list"},
-            )
-
-    def test_non_string_element_raises(self) -> None:
-        tool = _make_tool()
-        with pytest.raises(ToolCallException):
-            tool._resolve_reference_image_file_ids(
-                llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["ok", 123]},
-            )
-
-    def test_deduplicates_preserving_first_occurrence(self) -> None:
-        tool = _make_tool()
-        result = tool._resolve_reference_image_file_ids(
-            llm_kwargs={
-                REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1", "gen-2", "gen-1"]
-            },
-        )
-        assert result == ["gen-1", "gen-2"]
-
-    def test_strips_whitespace_and_skips_empty_strings(self) -> None:
-        tool = _make_tool()
-        result = tool._resolve_reference_image_file_ids(
-            llm_kwargs={
-                REFERENCE_IMAGE_FILE_IDS_FIELD: ["  gen-1  ", "", "   "]
-            },
-        )
-        assert result == ["gen-1"]
-
-    def test_provider_without_reference_support_raises(self) -> None:
-        tool = _make_tool(supports_reference_images=False)
-        with pytest.raises(ToolCallException):
-            tool._resolve_reference_image_file_ids(
-                llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1"]},
-            )
-
-    def test_truncates_to_provider_max_preserving_head(self) -> None:
-        """When the LLM lists more images than the provider allows, keep the
-        HEAD of the list (the primary edit source + earliest extras) rather
-        than the tail, since the LLM put the most important one first."""
-        tool = _make_tool(max_reference_images=2)
-        result = tool._resolve_reference_image_file_ids(
-            llm_kwargs={
-                REFERENCE_IMAGE_FILE_IDS_FIELD: ["a", "b", "c", "d"]
-            },
-        )
-        assert result == ["a", "b"]
--- a/backend/tests/unit/onyx/tools/test_tool_runner.py
+++ b/backend/tests/unit/onyx/tools/test_tool_runner.py
@@ -1,5 +1,10 @@
+from onyx.chat.models import ChatMessageSimple
+from onyx.chat.models import ToolCallSimple
+from onyx.configs.constants import MessageType
 from onyx.server.query_and_chat.placement import Placement
 from onyx.tools.models import ToolCallKickoff
+from onyx.tools.tool_runner import _extract_image_file_ids_from_tool_response_message
+from onyx.tools.tool_runner import _extract_recent_generated_image_file_ids
 from onyx.tools.tool_runner import _merge_tool_calls


@@ -308,3 +313,61 @@ class TestMergeToolCalls:
        # String should be converted to list item
        assert result[0].tool_args["queries"] == ["single_query", "q2"]

+
+class TestImageHistoryExtraction:
+    def test_extracts_image_file_ids_from_json_response(self) -> None:
+        msg = '[{"file_id":"img-1","revised_prompt":"v1"},{"file_id":"img-2","revised_prompt":"v2"}]'
+        assert _extract_image_file_ids_from_tool_response_message(msg) == [
+            "img-1",
+            "img-2",
+        ]
+
+    def test_extracts_recent_generated_image_ids_from_history(self) -> None:
+        history = [
+            ChatMessageSimple(
+                message="",
+                token_count=1,
+                message_type=MessageType.ASSISTANT,
+                tool_calls=[
+                    ToolCallSimple(
+                        tool_call_id="call_1",
+                        tool_name="generate_image",
+                        tool_arguments={"prompt": "test"},
+                        token_count=1,
+                    )
+                ],
+            ),
+            ChatMessageSimple(
+                message='[{"file_id":"img-1","revised_prompt":"r1"}]',
+                token_count=1,
+                message_type=MessageType.TOOL_CALL_RESPONSE,
+                tool_call_id="call_1",
+            ),
+        ]
+
+        assert _extract_recent_generated_image_file_ids(history) == ["img-1"]
+
+    def test_ignores_non_image_tool_responses(self) -> None:
+        history = [
+            ChatMessageSimple(
+                message="",
+                token_count=1,
+                message_type=MessageType.ASSISTANT,
+                tool_calls=[
+                    ToolCallSimple(
+                        tool_call_id="call_1",
+                        tool_name="web_search",
+                        tool_arguments={"queries": ["q"]},
+                        token_count=1,
+                    )
+                ],
+            ),
+            ChatMessageSimple(
+                message='[{"file_id":"img-1","revised_prompt":"r1"}]',
+                token_count=1,
+                message_type=MessageType.TOOL_CALL_RESPONSE,
+                tool_call_id="call_1",
+            ),
+        ]
+
+        assert _extract_recent_generated_image_file_ids(history) == []
--- a/deployment/data/nginx/mcp.conf.inc.template
+++ b/deployment/data/nginx/mcp.conf.inc.template
@@ -1,17 +1,3 @@
-# OAuth callback page must be served by the web server (Next.js),
-# not the MCP server. Exact match takes priority over the regex below.
-location = /mcp/oauth/callback {
-    proxy_set_header X-Real-IP $remote_addr;
-    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-    proxy_set_header X-Forwarded-Proto $scheme;
-    proxy_set_header X-Forwarded-Host $host;
-    proxy_set_header X-Forwarded-Port $server_port;
-    proxy_set_header Host $host;
-    proxy_http_version 1.1;
-    proxy_redirect off;
-    proxy_pass http://web_server;
-}
-
 # MCP Server - Model Context Protocol for LLM integrations
 # Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
 location ~ ^/mcp(/.*)?$ {
--- a/deployment/helm/charts/onyx/Chart.yaml
+++ b/deployment/helm/charts/onyx/Chart.yaml
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
 sources:
  - "https://github.com/onyx-dot-app/onyx"
 type: application
-version: 0.4.43
+version: 0.4.41
 appVersion: latest
 annotations:
  category: Productivity
--- a/deployment/helm/charts/onyx/dashboards/opensearch-search-latency.json
+++ b/deployment/helm/charts/onyx/dashboards/opensearch-search-latency.json
@@ -1,349 +0,0 @@
-{
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
-  "fiscalYearStartMonth": 0,
-  "graphTooltip": 1,
-  "id": null,
-  "links": [],
-  "liveNow": true,
-  "panels": [
-    {
-      "title": "Client-Side Search Latency (P50 / P95 / P99)",
-      "description": "End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.",
-      "type": "timeseries",
-      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
-      "id": 1,
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisLabel": "seconds",
-            "axisPlacement": "auto",
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "lineInterpolation": "smooth",
-            "lineWidth": 2,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "dashed" }
-          },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "yellow", "value": 0.5 },
-              { "color": "red", "value": 2.0 }
-            ]
-          },
-          "unit": "s",
-          "min": 0
-        },
-        "overrides": []
-      },
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
-          "legendFormat": "P50",
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
-          "legendFormat": "P95",
-          "refId": "B"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
-          "legendFormat": "P99",
-          "refId": "C"
-        }
-      ]
-    },
-    {
-      "title": "Server-Side Search Latency (P50 / P95 / P99)",
-      "description": "OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.",
-      "type": "timeseries",
-      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
-      "id": 2,
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisLabel": "seconds",
-            "axisPlacement": "auto",
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "lineInterpolation": "smooth",
-            "lineWidth": 2,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "dashed" }
-          },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "yellow", "value": 0.5 },
-              { "color": "red", "value": 2.0 }
-            ]
-          },
-          "unit": "s",
-          "min": 0
-        },
-        "overrides": []
-      },
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
-          "legendFormat": "P50",
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
-          "legendFormat": "P95",
-          "refId": "B"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
-          "legendFormat": "P99",
-          "refId": "C"
-        }
-      ]
-    },
-    {
-      "title": "Client-Side Latency by Search Type (P95)",
-      "description": "P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).",
-      "type": "timeseries",
-      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
-      "id": 3,
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisLabel": "seconds",
-            "axisPlacement": "auto",
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "lineInterpolation": "smooth",
-            "lineWidth": 2,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "unit": "s",
-          "min": 0
-        },
-        "overrides": []
-      },
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
-          "legendFormat": "{{ search_type }}",
-          "refId": "A"
-        }
-      ]
-    },
-    {
-      "title": "Search Throughput by Type",
-      "description": "Searches per second broken down by search type.",
-      "type": "timeseries",
-      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
-      "id": 4,
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisLabel": "searches/s",
-            "axisPlacement": "auto",
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "lineInterpolation": "smooth",
-            "lineWidth": 2,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "normal" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "unit": "ops",
-          "min": 0
-        },
-        "overrides": []
-      },
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "sum by (search_type) (rate(onyx_opensearch_search_total[5m]))",
-          "legendFormat": "{{ search_type }}",
-          "refId": "A"
-        }
-      ]
-    },
-    {
-      "title": "Concurrent Searches In Progress",
-      "description": "Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.",
-      "type": "timeseries",
-      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
-      "id": 5,
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisLabel": "searches",
-            "axisPlacement": "auto",
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "lineInterpolation": "smooth",
-            "lineWidth": 2,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "normal" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "min": 0
-        },
-        "overrides": []
-      },
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "sum by (search_type) (onyx_opensearch_searches_in_progress)",
-          "legendFormat": "{{ search_type }}",
-          "refId": "A"
-        }
-      ]
-    },
-    {
-      "title": "Client vs Server Latency Overhead (P50)",
-      "description": "Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.",
-      "type": "timeseries",
-      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
-      "id": 6,
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisLabel": "seconds",
-            "axisPlacement": "auto",
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "lineInterpolation": "smooth",
-            "lineWidth": 2,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "unit": "s",
-          "min": 0
-        },
-        "overrides": []
-      },
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
-          "legendFormat": "Client - Server overhead (P50)",
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
-          "legendFormat": "Client P50",
-          "refId": "B"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
-          "legendFormat": "Server P50",
-          "refId": "C"
-        }
-      ]
-    }
-  ],
-  "refresh": "5s",
-  "schemaVersion": 37,
-  "style": "dark",
-  "tags": ["onyx", "opensearch", "search", "latency"],
-  "templating": {
-    "list": [
-      {
-        "current": {
-          "text": "Prometheus",
-          "value": "prometheus"
-        },
-        "includeAll": false,
-        "name": "DS_PROMETHEUS",
-        "options": [],
-        "query": "prometheus",
-        "refresh": 1,
-        "type": "datasource"
-      }
-    ]
-  },
-  "time": { "from": "now-60m", "to": "now" },
-  "timepicker": {
-    "refresh_intervals": ["5s", "10s", "30s", "1m"]
-  },
-  "timezone": "",
-  "title": "Onyx OpenSearch Search Latency",
-  "uid": "onyx-opensearch-search-latency",
-  "version": 0,
-  "weekStart": ""
-}
--- a/deployment/helm/charts/onyx/dashboards/redis-queues.json
+++ b/deployment/helm/charts/onyx/dashboards/redis-queues.json
@@ -1,606 +0,0 @@
-{
-  "id": null,
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": {
-          "type": "grafana",
-          "uid": "-- Grafana --"
-        },
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
-  "fiscalYearStartMonth": 0,
-  "graphTooltip": 0,
-  "links": [],
-  "panels": [
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "drawStyle": "line",
-            "fillOpacity": 18,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 2,
-            "pointSize": 4,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 10
-              },
-              {
-                "color": "red",
-                "value": 50
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 10,
-        "w": 24,
-        "x": 0,
-        "y": 0
-      },
-      "id": 1,
-      "options": {
-        "legend": {
-          "calcs": ["lastNotNull", "max"],
-          "displayMode": "table",
-          "placement": "right",
-          "showLegend": true
-        },
-        "tooltip": {
-          "mode": "multi",
-          "sort": "desc"
-        }
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "onyx_celery_queue_depth{queue=~\"$queue\"}",
-          "legendFormat": "{{queue}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Queue Depth by Queue",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 20
-              },
-              {
-                "color": "red",
-                "value": 100
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 4,
-        "w": 6,
-        "x": 0,
-        "y": 10
-      },
-      "id": 2,
-      "options": {
-        "colorMode": "background",
-        "graphMode": "area",
-        "justifyMode": "auto",
-        "orientation": "auto",
-        "reduceOptions": {
-          "calcs": ["lastNotNull"],
-          "fields": "",
-          "values": false
-        },
-        "textMode": "auto"
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "sum(onyx_celery_queue_depth)",
-          "refId": "A"
-        }
-      ],
-      "title": "Total Queued Tasks",
-      "type": "stat"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 20
-              },
-              {
-                "color": "red",
-                "value": 100
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 4,
-        "w": 6,
-        "x": 6,
-        "y": 10
-      },
-      "id": 3,
-      "options": {
-        "colorMode": "background",
-        "graphMode": "area",
-        "justifyMode": "auto",
-        "orientation": "auto",
-        "reduceOptions": {
-          "calcs": ["lastNotNull"],
-          "fields": "",
-          "values": false
-        },
-        "textMode": "auto"
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "onyx_celery_unacked_tasks",
-          "refId": "A"
-        }
-      ],
-      "title": "Unacked Tasks",
-      "type": "stat"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 10
-              },
-              {
-                "color": "red",
-                "value": 50
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 4,
-        "w": 6,
-        "x": 12,
-        "y": 10
-      },
-      "id": 4,
-      "options": {
-        "colorMode": "background",
-        "graphMode": "none",
-        "justifyMode": "center",
-        "orientation": "auto",
-        "reduceOptions": {
-          "calcs": ["lastNotNull"],
-          "fields": "",
-          "values": false
-        },
-        "textMode": "auto"
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "onyx_celery_queue_depth{queue=\"docprocessing\"}",
-          "refId": "A"
-        }
-      ],
-      "title": "Docprocessing Queue",
-      "type": "stat"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 10
-              },
-              {
-                "color": "red",
-                "value": 50
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 4,
-        "w": 6,
-        "x": 18,
-        "y": 10
-      },
-      "id": 5,
-      "options": {
-        "colorMode": "background",
-        "graphMode": "none",
-        "justifyMode": "center",
-        "orientation": "auto",
-        "reduceOptions": {
-          "calcs": ["lastNotNull"],
-          "fields": "",
-          "values": false
-        },
-        "textMode": "auto"
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "onyx_celery_queue_depth{queue=\"connector_doc_fetching\"}",
-          "refId": "A"
-        }
-      ],
-      "title": "Docfetching Queue",
-      "type": "stat"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "drawStyle": "bars",
-            "fillOpacity": 80,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "viz": false
-            },
-            "lineWidth": 1,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 10
-              },
-              {
-                "color": "red",
-                "value": 50
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 10,
-        "w": 12,
-        "x": 0,
-        "y": 14
-      },
-      "id": 6,
-      "options": {
-        "legend": {
-          "calcs": ["lastNotNull"],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": false
-        },
-        "tooltip": {
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "topk(10, onyx_celery_queue_depth)",
-          "legendFormat": "{{queue}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Top 10 Queue Backlogs",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "custom": {
-            "align": "auto",
-            "cellOptions": {
-              "type": "auto"
-            },
-            "inspect": false
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "orange",
-                "value": 10
-              },
-              {
-                "color": "red",
-                "value": 50
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 10,
-        "w": 12,
-        "x": 12,
-        "y": 14
-      },
-      "id": 7,
-      "options": {
-        "cellHeight": "sm",
-        "footer": {
-          "countRows": false,
-          "fields": "",
-          "reducer": ["sum"],
-          "show": false
-        },
-        "showHeader": true,
-        "sortBy": [
-          {
-            "desc": true,
-            "displayName": "Value"
-          }
-        ]
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "sort_desc(onyx_celery_queue_depth)",
-          "format": "table",
-          "instant": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Current Queue Depth",
-      "transformations": [
-        {
-          "id": "labelsToFields",
-          "options": {
-            "mode": "columns"
-          }
-        }
-      ],
-      "type": "table"
-    }
-  ],
-  "refresh": "30s",
-  "schemaVersion": 39,
-  "style": "dark",
-  "tags": ["onyx", "redis", "celery"],
-  "templating": {
-    "list": [
-      {
-        "current": {
-          "selected": true,
-          "text": "Prometheus",
-          "value": "Prometheus"
-        },
-        "hide": 0,
-        "includeAll": false,
-        "label": "Datasource",
-        "name": "DS_PROMETHEUS",
-        "options": [],
-        "query": "prometheus",
-        "refresh": 1,
-        "regex": "",
-        "type": "datasource"
-      },
-      {
-        "allValue": ".*",
-        "current": {
-          "selected": true,
-          "text": "All",
-          "value": ".*"
-        },
-        "datasource": {
-          "type": "prometheus",
-          "uid": "${DS_PROMETHEUS}"
-        },
-        "definition": "label_values(onyx_celery_queue_depth, queue)",
-        "hide": 0,
-        "includeAll": true,
-        "label": "Queue",
-        "multi": true,
-        "name": "queue",
-        "options": [],
-        "query": {
-          "query": "label_values(onyx_celery_queue_depth, queue)",
-          "refId": "StandardVariableQuery"
-        },
-        "refresh": 2,
-        "regex": "",
-        "sort": 1,
-        "type": "query"
-      }
-    ]
-  },
-  "time": {
-    "from": "now-6h",
-    "to": "now"
-  },
-  "timepicker": {},
-  "timezone": "",
-  "title": "Onyx Redis Queues",
-  "uid": "onyx-redis-queues",
-  "version": 1,
-  "weekStart": ""
-}
--- a/deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
+++ b/deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
@@ -12,30 +12,4 @@ metadata:
 data:
  onyx-indexing-pipeline.json: |
    {{- .Files.Get "dashboards/indexing-pipeline.json" | nindent 4 }}
---
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "onyx.fullname" . }}-opensearch-search-latency-dashboard
-  labels:
-    {{- include "onyx.labels" . | nindent 4 }}
-    grafana_dashboard: "1"
-  annotations:
-    grafana_folder: "Onyx"
-data:
-  onyx-opensearch-search-latency.json: |
-    {{- .Files.Get "dashboards/opensearch-search-latency.json" | nindent 4 }}
---
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "onyx.fullname" . }}-redis-queues-dashboard
-  labels:
-    {{- include "onyx.labels" . | nindent 4 }}
-    grafana_dashboard: "1"
-  annotations:
-    grafana_folder: "Onyx"
-data:
-  onyx-redis-queues.json: |
-    {{- .Files.Get "dashboards/redis-queues.json" | nindent 4 }}
 {{- end }}
--- a/deployment/helm/charts/onyx/templates/nginx-conf.yaml
+++ b/deployment/helm/charts/onyx/templates/nginx-conf.yaml
@@ -42,22 +42,6 @@ data:
        client_max_body_size 5G;
        {{- if .Values.mcpServer.enabled }}

-        # OAuth callback page must be served by the web server (Next.js),
-        # not the MCP server. Exact match takes priority over the regex below.
-        location = /mcp/oauth/callback {
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_redirect off;
-            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
-            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
-            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
-            proxy_pass http://web_server;
-        }
-
        # MCP Server - Model Context Protocol for LLM integrations
        # Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
        location ~ ^/mcp(/.*)?$ {
--- a/deployment/helm/charts/onyx/values.yaml
+++ b/deployment/helm/charts/onyx/values.yaml
@@ -296,7 +296,7 @@ nginx:
    # The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.
    # Workaround: Helm upgrade will restart if the following annotation value changes.
    podAnnotations:
-      onyx.app/nginx-config-version: "4"
+      onyx.app/nginx-config-version: "3"

    # Propagate DOMAIN into nginx so server_name continues to use the same env var
    extraEnvs:
--- a/docs/agent/ARCHITECTURE.md
+++ b/docs/agent/ARCHITECTURE.md
@@ -0,0 +1,89 @@
+# Initial Architecture Map
+
+Status: provisional baseline. This is a routing map for agents, not a complete design spec for
+every subsystem. Update it as the repo becomes more explicit.
+
+## Top-Level Surfaces
+
+The repository is easiest to reason about as six main surfaces:
+
+| Surface | Primary Paths | Purpose |
+| --- | --- | --- |
+| Backend product logic | `backend/onyx/`, `backend/ee/onyx/` | Core auth, chat, search, indexing, connectors, API, and enterprise extensions |
+| Data and persistence | `backend/onyx/db/`, `backend/ee/onyx/db/`, `backend/alembic/` | DB models, data access logic, and schema migrations |
+| Frontend product surfaces | `web/src/app/`, `web/src/sections/`, `web/src/layouts/` | Next.js routes, screens, and feature-level UI composition |
+| Frontend design system and shared UI | `web/lib/opal/`, `web/src/refresh-components/` | Preferred primitives for new UI work |
+| Devtools and local developer workflows | `tools/ods/`, `cli/` | Repo automation, CI helpers, visual regression tooling, and CLI integrations |
+| Agent-facing platform work | `backend/onyx/server/features/build/`, `backend/onyx/mcp_server/`, `backend/onyx/deep_research/`, `backend/onyx/agents/` | Sandbox runtime, MCP tool surface, agent orchestration, and research workflows |
+| Agent-lab harness state | shared git metadata under `$(git rev-parse --git-common-dir)/onyx-agent-lab/` | Local worktree manifests, ports, env overlays, and verification artifacts for agentized development |
+
+## Backend Map
+
+Use these paths as the first stop when routing backend changes:
+
+| Area | Paths | Notes |
+| --- | --- | --- |
+| Authentication and access control | `backend/onyx/auth/`, `backend/onyx/access/`, `backend/ee/onyx/access/` | User identity, auth flows, permissions |
+| Chat and answer generation | `backend/onyx/chat/`, `backend/onyx/server/query_and_chat/` | Chat loop, message processing, streaming |
+| Retrieval and tools | `backend/onyx/tools/`, `backend/onyx/context/`, `backend/onyx/mcp_server/` | Search tools, web tools, context assembly, MCP exposure |
+| Connectors and indexing | `backend/onyx/connectors/`, `backend/onyx/document_index/`, `backend/onyx/background/` | Source sync, indexing, pruning, permissions sync |
+| LLM and prompt infrastructure | `backend/onyx/llm/`, `backend/onyx/prompts/`, `backend/ee/onyx/prompts/` | Provider integrations and prompting |
+| Server APIs and feature entrypoints | `backend/onyx/server/`, `backend/ee/onyx/server/` | FastAPI routes and product feature APIs |
+| Agent and build platform | `backend/onyx/server/features/build/`, `backend/onyx/agents/`, `backend/onyx/deep_research/` | Sandboxes, agent runtimes, orchestration, long-running research |
+| Persistence | `backend/onyx/db/`, `backend/ee/onyx/db/` | Put DB operations here, not in route handlers or feature modules |
+
+## Frontend Map
+
+For frontend work, route changes by intent first, then by component maturity:
+
+| Intent | Preferred Paths | Notes |
+| --- | --- | --- |
+| Next.js route/page work | `web/src/app/` | App Router pages and page-local wiring |
+| Feature composition | `web/src/sections/`, `web/src/layouts/` | Preferred place for reusable feature-level assemblies |
+| New shared UI primitives | `web/lib/opal/`, `web/src/refresh-components/` | Default targets for new reusable UI |
+| Legacy shared UI | `web/src/components/` | Avoid for new work unless forced by the local surface |
+| Frontend business logic | `web/src/lib/`, `web/src/hooks/`, `web/src/interfaces/` | Utilities, hooks, typed interfaces |
+
+Important frontend rule already established in [web/AGENTS.md](../../web/AGENTS.md):
+
+- Do not use `web/src/components/` for new component work.
+
+## Existing Hard Constraints
+
+These rules already exist and should be treated as architectural boundaries:
+
+- Backend errors should raise `OnyxError`, not `HTTPException`.
+- DB operations belong under `backend/onyx/db/` or `backend/ee/onyx/db/`.
+- New FastAPI APIs should not use `response_model`.
+- Celery tasks should use `@shared_task`.
+- Enqueued Celery tasks must include `expires=`.
+- Backend calls in local/manual flows should go through `http://localhost:3000/api/...`.
+
+## Change Routing Heuristics
+
+Use these heuristics before editing:
+
+1. If the task changes persistence semantics, start in the DB layer and migrations.
+2. If the task changes user-visible UI, find the route in `web/src/app/`, then move downward into
+   `sections`, `layouts`, and preferred shared UI.
+3. If the task spans product behavior and background execution, inspect both the API entrypoint and
+   the relevant Celery path.
+4. If the task concerns agentization, build, or local execution, check whether
+   `backend/onyx/server/features/build/` or `tools/ods/` is the better home before creating a new
+   subsystem.
+5. If the task needs isolated local boot, browser validation, or per-change artifacts, check
+   [HARNESS.md](./HARNESS.md) before inventing another ad hoc runner.
+6. If the change touches a historically messy area, consult [LEGACY_ZONES.md](./LEGACY_ZONES.md)
+   before adding more local patterns.
+
+## Test Routing
+
+Onyx already has a clear testing ladder:
+
+- `backend/tests/unit/`: isolated logic only
+- `backend/tests/external_dependency_unit/`: real infra, direct function calls, selective mocking
+- `backend/tests/integration/`: real deployment, no mocking
+- `web/tests/e2e/`: full frontend-backend coordination
+
+Prefer the lowest layer that still validates the real behavior. For many product changes in this
+repo, that means integration or Playwright rather than unit tests.
--- a/docs/agent/BRANCHING.md
+++ b/docs/agent/BRANCHING.md
@@ -0,0 +1,147 @@
+# Branching Model for `agent-lab`
+
+This is the branching policy for `agent-lab`. It is intentionally separate from the default
+workflow on `main`.
+
+This document explains how to use a long-running `agent-lab` branch without making `main`
+implicitly depend on lab-only agent-engineering changes.
+
+## Goals
+
+- Keep `main` stable and consensus-driven.
+- Allow opt-in agent-engineering improvements to live on `agent-lab`.
+- Let engineers and agents use `agent-lab` as a control checkout for worktree-based development.
+- Ensure product PRs to `main` originate from `main`-based branches, not from `agent-lab`.
+
+## Branch Roles
+
+| Branch | Purpose |
+| --- | --- |
+| `main` | Shipping branch and team default |
+| `codex/agent-lab` | Long-running control checkout containing the harness and agent-engineering improvements |
+| `codex/lab/<name>` | Short-lived branch for `agent-lab`-only tooling, docs, or workflow work |
+| `codex/fix/<name>`, `codex/feat/<name>`, etc. | Short-lived product branch cut from `origin/main` and managed by the `agent-lab` control checkout |
+
+## Core Rule
+
+`main` must never depend on `agent-lab`.
+
+That means:
+
+- `codex/agent-lab` may contain extra tooling, docs, checks, and workflow changes.
+- Product branches may be managed by the `agent-lab` control checkout, but they must still be based
+  on `origin/main`.
+- A PR to `main` should come from a `main`-based product branch, not from `codex/agent-lab`.
+
+## Preferred Workflow
+
+### Lab-Only Work
+
+Use this for agent-engineering docs, harnesses, optional checks, or tooling that should remain on
+`agent-lab` for now.
+
+1. Branch from `codex/agent-lab` into `codex/lab/<name>`.
+   For local isolation, create the branch via `ods worktree create codex/lab/<name>`.
+2. Make the lab-only changes.
+3. Open the PR back into `codex/agent-lab`.
+4. Do not open these changes directly to `main` unless the team later agrees to upstream them.
+
+### Product Feature Work
+
+Use this when you want to fix a product bug or build a shipping feature for `main`.
+
+1. Stay in the `codex/agent-lab` control checkout.
+2. Create a product worktree from `origin/main`, using a conventional branch lane such as:
+   - `ods worktree create codex/fix/<name>`
+   - `ods worktree create codex/feat/<name>`
+3. Make the code changes inside that worktree checkout.
+4. Run harness commands from the control checkout against the tracked worktree:
+   - `ods agent-check --worktree codex/fix/<name>`
+   - `ods verify --worktree codex/fix/<name>`
+   - `ods backend api --worktree codex/fix/<name>`
+   - `ods web dev --worktree codex/fix/<name>`
+5. If the change needs browser proof, record a before/after journey:
+   - before editing: `ods journey run --worktree codex/fix/<name> --journey <name> --label before`
+   - after validating the fix: `ods journey run --worktree codex/fix/<name> --journey <name> --label after`
+   - use `ods journey compare` only when the initial `before` capture was missed and a recovery
+     baseline is needed later
+   - after the PR exists, publish the artifact directory you captured or the fallback compare run
+     with `ods journey publish --run-dir <dir> --pr <number>`
+6. Commit, push, and open the PR from the product worktree checkout itself.
+   Prefer `ods pr-open` so the repo template and conventional-commit title check stay in the same
+   control plane.
+7. Open the PR directly from that product branch to `main`.
+8. After the PR is open, use:
+   - `ods pr-review triage --pr <number>`
+   - `ods pr-checks diagnose --pr <number>`
+   - `ods pr-review respond --comment-id ... --thread-id ... --body ...`
+
+## Commit Hygiene Rules
+
+This workflow only works if commits are separated cleanly.
+
+Agents and humans should:
+
+- keep lab-only workflow changes in separate commits from product logic
+- avoid mixing refactors, harness changes, and feature behavior in one commit
+- use conventional-commit messages and PR titles
+- prefer multiple small commits over one large mixed commit
+
+Good split:
+
+- `docs(agent-lab): clarify control-checkout workflow`
+- `fix: suppress logged-out modal on fresh unauthenticated load`
+- `test: add regression coverage for auth-page logout modal`
+
+Bad split:
+
+- `misc: update agent docs, add lint, change connector UI, fix API`
+
+## Guidance for Agents
+
+When an agent is working on product code, it should assume:
+
+1. The product branch should be created from `origin/main`, not from `codex/agent-lab`.
+2. The `codex/agent-lab` checkout is the control plane for `ods` commands until the harness is
+   upstreamed more broadly.
+3. The code change itself should still be made and committed inside the target product worktree.
+4. A PR to `main` should use a conventional-commit title such as `fix: ...` or `feat: ...`.
+
+If a product bug is discovered while editing on `codex/agent-lab`, treat that as exploration.
+Restart the real fix in a fresh `main`-based product worktree and port only the minimal product
+patch there.
+
+## What Should Usually Stay on `agent-lab`
+
+These are usually lab-only unless explicitly approved for upstreaming:
+
+- branch-specific workflow docs
+- harness-only `ods` commands
+- non-consensus lint rules
+- agent harness scripts
+- opt-in automation for review or promotion
+- branch-specific AGENTS guidance
+
+## What Can Be Promoted to `main`
+
+These can be promoted once they stand on their own:
+
+- product feature code
+- product tests
+- bug fixes
+- low-controversy lint rules with team agreement
+- small devtools improvements that are useful outside `agent-lab`
+
+## Review Standard
+
+If opening a PR to `main` from the `agent-lab` control workflow:
+
+- make sure the PR branch itself is based on `origin/main`
+- use a conventional-commit title
+- mention any control-plane validation that was run with `ods ... --worktree <branch>`
+- attach journey artifacts when browser behavior changed
+- treat review-thread replies and failing checks as part of the same agent loop, not as a separate
+  manual phase
+
+This keeps the product branch reviewable without forcing reviewers to understand the entire
+`agent-lab` branch.
--- a/docs/agent/GOLDEN_RULES.md
+++ b/docs/agent/GOLDEN_RULES.md
@@ -0,0 +1,73 @@
+# Golden Rules
+
+These are the current rules for the `agent-lab` workflow. The long-term goal is to move the useful
+ones from prose into shared checks, scripts, or tests where appropriate.
+
+Some of these are already documented elsewhere in the repo as project standards. In this file,
+they should be treated as the active rules for work done on `agent-lab`.
+
+## Current Rules
+
+### Backend
+
+1. Raise `OnyxError` instead of `HTTPException`.
+2. Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`.
+3. Use `@shared_task` for Celery tasks.
+4. Never enqueue a Celery task without `expires=`.
+5. Do not use FastAPI `response_model` on new APIs.
+6. Keep Python strictly typed.
+
+### Frontend
+
+1. Prefer `web/lib/opal/` and `web/src/refresh-components/` for new shared UI.
+2. Do not add new shared components under `web/src/components/`.
+3. Route backend calls through the frontend `/api/...` surface in local and test flows.
+4. Keep TypeScript strictly typed.
+
+### Workflow
+
+1. Start in a tracked worktree created by `ods worktree create`. Do not use raw `git worktree add`
+   for harness-managed work.
+2. For harness work, use `codex/lab/...` branches based on `codex/agent-lab`. For product work,
+   use conventional branches such as `codex/fix/...` or `codex/feat/...` based on `origin/main`.
+3. Make edits inside the target worktree. Copying a patch from another checkout is only acceptable
+   when debugging the harness itself.
+4. Prefer integration or external-dependency-unit tests over unit tests when validating real Onyx
+   behavior.
+5. When a repeated review comment appears, convert it into repo-local documentation or a mechanical
+   check.
+6. For browser-visible changes, prefer a registered `ods journey` capture over an ad hoc manual
+   recording. The before/after artifacts should live with the PR loop.
+7. Use `ods pr-review` to fetch and triage GitHub review threads instead of relying on memory or
+   the web UI alone. Reply and resolve from the same workflow when confidence is high.
+8. Use `ods pr-checks diagnose` to detect failing GitHub checks and point the next remediation
+   command. For Playwright failures, pair it with `ods trace`.
+6. PR titles and commit messages should use conventional-commit style such as `fix: ...` or
+   `feat: ...`. Never use `[codex]` prefixes in this repo.
+9. When touching legacy areas, leave the area more explicit than you found it: better naming,
+   better boundaries, or a follow-up cleanup note.
+
+## Mechanical Checks
+
+These are strong candidates for `ods agent-check` or dedicated linters:
+
+| Check | Why it matters |
+| --- | --- |
+| Ban `HTTPException` in backend product code | Keeps API error handling consistent |
+| Ban direct DB mutations outside DB directories | Preserves layering |
+| Detect task enqueue calls missing `expires=` | Prevents queue growth and stale work |
+| Detect new imports from `web/src/components/` in non-legacy code | Prevents further UI drift |
+| Detect direct calls to backend ports in tests/scripts where frontend proxy should be used | Preserves realistic request paths |
+| Detect missing docs/agent references for new repo-level rules | Prevents knowledge from staying only in chat |
+
+## Rule Promotion Policy
+
+Promote a rule from prose into enforcement when at least one is true:
+
+- it has been violated more than once
+- a violation is expensive to detect late
+- the remediation is mechanical
+- the error message can teach the correct pattern succinctly
+
+Agents work better with fast, local, actionable failures than with broad stylistic feedback after a
+PR is opened.
--- a/docs/agent/HARNESS.md
+++ b/docs/agent/HARNESS.md
@@ -0,0 +1,267 @@
+# Worktree Harness
+
+This document defines the `agent-lab` harness model for doing end-to-end work on `onyx`.
+
+The goal is to make one agent capable of taking one isolated change from edit to verification
+without depending on human memory for ports, paths, or validation steps.
+
+## Principles
+
+These decisions follow the same principles described in OpenAI's
+[Harness engineering](https://openai.com/index/harness-engineering/) and
+[Unlocking the Codex harness](https://openai.com/index/unlocking-the-codex-harness/) articles:
+
+- each task should run in its own git worktree
+- the app should be bootable per worktree
+- browser state should be directly legible to the agent
+- logs, traces, and test artifacts should be attached to the same worktree lifecycle
+- repository docs plus local metadata should be the system of record, not chat memory
+
+## Current Harness Surface
+
+The first `agent-lab` harness layer lives in `tools/ods/`.
+
+Implemented command surfaces:
+
+- `ods worktree create <branch>`: creates a git worktree plus local agent metadata
+- `ods worktree deps up|status|reset|down`: provisions and manages namespaced external state
+- `ods worktree status`: lists tracked worktrees and their URLs
+- `ods worktree show [worktree]`: prints the manifest for one worktree
+- `ods worktree remove <worktree>`: removes the worktree and local harness state
+- `ods journey list|run|compare|publish`: records registered browser journeys, including local
+  before/after video artifacts and optional PR publication
+- `ods pr-review fetch|triage|respond|resolve`: turns GitHub review threads into a local
+  machine-readable loop
+- `ods pr-checks status|diagnose`: makes failing GitHub checks queryable from the same control
+  plane
+- `ods verify`: runs the agent verification ladder and writes a machine-readable summary
+- `ods agent-check`: runs diff-based architectural and doc checks
+
+## Required Workflow
+
+This is the required `agent-lab` workflow going forward:
+
+1. Create the target worktree first with `ods worktree create`.
+2. Make the code changes inside that worktree.
+3. Run verification against that same worktree.
+4. Open the PR from that same worktree.
+
+Do not implement a change in one checkout and then rsync or patch it into another checkout just to
+test it. That is only acceptable when explicitly debugging the harness itself.
+
+Also do not use raw `git worktree add` for harness-managed work. `ods worktree create` is the
+authoritative entrypoint because it disables repo hooks during checkout, writes the local manifest,
+bootstraps env/runtime dependencies, provisions namespaced state, and records the worktree lane and
+base ref.
+
+## Control Checkout Model
+
+Right now the harness code itself lives on `codex/agent-lab`, not on plain `main`.
+
+That means the `codex/agent-lab` checkout acts as the control plane:
+
+- lab worktrees such as `codex/lab/...` are based on `codex/agent-lab`
+- product worktrees such as `codex/fix/...` or `codex/feat/...` are based on `origin/main`
+- the `agent-lab` checkout can still manage those product worktrees via `--worktree`
+  flags on `ods backend`, `ods web`, `ods verify`, and `ods agent-check`
+
+This lets us use the harness to manage a `main`-based product branch before the harness itself has
+been upstreamed to `main`.
+
+## Worktree Metadata
+
+Each `agent-lab` worktree gets a local manifest stored under the shared git metadata directory:
+
+```text
+$(git rev-parse --git-common-dir)/onyx-agent-lab/worktrees/<id>/
+```
+
+The manifest tracks:
+
+- branch name
+- checkout path
+- base ref used when the branch was created
+- dependency mode and namespace-derived external dependency settings
+- reserved ports for web, API, model server, and MCP
+- browser-facing URLs
+- generated env overlay file paths
+- artifact directory
+- last verification summary
+
+This state is local runtime metadata. It is intentionally not checked into the repo.
+
+## Boot Model
+
+The current harness boot model isolates the mutable application processes and can also isolate the
+mutable non-search data plane.
+
+Per worktree:
+
+- Next.js dev server gets its own `PORT`
+- browser-facing base URL is unique
+- backend API port is unique
+- model server port is unique
+- MCP port reservation exists for future worktree-local MCP runtime use
+- artifacts are written to a worktree-specific directory
+
+Today this is enough to make the app bootable per worktree without requiring a fully duplicated
+dependency container stack for every task.
+
+Important boundary:
+
+- isolated today: app processes, ports, URLs, local artifacts, worktree-local dependency installs,
+  PostgreSQL database, Redis key prefix, and MinIO file-store bucket when the worktree runs in
+  `namespaced` dependency mode
+- shared today: OpenSearch/Vespa and the rest of the local dependency stack started via docker
+  compose
+
+This means a normal `agent-lab` worktree can run against:
+
+- a dedicated Postgres database on the shared local Postgres server
+- a dedicated Redis namespace on the shared local Redis instance
+- a dedicated MinIO file-store bucket on the shared local object store
+
+OpenSearch/Vespa remain shared-only by design on this branch. The harness should never imply
+otherwise.
+
+This is a deliberate brownfield adaptation of the OpenAI article’s worktree-per-task model:
+keep the common path mechanically isolated where the repo already supports it, and explicitly mark
+the high-complexity surfaces that remain shared.
+
+## Dependency Modes
+
+`agent-lab` currently supports two dependency modes:
+
+- `namespaced`: default mode for agent feature work. Creates one Postgres database, one Redis
+  prefix, and one MinIO bucket per worktree.
+- `shared`: reuse the existing local DB/Redis/MinIO state when full isolation is unnecessary.
+
+The worktree manifest is the source of truth for the selected mode and the derived namespace values.
+
+Search infrastructure policy:
+
+- OpenSearch/Vespa are always shared
+- there is no current plan to add namespaced or per-worktree search stacks on `agent-lab`
+- tasks that mutate search/index infrastructure should be treated as higher-risk and validated with
+  extra care because the harness does not isolate that surface
+
+## Backend and Web Integration
+
+When `ods backend ...` or `ods web ...` runs inside a tracked `agent-lab` worktree, it should
+derive runtime settings from the worktree manifest automatically.
+
+Current behavior:
+
+- `ods backend api` defaults to the reserved worktree API port
+- `ods backend model_server` defaults to the reserved worktree model-server port
+- `ods web dev` gets the reserved worktree web port plus `BASE_URL`, `WEB_DOMAIN`,
+  `INTERNAL_URL`, and `MCP_INTERNAL_URL`
+- backend and web commands also inherit the manifest’s dependency namespace env overrides
+- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files mirror those values
+- `ods worktree bootstrap` prepares the worktree to run by linking env files, linking or cloning
+  the Python runtime, and preparing `web/node_modules`
+- `ods worktree deps up` provisions namespaced Postgres/Redis/MinIO state when needed
+- `ods backend ... --worktree <id>` and `ods web ... --worktree <id>` let the `agent-lab`
+  control checkout run app processes against a tracked target worktree
+
+This makes the standard dev commands work in an isolated way without inventing a second startup
+surface just for agents.
+
+## Browser Validation
+
+Use two browser surfaces with different jobs:
+
+- Chrome DevTools MCP for exploratory validation, DOM snapshots, navigation, and interactive bug
+  reproduction
+- Playwright for codified end-to-end verification, screenshots, and retained traces
+- `ods journey run` for the default article-style loop inside one worktree: capture `before` before
+  the fix, then capture `after` after the fix and publish the resulting artifacts to the PR when
+  needed
+- `ods journey compare` as the fallback path when the agent missed the initial `before` capture or
+  needs a strict baseline-vs-branch comparison after the fact
+
+Important detail:
+
+- The default path should not launch two worktrees just to prove a normal UI bug fix. Use one
+  tracked product worktree, start the app in that worktree, and record `before` and `after` from
+  that same environment.
+- If the fix is still uncommitted, always capture from the tracked target worktree, not from a
+  temporary `HEAD` checkout.
+- `ods journey compare` is reserved for recovery or explicit revision comparison, not as the
+  standard path for every PR.
+
+The worktree manifest's `web` URL is the source of truth for both.
+
+If an agent needs to inspect live UI behavior while iterating, it should prefer Chrome DevTools MCP
+against the worktree URL. If the behavior needs to become a repeatable regression check, encode it
+as Playwright coverage under `web/tests/e2e/`.
+
+## Verification Ladder
+
+The expected verification sequence for a worktree is:
+
+1. `ods agent-check`
+2. targeted backend tests when backend behavior changed
+3. targeted Playwright runs when UI or frontend-backend flows changed
+4. `ods journey run --label before` before the code change, then `ods journey run --label after`
+   after the change when the PR needs durable browser proof
+5. screenshot and trace review when UI validation fails
+
+`ods verify` is the first unified entrypoint for this ladder. It writes a JSON summary into the
+worktree artifact directory so later agent runs can inspect prior results directly.
+
+For product worktrees based on `main`, the intended control-plane usage is:
+
+1. from `codex/agent-lab`, run `ods worktree create codex/fix/<name>`
+2. edit inside the created `main`-based checkout
+3. from `codex/agent-lab`, run `ods verify --worktree codex/fix/<name>`
+4. if live processes are needed, run `ods backend ... --worktree codex/fix/<name>` and
+   `ods web ... --worktree codex/fix/<name>`
+5. commit, push, and open the PR from the product worktree checkout itself
+
+## Artifacts
+
+Per-worktree artifacts are written under the local harness state directory, not into chat.
+
+Current artifact classes:
+
+- verification summaries
+- pytest logs
+- Playwright logs
+- journey screenshots, videos, traces, and compare summaries
+- PR review thread snapshots and triage outputs
+- dependency namespace metadata in the local manifest
+
+Existing repo outputs are still relevant:
+
+- Playwright traces and screenshots under `web/output/`
+- screenshot diff reports from `ods screenshot-diff`
+- CI trace retrieval from `ods trace`
+
+## Known Gaps
+
+This is the initial harness layer, not the finished system.
+
+Still missing:
+
+- one-command `up/down` orchestration for all local processes
+- worktree-local observability stack for logs, metrics, and traces
+- worktree-local MCP server runtime wiring
+- automatic promotion tooling from `agent-lab` feature branches to `main`
+- recurring doc-gardening and cleanup agents
+- resumable long-running task server for local development tasks
+
+Resolved in the current harness layer:
+
+- fresh-worktree bootstrap for `.venv`, `.vscode/.env*`, and `web/node_modules`
+- namespaced isolation for Postgres, Redis, and MinIO on a per-worktree basis
+- registered before/after browser journeys with durable artifact directories
+- GitHub review-thread fetch/triage/respond tooling
+- GitHub failing-check diagnosis from the same `ods` control plane
+
+Non-goals on this branch:
+
+- OpenSearch/Vespa namespacing
+- per-worktree vector/search stacks
+
+Those are the next places to invest if we want to match the article more closely.
--- a/docs/agent/LEGACY_ZONES.md
+++ b/docs/agent/LEGACY_ZONES.md
@@ -0,0 +1,87 @@
+# Legacy Zones
+
+Status: initial classification. This file exists to stop agents from treating every existing
+pattern in the repository as equally desirable precedent.
+
+## Zone Types
+
+| Zone | Meaning | Edit Policy |
+| --- | --- | --- |
+| `strict` | Preferred surface for new work | Freely extend, but keep boundaries explicit and add tests |
+| `transition` | Actively evolving surface with mixed patterns | Prefer local consistency, avoid introducing new abstractions casually |
+| `legacy-adapter` | Known historical surface or deprecated pattern area | Avoid new dependencies on it; prefer facades, wrappers, or migrations away |
+| `frozen` | Only touch for bug fixes, security, or explicitly scoped work | Do not expand the pattern set |
+
+## Initial Classification
+
+### Strict
+
+These are good default targets for new investment:
+
+- `backend/onyx/db/`
+- `backend/ee/onyx/db/`
+- `backend/onyx/error_handling/`
+- `backend/onyx/mcp_server/`
+- `backend/onyx/server/features/build/`
+- `tools/ods/`
+- `web/lib/opal/`
+- `web/src/refresh-components/`
+- `web/src/layouts/`
+- `web/src/sections/cards/`
+
+### Transition
+
+These areas are important and active, but they mix styles, eras, and responsibilities:
+
+- `backend/onyx/server/`
+- `backend/ee/onyx/server/`
+- `backend/onyx/chat/`
+- `backend/onyx/tools/`
+- `backend/onyx/agents/`
+- `backend/onyx/deep_research/`
+- `web/src/app/`
+- `web/src/sections/`
+- `web/src/lib/`
+
+Edit guidance:
+
+- prefer incremental refactors over sweeping rewrites
+- keep changes local when the area lacks clear boundaries
+- add tests before extracting new shared abstractions
+
+### Legacy-Adapter
+
+These areas should not be treated as default precedent for new work:
+
+- `web/src/components/`
+- `backend/model_server/legacy/`
+
+Edit guidance:
+
+- do not add fresh reusable components or helper patterns here
+- if a task requires touching these areas, prefer introducing an adapter in a stricter surface
+- if you must extend a legacy file, keep the blast radius small and document follow-up cleanup
+
+### Frozen
+
+No repo-wide frozen zones are declared yet beyond files or subsystems that are clearly deprecated on
+their face. Add explicit entries here rather than relying on tribal knowledge.
+
+## Brownfield Rules
+
+When a task lands in a non-strict zone:
+
+1. Identify whether the task is fixing behavior, adding capability, or migrating structure.
+2. Avoid copying local patterns into stricter parts of the codebase.
+3. If an unsafe pattern is unavoidable, isolate it behind a typed boundary.
+4. Record newly discovered smells in [GOLDEN_RULES.md](./GOLDEN_RULES.md) or a follow-on
+   execution plan.
+
+## Promotion Criteria
+
+A transition area can move toward `strict` when:
+
+- its dependency boundaries are easy to explain
+- new code has a preferred home
+- tests are reliable enough for agents to use as feedback loops
+- recurring review comments have been turned into written or mechanical rules
--- a/docs/agent/QUALITY_SCORE.md
+++ b/docs/agent/QUALITY_SCORE.md
@@ -0,0 +1,48 @@
+# Quality Score Baseline
+
+This file is an intentionally rough baseline for how legible the repository is to coding agents.
+It is not a product quality report. It is a scorecard for agent development ergonomics.
+
+## Scoring Rubric
+
+Each area is scored from `0` to `5` on four dimensions:
+
+- `Legibility`: how easy it is to discover the right files and concepts
+- `Boundaries`: how clearly dependency and ownership seams are defined
+- `Verification`: how available and reliable the feedback loops are
+- `Agent ergonomics`: how likely an agent is to make a correct change without human rescue
+
+Overall score is directional, not mathematically precise.
+
+## Initial Baseline
+
+| Area | Legibility | Boundaries | Verification | Agent ergonomics | Overall | Notes |
+| --- | --- | --- | --- | --- | --- | --- |
+| Backend core (`backend/onyx/`, `backend/ee/onyx/`) | 3 | 3 | 4 | 3 | 3.25 | Strong test surface, but top-level routing docs are thin |
+| Persistence (`backend/onyx/db/`, migrations) | 4 | 4 | 3 | 4 | 3.75 | Clearer than most areas because path-level rules already exist |
+| Frontend modern surfaces (`web/src/app/`, `sections`, `opal`, `refresh-components`) | 3 | 3 | 3 | 3 | 3.0 | Direction exists, but mixed generations still leak across boundaries |
+| Frontend legacy shared UI (`web/src/components/`) | 1 | 1 | 2 | 1 | 1.25 | Explicitly deprecated, but still present and easy for agents to cargo-cult |
+| Agent platform and build sandbox (`backend/onyx/server/features/build/`) | 3 | 4 | 3 | 4 | 3.5 | Good substrate for agentization, but not yet aimed at repo development workflows |
+| MCP, CLI, and devtools (`backend/onyx/mcp_server/`, `cli/`, `tools/ods/`) | 4 | 4 | 4 | 4 | 4.0 | `agent-check`, worktree manifests, `ods verify`, `ods journey`, and PR review/check tooling give this surface a real control plane |
+| Repo-level docs and plans | 4 | 3 | 4 | 4 | 3.75 | `docs/agent/` now describes the journey/review/check loop directly, though subsystem coverage is still uneven |
+
+## Biggest Gaps
+
+1. Repo-level architecture knowledge is still thinner than the runtime and workflow docs.
+2. Brownfield and legacy zones are not explicitly flagged enough for agents.
+3. Important engineering rules still outnumber the mechanical checks that enforce them.
+4. The worktree harness does not yet include a local observability stack or one-command process orchestration.
+
+## Near-Term Targets
+
+The next improvements should aim to move these areas:
+
+- Repo-level docs and plans: `3.0 -> 4.0`
+- Frontend legacy safety: `1.25 -> 2.5`
+- Backend core agent ergonomics: `3.0 -> 4.0`
+- Worktree observability and runtime automation: `2.5 -> 4.0`
+
+## Update Policy
+
+When a new check, map, or workflow materially improves agent behavior, update this scorecard and
+note what changed. If a score changes, the adjacent notes should explain why.
--- a/docs/agent/README.md
+++ b/docs/agent/README.md
@@ -0,0 +1,68 @@
+# Agent Engineering Docs
+
+This directory is the knowledge base for the `agent-lab` workflow around making development of
+`onyx` itself more agentized.
+
+The goal is not to replace the root [AGENTS.md](../../AGENTS.md).
+The goal is to keep architecture maps, unsafe-zone notes, quality signals, and follow-on
+execution plans in a form that coding agents can discover and update.
+
+On `agent-lab`, this directory is the system of record for agent-engineering workflow.
+
+## Principles
+
+- Keep the entrypoint small. The root `AGENTS.md` should point here; it should not become a
+  growing encyclopedia.
+- Create the target worktree first. The intended workflow is one task, one tracked worktree, one
+  verification loop, and one PR from that same checkout.
+- Keep artifacts with the workflow. Browser videos, traces, review summaries, and check triage
+  should be produced by harness commands and stored as machine-readable outputs, not recreated
+  from chat memory.
+- Prefer maps over manuals. Agents need navigable pointers to the right subsystem, not a giant
+  blob of undifferentiated instructions.
+- Encode recurring judgment into the repo. If a rule matters often, document it here and then
+  promote it into a check, linter, test, or script.
+- Distinguish legacy from greenfield. Agents will copy the patterns they see. If an area is
+  historically messy, we need to say so explicitly.
+- Version decisions with the code. If a design choice matters for future changes, it should live
+  in-repo rather than in chat or memory.
+
+## Documents
+
+- [ARCHITECTURE.md](./ARCHITECTURE.md): top-level codebase map and change-routing guidance.
+- [BRANCHING.md](./BRANCHING.md): branch model for long-running `agent-lab` development and
+  promotion of product-only changes to `main`.
+- [HARNESS.md](./HARNESS.md): worktree runtime model, verification ladder, and browser/tooling
+  expectations.
+- [LEGACY_ZONES.md](./LEGACY_ZONES.md): edit policy for strict, transitional, and legacy areas.
+- [GOLDEN_RULES.md](./GOLDEN_RULES.md): active rules for `agent-lab` and promotion targets for
+  mechanical enforcement.
+- [QUALITY_SCORE.md](./QUALITY_SCORE.md): baseline legibility and maintainability assessment for
+  agent work.
+
+## Operating Model
+
+Use this directory for information that should change how future agents work in the `agent-lab`
+workflow:
+
+- architecture maps
+- dependency and layering rules
+- "do not extend this pattern" warnings
+- safe extension points
+- recurring cleanup policies
+- harness/runtime behavior for worktree-based development
+- before/after browser journeys and PR artifact publication
+- GitHub review and failing-check control loops
+- quality scorecards
+- active execution plans for agent-engineering improvements
+
+Current workflow split:
+
+- `codex/agent-lab` is the control checkout for the harness itself.
+- `codex/lab/<name>` branches are for harness/docs/tooling work based on `codex/agent-lab`.
+- `codex/fix/<name>`, `codex/feat/<name>`, and similar conventional product branches should be
+  created from `origin/main`, even when they are managed from the `agent-lab` control checkout.
+- PR titles and commit messages should use conventional-commit style, never `[codex]` prefixes.
+
+Do not turn this into a dumping ground. If something is local to one feature, keep it with that
+feature. This directory is for `agent-lab`-level agent-development guidance.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -148,7 +148,7 @@ dev = [
    "matplotlib==3.10.8",
    "mypy-extensions==1.0.0",
    "mypy==1.13.0",
-    "onyx-devtools==0.7.5",
+    "onyx-devtools==0.7.4",
    "openapi-generator-cli==7.17.0",
    "pandas-stubs~=2.3.3",
    "pre-commit==3.2.2",
--- a/tools/ods/README.md
+++ b/tools/ods/README.md
@@ -28,11 +28,11 @@ Some commands require external tools to be installed and configured:
 - **uv** - Required for `backend` commands
  - Install from [docs.astral.sh/uv](https://docs.astral.sh/uv/)

- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, and `trace` commands
+- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, `trace`, `pr-review`, and `pr-checks` commands
  - Install from [cli.github.com](https://cli.github.com/)
  - Authenticate with `gh auth login`

- **AWS CLI** - Required for `screenshot-diff` commands (S3 baseline sync)
+- **AWS CLI** - Required for `screenshot-diff` commands and `journey publish` (S3 artifact sync)
  - Install from [aws.amazon.com/cli](https://aws.amazon.com/cli/)
  - Authenticate with `aws sso login` or `aws configure`

@@ -196,11 +196,19 @@ ods backend <subcommand>
 | Flag | Default | Description |
 |------|---------|-------------|
 | `--no-ee` | `false` | Disable Enterprise Edition features (enabled by default) |
+| `--worktree` | current checkout | Run the command against a tracked agent-lab worktree |
 | `--port` | `8080` (api) / `9000` (model_server) | Port to listen on |

 Shell environment takes precedence over `.env` file values, so inline overrides
 work as expected (e.g. `S3_ENDPOINT_URL=foo ods backend api`).

+When run inside a tracked `agent-lab` worktree, `ods backend api` and
+`ods backend model_server` will automatically use that worktree's reserved
+ports unless you override them explicitly with `--port`.
+
+The same command can also be launched from the `codex/agent-lab` control
+checkout against another tracked worktree via `--worktree <branch>`.
+
 **Examples:**

 ```shell
@@ -218,6 +226,9 @@ ods backend model_server

 # Start the model server on a custom port
 ods backend model_server --port 9001
+
+# Run the API server for a tracked product worktree from the control checkout
+ods backend api --worktree codex/fix/auth-banner-modal
 ```

 ### `web` - Run Frontend Scripts
@@ -231,6 +242,14 @@ ods web <script> [args...]
 Script names are available via shell completion (for supported shells via
 `ods completion`), and are read from `web/package.json`.

+When run inside a tracked `agent-lab` worktree, `ods web ...` automatically
+injects the worktree's `PORT`, `BASE_URL`, `WEB_DOMAIN`, `INTERNAL_URL`, and
+`MCP_INTERNAL_URL` so the Next.js dev server boots against the right isolated
+stack.
+
+From the `codex/agent-lab` control checkout, `--worktree <branch>` applies the
+same wiring to a tracked target worktree.
+
 **Examples:**

 ```shell
@@ -242,6 +261,162 @@ ods web lint

 # Forward extra args to the script
 ods web test --watch
+
+# Run the Next.js dev server for a tracked product worktree
+ods web dev --worktree codex/fix/auth-banner-modal
+```
+
+### `worktree` - Manage Agent-Lab Worktrees
+
+Create and manage local git worktrees for agentized development. Each tracked
+worktree gets:
+
+- a reserved port bundle for web, API, model server, and MCP
+- an explicit dependency mode for local external state
+- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files
+- a local artifact directory for verification logs and summaries
+- a manifest stored under the shared git metadata directory
+- bootstrap support for env files, Python runtime, and frontend dependencies
+
+`ods worktree create` is the authoritative entrypoint for this workflow. Do not
+use raw `git worktree add` when you want the `agent-lab` harness, because you
+will skip the manifest, env overlays, dependency bootstrap, and lane-aware base
+selection.
+
+```shell
+ods worktree <subcommand>
+```
+
+**Subcommands:**
+
+- `create <branch>` - Create a worktree and manifest
+- `bootstrap [worktree]` - Prepare env files and dependencies for a worktree
+- `deps up|status|reset|down [worktree]` - Provision and manage namespaced external state
+- `status` - List tracked worktrees and URLs
+- `show [worktree]` - Show detailed metadata for one worktree
+- `remove <worktree>` - Remove a worktree and its local state
+
+`ods worktree create` bootstraps new worktrees by default. The current bootstrap
+behavior is:
+
+- link `.vscode/.env` and `.vscode/.env.web` from the source checkout when present
+- link the source checkout's `.venv` when present
+- clone `web/node_modules` into the worktree when present, falling back to
+  `npm ci --prefer-offline --no-audit`
+
+Current isolation boundary:
+
+- worktree-local: web/API/model-server ports, URLs, env overlays, artifact dirs
+- namespaced when `--dependency-mode namespaced` is used: PostgreSQL database,
+  Redis prefix, and MinIO file-store bucket
+- always shared: OpenSearch/Vespa and the rest of the docker-compose dependency stack
+
+`namespaced` is the default dependency mode on `agent-lab`. `shared` is still
+available for lighter-weight work that does not need isolated DB/Redis/MinIO
+state.
+
+Branch lanes:
+
+- `codex/lab/<name>` worktrees are treated as harness work and default to
+  `codex/agent-lab` as the base ref
+- `codex/fix/<name>`, `codex/feat/<name>`, and other conventional product lanes
+  default to `origin/main` as the base ref
+- branches that do not encode a lane fall back to `HEAD`; use `--from` or a
+  clearer branch name when the base matters
+
+Control-plane note:
+
+- the harness lives on `codex/agent-lab`
+- product worktrees can still be based on `origin/main`
+- run `ods backend`, `ods web`, `ods verify`, and `ods agent-check` with
+  `--worktree <branch>` from the control checkout when the target worktree does
+  not carry the harness code itself
+
+Search/vector note:
+
+- OpenSearch/Vespa stay shared-only
+- this branch intentionally does not implement namespaced or per-worktree search stacks
+- tasks that touch search/index infrastructure should assume a shared surface
+
+**Examples:**
+
+```shell
+# Create a product bugfix worktree from main
+ods worktree create codex/fix/auth-banner-modal
+
+# Create a lab-only worktree from agent-lab
+ods worktree create codex/lab/browser-validation
+
+# Reuse the shared DB/Redis/MinIO state for a lighter-weight task
+ods worktree create codex/fix/ui-polish --dependency-mode shared
+
+# Re-bootstrap an existing worktree
+ods worktree bootstrap codex/fix/auth-banner-modal
+
+# Inspect the current worktree's namespaced dependency state
+ods worktree deps status
+
+# Reset the current worktree's Postgres/Redis/MinIO namespace
+ods worktree deps reset
+
+# See tracked worktrees
+ods worktree status
+
+# Show the current worktree manifest
+ods worktree show
+
+# Remove a worktree when finished
+ods worktree remove codex/fix/auth-banner-modal
+
+# Remove a worktree and tear down its namespaced dependencies
+ods worktree remove codex/fix/auth-banner-modal --drop-deps
+```
+
+### `verify` - Run the Agent-Lab Verification Ladder
+
+Run a unified verification flow for the current checkout. `ods verify` is the
+first worktree-aware entrypoint that combines:
+
+- `agent-check`
+- optional targeted pytest execution
+- optional targeted Playwright execution
+- machine-readable verification summaries written to the worktree artifact dir
+
+```shell
+ods verify
+```
+
+Useful flags:
+
+| Flag | Description |
+|------|-------------|
+| `--base-ref <ref>` | Ref to compare against for `agent-check` |
+| `--skip-agent-check` | Skip the diff-based rules step |
+| `--worktree <id>` | Run verification against a tracked worktree from the control checkout |
+| `--pytest <path>` | Run a specific pytest path or node id (repeatable) |
+| `--playwright <path>` | Run a specific Playwright test path (repeatable) |
+| `--playwright-grep <expr>` | Pass `--grep` through to Playwright |
+| `--playwright-project <name>` | Limit Playwright to one project |
+
+Examples:
+
+```shell
+# Run just the diff-based checks
+ods verify
+
+# Validate a backend change with one focused integration target
+ods verify --pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
+
+# Validate a UI change with one Playwright suite
+ods verify --playwright tests/e2e/chat/welcome_page.spec.ts --playwright-project admin
+
+# Run both backend and UI checks
+ods verify \
+  --pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py \
+  --playwright tests/e2e/admin/default-agent.spec.ts
+
+# Verify a tracked product worktree from the control checkout
+ods verify --worktree codex/fix/auth-banner-modal
 ```

 ### `dev` - Devcontainer Management
@@ -325,6 +500,56 @@ Check that specified modules are only lazily imported (used for keeping backend
 ods check-lazy-imports
 ```

+### `agent-check` - Check New Agent-Safety Violations
+
+Run a small set of diff-based checks aimed at keeping new changes agent-friendly
+without failing on historical debt already present in the repository.
+
+This command is part of the expected workflow on `agent-lab`. It is not necessarily a repo-wide
+mandatory gate on `main`.
+
+```shell
+ods agent-check
+```
+
+Current checks flag newly added:
+
+- `HTTPException` usage in backend product code
+- `response_model=` on backend APIs
+- Celery `.delay()` calls
+- imports from `web/src/components/` outside the legacy component tree
+
+The command also validates the `docs/agent/` knowledge base by checking that
+required files exist and that local markdown links in that surface resolve
+correctly.
+
+Useful flags:
+
+| Flag | Description |
+|------|-------------|
+| `--staged` | Check the staged diff instead of the working tree |
+| `--base-ref <ref>` | Diff against a git ref other than `HEAD` |
+| `--worktree <id>` | Check a tracked worktree from the control checkout |
+
+Examples:
+
+```shell
+# Check working tree changes
+ods agent-check
+
+# Check only staged changes
+ods agent-check --staged
+
+# Compare the branch against main
+ods agent-check --base-ref origin/main
+
+# Limit the diff to specific paths
+ods agent-check web/src backend/onyx/server/features/build
+
+# Run against a tracked product worktree from the control checkout
+ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
+```
+
 ### `run-ci` - Run CI on Fork PRs

 Pull requests from forks don't automatically trigger GitHub Actions for security reasons.
@@ -516,6 +741,148 @@ ods trace --project admin
 ods trace --list
 ```

+### `journey` - Capture Before/After Browser Journeys
+
+Run a registered Playwright journey with video capture. The default workflow is
+to record `before` and `after` inside the same tracked worktree as the change.
+`journey compare` remains available as a recovery path when you need to compare
+two explicit revisions/worktrees after the fact.
+
+Registered journeys live in `web/tests/e2e/journeys/registry.json`.
+An optional `.github/agent-journeys.json` file can list journeys for a PR:
+
+```json
+{
+  "journeys": ["auth-landing"]
+}
+```
+
+```shell
+ods journey <subcommand>
+```
+
+**Subcommands:**
+
+- `list` - Show registered journeys
+- `run` - Run one journey against the current or target worktree
+- `compare` - Capture `before` and `after` artifacts across two revisions/worktrees when a missed baseline must be recovered
+- `publish` - Upload a compare run to S3 and upsert the PR comment
+
+**Examples:**
+
+```shell
+# List journey definitions
+ods journey list
+
+# Capture before in the tracked product worktree before editing
+ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label before
+
+# Capture after in that same worktree after validating the fix
+ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label after
+
+# Recover a missed baseline later by comparing origin/main to a tracked product worktree
+ods journey compare \
+  --journey auth-landing \
+  --after-worktree codex/fix/auth-banner-modal
+
+# Publish an existing compare run to PR #10007
+ods journey publish \
+  --run-dir .git/onyx-agent-lab/journeys/20260408-123000 \
+  --pr 10007
+```
+
+`journey run` writes a `summary.json` into the capture directory. `journey compare`
+writes a `summary.json` into its run directory and, when `--pr` is supplied,
+uploads that directory to S3 and upserts a PR comment with before/after links.
+
+### `pr-review` - Fetch and Respond to GitHub Review Threads
+
+Treat PR review comments as a local machine-readable workflow instead of relying
+on the GitHub UI alone.
+
+```shell
+ods pr-review <subcommand>
+```
+
+**Subcommands:**
+
+- `fetch` - Download review threads into local harness state
+- `triage` - Classify threads as actionable, duplicate, outdated, or resolved
+- `respond` - Reply to an inline review comment and optionally resolve its thread
+- `resolve` - Resolve a review thread without posting a reply
+
+**Examples:**
+
+```shell
+# Fetch review threads for the current branch PR
+ods pr-review fetch
+
+# Triage review threads for a specific PR
+ods pr-review triage --pr 10007
+
+# Reply to a top-level review comment and resolve the thread
+ods pr-review respond \
+  --pr 10007 \
+  --comment-id 2512997464 \
+  --thread-id PRRT_kwDO... \
+  --body "Fixed in the latest patch. Added a regression journey as well."
+```
+
+Fetched and triaged review data is written under the local harness state
+directory:
+
+```text
+$(git rev-parse --git-common-dir)/onyx-agent-lab/reviews/pr-<number>/
+```
+
+### `pr-checks` - Diagnose Failing GitHub Checks
+
+Inspect the latest checks on a PR and surface the failing ones with the next
+recommended remediation command.
+
+```shell
+ods pr-checks <subcommand>
+```
+
+**Subcommands:**
+
+- `status` - list all checks for the PR
+- `diagnose` - list only failing checks and point to the next step
+
+**Examples:**
+
+```shell
+# Show all checks on the current branch PR
+ods pr-checks status
+
+# Show only failing checks and the next remediation command
+ods pr-checks diagnose --pr 10007
+```
+
+`pr-checks diagnose` is especially useful after pushing a fix or after replying
+to review comments. For Playwright failures it points directly at `ods trace`.
+
+### `pr-open` - Open a PR With the Repo Template
+
+Create a pull request through `gh` while enforcing a conventional-commit title.
+If `--title` is omitted, `ods` uses the latest commit subject. The PR body
+defaults to `.github/pull_request_template.md`. PRs are ready-for-review by
+default; use `--draft` only when you explicitly need that state.
+
+```shell
+ods pr-open
+ods pr-open --title "fix: suppress logged-out modal on fresh auth load"
+```
+
+### `pr-merge` - Merge a PR Through `gh`
+
+Merge or auto-merge a pull request with an explicit merge method.
+
+```shell
+ods pr-merge --pr 10007 --method squash
+ods pr-merge --pr 10007 --method squash --auto --delete-branch
+```
+
 ### Testing Changes Locally (Dry Run)

 Both `run-ci` and `cherry-pick` support `--dry-run` to test without making remote changes:
--- a/tools/ods/cmd/agent_check.go
+++ b/tools/ods/cmd/agent_check.go
@@ -0,0 +1,161 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"sort"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentcheck"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentdocs"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+type AgentCheckOptions struct {
+	Staged   bool
+	BaseRef  string
+	Worktree string
+	RepoRoot string
+}
+
+type AgentCheckResult struct {
+	Violations    []agentcheck.Violation
+	DocViolations []agentdocs.Violation
+}
+
+// NewAgentCheckCommand creates the agent-check command.
+func NewAgentCheckCommand() *cobra.Command {
+	opts := &AgentCheckOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "agent-check [paths...]",
+		Short: "Run diff-based checks for agent-safe changes",
+		Long: `Run diff-based checks for agent-safe changes.
+
+This command inspects added lines in the current git diff and flags a small set
+of newly introduced repo-level violations without failing on historical debt.
+
+By default it compares the working tree against HEAD. Use --staged to inspect
+the staged diff instead, or --base-ref to compare against a different ref.
+Use --worktree to run the same check against a tracked target worktree from the
+agent-lab control checkout.
+
+Examples:
+  ods agent-check
+  ods agent-check --staged
+  ods agent-check --base-ref origin/main
+  ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
+  ods agent-check web/src backend/onyx/server/features/build`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runAgentCheck(opts, args)
+		},
+	}
+
+	cmd.Flags().BoolVar(&opts.Staged, "staged", false, "check staged changes instead of the working tree")
+	cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to diff against instead of HEAD")
+	cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to check instead of the current checkout")
+
+	return cmd
+}
+
+func runAgentCheck(opts *AgentCheckOptions, providedPaths []string) {
+	repoRoot, _, _ := resolveAgentLabTarget(opts.Worktree)
+	opts.RepoRoot = repoRoot
+	result, err := evaluateAgentCheck(opts, providedPaths)
+	if err != nil {
+		log.Fatalf("Failed to run agent-check: %v", err)
+	}
+
+	if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
+		log.Info("✅ agent-check found no new violations.")
+		return
+	}
+
+	sort.Slice(result.Violations, func(i, j int) bool {
+		if result.Violations[i].Path != result.Violations[j].Path {
+			return result.Violations[i].Path < result.Violations[j].Path
+		}
+		if result.Violations[i].LineNum != result.Violations[j].LineNum {
+			return result.Violations[i].LineNum < result.Violations[j].LineNum
+		}
+		return result.Violations[i].RuleID < result.Violations[j].RuleID
+	})
+
+	for _, violation := range result.Violations {
+		log.Errorf("\n❌ %s:%d [%s]", violation.Path, violation.LineNum, violation.RuleID)
+		log.Errorf("  %s", violation.Message)
+		log.Errorf("  Added line: %s", strings.TrimSpace(violation.Content))
+	}
+
+	for _, violation := range result.DocViolations {
+		log.Errorf("\n❌ %s [agent-docs]", violation.Path)
+		log.Errorf("  %s", violation.Message)
+	}
+
+	fmt.Fprintf(
+		os.Stderr,
+		"\nFound %d agent-check violation(s) and %d agent-docs violation(s).\n",
+		len(result.Violations),
+		len(result.DocViolations),
+	)
+	os.Exit(1)
+}
+
+func evaluateAgentCheck(opts *AgentCheckOptions, providedPaths []string) (*AgentCheckResult, error) {
+	diffOutput, err := getAgentCheckDiff(opts, providedPaths)
+	if err != nil {
+		return nil, err
+	}
+
+	addedLines, err := agentcheck.ParseAddedLines(diffOutput)
+	if err != nil {
+		return nil, err
+	}
+
+	root := opts.RepoRoot
+	if root == "" {
+		var err error
+		root, err = paths.GitRoot()
+		if err != nil {
+			return nil, fmt.Errorf("determine git root: %w", err)
+		}
+	}
+
+	result := &AgentCheckResult{
+		Violations:    agentcheck.CheckAddedLines(addedLines),
+		DocViolations: agentdocs.Validate(root),
+	}
+	return result, nil
+}
+
+func getAgentCheckDiff(opts *AgentCheckOptions, providedPaths []string) (string, error) {
+	args := []string{"diff", "--no-color", "--unified=0"}
+
+	if opts.Staged {
+		args = append(args, "--cached")
+	} else if opts.BaseRef != "" {
+		args = append(args, opts.BaseRef)
+	} else {
+		args = append(args, "HEAD")
+	}
+
+	if len(providedPaths) > 0 {
+		args = append(args, "--")
+		args = append(args, providedPaths...)
+	}
+
+	cmd := exec.Command("git", args...)
+	if opts.RepoRoot != "" {
+		cmd.Dir = opts.RepoRoot
+	}
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("git %s failed: %w\n%s", strings.Join(args, " "), err, string(output))
+	}
+
+	return string(output), nil
+}
--- a/tools/ods/cmd/agentlab_target.go
+++ b/tools/ods/cmd/agentlab_target.go
@@ -0,0 +1,32 @@
+package cmd
+
+import (
+	log "github.com/sirupsen/logrus"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+func resolveAgentLabTarget(identifier string) (string, agentlab.Manifest, bool) {
+	if identifier == "" {
+		repoRoot, err := paths.GitRoot()
+		if err != nil {
+			log.Fatalf("Failed to determine git root: %v", err)
+		}
+		manifest, found := currentAgentLabManifest(repoRoot)
+		return repoRoot, manifest, found
+	}
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		log.Fatalf("Failed to resolve worktree %q: %v", identifier, err)
+	}
+	if !found {
+		log.Fatalf("No agent-lab worktree found for %q", identifier)
+	}
+	return manifest.CheckoutPath, manifest, true
+}
--- a/tools/ods/cmd/backend.go
+++ b/tools/ods/cmd/backend.go
@@ -1,7 +1,6 @@
 package cmd

 import (
-	"bufio"
 	"errors"
 	"fmt"
 	"net"
@@ -14,14 +13,16 @@ import (
 	log "github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"

-	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
 )

 // NewBackendCommand creates the parent "backend" command with subcommands for
 // running backend services.
 // BackendOptions holds options shared across backend subcommands.
 type BackendOptions struct {
-	NoEE bool
+	NoEE     bool
+	Worktree string
 }

 func NewBackendCommand() *cobra.Command {
@@ -44,6 +45,7 @@ Available subcommands:
 	}

 	cmd.PersistentFlags().BoolVar(&opts.NoEE, "no-ee", false, "Disable Enterprise Edition features (enabled by default)")
+	cmd.PersistentFlags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")

 	cmd.AddCommand(newBackendAPICommand(opts))
 	cmd.AddCommand(newBackendModelServerCommand(opts))
@@ -62,9 +64,10 @@ func newBackendAPICommand(opts *BackendOptions) *cobra.Command {
 Examples:
  ods backend api
  ods backend api --port 9090
-  ods backend api --no-ee`,
+  ods backend api --no-ee
+  ods backend api --worktree codex/fix/auth-banner-modal`,
 		Run: func(cmd *cobra.Command, args []string) {
-			runBackendService("api", "onyx.main:app", port, opts)
+			runBackendService("api", "onyx.main:app", port, cmd.Flags().Changed("port"), opts)
 		},
 	}

@@ -83,9 +86,10 @@ func newBackendModelServerCommand(opts *BackendOptions) *cobra.Command {

 Examples:
  ods backend model_server
-  ods backend model_server --port 9001`,
+  ods backend model_server --port 9001
+  ods backend model_server --worktree codex/fix/auth-banner-modal`,
 		Run: func(cmd *cobra.Command, args []string) {
-			runBackendService("model_server", "model_server.main:app", port, opts)
+			runBackendService("model_server", "model_server.main:app", port, cmd.Flags().Changed("port"), opts)
 		},
 	}

@@ -137,16 +141,25 @@ func resolvePort(port string) string {
 	return port
 }

-func runBackendService(name, module, port string, opts *BackendOptions) {
-	root, err := paths.GitRoot()
-	if err != nil {
-		log.Fatalf("Failed to find git root: %v", err)
+func runBackendService(name, module, port string, portExplicit bool, opts *BackendOptions) {
+	root, worktreeManifest, hasWorktreeManifest := resolveAgentLabTarget(opts.Worktree)
+
+	if hasWorktreeManifest && !portExplicit {
+		switch name {
+		case "api":
+			port = strconv.Itoa(worktreeManifest.Ports.API)
+		case "model_server":
+			port = strconv.Itoa(worktreeManifest.Ports.ModelServer)
+		}
 	}

 	port = resolvePort(port)

 	envFile := ensureBackendEnvFile(root)
-	fileVars := loadBackendEnvFile(envFile)
+	fileVars, err := envutil.LoadFile(envFile)
+	if err != nil {
+		log.Fatalf("Failed to load env file %s: %v", envFile, err)
+	}

 	eeDefaults := eeEnvDefaults(opts.NoEE)
 	fileVars = append(fileVars, eeDefaults...)
@@ -162,9 +175,17 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
 	if !opts.NoEE {
 		log.Info("Enterprise Edition enabled (use --no-ee to disable)")
 	}
+	if hasWorktreeManifest {
+		log.Infof("agent-lab worktree %s detected: web=%s api=%s", worktreeManifest.Branch, worktreeManifest.URLs.Web, worktreeManifest.URLs.API)
+		log.Infof("lane=%s base-ref=%s", worktreeManifest.ResolvedLane(), worktreeManifest.BaseRef)
+		log.Infof("dependency mode=%s search-infra=%s", worktreeManifest.ResolvedDependencies().Mode, worktreeManifest.ResolvedDependencies().SearchInfraMode)
+	}
 	log.Debugf("Running in %s: uv %v", backendDir, uvicornArgs)

-	mergedEnv := mergeEnv(os.Environ(), fileVars)
+	mergedEnv := envutil.Merge(os.Environ(), fileVars)
+	if hasWorktreeManifest {
+		mergedEnv = envutil.ApplyOverrides(mergedEnv, worktreeManifest.RuntimeEnv())
+	}
 	log.Debugf("Applied %d env vars from %s (shell takes precedence)", len(fileVars), envFile)

 	svcCmd := exec.Command("uv", uvicornArgs...)
@@ -185,6 +206,18 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
 	}
 }

+func currentAgentLabManifest(repoRoot string) (agentlab.Manifest, bool) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		return agentlab.Manifest{}, false
+	}
+	manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
+	if err != nil {
+		return agentlab.Manifest{}, false
+	}
+	return manifest, found
+}
+
 // eeEnvDefaults returns env entries for EE and license enforcement settings.
 // These are appended to the file vars so they act as defaults — shell env
 // and .env file values still take precedence via mergeEnv.
@@ -231,59 +264,3 @@ func ensureBackendEnvFile(root string) string {
 	log.Infof("Created %s from template (review and fill in <REPLACE THIS> values)", envFile)
 	return envFile
 }
-
-// mergeEnv combines shell environment with file-based defaults. Shell values
-// take precedence — file entries are only added for keys not already present.
-func mergeEnv(shellEnv, fileVars []string) []string {
-	existing := make(map[string]bool, len(shellEnv))
-	for _, entry := range shellEnv {
-		if idx := strings.Index(entry, "="); idx > 0 {
-			existing[entry[:idx]] = true
-		}
-	}
-
-	merged := make([]string, len(shellEnv))
-	copy(merged, shellEnv)
-	for _, entry := range fileVars {
-		if idx := strings.Index(entry, "="); idx > 0 {
-			key := entry[:idx]
-			if !existing[key] {
-				merged = append(merged, entry)
-			} else {
-				log.Debugf("Env var %s already set in shell, skipping .env value", key)
-			}
-		}
-	}
-	return merged
-}
-
-// loadBackendEnvFile parses a .env file into KEY=VALUE entries suitable for
-// appending to os.Environ(). Blank lines and comments are skipped.
-func loadBackendEnvFile(path string) []string {
-	f, err := os.Open(path)
-	if err != nil {
-		log.Fatalf("Failed to open env file %s: %v", path, err)
-	}
-	defer func() { _ = f.Close() }()
-
-	var envVars []string
-	scanner := bufio.NewScanner(f)
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		if line == "" || strings.HasPrefix(line, "#") {
-			continue
-		}
-		if idx := strings.Index(line, "="); idx > 0 {
-			key := strings.TrimSpace(line[:idx])
-			value := strings.TrimSpace(line[idx+1:])
-			value = strings.Trim(value, `"'`)
-			envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
-		}
-	}
-
-	if err := scanner.Err(); err != nil {
-		log.Fatalf("Failed to read env file %s: %v", path, err)
-	}
-
-	return envVars
-}
--- a/tools/ods/cmd/desktop.go
+++ b/tools/ods/cmd/desktop.go
@@ -37,8 +37,6 @@ func NewDesktopCommand() *cobra.Command {
 			runDesktopScript(args)
 		},
 	}
-	cmd.Flags().SetInterspersed(false)
-
 	return cmd
 }

--- a/tools/ods/cmd/dev_into.go
+++ b/tools/ods/cmd/dev_into.go
@@ -29,8 +29,6 @@ Examples:
 // runDevExec executes "devcontainer exec --workspace-folder <root> <command...>".
 func runDevExec(command []string) {
 	checkDevcontainerCLI()
-	ensureDockerSock()
-	ensureRemoteUser()

 	root, err := paths.GitRoot()
 	if err != nil {
--- a/tools/ods/cmd/dev_up.go
+++ b/tools/ods/cmd/dev_up.go
@@ -148,53 +148,10 @@ func worktreeGitMount(root string) (string, bool) {
 	return mount, true
 }

-// sshAgentMount returns a --mount flag value that forwards the host's SSH agent
-// socket into the container.  Returns ("", false) when SSH_AUTH_SOCK is unset or
-// the socket is not accessible.
-func sshAgentMount() (string, bool) {
-	sock := os.Getenv("SSH_AUTH_SOCK")
-	if sock == "" {
-		log.Debug("SSH_AUTH_SOCK not set — skipping SSH agent forwarding")
-		return "", false
-	}
-	if _, err := os.Stat(sock); err != nil {
-		log.Debugf("SSH_AUTH_SOCK=%s not accessible: %v", sock, err)
-		return "", false
-	}
-	mount := fmt.Sprintf("type=bind,source=%s,target=/tmp/ssh-agent.sock", sock)
-	log.Debugf("Forwarding SSH agent: %s", sock)
-	return mount, true
-}
-
-// ensureRemoteUser sets DEVCONTAINER_REMOTE_USER when rootless Docker is
-// detected.  Container root maps to the host user in rootless mode, so running
-// as root inside the container avoids the UID mismatch on new files.
-// Must be called after ensureDockerSock.
-func ensureRemoteUser() {
-	if os.Getenv("DEVCONTAINER_REMOTE_USER") != "" {
-		return
-	}
-
-	if runtime.GOOS == "linux" {
-		sock := os.Getenv("DOCKER_SOCK")
-		xdg := os.Getenv("XDG_RUNTIME_DIR")
-		// Heuristic: rootless Docker on Linux typically places its socket
-		// under $XDG_RUNTIME_DIR. If DOCKER_SOCK was set to a custom path
-		// outside XDG_RUNTIME_DIR, set DEVCONTAINER_REMOTE_USER=root manually.
-		if xdg != "" && strings.HasPrefix(sock, xdg) {
-			log.Debug("Rootless Docker detected — setting DEVCONTAINER_REMOTE_USER=root")
-			if err := os.Setenv("DEVCONTAINER_REMOTE_USER", "root"); err != nil {
-				log.Warnf("Failed to set DEVCONTAINER_REMOTE_USER: %v", err)
-			}
-		}
-	}
-}
-
 // runDevcontainer executes "devcontainer <action> --workspace-folder <root> [extraArgs...]".
 func runDevcontainer(action string, extraArgs []string) {
 	checkDevcontainerCLI()
 	ensureDockerSock()
-	ensureRemoteUser()

 	root, err := paths.GitRoot()
 	if err != nil {
@@ -205,9 +162,6 @@ func runDevcontainer(action string, extraArgs []string) {
 	if mount, ok := worktreeGitMount(root); ok {
 		args = append(args, "--mount", mount)
 	}
-	if mount, ok := sshAgentMount(); ok {
-		args = append(args, "--mount", mount)
-	}
 	args = append(args, extraArgs...)

 	log.Debugf("Running: devcontainer %v", args)
--- a/tools/ods/cmd/github_helpers.go
+++ b/tools/ods/cmd/github_helpers.go
@@ -0,0 +1,63 @@
+package cmd
+
+import (
+	"fmt"
+	"os/exec"
+	"strings"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+)
+
+func ghString(args ...string) (string, error) {
+	git.CheckGitHubCLI()
+
+	cmd := exec.Command("gh", args...)
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return "", fmt.Errorf("gh %s failed: %w: %s", strings.Join(args, " "), err, strings.TrimSpace(string(exitErr.Stderr)))
+		}
+		return "", fmt.Errorf("gh %s failed: %w", strings.Join(args, " "), err)
+	}
+	return strings.TrimSpace(string(output)), nil
+}
+
+func resolvePRNumber(explicit string) (string, error) {
+	if strings.TrimSpace(explicit) != "" {
+		return explicit, nil
+	}
+	return ghString("pr", "view", "--json", "number", "--jq", ".number")
+}
+
+func currentRepoSlug() (string, error) {
+	return ghString("repo", "view", "--json", "owner,name", "--jq", `.owner.login + "/" + .name`)
+}
+
+func upsertIssueComment(repoSlug, prNumber, marker, body string) error {
+	commentID, err := ghString(
+		"api",
+		fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
+		"--jq",
+		fmt.Sprintf(".[] | select(.body | startswith(%q)) | .id", marker),
+	)
+	if err != nil {
+		return err
+	}
+	if commentID != "" {
+		_, err := ghString(
+			"api",
+			"--method", "PATCH",
+			fmt.Sprintf("repos/%s/issues/comments/%s", repoSlug, commentID),
+			"-f", fmt.Sprintf("body=%s", body),
+		)
+		return err
+	}
+
+	_, err = ghString(
+		"api",
+		"--method", "POST",
+		fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
+		"-f", fmt.Sprintf("body=%s", body),
+	)
+	return err
+}
--- a/tools/ods/cmd/journey.go
+++ b/tools/ods/cmd/journey.go
@@ -0,0 +1,865 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strings"
+	"syscall"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/journey"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/s3"
+)
+
+const defaultJourneyHTTPRegion = "us-east-2"
+
+type JourneyRunOptions struct {
+	Journey   string
+	Label     string
+	Worktree  string
+	OutputDir string
+	Project   string
+}
+
+type JourneyCompareOptions struct {
+	Journeys       []string
+	PlanFile       string
+	BeforeRef      string
+	AfterRef       string
+	AfterWorktree  string
+	DependencyMode string
+	PR             string
+	KeepWorktrees  bool
+	Bucket         string
+}
+
+type JourneyPublishOptions struct {
+	RunDir string
+	PR     string
+	Bucket string
+}
+
+type JourneyCaptureSummary struct {
+	Journey      string   `json:"journey"`
+	Label        string   `json:"label"`
+	Worktree     string   `json:"worktree,omitempty"`
+	URL          string   `json:"url"`
+	ArtifactDir  string   `json:"artifact_dir"`
+	LogPath      string   `json:"log_path"`
+	VideoFiles   []string `json:"video_files,omitempty"`
+	TraceFiles   []string `json:"trace_files,omitempty"`
+	Screenshots  []string `json:"screenshots,omitempty"`
+	MetadataJSON []string `json:"metadata_json,omitempty"`
+}
+
+type JourneyCompareSummary struct {
+	GeneratedAt string                  `json:"generated_at"`
+	BeforeRef   string                  `json:"before_ref"`
+	AfterRef    string                  `json:"after_ref"`
+	RunDir      string                  `json:"run_dir"`
+	S3Prefix    string                  `json:"s3_prefix,omitempty"`
+	S3HTTPBase  string                  `json:"s3_http_base,omitempty"`
+	Captures    []JourneyCaptureSummary `json:"captures"`
+}
+
+type managedProcess struct {
+	Name    string
+	Cmd     *exec.Cmd
+	LogPath string
+}
+
+// NewJourneyCommand creates the journey command surface.
+func NewJourneyCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "journey",
+		Short: "Capture before/after browser journeys as agent artifacts",
+	}
+
+	cmd.AddCommand(newJourneyListCommand())
+	cmd.AddCommand(newJourneyRunCommand())
+	cmd.AddCommand(newJourneyCompareCommand())
+	cmd.AddCommand(newJourneyPublishCommand())
+
+	return cmd
+}
+
+func newJourneyListCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:   "list",
+		Short: "List registered browser journeys",
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyList()
+		},
+	}
+}
+
+func newJourneyRunCommand() *cobra.Command {
+	opts := &JourneyRunOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "run",
+		Short: "Run a single registered journey against the current or target worktree",
+		Long: `Run one registered journey against the current checkout or a tracked worktree.
+
+This is the default before/after workflow for product changes:
+  1. capture --label before in the target worktree before editing
+  2. implement and validate the change in that same worktree
+  3. capture --label after in that same worktree
+
+Use journey compare only when you need to recover a missed baseline or compare
+two explicit revisions after the fact.`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyRun(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.Journey, "journey", "", "registered journey name to run")
+	cmd.Flags().StringVar(&opts.Label, "label", "after", "artifact label for this capture (for example before or after)")
+	cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
+	cmd.Flags().StringVar(&opts.OutputDir, "output-dir", "", "explicit artifact directory for the capture")
+	cmd.Flags().StringVar(&opts.Project, "project", "", "override the Playwright project from the journey registry")
+	_ = cmd.MarkFlagRequired("journey")
+
+	return cmd
+}
+
+func newJourneyCompareCommand() *cobra.Command {
+	opts := &JourneyCompareOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "compare",
+		Short: "Capture before and after videos by replaying registered journeys against two revisions",
+		Long: `Create or reuse worktrees for the before and after revisions, boot the app in each one,
+record the configured journeys, and write a machine-readable summary. If --pr is supplied,
+the compare run is also uploaded to S3 and linked from the pull request.
+
+This is the fallback path, not the default workflow. Prefer journey run inside a
+single tracked product worktree when you can capture before and after during the
+normal edit loop.`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyCompare(opts)
+		},
+	}
+
+	cmd.Flags().StringArrayVar(&opts.Journeys, "journey", nil, "registered journey name to capture (repeatable)")
+	cmd.Flags().StringVar(&opts.PlanFile, "plan-file", "", "JSON file containing {\"journeys\":[...]} (defaults to .github/agent-journeys.json when present)")
+	cmd.Flags().StringVar(&opts.BeforeRef, "before-ref", "origin/main", "git ref for the before capture")
+	cmd.Flags().StringVar(&opts.AfterRef, "after-ref", "HEAD", "git ref for the after capture when --after-worktree is not supplied")
+	cmd.Flags().StringVar(&opts.AfterWorktree, "after-worktree", "", "existing tracked worktree to use for the after capture")
+	cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode for temporary worktrees: namespaced or shared")
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to upload/comment against after capture")
+	cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
+	cmd.Flags().BoolVar(&opts.KeepWorktrees, "keep-worktrees", false, "keep temporary journey worktrees after the capture run")
+
+	return cmd
+}
+
+func newJourneyPublishCommand() *cobra.Command {
+	opts := &JourneyPublishOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "publish",
+		Short: "Upload a previously captured compare run and update the pull request comment",
+		Run: func(cmd *cobra.Command, args []string) {
+			runJourneyPublish(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.RunDir, "run-dir", "", "compare run directory containing summary.json")
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to publish against")
+	cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
+	_ = cmd.MarkFlagRequired("run-dir")
+
+	return cmd
+}
+
+func runJourneyList() {
+	repoRoot, err := paths.GitRoot()
+	if err != nil {
+		log.Fatalf("Failed to determine git root: %v", err)
+	}
+
+	registry, err := journey.LoadRegistry(repoRoot)
+	if err != nil {
+		log.Fatalf("Failed to load journey registry: %v", err)
+	}
+
+	for _, definition := range registry.Journeys {
+		fmt.Printf("%s\t%s\tproject=%s\tmodel_server=%t\n", definition.Name, definition.Description, definition.Project, definition.RequiresModelServer)
+	}
+}
+
+func runJourneyRun(opts *JourneyRunOptions) {
+	repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
+	harnessRoot, err := resolveJourneyHarnessRoot(repoRoot, manifest, hasManifest)
+	if err != nil {
+		log.Fatalf("Failed to resolve journey harness root: %v", err)
+	}
+	capture, err := captureJourney(harnessRoot, repoRoot, manifest, hasManifest, opts.Journey, opts.Label, opts.OutputDir, opts.Project)
+	if err != nil {
+		log.Fatalf("Journey capture failed: %v", err)
+	}
+
+	summaryPath := filepath.Join(capture.ArtifactDir, "summary.json")
+	data, err := json.MarshalIndent(capture, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode journey summary: %v", err)
+	}
+	if err := os.WriteFile(summaryPath, data, 0644); err != nil {
+		log.Fatalf("Failed to write journey summary: %v", err)
+	}
+
+	log.Infof("Journey %s (%s) captured to %s", capture.Journey, capture.Label, capture.ArtifactDir)
+}
+
+func runJourneyCompare(opts *JourneyCompareOptions) {
+	repoRoot, err := paths.GitRoot()
+	if err != nil {
+		log.Fatalf("Failed to determine git root: %v", err)
+	}
+
+	definitions, err := resolveJourneyDefinitions(repoRoot, opts.Journeys, opts.PlanFile)
+	if err != nil {
+		log.Fatalf("Failed to resolve journeys: %v", err)
+	}
+
+	currentRoot, currentManifest, hasCurrentManifest := resolveAgentLabTarget("")
+	if opts.AfterWorktree == "" && strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") && !hasCurrentManifest && git.HasUncommittedChanges() {
+		log.Fatalf("The current checkout has uncommitted changes, but it is not a tracked agent-lab worktree. Create the product worktree first and rerun with --after-worktree <branch> so the after capture reflects the real patch.")
+	}
+	_ = currentRoot
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	runDir := filepath.Join(agentlab.StateRoot(commonGitDir), "journeys", time.Now().UTC().Format("20060102-150405"))
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		log.Fatalf("Failed to create journey run dir: %v", err)
+	}
+
+	beforeTarget, err := createTemporaryJourneyWorktree(opts.BeforeRef, "before", agentlab.DependencyMode(opts.DependencyMode))
+	if err != nil {
+		log.Fatalf("Failed to create before worktree: %v", err)
+	}
+	if !opts.KeepWorktrees {
+		defer cleanupJourneyTarget(beforeTarget)
+	}
+
+	var afterTarget journeyTarget
+	if opts.AfterWorktree != "" {
+		afterTarget, err = resolveJourneyTarget(opts.AfterWorktree)
+		if err != nil {
+			log.Fatalf("Failed to resolve after worktree: %v", err)
+		}
+		if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
+			log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
+		}
+	} else if strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") {
+		if hasCurrentManifest {
+			afterTarget = journeyTarget{
+				Identifier: currentManifest.Branch,
+				Manifest:   currentManifest,
+			}
+			if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
+				log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
+			}
+			log.Infof("Using current tracked worktree %s for the after capture", afterTarget.Identifier)
+		} else {
+			afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
+			if err != nil {
+				log.Fatalf("Failed to create after worktree: %v", err)
+			}
+			if !opts.KeepWorktrees {
+				defer cleanupJourneyTarget(afterTarget)
+			}
+		}
+	} else {
+		afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
+		if err != nil {
+			log.Fatalf("Failed to create after worktree: %v", err)
+		}
+		if !opts.KeepWorktrees {
+			defer cleanupJourneyTarget(afterTarget)
+		}
+	}
+
+	summary := JourneyCompareSummary{
+		GeneratedAt: time.Now().UTC().Format(time.RFC3339),
+		BeforeRef:   opts.BeforeRef,
+		AfterRef:    opts.AfterRef,
+		RunDir:      runDir,
+		Captures:    []JourneyCaptureSummary{},
+	}
+
+	beforeCaptures, err := captureJourneySet(beforeTarget, definitions, "before", runDir)
+	if err != nil {
+		log.Fatalf("Before capture failed: %v", err)
+	}
+	summary.Captures = append(summary.Captures, beforeCaptures...)
+
+	afterCaptures, err := captureJourneySet(afterTarget, definitions, "after", runDir)
+	if err != nil {
+		log.Fatalf("After capture failed: %v", err)
+	}
+	summary.Captures = append(summary.Captures, afterCaptures...)
+
+	writeJourneyCompareSummary(runDir, summary)
+	log.Infof("Journey compare summary written to %s", filepath.Join(runDir, "summary.json"))
+
+	if opts.PR != "" {
+		prNumber, err := resolvePRNumber(opts.PR)
+		if err != nil {
+			log.Fatalf("Failed to resolve PR number: %v", err)
+		}
+		updated, err := publishJourneyCompare(runDir, prNumber, opts.Bucket)
+		if err != nil {
+			log.Fatalf("Failed to publish journey compare run: %v", err)
+		}
+		writeJourneyCompareSummary(runDir, updated)
+	}
+}
+
+func runJourneyPublish(opts *JourneyPublishOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	updated, err := publishJourneyCompare(opts.RunDir, prNumber, opts.Bucket)
+	if err != nil {
+		log.Fatalf("Failed to publish journey compare run: %v", err)
+	}
+	writeJourneyCompareSummary(opts.RunDir, updated)
+	log.Infof("Published journey compare run from %s", opts.RunDir)
+}
+
+func resolveJourneyDefinitions(repoRoot string, requested []string, planFile string) ([]journey.Definition, error) {
+	journeyNames := append([]string{}, requested...)
+	resolvedPlan := strings.TrimSpace(planFile)
+	if resolvedPlan == "" {
+		defaultPlan := filepath.Join(repoRoot, journey.DefaultPlanPath)
+		if _, err := os.Stat(defaultPlan); err == nil {
+			resolvedPlan = defaultPlan
+		}
+	}
+	if resolvedPlan != "" {
+		plan, err := journey.LoadPlan(resolvedPlan)
+		if err != nil {
+			return nil, err
+		}
+		journeyNames = append(journeyNames, plan.Journeys...)
+	}
+	if len(journeyNames) == 0 {
+		return nil, fmt.Errorf("no journeys requested; pass --journey or provide %s", journey.DefaultPlanPath)
+	}
+
+	seen := map[string]bool{}
+	deduped := make([]string, 0, len(journeyNames))
+	for _, name := range journeyNames {
+		if !seen[name] {
+			seen[name] = true
+			deduped = append(deduped, name)
+		}
+	}
+	return journey.ResolveDefinitions(repoRoot, deduped)
+}
+
+func resolveJourneyHarnessRoot(targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool) (string, error) {
+	candidates := []string{targetRepoRoot}
+	if hasManifest && manifest.CreatedFromPath != "" {
+		candidates = append([]string{manifest.CreatedFromPath}, candidates...)
+	}
+	for _, candidate := range candidates {
+		if _, err := os.Stat(filepath.Join(candidate, journey.RegistryPath)); err == nil {
+			return candidate, nil
+		}
+	}
+	return "", fmt.Errorf("no journey registry found in target repo %s or control checkout %s", targetRepoRoot, manifest.CreatedFromPath)
+}
+
+func captureJourney(harnessRoot, targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool, journeyName, label, outputDir, projectOverride string) (JourneyCaptureSummary, error) {
+	definitions, err := journey.ResolveDefinitions(harnessRoot, []string{journeyName})
+	if err != nil {
+		return JourneyCaptureSummary{}, err
+	}
+	definition := definitions[0]
+
+	targetDir := strings.TrimSpace(outputDir)
+	if targetDir == "" {
+		if hasManifest {
+			targetDir = filepath.Join(manifest.ArtifactDir, "journeys", journey.Slug(definition.Name), journey.Slug(label))
+		} else {
+			targetDir = filepath.Join(targetRepoRoot, "web", "output", "journeys", journey.Slug(definition.Name), journey.Slug(label))
+		}
+	}
+	if err := os.MkdirAll(targetDir, 0755); err != nil {
+		return JourneyCaptureSummary{}, fmt.Errorf("create journey artifact dir: %w", err)
+	}
+
+	playwrightOutputDir := filepath.Join(targetDir, "playwright")
+	logPath := filepath.Join(targetDir, "journey.log")
+
+	projectName := definition.Project
+	if strings.TrimSpace(projectOverride) != "" {
+		projectName = projectOverride
+	}
+
+	envOverrides := map[string]string{
+		"PLAYWRIGHT_JOURNEY_MODE":        "1",
+		"PLAYWRIGHT_JOURNEY_CAPTURE_DIR": targetDir,
+		"PLAYWRIGHT_OUTPUT_DIR":          playwrightOutputDir,
+	}
+	if definition.SkipGlobalSetup {
+		envOverrides["PLAYWRIGHT_SKIP_GLOBAL_SETUP"] = "1"
+	}
+	if hasManifest {
+		for key, value := range manifest.RuntimeEnv() {
+			envOverrides[key] = value
+		}
+	}
+
+	step, passed := runLoggedCommand(
+		"journey-"+definition.Name,
+		logPath,
+		filepath.Join(harnessRoot, "web"),
+		envOverrides,
+		"npx",
+		"playwright", "test", definition.TestPath, "--project", projectName,
+	)
+	if !passed {
+		return JourneyCaptureSummary{}, fmt.Errorf("%s", strings.Join(step.Details, "\n"))
+	}
+
+	artifactSummary, err := summarizeJourneyArtifacts(targetDir)
+	if err != nil {
+		return JourneyCaptureSummary{}, err
+	}
+	artifactSummary.Journey = definition.Name
+	artifactSummary.Label = label
+	artifactSummary.ArtifactDir = targetDir
+	artifactSummary.LogPath = logPath
+	if hasManifest {
+		artifactSummary.Worktree = manifest.Branch
+		artifactSummary.URL = manifest.URLs.Web
+	} else {
+		artifactSummary.URL = envOverrides["BASE_URL"]
+	}
+	return artifactSummary, nil
+}
+
+type journeyTarget struct {
+	Identifier string
+	Manifest   agentlab.Manifest
+	Temporary  bool
+}
+
+func resolveJourneyTarget(identifier string) (journeyTarget, error) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		return journeyTarget{}, err
+	}
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		return journeyTarget{}, err
+	}
+	if !found {
+		return journeyTarget{}, fmt.Errorf("no worktree found for %q", identifier)
+	}
+	return journeyTarget{Identifier: manifest.Branch, Manifest: manifest}, nil
+}
+
+func createTemporaryJourneyWorktree(ref, label string, mode agentlab.DependencyMode) (journeyTarget, error) {
+	branch := fmt.Sprintf("codex/journey-%s-%s-%d", journey.Slug(label), journey.Slug(strings.ReplaceAll(ref, "/", "-")), time.Now().UTC().UnixNano())
+	if err := runSelfCommand("worktree", "create", branch, "--from", ref, "--dependency-mode", string(mode)); err != nil {
+		return journeyTarget{}, err
+	}
+	if err := runSelfCommand("worktree", "deps", "up", branch); err != nil {
+		return journeyTarget{}, err
+	}
+	target, err := resolveJourneyTarget(branch)
+	if err != nil {
+		return journeyTarget{}, err
+	}
+	target.Temporary = true
+	return target, nil
+}
+
+func cleanupJourneyTarget(target journeyTarget) {
+	if !target.Temporary {
+		return
+	}
+	if err := runSelfCommand("worktree", "remove", target.Identifier, "--force", "--drop-deps"); err != nil {
+		log.Warnf("Failed to remove temporary worktree %s: %v", target.Identifier, err)
+	}
+	if err := exec.Command("git", "branch", "-D", target.Identifier).Run(); err != nil {
+		log.Warnf("Failed to delete temporary branch %s: %v", target.Identifier, err)
+	}
+}
+
+func captureJourneySet(target journeyTarget, definitions []journey.Definition, label, runDir string) ([]JourneyCaptureSummary, error) {
+	harnessRoot, err := resolveJourneyHarnessRoot(target.Manifest.CheckoutPath, target.Manifest, true)
+	if err != nil {
+		return nil, err
+	}
+	requiresModelServer := false
+	for _, definition := range definitions {
+		if definition.RequiresModelServer {
+			requiresModelServer = true
+			break
+		}
+	}
+
+	processes, err := startJourneyServices(target, runDir, requiresModelServer)
+	if err != nil {
+		return nil, err
+	}
+	defer stopManagedProcesses(processes)
+
+	captures := make([]JourneyCaptureSummary, 0, len(definitions))
+	for _, definition := range definitions {
+		outputDir := filepath.Join(runDir, journey.Slug(definition.Name), journey.Slug(label))
+		capture, err := captureJourney(harnessRoot, target.Manifest.CheckoutPath, target.Manifest, true, definition.Name, label, outputDir, "")
+		if err != nil {
+			return nil, err
+		}
+		captures = append(captures, capture)
+	}
+	return captures, nil
+}
+
+func startJourneyServices(target journeyTarget, runDir string, includeModelServer bool) ([]managedProcess, error) {
+	logDir := filepath.Join(runDir, "services", journey.Slug(target.Manifest.Branch))
+	if err := os.MkdirAll(logDir, 0755); err != nil {
+		return nil, fmt.Errorf("create service log dir: %w", err)
+	}
+
+	processes := make([]managedProcess, 0, 3)
+
+	apiProcess, err := startManagedProcess(
+		"api",
+		filepath.Join(logDir, "api.log"),
+		"backend", "api", "--worktree", target.Identifier,
+	)
+	if err != nil {
+		return nil, err
+	}
+	processes = append(processes, apiProcess)
+
+	if includeModelServer {
+		modelProcess, err := startManagedProcess(
+			"model_server",
+			filepath.Join(logDir, "model_server.log"),
+			"backend", "model_server", "--worktree", target.Identifier,
+		)
+		if err != nil {
+			stopManagedProcesses(processes)
+			return nil, err
+		}
+		processes = append(processes, modelProcess)
+	}
+
+	webProcess, err := startManagedProcess(
+		"web",
+		filepath.Join(logDir, "web.log"),
+		"web", "dev", "--worktree", target.Identifier, "--", "--webpack",
+	)
+	if err != nil {
+		stopManagedProcesses(processes)
+		return nil, err
+	}
+	processes = append(processes, webProcess)
+
+	if err := waitForJourneyURL(target.Manifest.URLs.API+"/health", 2*time.Minute, processes...); err != nil {
+		stopManagedProcesses(processes)
+		return nil, err
+	}
+	if err := waitForJourneyURL(target.Manifest.URLs.Web+"/api/health", 3*time.Minute, processes...); err != nil {
+		stopManagedProcesses(processes)
+		return nil, err
+	}
+	return processes, nil
+}
+
+func startManagedProcess(name, logPath string, args ...string) (managedProcess, error) {
+	executable, err := os.Executable()
+	if err != nil {
+		return managedProcess{}, fmt.Errorf("determine ods executable: %w", err)
+	}
+
+	if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
+		return managedProcess{}, fmt.Errorf("create log dir: %w", err)
+	}
+	logFile, err := os.Create(logPath)
+	if err != nil {
+		return managedProcess{}, fmt.Errorf("create log file: %w", err)
+	}
+
+	cmd := exec.Command(executable, args...)
+	cmd.Stdout = logFile
+	cmd.Stderr = logFile
+	cmd.Stdin = nil
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	if err := cmd.Start(); err != nil {
+		_ = logFile.Close()
+		return managedProcess{}, fmt.Errorf("start %s: %w", name, err)
+	}
+	_ = logFile.Close()
+
+	return managedProcess{Name: name, Cmd: cmd, LogPath: logPath}, nil
+}
+
+func stopManagedProcesses(processes []managedProcess) {
+	for i := len(processes) - 1; i >= 0; i-- {
+		process := processes[i]
+		if process.Cmd == nil || process.Cmd.Process == nil {
+			continue
+		}
+		_ = process.Cmd.Process.Signal(os.Interrupt)
+		done := make(chan struct{})
+		go func(cmd *exec.Cmd) {
+			_, _ = cmd.Process.Wait()
+			close(done)
+		}(process.Cmd)
+		select {
+		case <-done:
+		case <-time.After(10 * time.Second):
+			_ = process.Cmd.Process.Kill()
+		}
+	}
+}
+
+func waitForJourneyURL(url string, timeout time.Duration, processes ...managedProcess) error {
+	client := &http.Client{Timeout: 5 * time.Second}
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		if err := ensureManagedProcessesRunning(processes); err != nil {
+			return fmt.Errorf("while waiting for %s: %w", url, err)
+		}
+		resp, err := client.Get(url)
+		if err == nil {
+			_ = resp.Body.Close()
+			if resp.StatusCode >= 200 && resp.StatusCode < 500 {
+				return nil
+			}
+		}
+		time.Sleep(3 * time.Second)
+	}
+	if err := ensureManagedProcessesRunning(processes); err != nil {
+		return fmt.Errorf("while waiting for %s: %w", url, err)
+	}
+	return fmt.Errorf("timed out waiting for %s", url)
+}
+
+func ensureManagedProcessesRunning(processes []managedProcess) error {
+	for _, process := range processes {
+		if process.Cmd == nil || process.Cmd.Process == nil {
+			continue
+		}
+		if err := syscall.Kill(process.Cmd.Process.Pid, 0); err != nil {
+			if err == syscall.ESRCH {
+				return fmt.Errorf("%s exited early\n%s", process.Name, readJourneyLogTail(process.LogPath, 40))
+			}
+			if err != syscall.EPERM {
+				return fmt.Errorf("check %s process health: %w", process.Name, err)
+			}
+		}
+	}
+	return nil
+}
+
+func readJourneyLogTail(path string, lineCount int) string {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return fmt.Sprintf("failed to read %s: %v", path, err)
+	}
+	trimmed := strings.TrimRight(string(data), "\n")
+	if trimmed == "" {
+		return fmt.Sprintf("%s is empty", path)
+	}
+	lines := strings.Split(trimmed, "\n")
+	if len(lines) > lineCount {
+		lines = lines[len(lines)-lineCount:]
+	}
+	return fmt.Sprintf("recent log tail from %s:\n%s", path, strings.Join(lines, "\n"))
+}
+
+func summarizeJourneyArtifacts(root string) (JourneyCaptureSummary, error) {
+	summary := JourneyCaptureSummary{}
+	err := filepath.Walk(root, func(path string, info os.FileInfo, walkErr error) error {
+		if walkErr != nil {
+			return walkErr
+		}
+		if info.IsDir() {
+			return nil
+		}
+		relative, err := filepath.Rel(root, path)
+		if err != nil {
+			return err
+		}
+		switch {
+		case strings.HasSuffix(path, ".webm"):
+			summary.VideoFiles = append(summary.VideoFiles, relative)
+		case strings.HasSuffix(path, "trace.zip"):
+			summary.TraceFiles = append(summary.TraceFiles, relative)
+		case strings.HasSuffix(path, ".png"):
+			summary.Screenshots = append(summary.Screenshots, relative)
+		case strings.HasSuffix(path, ".json") && filepath.Base(path) != "summary.json":
+			summary.MetadataJSON = append(summary.MetadataJSON, relative)
+		}
+		return nil
+	})
+	if err != nil {
+		return summary, fmt.Errorf("walk journey artifacts: %w", err)
+	}
+	sort.Strings(summary.VideoFiles)
+	sort.Strings(summary.TraceFiles)
+	sort.Strings(summary.Screenshots)
+	sort.Strings(summary.MetadataJSON)
+	return summary, nil
+}
+
+func runSelfCommand(args ...string) error {
+	executable, err := os.Executable()
+	if err != nil {
+		return err
+	}
+	cmd := exec.Command(executable, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	return cmd.Run()
+}
+
+func writeJourneyCompareSummary(runDir string, summary JourneyCompareSummary) {
+	data, err := json.MarshalIndent(summary, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode journey compare summary: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "summary.json"), data, 0644); err != nil {
+		log.Fatalf("Failed to write journey compare summary: %v", err)
+	}
+}
+
+func publishJourneyCompare(runDir, prNumber, bucketOverride string) (JourneyCompareSummary, error) {
+	var summary JourneyCompareSummary
+	data, err := os.ReadFile(filepath.Join(runDir, "summary.json"))
+	if err != nil {
+		return summary, fmt.Errorf("read compare summary: %w", err)
+	}
+	if err := json.Unmarshal(data, &summary); err != nil {
+		return summary, fmt.Errorf("parse compare summary: %w", err)
+	}
+
+	bucket := bucketOverride
+	if bucket == "" {
+		bucket = getS3Bucket()
+	}
+
+	timestamp := filepath.Base(runDir)
+	s3Prefix := fmt.Sprintf("s3://%s/journeys/pr-%s/%s/", bucket, prNumber, timestamp)
+	if err := s3.SyncUp(runDir, s3Prefix, true); err != nil {
+		return summary, err
+	}
+
+	httpBase := fmt.Sprintf("https://%s.s3.%s.amazonaws.com/journeys/pr-%s/%s/", bucket, defaultJourneyHTTPRegion, prNumber, timestamp)
+	summary.S3Prefix = s3Prefix
+	summary.S3HTTPBase = httpBase
+
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		return summary, err
+	}
+	body := buildJourneyPRComment(summary)
+	if err := upsertIssueComment(repoSlug, prNumber, "<!-- agent-journey-report -->", body); err != nil {
+		return summary, err
+	}
+	return summary, nil
+}
+
+func buildJourneyPRComment(summary JourneyCompareSummary) string {
+	type capturePair struct {
+		before *JourneyCaptureSummary
+		after  *JourneyCaptureSummary
+	}
+	byJourney := map[string]*capturePair{}
+	for i := range summary.Captures {
+		capture := &summary.Captures[i]
+		pair := byJourney[capture.Journey]
+		if pair == nil {
+			pair = &capturePair{}
+			byJourney[capture.Journey] = pair
+		}
+		switch capture.Label {
+		case "before":
+			pair.before = capture
+		case "after":
+			pair.after = capture
+		}
+	}
+
+	names := make([]string, 0, len(byJourney))
+	for name := range byJourney {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	lines := []string{
+		"<!-- agent-journey-report -->",
+		"### Agent Journey Report",
+		"",
+		fmt.Sprintf("Before ref: `%s`", summary.BeforeRef),
+		fmt.Sprintf("After ref: `%s`", summary.AfterRef),
+		"",
+		"| Journey | Before | After |",
+		"|---------|--------|-------|",
+	}
+
+	for _, name := range names {
+		pair := byJourney[name]
+		before := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.before)
+		after := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.after)
+		lines = append(lines, fmt.Sprintf("| `%s` | %s | %s |", name, before, after))
+	}
+
+	return strings.Join(lines, "\n")
+}
+
+func journeyLink(runDir, httpBase string, capture *JourneyCaptureSummary) string {
+	if capture == nil {
+		return "_not captured_"
+	}
+	artifactDir, err := filepath.Rel(runDir, capture.ArtifactDir)
+	if err != nil {
+		artifactDir = capture.ArtifactDir
+	}
+	if len(capture.VideoFiles) > 0 {
+		return fmt.Sprintf("[video](%s%s)", httpBase, pathJoin(artifactDir, capture.VideoFiles[0]))
+	}
+	if len(capture.Screenshots) > 0 {
+		return fmt.Sprintf("[screenshot](%s%s)", httpBase, pathJoin(artifactDir, capture.Screenshots[0]))
+	}
+	return "_no artifact_"
+}
+
+func pathJoin(parts ...string) string {
+	clean := make([]string, 0, len(parts))
+	for _, part := range parts {
+		if part == "" {
+			continue
+		}
+		clean = append(clean, strings.Trim(part, "/"))
+	}
+	return strings.Join(clean, "/")
+}
--- a/tools/ods/cmd/pr_checks.go
+++ b/tools/ods/cmd/pr_checks.go
@@ -0,0 +1,289 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"os/exec"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+)
+
+type PRChecksOptions struct {
+	PR string
+}
+
+type ghChecksResponse struct {
+	Data struct {
+		Repository struct {
+			PullRequest struct {
+				Number  int    `json:"number"`
+				Title   string `json:"title"`
+				URL     string `json:"url"`
+				HeadRef string `json:"headRefName"`
+				Commits struct {
+					Nodes []struct {
+						Commit struct {
+							StatusCheckRollup struct {
+								Contexts struct {
+									Nodes []struct {
+										Type         string `json:"__typename"`
+										Name         string `json:"name"`
+										DisplayTitle string `json:"displayTitle"`
+										WorkflowName string `json:"workflowName"`
+										Status       string `json:"status"`
+										Conclusion   string `json:"conclusion"`
+										DetailsURL   string `json:"detailsUrl"`
+										Context      string `json:"context"`
+										State        string `json:"state"`
+										TargetURL    string `json:"targetUrl"`
+										Description  string `json:"description"`
+									} `json:"nodes"`
+								} `json:"contexts"`
+							} `json:"statusCheckRollup"`
+						} `json:"commit"`
+					} `json:"nodes"`
+				} `json:"commits"`
+			} `json:"pullRequest"`
+		} `json:"repository"`
+	} `json:"data"`
+}
+
+// NewPRChecksCommand creates the pr-checks command surface.
+func NewPRChecksCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "pr-checks",
+		Short: "Inspect GitHub PR checks and surface failing runs for remediation",
+	}
+
+	cmd.AddCommand(newPRChecksStatusCommand())
+	cmd.AddCommand(newPRChecksDiagnoseCommand())
+	return cmd
+}
+
+func newPRChecksStatusCommand() *cobra.Command {
+	opts := &PRChecksOptions{}
+	cmd := &cobra.Command{
+		Use:   "status",
+		Short: "List all status checks for a pull request",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRChecksStatus(opts)
+		},
+	}
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	return cmd
+}
+
+func newPRChecksDiagnoseCommand() *cobra.Command {
+	opts := &PRChecksOptions{}
+	cmd := &cobra.Command{
+		Use:   "diagnose",
+		Short: "List only failing checks and point to the next remediation command",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRChecksDiagnose(opts)
+		},
+	}
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	return cmd
+}
+
+func runPRChecksStatus(opts *PRChecksOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	response, err := fetchPRChecks(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR checks: %v", err)
+	}
+
+	fmt.Printf("PR #%d %s\n", response.Data.Repository.PullRequest.Number, response.Data.Repository.PullRequest.Title)
+	for _, check := range flattenChecks(response) {
+		fmt.Printf("[%s] %s (%s) %s\n", check.result(), check.displayName(), check.kind(), check.url())
+	}
+}
+
+func runPRChecksDiagnose(opts *PRChecksOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	response, err := fetchPRChecks(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR checks: %v", err)
+	}
+
+	failing := failingChecks(response)
+	if len(failing) == 0 {
+		fmt.Printf("No failing checks found on PR #%s\n", prNumber)
+		return
+	}
+
+	fmt.Printf("Failing checks for PR #%s:\n", prNumber)
+	for _, check := range failing {
+		fmt.Printf("- %s (%s)\n", check.displayName(), check.url())
+		if strings.Contains(strings.ToLower(check.displayName()), "playwright") {
+			fmt.Printf("  next: ods trace --pr %s\n", prNumber)
+		} else {
+			fmt.Printf("  next: gh run view <run-id> --log-failed\n")
+		}
+	}
+}
+
+func fetchPRChecks(prNumber string) (*ghChecksResponse, error) {
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		return nil, err
+	}
+	parts := strings.SplitN(repoSlug, "/", 2)
+	if len(parts) != 2 {
+		return nil, fmt.Errorf("unexpected repo slug %q", repoSlug)
+	}
+
+	git.CheckGitHubCLI()
+	query := `query($owner:String!, $name:String!, $number:Int!) {
+  repository(owner:$owner, name:$name) {
+    pullRequest(number:$number) {
+      number
+      title
+      url
+      headRefName
+      commits(last:1) {
+        nodes {
+          commit {
+            statusCheckRollup {
+              contexts(first:100) {
+                nodes {
+                  __typename
+                  ... on CheckRun {
+                    name
+                    status
+                    conclusion
+                    detailsUrl
+                  }
+                  ... on StatusContext {
+                    context
+                    state
+                    targetUrl
+                    description
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}`
+
+	cmd := exec.Command(
+		"gh", "api", "graphql",
+		"-f", "query="+query,
+		"-F", "owner="+parts[0],
+		"-F", "name="+parts[1],
+		"-F", "number="+prNumber,
+	)
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
+		}
+		return nil, fmt.Errorf("gh api graphql failed: %w", err)
+	}
+
+	var response ghChecksResponse
+	if err := json.Unmarshal(output, &response); err != nil {
+		return nil, fmt.Errorf("parse PR checks: %w", err)
+	}
+	return &response, nil
+}
+
+type flattenedCheck struct {
+	Type         string
+	Name         string
+	DisplayTitle string
+	WorkflowName string
+	Status       string
+	Conclusion   string
+	DetailsURL   string
+	Context      string
+	State        string
+	TargetURL    string
+}
+
+func flattenChecks(response *ghChecksResponse) []flattenedCheck {
+	result := []flattenedCheck{}
+	if response == nil || len(response.Data.Repository.PullRequest.Commits.Nodes) == 0 {
+		return result
+	}
+	for _, node := range response.Data.Repository.PullRequest.Commits.Nodes[0].Commit.StatusCheckRollup.Contexts.Nodes {
+		result = append(result, flattenedCheck{
+			Type:         node.Type,
+			Name:         node.Name,
+			DisplayTitle: node.DisplayTitle,
+			WorkflowName: node.WorkflowName,
+			Status:       node.Status,
+			Conclusion:   node.Conclusion,
+			DetailsURL:   node.DetailsURL,
+			Context:      node.Context,
+			State:        node.State,
+			TargetURL:    node.TargetURL,
+		})
+	}
+	return result
+}
+
+func (c flattenedCheck) displayName() string {
+	switch c.Type {
+	case "CheckRun":
+		if c.DisplayTitle != "" {
+			return c.DisplayTitle
+		}
+		if c.WorkflowName != "" && c.Name != "" {
+			return c.WorkflowName + " / " + c.Name
+		}
+		return c.Name
+	default:
+		return c.Context
+	}
+}
+
+func (c flattenedCheck) kind() string {
+	if c.Type == "" {
+		return "status"
+	}
+	return c.Type
+}
+
+func (c flattenedCheck) result() string {
+	if c.Type == "CheckRun" {
+		if c.Conclusion != "" {
+			return strings.ToLower(c.Conclusion)
+		}
+		return strings.ToLower(c.Status)
+	}
+	return strings.ToLower(c.State)
+}
+
+func (c flattenedCheck) url() string {
+	if c.DetailsURL != "" {
+		return c.DetailsURL
+	}
+	return c.TargetURL
+}
+
+func failingChecks(response *ghChecksResponse) []flattenedCheck {
+	checks := flattenChecks(response)
+	failing := make([]flattenedCheck, 0, len(checks))
+	for _, check := range checks {
+		result := check.result()
+		if result == "failure" || result == "failed" || result == "timed_out" || result == "cancelled" || result == "error" {
+			failing = append(failing, check)
+		}
+	}
+	return failing
+}
--- a/tools/ods/cmd/pr_merge.go
+++ b/tools/ods/cmd/pr_merge.go
@@ -0,0 +1,73 @@
+package cmd
+
+import (
+	"os"
+	"os/exec"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+)
+
+type PRMergeOptions struct {
+	PR           string
+	Auto         bool
+	DeleteBranch bool
+	Method       string
+}
+
+// NewPRMergeCommand creates the pr-merge command.
+func NewPRMergeCommand() *cobra.Command {
+	opts := &PRMergeOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "pr-merge",
+		Short: "Merge a GitHub pull request through gh with explicit method flags",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRMerge(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().BoolVar(&opts.Auto, "auto", false, "enable auto-merge instead of merging immediately")
+	cmd.Flags().BoolVar(&opts.DeleteBranch, "delete-branch", false, "delete the branch after merge")
+	cmd.Flags().StringVar(&opts.Method, "method", "squash", "merge method: squash, merge, or rebase")
+
+	return cmd
+}
+
+func runPRMerge(opts *PRMergeOptions) {
+	git.CheckGitHubCLI()
+
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+
+	args := []string{"pr", "merge", prNumber}
+	switch opts.Method {
+	case "squash":
+		args = append(args, "--squash")
+	case "merge":
+		args = append(args, "--merge")
+	case "rebase":
+		args = append(args, "--rebase")
+	default:
+		log.Fatalf("Invalid merge method %q: expected squash, merge, or rebase", opts.Method)
+	}
+	if opts.Auto {
+		args = append(args, "--auto")
+	}
+	if opts.DeleteBranch {
+		args = append(args, "--delete-branch")
+	}
+
+	cmd := exec.Command("gh", args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	if err := cmd.Run(); err != nil {
+		log.Fatalf("Failed to merge PR #%s: %v", prNumber, err)
+	}
+}
--- a/tools/ods/cmd/pr_open.go
+++ b/tools/ods/cmd/pr_open.go
@@ -0,0 +1,89 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+var conventionalPRTitlePattern = regexp.MustCompile(`^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test)(\([^)]+\))?: .+`)
+
+type PROpenOptions struct {
+	Title    string
+	Base     string
+	BodyFile string
+	Draft    bool
+}
+
+// NewPROpenCommand creates the pr-open command.
+func NewPROpenCommand() *cobra.Command {
+	opts := &PROpenOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "pr-open",
+		Short: "Open a GitHub pull request using the repo template and a conventional-commit title",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPROpen(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.Title, "title", "", "PR title (defaults to the latest commit subject)")
+	cmd.Flags().StringVar(&opts.Base, "base", "main", "base branch for the PR")
+	cmd.Flags().StringVar(&opts.BodyFile, "body-file", "", "explicit PR body file (defaults to .github/pull_request_template.md)")
+	cmd.Flags().BoolVar(&opts.Draft, "draft", false, "open the PR as a draft")
+
+	return cmd
+}
+
+func runPROpen(opts *PROpenOptions) {
+	git.CheckGitHubCLI()
+
+	title := strings.TrimSpace(opts.Title)
+	if title == "" {
+		subject, err := git.GetCommitMessage("HEAD")
+		if err != nil {
+			log.Fatalf("Failed to determine PR title from HEAD: %v", err)
+		}
+		title = subject
+	}
+	if !conventionalPRTitlePattern.MatchString(title) {
+		log.Fatalf("PR title must follow conventional-commit style. Got %q", title)
+	}
+
+	bodyFile := strings.TrimSpace(opts.BodyFile)
+	if bodyFile == "" {
+		repoRoot, err := paths.GitRoot()
+		if err != nil {
+			log.Fatalf("Failed to determine git root: %v", err)
+		}
+		bodyFile = filepath.Join(repoRoot, ".github", "pull_request_template.md")
+	}
+	bodyBytes, err := os.ReadFile(bodyFile)
+	if err != nil {
+		log.Fatalf("Failed to read PR body file %s: %v", bodyFile, err)
+	}
+
+	args := []string{"pr", "create", "--base", opts.Base, "--title", title, "--body", string(bodyBytes)}
+	if opts.Draft {
+		args = append(args, "--draft")
+	}
+
+	cmd := exec.Command("gh", args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	if err := cmd.Run(); err != nil {
+		log.Fatalf("Failed to open PR: %v", err)
+	}
+
+	fmt.Printf("Opened PR with title %q\n", title)
+}
--- a/tools/ods/cmd/pr_review.go
+++ b/tools/ods/cmd/pr_review.go
@@ -0,0 +1,393 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/prreview"
+)
+
+type PRReviewFetchOptions struct {
+	PR     string
+	Output string
+}
+
+type PRReviewTriageOptions struct {
+	PR     string
+	Output string
+}
+
+type PRReviewRespondOptions struct {
+	PR        string
+	CommentID int
+	Body      string
+	ThreadID  string
+}
+
+type ghReviewResponse struct {
+	Data struct {
+		Repository struct {
+			PullRequest struct {
+				Number        int    `json:"number"`
+				Title         string `json:"title"`
+				URL           string `json:"url"`
+				ReviewThreads struct {
+					Nodes []struct {
+						ID         string `json:"id"`
+						IsResolved bool   `json:"isResolved"`
+						IsOutdated bool   `json:"isOutdated"`
+						Path       string `json:"path"`
+						Line       int    `json:"line"`
+						StartLine  int    `json:"startLine"`
+						Comments   struct {
+							Nodes []struct {
+								DatabaseID int `json:"databaseId"`
+								Body       string
+								URL        string `json:"url"`
+								CreatedAt  string `json:"createdAt"`
+								Author     struct {
+									Login string `json:"login"`
+								} `json:"author"`
+							} `json:"nodes"`
+						} `json:"comments"`
+					} `json:"nodes"`
+				} `json:"reviewThreads"`
+			} `json:"pullRequest"`
+		} `json:"repository"`
+	} `json:"data"`
+}
+
+// NewPRReviewCommand creates the pr-review command surface.
+func NewPRReviewCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "pr-review",
+		Short: "Fetch, triage, and respond to GitHub pull request review threads",
+	}
+
+	cmd.AddCommand(newPRReviewFetchCommand())
+	cmd.AddCommand(newPRReviewTriageCommand())
+	cmd.AddCommand(newPRReviewRespondCommand())
+	cmd.AddCommand(newPRReviewResolveCommand())
+
+	return cmd
+}
+
+func newPRReviewFetchCommand() *cobra.Command {
+	opts := &PRReviewFetchOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "fetch",
+		Short: "Fetch pull request review threads and write them to local harness state",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewFetch(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the fetched review JSON")
+	return cmd
+}
+
+func newPRReviewTriageCommand() *cobra.Command {
+	opts := &PRReviewTriageOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "triage",
+		Short: "Classify unresolved review threads into actionable, duplicate, outdated, or resolved",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewTriage(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the triage JSON")
+	return cmd
+}
+
+func newPRReviewRespondCommand() *cobra.Command {
+	opts := &PRReviewRespondOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "respond",
+		Short: "Reply to an inline pull request review comment and optionally resolve the thread",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewRespond(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
+	cmd.Flags().IntVar(&opts.CommentID, "comment-id", 0, "top-level pull request review comment ID to reply to")
+	cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve after replying")
+	cmd.Flags().StringVar(&opts.Body, "body", "", "reply body to post")
+	_ = cmd.MarkFlagRequired("comment-id")
+	_ = cmd.MarkFlagRequired("body")
+
+	return cmd
+}
+
+func newPRReviewResolveCommand() *cobra.Command {
+	opts := &PRReviewRespondOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "resolve",
+		Short: "Resolve a review thread without posting a reply",
+		Run: func(cmd *cobra.Command, args []string) {
+			runPRReviewResolve(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve")
+	_ = cmd.MarkFlagRequired("thread-id")
+
+	return cmd
+}
+
+func runPRReviewFetch(opts *PRReviewFetchOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+
+	review, err := fetchPRReview(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR review threads: %v", err)
+	}
+
+	outputPath, err := reviewOutputPath(prNumber, opts.Output, "threads.json")
+	if err != nil {
+		log.Fatalf("Failed to determine output path: %v", err)
+	}
+	writeJSON(outputPath, review)
+	log.Infof("Fetched %d review threads into %s", len(review.Threads), outputPath)
+}
+
+func runPRReviewTriage(opts *PRReviewTriageOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+
+	review, err := fetchPRReview(prNumber)
+	if err != nil {
+		log.Fatalf("Failed to fetch PR review threads: %v", err)
+	}
+	triage := prreview.Triage(review)
+
+	outputPath, err := reviewOutputPath(prNumber, opts.Output, "triage.json")
+	if err != nil {
+		log.Fatalf("Failed to determine output path: %v", err)
+	}
+	writeJSON(outputPath, triage)
+
+	for _, summary := range triage.Summaries {
+		lineRef := ""
+		if summary.Thread.Path != "" {
+			lineRef = summary.Thread.Path
+			if summary.Thread.Line > 0 {
+				lineRef = fmt.Sprintf("%s:%d", lineRef, summary.Thread.Line)
+			}
+		}
+		fmt.Printf("[%s] %s %s %s\n", summary.Category, summary.Source, summary.Thread.ID, lineRef)
+		for _, reason := range summary.Reasons {
+			fmt.Printf("  - %s\n", reason)
+		}
+	}
+	log.Infof("Wrote PR review triage to %s", outputPath)
+}
+
+func runPRReviewRespond(opts *PRReviewRespondOptions) {
+	prNumber, err := resolvePRNumber(opts.PR)
+	if err != nil {
+		log.Fatalf("Failed to resolve PR number: %v", err)
+	}
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		log.Fatalf("Failed to resolve repo slug: %v", err)
+	}
+
+	if err := replyToReviewComment(repoSlug, prNumber, opts.CommentID, opts.Body); err != nil {
+		log.Fatalf("Failed to reply to review comment: %v", err)
+	}
+	if strings.TrimSpace(opts.ThreadID) != "" {
+		if err := resolveReviewThread(opts.ThreadID); err != nil {
+			log.Fatalf("Failed to resolve review thread: %v", err)
+		}
+	}
+	log.Infof("Posted reply to review comment %d on PR #%s", opts.CommentID, prNumber)
+}
+
+func runPRReviewResolve(opts *PRReviewRespondOptions) {
+	if err := resolveReviewThread(opts.ThreadID); err != nil {
+		log.Fatalf("Failed to resolve review thread: %v", err)
+	}
+	log.Infof("Resolved review thread %s", opts.ThreadID)
+}
+
+func fetchPRReview(prNumber string) (prreview.PullRequest, error) {
+	repoSlug, err := currentRepoSlug()
+	if err != nil {
+		return prreview.PullRequest{}, err
+	}
+	parts := strings.SplitN(repoSlug, "/", 2)
+	if len(parts) != 2 {
+		return prreview.PullRequest{}, fmt.Errorf("unexpected repo slug %q", repoSlug)
+	}
+
+	response, err := ghGraphQL(parts[0], parts[1], prNumber)
+	if err != nil {
+		return prreview.PullRequest{}, err
+	}
+
+	pr := prreview.PullRequest{
+		Number:  response.Data.Repository.PullRequest.Number,
+		Title:   response.Data.Repository.PullRequest.Title,
+		URL:     response.Data.Repository.PullRequest.URL,
+		Threads: []prreview.Thread{},
+	}
+
+	for _, thread := range response.Data.Repository.PullRequest.ReviewThreads.Nodes {
+		item := prreview.Thread{
+			ID:         thread.ID,
+			IsResolved: thread.IsResolved,
+			IsOutdated: thread.IsOutdated,
+			Path:       thread.Path,
+			Line:       thread.Line,
+			StartLine:  thread.StartLine,
+			Comments:   []prreview.Comment{},
+		}
+		for _, comment := range thread.Comments.Nodes {
+			item.Comments = append(item.Comments, prreview.Comment{
+				ID:          comment.DatabaseID,
+				Body:        comment.Body,
+				AuthorLogin: comment.Author.Login,
+				URL:         comment.URL,
+				CreatedAt:   comment.CreatedAt,
+			})
+		}
+		pr.Threads = append(pr.Threads, item)
+	}
+
+	return pr, nil
+}
+
+func ghGraphQL(owner, name, prNumber string) (*ghReviewResponse, error) {
+	git.CheckGitHubCLI()
+	query := `query($owner:String!, $name:String!, $number:Int!) {
+  repository(owner:$owner, name:$name) {
+    pullRequest(number:$number) {
+      number
+      title
+      url
+      reviewThreads(first:100) {
+        nodes {
+          id
+          isResolved
+          isOutdated
+          path
+          line
+          startLine
+          comments(first:100) {
+            nodes {
+              databaseId
+              body
+              url
+              createdAt
+              author {
+                login
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}`
+
+	cmd := exec.Command(
+		"gh", "api", "graphql",
+		"-f", "query="+query,
+		"-F", "owner="+owner,
+		"-F", "name="+name,
+		"-F", "number="+prNumber,
+	)
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
+		}
+		return nil, fmt.Errorf("gh api graphql failed: %w", err)
+	}
+
+	var response ghReviewResponse
+	if err := json.Unmarshal(output, &response); err != nil {
+		return nil, fmt.Errorf("parse graphql response: %w", err)
+	}
+	return &response, nil
+}
+
+func replyToReviewComment(repoSlug, prNumber string, commentID int, body string) error {
+	_, err := ghString(
+		"api",
+		"--method", "POST",
+		fmt.Sprintf("repos/%s/pulls/%s/comments/%d/replies", repoSlug, prNumber, commentID),
+		"-f", "body="+body,
+	)
+	return err
+}
+
+func resolveReviewThread(threadID string) error {
+	git.CheckGitHubCLI()
+	mutation := `mutation($threadId:ID!) {
+  resolveReviewThread(input:{threadId:$threadId}) {
+    thread {
+      id
+      isResolved
+    }
+  }
+}`
+
+	cmd := exec.Command(
+		"gh", "api", "graphql",
+		"-f", "query="+mutation,
+		"-F", "threadId="+threadID,
+	)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("resolve review thread: %w: %s", err, strings.TrimSpace(string(output)))
+	}
+	return nil
+}
+
+func reviewOutputPath(prNumber, explicit, fileName string) (string, error) {
+	if strings.TrimSpace(explicit) != "" {
+		return explicit, nil
+	}
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		return "", err
+	}
+	stateDir := filepath.Join(agentlab.StateRoot(commonGitDir), "reviews", "pr-"+prNumber)
+	if err := os.MkdirAll(stateDir, 0755); err != nil {
+		return "", fmt.Errorf("create review state dir: %w", err)
+	}
+	return filepath.Join(stateDir, fileName), nil
+}
+
+func writeJSON(path string, value any) {
+	data, err := json.MarshalIndent(value, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode JSON for %s: %v", path, err)
+	}
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		log.Fatalf("Failed to write %s: %v", path, err)
+	}
+}
--- a/tools/ods/cmd/root.go
+++ b/tools/ods/cmd/root.go
@@ -41,6 +41,7 @@ func NewRootCommand() *cobra.Command {
 	cmd.PersistentFlags().BoolVar(&opts.Debug, "debug", false, "run in debug mode")

 	// Add subcommands
+	cmd.AddCommand(NewAgentCheckCommand())
 	cmd.AddCommand(NewBackendCommand())
 	cmd.AddCommand(NewCheckLazyImportsCommand())
 	cmd.AddCommand(NewCherryPickCommand())
@@ -48,8 +49,13 @@ func NewRootCommand() *cobra.Command {
 	cmd.AddCommand(NewDeployCommand())
 	cmd.AddCommand(NewOpenAPICommand())
 	cmd.AddCommand(NewComposeCommand())
+	cmd.AddCommand(NewJourneyCommand())
 	cmd.AddCommand(NewLogsCommand())
 	cmd.AddCommand(NewPullCommand())
+	cmd.AddCommand(NewPRChecksCommand())
+	cmd.AddCommand(NewPRMergeCommand())
+	cmd.AddCommand(NewPROpenCommand())
+	cmd.AddCommand(NewPRReviewCommand())
 	cmd.AddCommand(NewRunCICommand())
 	cmd.AddCommand(NewScreenshotDiffCommand())
 	cmd.AddCommand(NewDesktopCommand())
@@ -58,6 +64,8 @@ func NewRootCommand() *cobra.Command {
 	cmd.AddCommand(NewLatestStableTagCommand())
 	cmd.AddCommand(NewWhoisCommand())
 	cmd.AddCommand(NewTraceCommand())
+	cmd.AddCommand(NewVerifyCommand())
+	cmd.AddCommand(NewWorktreeCommand())

 	return cmd
 }
--- a/tools/ods/cmd/verify.go
+++ b/tools/ods/cmd/verify.go
@@ -0,0 +1,318 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
+)
+
+type VerifyOptions struct {
+	BaseRef           string
+	SkipAgentCheck    bool
+	Worktree          string
+	PytestPaths       []string
+	PlaywrightPaths   []string
+	PlaywrightGrep    string
+	PlaywrightProject string
+}
+
+type VerifySummary struct {
+	GeneratedAt string              `json:"generated_at"`
+	RepoRoot    string              `json:"repo_root"`
+	Worktree    *agentlab.Manifest  `json:"worktree,omitempty"`
+	Steps       []VerifyStepSummary `json:"steps"`
+}
+
+type VerifyStepSummary struct {
+	Name        string   `json:"name"`
+	Status      string   `json:"status"`
+	Command     []string `json:"command,omitempty"`
+	DurationMS  int64    `json:"duration_ms"`
+	LogPath     string   `json:"log_path,omitempty"`
+	ArtifactDir string   `json:"artifact_dir,omitempty"`
+	Details     []string `json:"details,omitempty"`
+}
+
+// NewVerifyCommand creates the verify command.
+func NewVerifyCommand() *cobra.Command {
+	opts := &VerifyOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "verify",
+		Short: "Run the agent-lab verification ladder and write a machine-readable summary",
+		Long: `Run the agent-lab verification ladder for the current checkout.
+
+This command composes the diff-based agent-check with optional pytest and
+Playwright execution, then writes a JSON summary into the worktree artifact
+directory so agents can inspect the result without re-parsing console output.
+
+Use --worktree to run the same flow against a tracked target worktree from the
+agent-lab control checkout.`,
+		Run: func(cmd *cobra.Command, args []string) {
+			runVerify(opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to compare against for agent-check (defaults to the worktree base ref or HEAD)")
+	cmd.Flags().BoolVar(&opts.SkipAgentCheck, "skip-agent-check", false, "skip the diff-based agent-check step")
+	cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to verify from instead of the current checkout")
+	cmd.Flags().StringArrayVar(&opts.PytestPaths, "pytest", nil, "pytest path or node id to run (repeatable)")
+	cmd.Flags().StringArrayVar(&opts.PlaywrightPaths, "playwright", nil, "Playwright test path to run (repeatable)")
+	cmd.Flags().StringVar(&opts.PlaywrightGrep, "playwright-grep", "", "grep passed through to Playwright")
+	cmd.Flags().StringVar(&opts.PlaywrightProject, "playwright-project", "", "Playwright project to run")
+
+	return cmd
+}
+
+func runVerify(opts *VerifyOptions) {
+	repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	runAt := time.Now().UTC()
+	artifactRoot := filepath.Join(repoRoot, "web", "output")
+	if hasManifest {
+		artifactRoot = filepath.Join(manifest.ArtifactDir, "verify", runAt.Format("20060102-150405"))
+	}
+	if err := os.MkdirAll(artifactRoot, 0755); err != nil {
+		log.Fatalf("Failed to create verify artifact dir: %v", err)
+	}
+
+	summary := VerifySummary{
+		GeneratedAt: runAt.Format(time.RFC3339),
+		RepoRoot:    repoRoot,
+		Steps:       make([]VerifyStepSummary, 0, 3),
+	}
+	if hasManifest {
+		manifestCopy := manifest
+		summary.Worktree = &manifestCopy
+	}
+
+	if !opts.SkipAgentCheck {
+		baseRef := opts.BaseRef
+		if baseRef == "" && hasManifest {
+			baseRef = manifest.BaseRef
+		}
+		if baseRef == "" {
+			baseRef = "HEAD"
+		}
+
+		step, passed := runAgentCheckVerifyStep(repoRoot, opts.Worktree, baseRef)
+		summary.Steps = append(summary.Steps, step)
+		if !passed {
+			writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+			os.Exit(1)
+		}
+	}
+
+	if len(opts.PytestPaths) > 0 {
+		step, passed := runPytestVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts.PytestPaths)
+		summary.Steps = append(summary.Steps, step)
+		if !passed {
+			writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+			os.Exit(1)
+		}
+	}
+
+	if len(opts.PlaywrightPaths) > 0 || opts.PlaywrightGrep != "" {
+		step, passed := runPlaywrightVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts)
+		summary.Steps = append(summary.Steps, step)
+		if !passed {
+			writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+			os.Exit(1)
+		}
+	}
+
+	writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
+	log.Infof("Verification summary written to %s", filepath.Join(artifactRoot, "summary.json"))
+}
+
+func runAgentCheckVerifyStep(repoRoot, worktree, baseRef string) (VerifyStepSummary, bool) {
+	startedAt := time.Now()
+	opts := &AgentCheckOptions{BaseRef: baseRef, Worktree: worktree, RepoRoot: repoRoot}
+	result, err := evaluateAgentCheck(opts, nil)
+
+	step := VerifyStepSummary{
+		Name:       "agent-check",
+		Command:    []string{"ods", "agent-check", "--base-ref", baseRef},
+		DurationMS: time.Since(startedAt).Milliseconds(),
+	}
+	if worktree != "" {
+		step.Command = append(step.Command, "--worktree", worktree)
+	}
+	if err != nil {
+		step.Status = "failed"
+		step.Details = []string{err.Error()}
+		return step, false
+	}
+
+	if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
+		step.Status = "passed"
+		return step, true
+	}
+
+	step.Status = "failed"
+	for _, violation := range result.Violations {
+		step.Details = append(step.Details, fmt.Sprintf("%s:%d [%s] %s", violation.Path, violation.LineNum, violation.RuleID, violation.Message))
+	}
+	for _, violation := range result.DocViolations {
+		step.Details = append(step.Details, fmt.Sprintf("%s [agent-docs] %s", violation.Path, violation.Message))
+	}
+	return step, false
+}
+
+func runPytestVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, pytestPaths []string) (VerifyStepSummary, bool) {
+	pythonExecutable := pythonForRepo(repoRoot)
+	args := append([]string{"-m", "dotenv", "-f", ".vscode/.env", "run", "--", "pytest"}, pytestPaths...)
+	extraEnv := map[string]string{}
+	if hasManifest {
+		for key, value := range manifest.RuntimeEnv() {
+			extraEnv[key] = value
+		}
+	}
+
+	step, passed := runLoggedCommand(
+		"pytest",
+		filepath.Join(artifactRoot, "pytest.log"),
+		filepath.Join(repoRoot, "backend"),
+		extraEnv,
+		pythonExecutable,
+		args...,
+	)
+	if hasManifest {
+		step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
+		step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
+	}
+	return step, passed
+}
+
+func runPlaywrightVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, opts *VerifyOptions) (VerifyStepSummary, bool) {
+	args := []string{"playwright", "test"}
+	args = append(args, opts.PlaywrightPaths...)
+	if opts.PlaywrightGrep != "" {
+		args = append(args, "--grep", opts.PlaywrightGrep)
+	}
+	if opts.PlaywrightProject != "" {
+		args = append(args, "--project", opts.PlaywrightProject)
+	}
+
+	extraEnv := map[string]string{}
+	if hasManifest {
+		for key, value := range manifest.RuntimeEnv() {
+			extraEnv[key] = value
+		}
+	}
+
+	step, passed := runLoggedCommand(
+		"playwright",
+		filepath.Join(artifactRoot, "playwright.log"),
+		filepath.Join(repoRoot, "web"),
+		extraEnv,
+		"npx",
+		args...,
+	)
+	step.ArtifactDir = filepath.Join(repoRoot, "web", "output")
+	if hasManifest {
+		step.Details = append(step.Details, fmt.Sprintf("base url: %s", manifest.URLs.Web))
+		step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
+		step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
+		step.Details = append(step.Details, fmt.Sprintf("reuse Chrome DevTools MCP against %s for interactive browser validation", manifest.URLs.Web))
+		step.Details = append(step.Details, manifest.DependencyWarnings()...)
+	}
+	return step, passed
+}
+
+func runLoggedCommand(name, logPath, workdir string, extraEnv map[string]string, executable string, args ...string) (VerifyStepSummary, bool) {
+	startedAt := time.Now()
+	if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
+		return VerifyStepSummary{
+			Name:       name,
+			Status:     "failed",
+			DurationMS: time.Since(startedAt).Milliseconds(),
+			Details:    []string{fmt.Sprintf("create log dir: %v", err)},
+		}, false
+	}
+
+	logFile, err := os.Create(logPath)
+	if err != nil {
+		return VerifyStepSummary{
+			Name:       name,
+			Status:     "failed",
+			DurationMS: time.Since(startedAt).Milliseconds(),
+			Details:    []string{fmt.Sprintf("create log file: %v", err)},
+		}, false
+	}
+	defer func() { _ = logFile.Close() }()
+
+	cmd := exec.Command(executable, args...)
+	cmd.Dir = workdir
+	cmd.Stdout = io.MultiWriter(os.Stdout, logFile)
+	cmd.Stderr = io.MultiWriter(os.Stderr, logFile)
+	if len(extraEnv) > 0 {
+		cmd.Env = envutil.ApplyOverrides(os.Environ(), extraEnv)
+	}
+
+	step := VerifyStepSummary{
+		Name:       name,
+		Command:    append([]string{executable}, args...),
+		LogPath:    logPath,
+		DurationMS: 0,
+	}
+
+	err = cmd.Run()
+	step.DurationMS = time.Since(startedAt).Milliseconds()
+	if err != nil {
+		step.Status = "failed"
+		step.Details = []string{err.Error()}
+		return step, false
+	}
+
+	step.Status = "passed"
+	return step, true
+}
+
+func writeVerifySummary(summary VerifySummary, artifactRoot, commonGitDir string, manifest agentlab.Manifest, hasManifest bool, runAt time.Time) {
+	summaryPath := filepath.Join(artifactRoot, "summary.json")
+	data, err := json.MarshalIndent(summary, "", "  ")
+	if err != nil {
+		log.Fatalf("Failed to encode verify summary: %v", err)
+	}
+	if err := os.WriteFile(summaryPath, data, 0644); err != nil {
+		log.Fatalf("Failed to write verify summary: %v", err)
+	}
+
+	if hasManifest {
+		if err := agentlab.UpdateVerification(commonGitDir, manifest, summaryPath, runAt); err != nil {
+			log.Warnf("Failed to update worktree verification metadata: %v", err)
+		}
+	}
+}
+
+func pythonForRepo(repoRoot string) string {
+	candidate := filepath.Join(repoRoot, ".venv", "bin", "python")
+	if _, err := os.Stat(candidate); err == nil {
+		return candidate
+	}
+
+	if manifest, found := currentAgentLabManifest(repoRoot); found {
+		sharedCandidate := filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python")
+		if _, err := os.Stat(sharedCandidate); err == nil {
+			return sharedCandidate
+		}
+	}
+
+	return "python"
+}
--- a/tools/ods/cmd/web.go
+++ b/tools/ods/cmd/web.go
@@ -13,6 +13,7 @@ import (
 	log "github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"

+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
 	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
 )

@@ -22,28 +23,32 @@ type webPackageJSON struct {

 // NewWebCommand creates a command that runs npm scripts from the web directory.
 func NewWebCommand() *cobra.Command {
+	var worktree string
+
 	cmd := &cobra.Command{
 		Use:   "web <script> [args...]",
 		Short: "Run web/package.json npm scripts",
 		Long:  webHelpDescription(),
-		Args: cobra.MinimumNArgs(1),
+		Args:  cobra.MinimumNArgs(1),
 		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
 			if len(args) > 0 {
 				return nil, cobra.ShellCompDirectiveNoFileComp
 			}
 			return webScriptNames(), cobra.ShellCompDirectiveNoFileComp
 		},
-		Run: func(cmd *cobra.Command, args []string) {
-			runWebScript(args)
-		},
 	}
-	cmd.Flags().SetInterspersed(false)
+	cmd.Flags().StringVar(&worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
+	cmd.Run = func(cmd *cobra.Command, args []string) {
+		runWebScript(args, worktree)
+	}

 	return cmd
 }

-func runWebScript(args []string) {
-	webDir, err := webDir()
+func runWebScript(args []string, worktree string) {
+	repoRoot, manifest, hasManifest := resolveAgentLabTarget(worktree)
+
+	webDir, err := webDirForRoot(repoRoot)
 	if err != nil {
 		log.Fatalf("Failed to find web directory: %v", err)
 	}
@@ -68,6 +73,13 @@ func runWebScript(args []string) {
 	webCmd.Stderr = os.Stderr
 	webCmd.Stdin = os.Stdin

+	if hasManifest {
+		webCmd.Env = envutil.ApplyOverrides(os.Environ(), manifest.RuntimeEnv())
+		log.Infof("agent-lab worktree %s detected: web=%s api=%s", manifest.Branch, manifest.URLs.Web, manifest.URLs.API)
+		log.Infof("lane=%s base-ref=%s", manifest.ResolvedLane(), manifest.BaseRef)
+		log.Infof("dependency mode=%s search-infra=%s", manifest.ResolvedDependencies().Mode, manifest.ResolvedDependencies().SearchInfraMode)
+	}
+
 	if err := webCmd.Run(); err != nil {
 		// For wrapped commands, preserve the child process's exit code and
 		// avoid duplicating already-printed stderr output.
@@ -101,7 +113,8 @@ func webHelpDescription() string {
 Examples:
  ods web dev
  ods web lint
-  ods web test --watch`
+  ods web test --watch
+  ods web dev --worktree codex/fix/auth-banner-modal`

 	scripts := webScriptNames()
 	if len(scripts) == 0 {
@@ -112,7 +125,7 @@ Examples:
 }

 func loadWebScripts() (map[string]string, error) {
-	webDir, err := webDir()
+	webDir, err := webDirForRoot("")
 	if err != nil {
 		return nil, err
 	}
@@ -135,10 +148,13 @@ func loadWebScripts() (map[string]string, error) {
 	return pkg.Scripts, nil
 }

-func webDir() (string, error) {
-	root, err := paths.GitRoot()
-	if err != nil {
-		return "", err
+func webDirForRoot(root string) (string, error) {
+	if root == "" {
+		var err error
+		root, err = paths.GitRoot()
+		if err != nil {
+			return "", err
+		}
 	}
 	return filepath.Join(root, "web"), nil
 }
--- a/tools/ods/cmd/worktree.go
+++ b/tools/ods/cmd/worktree.go
@@ -0,0 +1,626 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"text/tabwriter"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
+)
+
+type WorktreeCreateOptions struct {
+	From           string
+	Path           string
+	Bootstrap      bool
+	DependencyMode string
+}
+
+type WorktreeRemoveOptions struct {
+	Force    bool
+	DropDeps bool
+}
+
+type WorktreeBootstrapOptions struct {
+	EnvMode    string
+	PythonMode string
+	WebMode    string
+}
+
+// NewWorktreeCommand creates the parent worktree command.
+func NewWorktreeCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "worktree",
+		Short: "Manage agent-lab git worktrees and harness metadata",
+		Long: `Manage agent-lab git worktrees and the local harness state that makes
+them bootable with isolated ports, URLs, and artifact directories.`,
+	}
+
+	cmd.AddCommand(newWorktreeCreateCommand())
+	cmd.AddCommand(newWorktreeBootstrapCommand())
+	cmd.AddCommand(newWorktreeDepsCommand())
+	cmd.AddCommand(newWorktreeStatusCommand())
+	cmd.AddCommand(newWorktreeShowCommand())
+	cmd.AddCommand(newWorktreeRemoveCommand())
+
+	return cmd
+}
+
+func newWorktreeCreateCommand() *cobra.Command {
+	opts := &WorktreeCreateOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "create <branch>",
+		Short: "Create a new agent-lab worktree with isolated runtime metadata",
+		Long: `Create a tracked agent-lab worktree and bootstrap its local runtime state.
+
+Branch lanes control the default base ref when --from is not supplied:
+  codex/lab/<name>   -> codex/agent-lab
+  codex/fix/<name>   -> origin/main
+  codex/feat/<name>  -> origin/main
+
+Use conventional branch lanes for product work so the base stays explicit.`,
+		Args: cobra.ExactArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			runWorktreeCreate(args[0], opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.From, "from", "", "git ref to branch from (defaults are inferred from the branch lane)")
+	cmd.Flags().StringVar(&opts.Path, "path", "", "custom checkout path for the new worktree")
+	cmd.Flags().BoolVar(&opts.Bootstrap, "bootstrap", true, "bootstrap env, Python, and frontend dependencies for the worktree")
+	cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode: namespaced or shared")
+
+	return cmd
+}
+
+func newWorktreeBootstrapCommand() *cobra.Command {
+	opts := &WorktreeBootstrapOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "bootstrap [worktree]",
+		Short: "Bootstrap env files and dependencies for an agent-lab worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeBootstrap(identifier, opts)
+		},
+	}
+
+	cmd.Flags().StringVar(&opts.EnvMode, "env-mode", string(agentlab.BootstrapModeAuto), "env bootstrap mode: auto, skip, link, copy")
+	cmd.Flags().StringVar(&opts.PythonMode, "python-mode", string(agentlab.BootstrapModeAuto), "Python bootstrap mode: auto, skip, link, copy")
+	cmd.Flags().StringVar(&opts.WebMode, "web-mode", string(agentlab.BootstrapModeAuto), "frontend bootstrap mode: auto, skip, clone, copy, npm")
+
+	return cmd
+}
+
+func newWorktreeDepsCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "deps",
+		Short: "Manage namespaced external dependencies for an agent-lab worktree",
+	}
+
+	cmd.AddCommand(&cobra.Command{
+		Use:   "up [worktree]",
+		Short: "Provision external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsUp(identifier)
+		},
+	})
+	cmd.AddCommand(&cobra.Command{
+		Use:   "status [worktree]",
+		Short: "Inspect external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsStatus(identifier)
+		},
+	})
+	cmd.AddCommand(&cobra.Command{
+		Use:   "reset [worktree]",
+		Short: "Reset namespaced external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsReset(identifier)
+		},
+	})
+	cmd.AddCommand(&cobra.Command{
+		Use:   "down [worktree]",
+		Short: "Tear down namespaced external dependency state for a worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeDepsDown(identifier)
+		},
+	})
+
+	return cmd
+}
+
+func newWorktreeStatusCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:   "status",
+		Short: "List tracked agent-lab worktrees",
+		Run: func(cmd *cobra.Command, args []string) {
+			runWorktreeStatus()
+		},
+	}
+}
+
+func newWorktreeShowCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:   "show [worktree]",
+		Short: "Show detailed metadata for an agent-lab worktree",
+		Args:  cobra.MaximumNArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			identifier := ""
+			if len(args) == 1 {
+				identifier = args[0]
+			}
+			runWorktreeShow(identifier)
+		},
+	}
+}
+
+func newWorktreeRemoveCommand() *cobra.Command {
+	opts := &WorktreeRemoveOptions{}
+
+	cmd := &cobra.Command{
+		Use:   "remove <worktree>",
+		Short: "Remove an agent-lab worktree and its local state",
+		Args:  cobra.ExactArgs(1),
+		Run: func(cmd *cobra.Command, args []string) {
+			runWorktreeRemove(args[0], opts)
+		},
+	}
+
+	cmd.Flags().BoolVar(&opts.Force, "force", false, "force removal even if git reports uncommitted changes")
+	cmd.Flags().BoolVar(&opts.DropDeps, "drop-deps", false, "tear down namespaced dependencies before removing the worktree")
+
+	return cmd
+}
+
+func runWorktreeCreate(branch string, opts *WorktreeCreateOptions) {
+	repoRoot, err := paths.GitRoot()
+	if err != nil {
+		log.Fatalf("Failed to determine git root: %v", err)
+	}
+
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	if manifest, found, err := agentlab.FindByIdentifier(commonGitDir, branch); err != nil {
+		log.Fatalf("Failed to inspect existing worktrees: %v", err)
+	} else if found {
+		log.Fatalf("Worktree already exists for %s at %s", manifest.Branch, manifest.CheckoutPath)
+	}
+
+	manifests, err := agentlab.LoadAll(commonGitDir)
+	if err != nil {
+		log.Fatalf("Failed to load worktree metadata: %v", err)
+	}
+
+	ports, err := agentlab.AllocatePorts(manifests)
+	if err != nil {
+		log.Fatalf("Failed to allocate worktree ports: %v", err)
+	}
+
+	dependencyMode := agentlab.DependencyMode(opts.DependencyMode)
+	switch dependencyMode {
+	case agentlab.DependencyModeShared, agentlab.DependencyModeNamespaced:
+	default:
+		log.Fatalf("Invalid dependency mode %q: must be shared or namespaced", opts.DependencyMode)
+	}
+
+	checkoutPath := opts.Path
+	if checkoutPath == "" {
+		checkoutPath = agentlab.DefaultCheckoutPath(repoRoot, branch)
+	}
+	checkoutPath, err = filepath.Abs(checkoutPath)
+	if err != nil {
+		log.Fatalf("Failed to resolve checkout path: %v", err)
+	}
+
+	if _, err := os.Stat(checkoutPath); err == nil {
+		log.Fatalf("Checkout path already exists: %s", checkoutPath)
+	}
+
+	baseSelection := agentlab.ResolveCreateBaseRef(branch, opts.From, agentlab.GitRefExists)
+	manifest := agentlab.BuildManifest(
+		repoRoot,
+		commonGitDir,
+		branch,
+		baseSelection.Lane,
+		baseSelection.Ref,
+		checkoutPath,
+		ports,
+		dependencyMode,
+	)
+	args := []string{"-c", "core.hooksPath=/dev/null", "worktree", "add", "-b", branch, checkoutPath, baseSelection.Ref}
+	log.Infof("Creating worktree %s at %s", branch, checkoutPath)
+	gitCmd := exec.Command("git", args...)
+	gitCmd.Stdout = os.Stdout
+	gitCmd.Stderr = os.Stderr
+	gitCmd.Stdin = os.Stdin
+	if err := gitCmd.Run(); err != nil {
+		log.Fatalf("git worktree add failed: %v", err)
+	}
+
+	if resolvedPath, err := filepath.EvalSymlinks(checkoutPath); err == nil {
+		manifest.CheckoutPath = resolvedPath
+	}
+
+	if err := agentlab.WriteEnvFiles(manifest); err != nil {
+		log.Fatalf("Failed to write worktree env files: %v", err)
+	}
+	if err := agentlab.WriteManifest(commonGitDir, manifest); err != nil {
+		log.Fatalf("Failed to write worktree manifest: %v", err)
+	}
+
+	if opts.Bootstrap {
+		bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
+			EnvMode:    agentlab.BootstrapModeAuto,
+			PythonMode: agentlab.BootstrapModeAuto,
+			WebMode:    agentlab.BootstrapModeAuto,
+		})
+		if err != nil {
+			log.Fatalf("Failed to bootstrap worktree: %v", err)
+		}
+		for _, action := range bootstrapResult.Actions {
+			fmt.Printf("  bootstrap: %s\n", action)
+		}
+	}
+
+	manifest, dependencyResult, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to provision worktree dependencies: %v", err)
+	}
+	for _, action := range dependencyResult.Actions {
+		fmt.Printf("  deps: %s\n", action)
+	}
+
+	fmt.Printf("Created agent-lab worktree %s\n", manifest.Branch)
+	fmt.Printf("  checkout: %s\n", manifest.CheckoutPath)
+	fmt.Printf("  lane: %s\n", manifest.ResolvedLane())
+	fmt.Printf("  base ref: %s\n", manifest.BaseRef)
+	fmt.Printf("  base selection: %s\n", baseSelection.Reason)
+	fmt.Printf("  dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
+	if manifest.ResolvedDependencies().Namespace != "" {
+		fmt.Printf("  dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
+	}
+	if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced {
+		fmt.Printf("  postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
+		fmt.Printf("  redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
+		fmt.Printf("  file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
+	}
+	fmt.Printf("  web url:  %s\n", manifest.URLs.Web)
+	fmt.Printf("  api url:  %s\n", manifest.URLs.API)
+	fmt.Printf("  mcp url:  %s\n", manifest.URLs.MCP)
+	fmt.Printf("  artifacts: %s\n", manifest.ArtifactDir)
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("  note: %s\n", warning)
+	}
+	fmt.Printf("\nNext steps:\n")
+	fmt.Printf("  cd %s\n", manifest.CheckoutPath)
+	fmt.Printf("  # Make edits in the worktree itself.\n")
+	if manifest.ResolvedLane() == agentlab.WorktreeLaneProduct {
+		fmt.Printf("  # Run harness commands from the control checkout with --worktree %s.\n", manifest.Branch)
+		fmt.Printf("  ods verify --worktree %s\n", manifest.Branch)
+		fmt.Printf("  ods backend api --worktree %s\n", manifest.Branch)
+		fmt.Printf("  ods web dev --worktree %s\n", manifest.Branch)
+	} else {
+		fmt.Printf("  ods backend api\n")
+		fmt.Printf("  ods backend model_server\n")
+		fmt.Printf("  ods web dev\n")
+		fmt.Printf("  ods verify\n")
+	}
+}
+
+func runWorktreeBootstrap(identifier string, opts *WorktreeBootstrapOptions) {
+	manifest := mustResolveWorktree(identifier)
+	bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
+		EnvMode:    agentlab.BootstrapMode(opts.EnvMode),
+		PythonMode: agentlab.BootstrapMode(opts.PythonMode),
+		WebMode:    agentlab.BootstrapMode(opts.WebMode),
+	})
+	if err != nil {
+		log.Fatalf("Failed to bootstrap worktree %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Bootstrapped %s\n", manifest.Branch)
+	for _, action := range bootstrapResult.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+}
+
+func runWorktreeDepsUp(identifier string) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest := mustResolveWorktree(identifier)
+	manifest, result, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to provision dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Provisioned dependencies for %s\n", manifest.Branch)
+	for _, action := range result.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("  note: %s\n", warning)
+	}
+}
+
+func runWorktreeDepsStatus(identifier string) {
+	manifest := mustResolveWorktree(identifier)
+	status, err := agentlab.InspectDependencies(manifest)
+	if err != nil {
+		log.Fatalf("Failed to inspect dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("branch: %s\n", manifest.Branch)
+	fmt.Printf("mode: %s\n", status.Mode)
+	if status.Namespace != "" {
+		fmt.Printf("namespace: %s\n", status.Namespace)
+	}
+	if status.PostgresDatabase != "" {
+		fmt.Printf("postgres database: %s (ready=%t tables=%d)\n", status.PostgresDatabase, status.PostgresReady, status.PostgresTableCount)
+	}
+	if status.RedisPrefix != "" {
+		fmt.Printf("redis prefix: %s (ready=%t keys=%d)\n", status.RedisPrefix, status.RedisReady, status.RedisKeyCount)
+	}
+	if status.FileStoreBucket != "" {
+		fmt.Printf("file-store bucket: %s (ready=%t objects=%d)\n", status.FileStoreBucket, status.FileStoreReady, status.FileStoreObjectCount)
+	}
+	fmt.Printf("search infra: %s\n", status.SearchInfraMode)
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("note: %s\n", warning)
+	}
+}
+
+func runWorktreeDepsReset(identifier string) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest := mustResolveWorktree(identifier)
+	manifest, result, err := agentlab.ResetDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to reset dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Reset dependencies for %s\n", manifest.Branch)
+	for _, action := range result.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+}
+
+func runWorktreeDepsDown(identifier string) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+	manifest := mustResolveWorktree(identifier)
+	manifest, result, err := agentlab.TeardownDependencies(commonGitDir, manifest)
+	if err != nil {
+		log.Fatalf("Failed to tear down dependencies for %s: %v", manifest.Branch, err)
+	}
+
+	fmt.Printf("Tore down dependencies for %s\n", manifest.Branch)
+	for _, action := range result.Actions {
+		fmt.Printf("  %s\n", action)
+	}
+}
+
+func runWorktreeStatus() {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	repoRoot, _ := paths.GitRoot()
+	current, _, _ := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
+
+	manifests, err := agentlab.LoadAll(commonGitDir)
+	if err != nil {
+		log.Fatalf("Failed to load worktree manifests: %v", err)
+	}
+
+	if len(manifests) == 0 {
+		log.Info("No agent-lab worktrees tracked yet.")
+		return
+	}
+
+	tw := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
+	if _, err := fmt.Fprintln(tw, "CURRENT\tBRANCH\tLANE\tMODE\tWEB\tAPI\tPATH"); err != nil {
+		log.Fatalf("Failed to write worktree header: %v", err)
+	}
+	for _, manifest := range manifests {
+		marker := ""
+		if manifest.ID == current.ID && manifest.ID != "" {
+			marker = "*"
+		}
+		if _, err := fmt.Fprintf(
+			tw,
+			"%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+			marker,
+			manifest.Branch,
+			manifest.ResolvedLane(),
+			manifest.ResolvedDependencies().Mode,
+			manifest.URLs.Web,
+			manifest.URLs.API,
+			manifest.CheckoutPath,
+		); err != nil {
+			log.Fatalf("Failed to write worktree row for %s: %v", manifest.Branch, err)
+		}
+	}
+	_ = tw.Flush()
+}
+
+func runWorktreeShow(identifier string) {
+	manifest := mustResolveWorktree(identifier)
+
+	fmt.Printf("branch: %s\n", manifest.Branch)
+	fmt.Printf("id: %s\n", manifest.ID)
+	fmt.Printf("lane: %s\n", manifest.ResolvedLane())
+	fmt.Printf("checkout: %s\n", manifest.CheckoutPath)
+	fmt.Printf("base-ref: %s\n", manifest.BaseRef)
+	fmt.Printf("state-dir: %s\n", manifest.StateDir)
+	fmt.Printf("artifacts: %s\n", manifest.ArtifactDir)
+	fmt.Printf("backend env: %s\n", manifest.EnvFile)
+	fmt.Printf("web env: %s\n", manifest.WebEnvFile)
+	fmt.Printf("compose project: %s\n", manifest.ComposeProject)
+	fmt.Printf("dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
+	if manifest.ResolvedDependencies().Namespace != "" {
+		fmt.Printf("dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
+	}
+	if manifest.ResolvedDependencies().PostgresDatabase != "" {
+		fmt.Printf("postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
+		fmt.Printf("redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
+		fmt.Printf("file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
+	}
+	fmt.Printf("search infra: %s\n", manifest.ResolvedDependencies().SearchInfraMode)
+	fmt.Printf("web url: %s\n", manifest.URLs.Web)
+	fmt.Printf("api url: %s\n", manifest.URLs.API)
+	fmt.Printf("mcp url: %s\n", manifest.URLs.MCP)
+	fmt.Printf("ports: web=%d api=%d model_server=%d mcp=%d\n", manifest.Ports.Web, manifest.Ports.API, manifest.Ports.ModelServer, manifest.Ports.MCP)
+	if manifest.LastVerifiedAt != "" {
+		fmt.Printf("last verified: %s\n", manifest.LastVerifiedAt)
+	}
+	if manifest.LastVerifySummary != "" {
+		fmt.Printf("last summary: %s\n", manifest.LastVerifySummary)
+	}
+	for _, warning := range manifest.DependencyWarnings() {
+		fmt.Printf("note: %s\n", warning)
+	}
+}
+
+func mustResolveWorktree(identifier string) agentlab.Manifest {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	if identifier == "" {
+		repoRoot, err := paths.GitRoot()
+		if err != nil {
+			log.Fatalf("Failed to determine git root: %v", err)
+		}
+		manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
+		if err != nil {
+			log.Fatalf("Failed to resolve current worktree manifest: %v", err)
+		}
+		if !found {
+			log.Fatalf("No agent-lab worktree found for %q", identifier)
+		}
+		return manifest
+	}
+
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		log.Fatalf("Failed to resolve worktree manifest: %v", err)
+	}
+	if !found {
+		log.Fatalf("No agent-lab worktree found for %q", identifier)
+	}
+	return manifest
+}
+
+func runWorktreeRemove(identifier string, opts *WorktreeRemoveOptions) {
+	commonGitDir, err := agentlab.GetCommonGitDir()
+	if err != nil {
+		log.Fatalf("Failed to determine git common dir: %v", err)
+	}
+
+	manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
+	if err != nil {
+		log.Fatalf("Failed to resolve worktree: %v", err)
+	}
+	if !found {
+		log.Fatalf("No agent-lab worktree found for %q", identifier)
+	}
+
+	if opts.DropDeps {
+		var teardownResult *agentlab.DependencyResult
+		manifest, teardownResult, err = agentlab.TeardownDependencies(commonGitDir, manifest)
+		if err != nil {
+			log.Fatalf("Failed to tear down worktree dependencies: %v", err)
+		}
+		for _, action := range teardownResult.Actions {
+			fmt.Printf("  deps: %s\n", action)
+		}
+	}
+
+	args := []string{"worktree", "remove"}
+	if opts.Force {
+		args = append(args, "--force")
+	}
+	args = append(args, manifest.CheckoutPath)
+
+	log.Infof("Removing worktree %s", manifest.Branch)
+	gitCmd := exec.Command("git", args...)
+	gitCmd.Stdout = os.Stdout
+	gitCmd.Stderr = os.Stderr
+	gitCmd.Stdin = os.Stdin
+	if err := gitCmd.Run(); err != nil {
+		if opts.Force && isOrphanedWorktree(manifest.CheckoutPath) {
+			log.Warnf("git detached %s but left an orphaned checkout behind; removing %s", manifest.Branch, manifest.CheckoutPath)
+			if removeErr := os.RemoveAll(manifest.CheckoutPath); removeErr != nil {
+				log.Fatalf("git worktree remove failed: %v (fallback cleanup failed: %v)", err, removeErr)
+			}
+		} else {
+			log.Fatalf("git worktree remove failed: %v", err)
+		}
+	}
+
+	if err := agentlab.RemoveState(commonGitDir, manifest.ID); err != nil {
+		log.Fatalf("Failed to remove worktree state: %v", err)
+	}
+
+	fmt.Printf("Removed agent-lab worktree %s\n", manifest.Branch)
+	if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced && !opts.DropDeps {
+		fmt.Printf("  note: namespaced Postgres/Redis/MinIO state was left in place. Use `ods worktree deps down %s` before removal if you want cleanup.\n", manifest.Branch)
+	}
+}
+
+func isOrphanedWorktree(checkoutPath string) bool {
+	output, err := exec.Command("git", "worktree", "list", "--porcelain").Output()
+	if err == nil && strings.Contains(string(output), "worktree "+checkoutPath+"\n") {
+		return false
+	}
+	if _, statErr := os.Stat(checkoutPath); os.IsNotExist(statErr) {
+		return true
+	}
+	if statusErr := exec.Command("git", "-C", checkoutPath, "status", "--short").Run(); statusErr != nil {
+		return true
+	}
+	return false
+}
--- a/tools/ods/internal/agentcheck/agentcheck.go
+++ b/tools/ods/internal/agentcheck/agentcheck.go
@@ -0,0 +1,95 @@
+package agentcheck
+
+import (
+	"bufio"
+	"fmt"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+var hunkHeaderPattern = regexp.MustCompile(`^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@`)
+
+type AddedLine struct {
+	Path    string
+	LineNum int
+	Content string
+}
+
+type Violation struct {
+	RuleID  string
+	Path    string
+	LineNum int
+	Message string
+	Content string
+}
+
+func ParseAddedLines(diff string) ([]AddedLine, error) {
+	scanner := bufio.NewScanner(strings.NewReader(diff))
+	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+
+	var addedLines []AddedLine
+	currentPath := ""
+	currentNewLine := 0
+	inHunk := false
+
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		switch {
+		case strings.HasPrefix(line, "+++ "):
+			currentPath = normalizeDiffPath(strings.TrimPrefix(line, "+++ "))
+			inHunk = false
+		case strings.HasPrefix(line, "@@ "):
+			match := hunkHeaderPattern.FindStringSubmatch(line)
+			if len(match) != 2 {
+				return nil, fmt.Errorf("failed to parse hunk header: %s", line)
+			}
+			var err error
+			currentNewLine, err = parseLineNumber(match[1])
+			if err != nil {
+				return nil, err
+			}
+			inHunk = true
+		case !inHunk || currentPath == "":
+			continue
+		case strings.HasPrefix(line, "+") && !strings.HasPrefix(line, "+++"):
+			addedLines = append(addedLines, AddedLine{
+				Path:    currentPath,
+				LineNum: currentNewLine,
+				Content: strings.TrimPrefix(line, "+"),
+			})
+			currentNewLine++
+		case strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---"):
+			continue
+		default:
+			currentNewLine++
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("failed to scan diff: %w", err)
+	}
+
+	return addedLines, nil
+}
+
+func normalizeDiffPath(path string) string {
+	path = strings.TrimSpace(path)
+	path = strings.TrimPrefix(path, "b/")
+	if path == "/dev/null" {
+		return ""
+	}
+	return filepath.ToSlash(path)
+}
+
+func parseLineNumber(value string) (int, error) {
+	lineNum := 0
+	for _, ch := range value {
+		if ch < '0' || ch > '9' {
+			return 0, fmt.Errorf("invalid line number: %s", value)
+		}
+		lineNum = lineNum*10 + int(ch-'0')
+	}
+	return lineNum, nil
+}
--- a/tools/ods/internal/agentcheck/agentcheck_test.go
+++ b/tools/ods/internal/agentcheck/agentcheck_test.go
@@ -0,0 +1,143 @@
+package agentcheck
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestParseAddedLines(t *testing.T) {
+	diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
+index 1111111..2222222 100644
+--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
+@@ -10,1 +11,3 @@
+ context = old_value
+from fastapi import HTTPException
+-raise OldError()
+raise HTTPException(status_code=400, detail="bad")
+@@ -20,0 +23,1 @@
+task.delay (payload)
+diff --git a/web/src/sections/Foo.tsx b/web/src/sections/Foo.tsx
+index 1111111..2222222 100644
+--- a/web/src/sections/Foo.tsx
+++ b/web/src/sections/Foo.tsx
+@@ -3,0 +4 @@
+import { Thing } from "@/components/Thing";`
+
+	addedLines, err := ParseAddedLines(diff)
+	if err != nil {
+		t.Fatalf("ParseAddedLines returned error: %v", err)
+	}
+
+	if len(addedLines) != 4 {
+		t.Fatalf("expected 4 added lines, got %d", len(addedLines))
+	}
+
+	if addedLines[0].Path != "backend/onyx/server/foo.py" || addedLines[0].LineNum != 12 {
+		t.Fatalf("unexpected first added line: %+v", addedLines[0])
+	}
+
+	if addedLines[2].Path != "backend/onyx/server/foo.py" || addedLines[2].LineNum != 23 {
+		t.Fatalf("unexpected third added line: %+v", addedLines[2])
+	}
+
+	if addedLines[3].Path != "web/src/sections/Foo.tsx" || addedLines[3].LineNum != 4 {
+		t.Fatalf("unexpected final added line: %+v", addedLines[3])
+	}
+}
+
+func TestParseAddedLinesRejectsMalformedHunkHeader(t *testing.T) {
+	diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
+--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
+@@ invalid @@
+raise HTTPException(status_code=400, detail="bad")`
+
+	if _, err := ParseAddedLines(diff); err == nil {
+		t.Fatal("expected malformed hunk header to return an error")
+	}
+}
+
+func TestCheckAddedLinesFindsExpectedViolations(t *testing.T) {
+	lines := []AddedLine{
+		{Path: "backend/onyx/server/foo.py", LineNum: 10, Content: "from fastapi import HTTPException"},
+		{Path: "backend/onyx/server/foo.py", LineNum: 11, Content: `raise HTTPException(status_code=400, detail="bad")`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
+		{Path: "backend/onyx/server/foo.py", LineNum: 13, Content: "my_task.delay (payload)"},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `export { Thing } from "@/components/Thing";`},
+	}
+
+	violations := CheckAddedLines(lines)
+
+	if len(violations) != 5 {
+		t.Fatalf("expected 5 violations, got %d: %+v", len(violations), violations)
+	}
+
+	expectedRules := []string{
+		"no-new-http-exception",
+		"no-new-http-exception",
+		"no-new-response-model",
+		"no-new-delay",
+		"no-new-legacy-component-import",
+	}
+
+	for i, expectedRule := range expectedRules {
+		if violations[i].RuleID != expectedRule {
+			t.Fatalf("expected rule %q at index %d, got %q", expectedRule, i, violations[i].RuleID)
+		}
+	}
+}
+
+func TestCheckAddedLinesIgnoresCommentsStringsAndAllowedScopes(t *testing.T) {
+	lines := []AddedLine{
+		{Path: "backend/onyx/server/foo.py", LineNum: 1, Content: `message = "HTTPException"`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 2, Content: `detail = "response_model="`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 3, Content: `note = ".delay("`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 4, Content: `# HTTPException`},
+		{Path: "backend/onyx/server/foo.py", LineNum: 5, Content: `handler = HTTPExceptionAlias`},
+		{Path: "backend/onyx/main.py", LineNum: 6, Content: `raise HTTPException(status_code=400, detail="bad")`},
+		{Path: "backend/tests/unit/test_foo.py", LineNum: 7, Content: `from fastapi import HTTPException`},
+		{Path: "backend/model_server/foo.py", LineNum: 8, Content: `task.delay(payload)`},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 9, Content: `const path = "@/components/Thing";`},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 10, Content: `// import { Thing } from "@/components/Thing";`},
+		{Path: "web/src/components/Foo.tsx", LineNum: 11, Content: `import { Bar } from "@/components/Bar";`},
+	}
+
+	violations := CheckAddedLines(lines)
+	if len(violations) != 0 {
+		t.Fatalf("expected no violations, got %+v", violations)
+	}
+}
+
+func TestCheckAddedLinesWithRulesSupportsCustomRuleSets(t *testing.T) {
+	lines := []AddedLine{
+		{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
+		{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `import type { Thing } from "@/components/Thing";`},
+	}
+
+	rules := []Rule{
+		{
+			ID:      "python-response-model-only",
+			Message: "response_model is not allowed",
+			Scope:   backendProductPythonScope(),
+			Match: func(line lineView) bool {
+				return responseModelPattern.MatchString(line.CodeSansStrings)
+			},
+		},
+	}
+
+	violations := CheckAddedLinesWithRules(lines, rules)
+	expected := []Violation{
+		{
+			RuleID:  "python-response-model-only",
+			Path:    "backend/onyx/server/foo.py",
+			LineNum: 12,
+			Message: "response_model is not allowed",
+			Content: "response_model = FooResponse",
+		},
+	}
+
+	if !reflect.DeepEqual(expected, violations) {
+		t.Fatalf("unexpected violations: %+v", violations)
+	}
+}
--- a/tools/ods/internal/agentcheck/lex.go
+++ b/tools/ods/internal/agentcheck/lex.go
@@ -0,0 +1,101 @@
+package agentcheck
+
+import "strings"
+
+func stripLineComment(path string, content string) string {
+	switch {
+	case strings.HasSuffix(path, ".py"):
+		return stripCommentMarker(content, "#")
+	case isJSLikePath(path):
+		return stripCommentMarker(content, "//")
+	default:
+		return content
+	}
+}
+
+func isJSLikePath(path string) bool {
+	return strings.HasSuffix(path, ".js") ||
+		strings.HasSuffix(path, ".jsx") ||
+		strings.HasSuffix(path, ".ts") ||
+		strings.HasSuffix(path, ".tsx")
+}
+
+func stripCommentMarker(line string, marker string) string {
+	if marker == "" {
+		return line
+	}
+
+	var builder strings.Builder
+	quote := byte(0)
+	escaped := false
+
+	for i := 0; i < len(line); i++ {
+		ch := line[i]
+
+		if quote != 0 {
+			builder.WriteByte(ch)
+			if escaped {
+				escaped = false
+				continue
+			}
+			if ch == '\\' && quote != '`' {
+				escaped = true
+				continue
+			}
+			if ch == quote {
+				quote = 0
+			}
+			continue
+		}
+
+		if strings.HasPrefix(line[i:], marker) {
+			break
+		}
+
+		builder.WriteByte(ch)
+		if isQuote(ch) {
+			quote = ch
+		}
+	}
+
+	return builder.String()
+}
+
+func stripQuotedStrings(line string) string {
+	var builder strings.Builder
+	quote := byte(0)
+	escaped := false
+
+	for i := 0; i < len(line); i++ {
+		ch := line[i]
+
+		if quote != 0 {
+			if escaped {
+				escaped = false
+				continue
+			}
+			if ch == '\\' && quote != '`' {
+				escaped = true
+				continue
+			}
+			if ch == quote {
+				quote = 0
+			}
+			continue
+		}
+
+		if isQuote(ch) {
+			quote = ch
+			builder.WriteByte(' ')
+			continue
+		}
+
+		builder.WriteByte(ch)
+	}
+
+	return builder.String()
+}
+
+func isQuote(ch byte) bool {
+	return ch == '"' || ch == '\'' || ch == '`'
+}
--- a/tools/ods/internal/agentcheck/rules.go
+++ b/tools/ods/internal/agentcheck/rules.go
@@ -0,0 +1,170 @@
+package agentcheck
+
+import (
+	"regexp"
+	"strings"
+)
+
+var (
+	httpExceptionPattern = regexp.MustCompile(`\bHTTPException\b`)
+	responseModelPattern = regexp.MustCompile(`\bresponse_model\s*=`)
+	delayCallPattern     = regexp.MustCompile(`\.\s*delay\s*\(`)
+	componentPathPattern = regexp.MustCompile(`["'](?:@/components/|\.\.?/components/|\.\.?/.*/components/)`)
+	importExportPattern  = regexp.MustCompile(`^\s*(?:import|export)\b`)
+)
+
+type Scope func(path string) bool
+
+type Matcher func(line lineView) bool
+
+type Rule struct {
+	ID      string
+	Message string
+	Scope   Scope
+	Match   Matcher
+}
+
+type lineView struct {
+	AddedLine
+	Path            string
+	Code            string
+	CodeSansStrings string
+	TrimmedCode     string
+}
+
+func CheckAddedLines(lines []AddedLine) []Violation {
+	return CheckAddedLinesWithRules(lines, DefaultRules())
+}
+
+func CheckAddedLinesWithRules(lines []AddedLine, rules []Rule) []Violation {
+	var violations []Violation
+
+	for _, addedLine := range lines {
+		line := buildLineView(addedLine)
+		if line.Path == "" {
+			continue
+		}
+
+		for _, rule := range rules {
+			if rule.Scope != nil && !rule.Scope(line.Path) {
+				continue
+			}
+			if rule.Match == nil || !rule.Match(line) {
+				continue
+			}
+
+			violations = append(violations, Violation{
+				RuleID:  rule.ID,
+				Path:    line.Path,
+				LineNum: line.LineNum,
+				Message: rule.Message,
+				Content: line.Content,
+			})
+		}
+	}
+
+	return violations
+}
+
+func DefaultRules() []Rule {
+	return append([]Rule(nil), defaultRules...)
+}
+
+var defaultRules = []Rule{
+	{
+		ID:      "no-new-http-exception",
+		Message: "Do not introduce new HTTPException usage in backend product code. Raise OnyxError instead.",
+		Scope:   backendProductPythonScope(exactPath("backend/onyx/main.py")),
+		Match: func(line lineView) bool {
+			return hasPythonCode(line) && httpExceptionPattern.MatchString(line.CodeSansStrings)
+		},
+	},
+	{
+		ID:      "no-new-response-model",
+		Message: "Do not introduce response_model on new FastAPI APIs. Type the function directly instead.",
+		Scope:   backendProductPythonScope(),
+		Match: func(line lineView) bool {
+			return hasPythonCode(line) && responseModelPattern.MatchString(line.CodeSansStrings)
+		},
+	},
+	{
+		ID:      "no-new-delay",
+		Message: "Do not introduce Celery .delay() calls. Use an enqueue path that sets expires= explicitly.",
+		Scope:   backendProductPythonScope(),
+		Match: func(line lineView) bool {
+			return hasPythonCode(line) && delayCallPattern.MatchString(line.CodeSansStrings)
+		},
+	},
+	{
+		ID:      "no-new-legacy-component-import",
+		Message: "Do not introduce new imports from web/src/components. Prefer Opal or refresh-components.",
+		Scope:   nonLegacyWebSourceScope(),
+		Match: func(line lineView) bool {
+			return isLegacyComponentImport(line)
+		},
+	},
+}
+
+func buildLineView(line AddedLine) lineView {
+	path := normalizeDiffPath(line.Path)
+	code := stripLineComment(path, line.Content)
+	return lineView{
+		AddedLine:       line,
+		Path:            path,
+		Code:            code,
+		CodeSansStrings: stripQuotedStrings(code),
+		TrimmedCode:     strings.TrimSpace(code),
+	}
+}
+
+func backendProductPythonScope(excluded ...Scope) Scope {
+	return func(path string) bool {
+		if !strings.HasPrefix(path, "backend/") || !strings.HasSuffix(path, ".py") {
+			return false
+		}
+		if strings.HasPrefix(path, "backend/tests/") {
+			return false
+		}
+		if strings.HasPrefix(path, "backend/model_server/") {
+			return false
+		}
+		if strings.Contains(path, "/__pycache__/") {
+			return false
+		}
+		for _, exclude := range excluded {
+			if exclude != nil && exclude(path) {
+				return false
+			}
+		}
+		return true
+	}
+}
+
+func nonLegacyWebSourceScope() Scope {
+	return func(path string) bool {
+		if !strings.HasPrefix(path, "web/src/") {
+			return false
+		}
+		return !strings.HasPrefix(path, "web/src/components/")
+	}
+}
+
+func exactPath(target string) Scope {
+	return func(path string) bool {
+		return path == target
+	}
+}
+
+func hasPythonCode(line lineView) bool {
+	return strings.TrimSpace(line.CodeSansStrings) != ""
+}
+
+func isLegacyComponentImport(line lineView) bool {
+	if line.TrimmedCode == "" {
+		return false
+	}
+	if !importExportPattern.MatchString(line.TrimmedCode) {
+		return false
+	}
+	return componentPathPattern.MatchString(line.Code)
+}
--- a/tools/ods/internal/agentdocs/agentdocs.go
+++ b/tools/ods/internal/agentdocs/agentdocs.go
@@ -0,0 +1,107 @@
+package agentdocs
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+var markdownLinkPattern = regexp.MustCompile(`\[[^\]]+\]\(([^)]+)\)`)
+
+var requiredFiles = []string{
+	"AGENTS.md",
+	"docs/agent/README.md",
+	"docs/agent/ARCHITECTURE.md",
+	"docs/agent/BRANCHING.md",
+	"docs/agent/HARNESS.md",
+	"docs/agent/GOLDEN_RULES.md",
+	"docs/agent/LEGACY_ZONES.md",
+	"docs/agent/QUALITY_SCORE.md",
+}
+
+type Violation struct {
+	Path    string
+	Message string
+}
+
+func Validate(root string) []Violation {
+	if _, err := os.Stat(filepath.Join(root, filepath.FromSlash("docs/agent/README.md"))); err != nil {
+		return nil
+	}
+
+	var violations []Violation
+
+	for _, relPath := range requiredFiles {
+		if _, err := os.Stat(filepath.Join(root, filepath.FromSlash(relPath))); err != nil {
+			violations = append(violations, Violation{
+				Path:    relPath,
+				Message: "required agent-lab knowledge-base file is missing",
+			})
+		}
+	}
+
+	for _, relPath := range requiredFiles {
+		absPath := filepath.Join(root, filepath.FromSlash(relPath))
+		content, err := os.ReadFile(absPath)
+		if err != nil {
+			continue
+		}
+
+		violations = append(violations, validateMarkdownLinks(root, relPath, string(content))...)
+	}
+
+	return violations
+}
+
+func validateMarkdownLinks(root string, relPath string, content string) []Violation {
+	var violations []Violation
+	matches := markdownLinkPattern.FindAllStringSubmatch(content, -1)
+	docDir := filepath.Dir(filepath.Join(root, filepath.FromSlash(relPath)))
+
+	for _, match := range matches {
+		if len(match) != 2 {
+			continue
+		}
+
+		target := strings.TrimSpace(match[1])
+		if target == "" {
+			continue
+		}
+		if strings.HasPrefix(target, "http://") || strings.HasPrefix(target, "https://") {
+			continue
+		}
+		if strings.HasPrefix(target, "#") || strings.HasPrefix(target, "mailto:") {
+			continue
+		}
+
+		target = stripAnchor(target)
+
+		var absTarget string
+		if filepath.IsAbs(target) {
+			absTarget = target
+		} else {
+			absTarget = filepath.Join(docDir, target)
+		}
+
+		if _, err := os.Stat(absTarget); err != nil {
+			violations = append(violations, Violation{
+				Path: relPath,
+				Message: fmt.Sprintf(
+					"broken markdown link target: %s",
+					target,
+				),
+			})
+		}
+	}
+
+	return violations
+}
+
+func stripAnchor(target string) string {
+	if idx := strings.Index(target, "#"); idx >= 0 {
+		return target[:idx]
+	}
+	return target
+}
--- a/tools/ods/internal/agentdocs/agentdocs_test.go
+++ b/tools/ods/internal/agentdocs/agentdocs_test.go
@@ -0,0 +1,61 @@
+package agentdocs
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestValidateSuccess(t *testing.T) {
+	root := t.TempDir()
+
+	writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Architecture](./ARCHITECTURE.md)
+[Root](../../AGENTS.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/BRANCHING.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/HARNESS.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/GOLDEN_RULES.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/LEGACY_ZONES.md"), `ok`)
+	writeFile(t, filepath.Join(root, "docs/agent/QUALITY_SCORE.md"), `ok`)
+
+	violations := Validate(root)
+	if len(violations) != 0 {
+		t.Fatalf("expected no violations, got %+v", violations)
+	}
+}
+
+func TestValidateMissingAndBrokenLinks(t *testing.T) {
+	root := t.TempDir()
+
+	writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Missing](./MISSING.md)`)
+	writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
+
+	violations := Validate(root)
+	if len(violations) < 2 {
+		t.Fatalf("expected multiple violations, got %+v", violations)
+	}
+}
+
+func TestValidateSkipsReposWithoutAgentLabDocs(t *testing.T) {
+	root := t.TempDir()
+
+	writeFile(t, filepath.Join(root, "README.md"), `plain repo`)
+
+	violations := Validate(root)
+	if len(violations) != 0 {
+		t.Fatalf("expected no violations for repo without agent-lab docs, got %+v", violations)
+	}
+}
+
+func writeFile(t *testing.T, path string, content string) {
+	t.Helper()
+
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("failed to create dir for %s: %v", path, err)
+	}
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatalf("failed to write %s: %v", path, err)
+	}
+}
--- a/tools/ods/internal/agentlab/agentlab.go
+++ b/tools/ods/internal/agentlab/agentlab.go
@@ -0,0 +1,585 @@
+package agentlab
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+	"time"
+)
+
+const (
+	stateDirName        = "onyx-agent-lab"
+	worktreesDirName    = "worktrees"
+	envFileName         = ".env.agent-lab"
+	webEnvFileName      = ".env.web.agent-lab"
+	defaultWebPort      = 3300
+	defaultAPIPort      = 8380
+	defaultModelPort    = 9300
+	defaultMCPPort      = 8390
+	portSearchWindow    = 400
+	dockerProjectPrefix = "onyx"
+	searchInfraMode     = "shared"
+)
+
+var nonAlphaNumPattern = regexp.MustCompile(`[^a-z0-9]+`)
+
+type DependencyMode string
+
+const (
+	DependencyModeShared     DependencyMode = "shared"
+	DependencyModeNamespaced DependencyMode = "namespaced"
+)
+
+type WorktreeLane string
+
+const (
+	WorktreeLaneLab     WorktreeLane = "lab"
+	WorktreeLaneProduct WorktreeLane = "product"
+	WorktreeLaneCustom  WorktreeLane = "custom"
+)
+
+var productBranchPrefixes = []string{
+	"build/",
+	"chore/",
+	"ci/",
+	"docs/",
+	"feat/",
+	"fix/",
+	"perf/",
+	"refactor/",
+	"revert/",
+	"style/",
+	"test/",
+}
+
+type DependencyConfig struct {
+	Mode              DependencyMode `json:"mode"`
+	Namespace         string         `json:"namespace,omitempty"`
+	PostgresDatabase  string         `json:"postgres_database,omitempty"`
+	RedisPrefix       string         `json:"redis_prefix,omitempty"`
+	FileStoreBucket   string         `json:"file_store_bucket,omitempty"`
+	SearchInfraMode   string         `json:"search_infra_mode"`
+	LastProvisionedAt string         `json:"last_provisioned_at,omitempty"`
+}
+
+type PortSet struct {
+	Web         int `json:"web"`
+	API         int `json:"api"`
+	ModelServer int `json:"model_server"`
+	MCP         int `json:"mcp"`
+}
+
+type URLSet struct {
+	Web string `json:"web"`
+	API string `json:"api"`
+	MCP string `json:"mcp"`
+}
+
+type Manifest struct {
+	ID                string           `json:"id"`
+	Branch            string           `json:"branch"`
+	Lane              WorktreeLane     `json:"lane,omitempty"`
+	BaseRef           string           `json:"base_ref"`
+	CreatedFromPath   string           `json:"created_from_path"`
+	CheckoutPath      string           `json:"checkout_path"`
+	StateDir          string           `json:"state_dir"`
+	ArtifactDir       string           `json:"artifact_dir"`
+	EnvFile           string           `json:"env_file"`
+	WebEnvFile        string           `json:"web_env_file"`
+	ComposeProject    string           `json:"compose_project"`
+	Dependencies      DependencyConfig `json:"dependencies"`
+	Ports             PortSet          `json:"ports"`
+	URLs              URLSet           `json:"urls"`
+	CreatedAt         time.Time        `json:"created_at"`
+	LastVerifiedAt    string           `json:"last_verified_at,omitempty"`
+	LastVerifySummary string           `json:"last_verify_summary,omitempty"`
+}
+
+func Slug(value string) string {
+	normalized := strings.ToLower(strings.TrimSpace(value))
+	normalized = strings.ReplaceAll(normalized, "/", "-")
+	normalized = strings.ReplaceAll(normalized, "_", "-")
+	normalized = nonAlphaNumPattern.ReplaceAllString(normalized, "-")
+	normalized = strings.Trim(normalized, "-")
+	if normalized == "" {
+		return "worktree"
+	}
+	return normalized
+}
+
+func worktreeID(value string) string {
+	slug := Slug(value)
+	sum := sha256.Sum256([]byte(value))
+	return fmt.Sprintf("%s-%s", slug, hex.EncodeToString(sum[:4]))
+}
+
+func ComposeProjectName(id string) string {
+	slug := Slug(id)
+	if len(slug) > 32 {
+		slug = slug[:32]
+	}
+	return fmt.Sprintf("%s-%s", dockerProjectPrefix, slug)
+}
+
+func GetCommonGitDir() (string, error) {
+	cmd := exec.Command("git", "rev-parse", "--path-format=absolute", "--git-common-dir")
+	output, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("git rev-parse --git-common-dir failed: %w", err)
+	}
+	return strings.TrimSpace(string(output)), nil
+}
+
+func StateRoot(commonGitDir string) string {
+	return filepath.Join(commonGitDir, stateDirName)
+}
+
+func WorktreesRoot(commonGitDir string) string {
+	return filepath.Join(StateRoot(commonGitDir), worktreesDirName)
+}
+
+func WorktreeStateDir(commonGitDir, id string) string {
+	return filepath.Join(WorktreesRoot(commonGitDir), Slug(id))
+}
+
+func ManifestPath(commonGitDir, id string) string {
+	return filepath.Join(WorktreeStateDir(commonGitDir, id), "manifest.json")
+}
+
+func DefaultCheckoutPath(repoRoot, id string) string {
+	parent := filepath.Dir(repoRoot)
+	worktreesRoot := filepath.Join(parent, filepath.Base(repoRoot)+"-worktrees")
+	return filepath.Join(worktreesRoot, worktreeID(id))
+}
+
+func NormalizeBranchForLane(branch string) string {
+	normalized := strings.TrimSpace(branch)
+	normalized = strings.TrimPrefix(normalized, "refs/heads/")
+	normalized = strings.TrimPrefix(normalized, "origin/")
+	normalized = strings.TrimPrefix(normalized, "codex/")
+	return normalized
+}
+
+func InferLane(branch string) WorktreeLane {
+	normalized := NormalizeBranchForLane(branch)
+	if strings.HasPrefix(normalized, "lab/") {
+		return WorktreeLaneLab
+	}
+	for _, prefix := range productBranchPrefixes {
+		if strings.HasPrefix(normalized, prefix) {
+			return WorktreeLaneProduct
+		}
+	}
+	return WorktreeLaneCustom
+}
+
+type BaseRefSelection struct {
+	Ref    string
+	Lane   WorktreeLane
+	Reason string
+}
+
+func ResolveCreateBaseRef(branch, requested string, refExists func(string) bool) BaseRefSelection {
+	lane := InferLane(branch)
+	if requested != "" {
+		return BaseRefSelection{
+			Ref:    requested,
+			Lane:   lane,
+			Reason: "using explicit --from value",
+		}
+	}
+
+	switch lane {
+	case WorktreeLaneLab:
+		for _, candidate := range []string{"codex/agent-lab", "agent-lab", "origin/codex/agent-lab", "origin/agent-lab"} {
+			if refExists(candidate) {
+				return BaseRefSelection{
+					Ref:    candidate,
+					Lane:   lane,
+					Reason: fmt.Sprintf("inferred lab lane from branch name; using %s as the base ref", candidate),
+				}
+			}
+		}
+		return BaseRefSelection{
+			Ref:    "HEAD",
+			Lane:   lane,
+			Reason: "inferred lab lane from branch name, but no agent-lab ref exists locally; falling back to HEAD",
+		}
+	case WorktreeLaneProduct:
+		for _, candidate := range []string{"origin/main", "main"} {
+			if refExists(candidate) {
+				return BaseRefSelection{
+					Ref:    candidate,
+					Lane:   lane,
+					Reason: fmt.Sprintf("inferred product lane from branch name; using %s as the base ref", candidate),
+				}
+			}
+		}
+		return BaseRefSelection{
+			Ref:    "HEAD",
+			Lane:   lane,
+			Reason: "inferred product lane from branch name, but no main ref exists locally; falling back to HEAD",
+		}
+	default:
+		return BaseRefSelection{
+			Ref:    "HEAD",
+			Lane:   lane,
+			Reason: "no lane inferred from branch name; defaulting to HEAD. Prefer codex/lab/... for harness work and codex/fix... or codex/feat... for product work, or pass --from explicitly",
+		}
+	}
+}
+
+func GitRefExists(ref string) bool {
+	cmd := exec.Command("git", "rev-parse", "--verify", "--quiet", ref)
+	return cmd.Run() == nil
+}
+
+func BuildManifest(repoRoot, commonGitDir, branch string, lane WorktreeLane, baseRef, checkoutPath string, ports PortSet, dependencyMode DependencyMode) Manifest {
+	id := worktreeID(branch)
+	stateDir := WorktreeStateDir(commonGitDir, id)
+	artifactDir := filepath.Join(stateDir, "artifacts")
+	envDir := filepath.Join(checkoutPath, ".vscode")
+
+	return Manifest{
+		ID:              id,
+		Branch:          branch,
+		Lane:            lane,
+		BaseRef:         baseRef,
+		CreatedFromPath: repoRoot,
+		CheckoutPath:    checkoutPath,
+		StateDir:        stateDir,
+		ArtifactDir:     artifactDir,
+		EnvFile:         filepath.Join(envDir, envFileName),
+		WebEnvFile:      filepath.Join(envDir, webEnvFileName),
+		ComposeProject:  ComposeProjectName(id),
+		Dependencies:    BuildDependencyConfig(branch, dependencyMode),
+		Ports:           ports,
+		URLs: URLSet{
+			Web: fmt.Sprintf("http://127.0.0.1:%d", ports.Web),
+			API: fmt.Sprintf("http://127.0.0.1:%d", ports.API),
+			MCP: fmt.Sprintf("http://127.0.0.1:%d", ports.MCP),
+		},
+		CreatedAt: time.Now().UTC(),
+	}
+}
+
+func (m Manifest) ResolvedLane() WorktreeLane {
+	if m.Lane == "" {
+		return InferLane(m.Branch)
+	}
+	return m.Lane
+}
+
+func BuildDependencyConfig(branch string, mode DependencyMode) DependencyConfig {
+	if mode == "" {
+		mode = DependencyModeShared
+	}
+
+	config := DependencyConfig{
+		Mode:            mode,
+		SearchInfraMode: searchInfraMode,
+	}
+
+	if mode != DependencyModeNamespaced {
+		return config
+	}
+
+	namespace := worktreeID(branch)
+	dbSuffix := strings.ReplaceAll(namespace, "-", "_")
+	database := fmt.Sprintf("agentlab_%s", dbSuffix)
+	if len(database) > 63 {
+		database = database[:63]
+	}
+	bucket := fmt.Sprintf("onyx-agentlab-%s", namespace)
+	if len(bucket) > 63 {
+		bucket = bucket[:63]
+		bucket = strings.Trim(bucket, "-")
+	}
+
+	config.Namespace = namespace
+	config.PostgresDatabase = database
+	config.RedisPrefix = fmt.Sprintf("agentlab:%s", namespace)
+	config.FileStoreBucket = bucket
+	return config
+}
+
+func (m Manifest) ResolvedDependencies() DependencyConfig {
+	if m.Dependencies.Mode == "" {
+		return BuildDependencyConfig(m.Branch, DependencyModeShared)
+	}
+	resolved := m.Dependencies
+	if resolved.SearchInfraMode == "" {
+		resolved.SearchInfraMode = searchInfraMode
+	}
+	return resolved
+}
+
+func (m Manifest) RuntimeEnv() map[string]string {
+	env := map[string]string{
+		"AGENT_LAB_ARTIFACT_DIR":      m.ArtifactDir,
+		"AGENT_LAB_DEPENDENCY_MODE":   string(m.ResolvedDependencies().Mode),
+		"AGENT_LAB_SEARCH_INFRA_MODE": m.ResolvedDependencies().SearchInfraMode,
+		"AGENT_LAB_WORKTREE_ID":       m.ID,
+		"AGENT_LAB_WORKTREE_URL":      m.URLs.Web,
+		"BASE_URL":                    m.URLs.Web,
+		"INTERNAL_URL":                m.URLs.API,
+		"MCP_INTERNAL_URL":            m.URLs.MCP,
+		"PORT":                        fmt.Sprintf("%d", m.Ports.Web),
+		"WEB_DOMAIN":                  m.URLs.Web,
+	}
+
+	deps := m.ResolvedDependencies()
+	if deps.Namespace != "" {
+		env["AGENT_LAB_NAMESPACE"] = deps.Namespace
+	}
+	if deps.Mode == DependencyModeNamespaced {
+		env["POSTGRES_DB"] = deps.PostgresDatabase
+		env["DEFAULT_REDIS_PREFIX"] = deps.RedisPrefix
+		env["S3_FILE_STORE_BUCKET_NAME"] = deps.FileStoreBucket
+	}
+
+	return env
+}
+
+func (m Manifest) ShellEnv() map[string]string {
+	return m.RuntimeEnv()
+}
+
+func (m Manifest) DependencyWarnings() []string {
+	deps := m.ResolvedDependencies()
+	if deps.SearchInfraMode == searchInfraMode {
+		return []string{
+			"Search infrastructure remains shared across worktrees. OpenSearch/Vespa state is not namespaced or torn down by agent-lab.",
+		}
+	}
+	return nil
+}
+
+func (m Manifest) EnvFileContents(kind string) string {
+	values := m.RuntimeEnv()
+	deps := m.ResolvedDependencies()
+	var lines []string
+	lines = append(lines, "# Generated by `ods worktree create` for agent-lab.")
+	lines = append(lines, "# This file only contains worktree-local overrides.")
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_WORKTREE_ID=%s", m.ID))
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_ARTIFACT_DIR=%s", m.ArtifactDir))
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_DEPENDENCY_MODE=%s", deps.Mode))
+	lines = append(lines, fmt.Sprintf("AGENT_LAB_SEARCH_INFRA_MODE=%s", deps.SearchInfraMode))
+	if deps.Namespace != "" {
+		lines = append(lines, fmt.Sprintf("AGENT_LAB_NAMESPACE=%s", deps.Namespace))
+	}
+	switch kind {
+	case "web":
+		lines = append(lines, fmt.Sprintf("PORT=%d", m.Ports.Web))
+		lines = append(lines, fmt.Sprintf("BASE_URL=%s", values["BASE_URL"]))
+		lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
+		lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
+		lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
+	default:
+		lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
+		lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
+		lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
+		if deps.Mode == DependencyModeNamespaced {
+			lines = append(lines, fmt.Sprintf("POSTGRES_DB=%s", deps.PostgresDatabase))
+			lines = append(lines, fmt.Sprintf("DEFAULT_REDIS_PREFIX=%s", deps.RedisPrefix))
+			lines = append(lines, fmt.Sprintf("S3_FILE_STORE_BUCKET_NAME=%s", deps.FileStoreBucket))
+		}
+	}
+	return strings.Join(lines, "\n") + "\n"
+}
+
+func WriteManifest(commonGitDir string, manifest Manifest) error {
+	stateDir := WorktreeStateDir(commonGitDir, manifest.ID)
+	if err := os.MkdirAll(stateDir, 0755); err != nil {
+		return fmt.Errorf("create worktree state dir: %w", err)
+	}
+	if err := os.MkdirAll(manifest.ArtifactDir, 0755); err != nil {
+		return fmt.Errorf("create artifact dir: %w", err)
+	}
+
+	data, err := json.MarshalIndent(manifest, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal manifest: %w", err)
+	}
+
+	if err := os.WriteFile(ManifestPath(commonGitDir, manifest.ID), data, 0644); err != nil {
+		return fmt.Errorf("write manifest: %w", err)
+	}
+
+	return nil
+}
+
+func WriteEnvFiles(manifest Manifest) error {
+	if err := os.MkdirAll(filepath.Dir(manifest.EnvFile), 0755); err != nil {
+		return fmt.Errorf("create env dir: %w", err)
+	}
+	if err := os.WriteFile(manifest.EnvFile, []byte(manifest.EnvFileContents("backend")), 0644); err != nil {
+		return fmt.Errorf("write backend env file: %w", err)
+	}
+	if err := os.WriteFile(manifest.WebEnvFile, []byte(manifest.EnvFileContents("web")), 0644); err != nil {
+		return fmt.Errorf("write web env file: %w", err)
+	}
+	return nil
+}
+
+func LoadAll(commonGitDir string) ([]Manifest, error) {
+	worktreesRoot := WorktreesRoot(commonGitDir)
+	entries, err := os.ReadDir(worktreesRoot)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			return nil, nil
+		}
+		return nil, fmt.Errorf("read worktrees dir: %w", err)
+	}
+
+	manifests := make([]Manifest, 0, len(entries))
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+		manifest, err := LoadManifest(filepath.Join(worktreesRoot, entry.Name(), "manifest.json"))
+		if err != nil {
+			return nil, err
+		}
+		manifests = append(manifests, manifest)
+	}
+
+	sort.Slice(manifests, func(i, j int) bool {
+		return manifests[i].Branch < manifests[j].Branch
+	})
+
+	return manifests, nil
+}
+
+func LoadManifest(path string) (Manifest, error) {
+	var manifest Manifest
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return manifest, fmt.Errorf("read manifest %s: %w", path, err)
+	}
+	if err := json.Unmarshal(data, &manifest); err != nil {
+		return manifest, fmt.Errorf("parse manifest %s: %w", path, err)
+	}
+	return manifest, nil
+}
+
+func FindByRepoRoot(commonGitDir, repoRoot string) (Manifest, bool, error) {
+	manifests, err := LoadAll(commonGitDir)
+	if err != nil {
+		return Manifest{}, false, err
+	}
+
+	repoRoot = normalizePath(repoRoot)
+	for _, manifest := range manifests {
+		if normalizePath(manifest.CheckoutPath) == repoRoot {
+			return manifest, true, nil
+		}
+	}
+
+	return Manifest{}, false, nil
+}
+
+func FindByIdentifier(commonGitDir, identifier string) (Manifest, bool, error) {
+	manifests, err := LoadAll(commonGitDir)
+	if err != nil {
+		return Manifest{}, false, err
+	}
+
+	slug := Slug(identifier)
+	cleanIdentifier := normalizePath(identifier)
+	var slugMatches []Manifest
+
+	for _, manifest := range manifests {
+		switch {
+		case manifest.ID == slug:
+			return manifest, true, nil
+		case manifest.Branch == identifier:
+			return manifest, true, nil
+		case normalizePath(manifest.CheckoutPath) == cleanIdentifier:
+			return manifest, true, nil
+		case slug != "" && Slug(manifest.Branch) == slug:
+			slugMatches = append(slugMatches, manifest)
+		}
+	}
+
+	if len(slugMatches) == 1 {
+		return slugMatches[0], true, nil
+	}
+	if len(slugMatches) > 1 {
+		return Manifest{}, false, fmt.Errorf("identifier %q matches multiple worktrees; use the branch, full id, or checkout path", identifier)
+	}
+
+	return Manifest{}, false, nil
+}
+
+func RemoveState(commonGitDir, id string) error {
+	if err := os.RemoveAll(WorktreeStateDir(commonGitDir, id)); err != nil {
+		return fmt.Errorf("remove worktree state: %w", err)
+	}
+	return nil
+}
+
+func UpdateVerification(commonGitDir string, manifest Manifest, summaryPath string, verifiedAt time.Time) error {
+	manifest.LastVerifySummary = summaryPath
+	manifest.LastVerifiedAt = verifiedAt.UTC().Format(time.RFC3339)
+	return WriteManifest(commonGitDir, manifest)
+}
+
+func AllocatePorts(existing []Manifest) (PortSet, error) {
+	reserved := make(map[int]bool)
+	for _, manifest := range existing {
+		reserved[manifest.Ports.Web] = true
+		reserved[manifest.Ports.API] = true
+		reserved[manifest.Ports.ModelServer] = true
+		reserved[manifest.Ports.MCP] = true
+	}
+
+	for offset := 0; offset < portSearchWindow; offset++ {
+		ports := PortSet{
+			Web:         defaultWebPort + offset,
+			API:         defaultAPIPort + offset,
+			ModelServer: defaultModelPort + offset,
+			MCP:         defaultMCPPort + offset,
+		}
+
+		if reserved[ports.Web] || reserved[ports.API] || reserved[ports.ModelServer] || reserved[ports.MCP] {
+			continue
+		}
+
+		if portsAvailable(ports) {
+			return ports, nil
+		}
+	}
+
+	return PortSet{}, fmt.Errorf("failed to allocate an available worktree port set after %d attempts", portSearchWindow)
+}
+
+func portsAvailable(ports PortSet) bool {
+	candidates := []int{ports.Web, ports.API, ports.ModelServer, ports.MCP}
+	for _, port := range candidates {
+		ln, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
+		if err != nil {
+			return false
+		}
+		_ = ln.Close()
+	}
+	return true
+}
+
+func normalizePath(path string) string {
+	clean := filepath.Clean(path)
+	resolved, err := filepath.EvalSymlinks(clean)
+	if err == nil {
+		return filepath.Clean(resolved)
+	}
+	return clean
+}
--- a/tools/ods/internal/agentlab/agentlab_test.go
+++ b/tools/ods/internal/agentlab/agentlab_test.go
@@ -0,0 +1,312 @@
+package agentlab
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestSlug(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]string{
+		"feat/My Feature": "feat-my-feature",
+		"lab/agent_docs":  "lab-agent-docs",
+		"  ":              "worktree",
+	}
+
+	for input, want := range tests {
+		input := input
+		want := want
+		t.Run(input, func(t *testing.T) {
+			t.Parallel()
+			if got := Slug(input); got != want {
+				t.Fatalf("Slug(%q) = %q, want %q", input, got, want)
+			}
+		})
+	}
+}
+
+func TestWorktreeIDIsCollisionResistant(t *testing.T) {
+	t.Parallel()
+
+	idOne := worktreeID("feat/foo_bar")
+	idTwo := worktreeID("feat/foo-bar")
+	if idOne == idTwo {
+		t.Fatalf("expected distinct worktree ids, got %q", idOne)
+	}
+	if !strings.HasPrefix(idOne, "feat-foo-bar-") {
+		t.Fatalf("unexpected worktree id format: %s", idOne)
+	}
+}
+
+func TestInferLane(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]WorktreeLane{
+		"lab/docs":                 WorktreeLaneLab,
+		"codex/lab/docs":           WorktreeLaneLab,
+		"fix/auth-banner-modal":    WorktreeLaneProduct,
+		"codex/feat/agent-check":   WorktreeLaneProduct,
+		"chore/update-readme":      WorktreeLaneProduct,
+		"codex/auth-banner-modal":  WorktreeLaneCustom,
+		"agent-lab":                WorktreeLaneCustom,
+	}
+
+	for branch, want := range tests {
+		branch := branch
+		want := want
+		t.Run(branch, func(t *testing.T) {
+			t.Parallel()
+			if got := InferLane(branch); got != want {
+				t.Fatalf("InferLane(%q) = %q, want %q", branch, got, want)
+			}
+		})
+	}
+}
+
+func TestResolveCreateBaseRef(t *testing.T) {
+	t.Parallel()
+
+	refExists := func(ref string) bool {
+		switch ref {
+		case "codex/agent-lab", "origin/main":
+			return true
+		default:
+			return false
+		}
+	}
+
+	product := ResolveCreateBaseRef("codex/fix/auth-banner-modal", "", refExists)
+	if product.Ref != "origin/main" || product.Lane != WorktreeLaneProduct {
+		t.Fatalf("unexpected product base selection: %+v", product)
+	}
+
+	lab := ResolveCreateBaseRef("codex/lab/bootstrap-docs", "", refExists)
+	if lab.Ref != "codex/agent-lab" || lab.Lane != WorktreeLaneLab {
+		t.Fatalf("unexpected lab base selection: %+v", lab)
+	}
+
+	explicit := ResolveCreateBaseRef("codex/auth-banner-modal", "origin/release", refExists)
+	if explicit.Ref != "origin/release" || explicit.Lane != WorktreeLaneCustom {
+		t.Fatalf("unexpected explicit base selection: %+v", explicit)
+	}
+
+	custom := ResolveCreateBaseRef("codex/auth-banner-modal", "", refExists)
+	if custom.Ref != "HEAD" || custom.Lane != WorktreeLaneCustom {
+		t.Fatalf("unexpected custom base selection: %+v", custom)
+	}
+}
+
+func TestBuildManifest(t *testing.T) {
+	t.Parallel()
+
+	ports := PortSet{Web: 3301, API: 8381, ModelServer: 9301, MCP: 8391}
+	manifest := BuildManifest(
+		"/repo/main",
+		"/repo/.git",
+		"feat/agent-harness",
+		WorktreeLaneProduct,
+		"origin/main",
+		"/worktrees/feat-agent-harness",
+		ports,
+		DependencyModeNamespaced,
+	)
+
+	if manifest.ID != worktreeID("feat/agent-harness") {
+		t.Fatalf("unexpected manifest id: %s", manifest.ID)
+	}
+	if manifest.URLs.Web != "http://127.0.0.1:3301" {
+		t.Fatalf("unexpected web url: %s", manifest.URLs.Web)
+	}
+	if manifest.ComposeProject != "onyx-"+worktreeID("feat/agent-harness") {
+		t.Fatalf("unexpected compose project: %s", manifest.ComposeProject)
+	}
+	if got := manifest.ShellEnv()["INTERNAL_URL"]; got != "http://127.0.0.1:8381" {
+		t.Fatalf("unexpected INTERNAL_URL: %s", got)
+	}
+	if got := manifest.ResolvedDependencies().PostgresDatabase; got != "agentlab_"+strings.ReplaceAll(worktreeID("feat/agent-harness"), "-", "_") {
+		t.Fatalf("unexpected postgres database: %s", got)
+	}
+	if got := manifest.RuntimeEnv()["DEFAULT_REDIS_PREFIX"]; got != "agentlab:"+worktreeID("feat/agent-harness") {
+		t.Fatalf("unexpected redis prefix: %s", got)
+	}
+}
+
+func TestWriteManifestAndLoadAll(t *testing.T) {
+	t.Parallel()
+
+	commonGitDir := t.TempDir()
+	manifest := BuildManifest(
+		"/repo/main",
+		commonGitDir,
+		"lab/docs",
+		WorktreeLaneLab,
+		"HEAD",
+		"/repo-worktrees/lab-docs",
+		PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
+		DependencyModeShared,
+	)
+
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		t.Fatalf("WriteManifest() error = %v", err)
+	}
+
+	manifests, err := LoadAll(commonGitDir)
+	if err != nil {
+		t.Fatalf("LoadAll() error = %v", err)
+	}
+	if len(manifests) != 1 {
+		t.Fatalf("LoadAll() length = %d, want 1", len(manifests))
+	}
+	if manifests[0].Branch != manifest.Branch {
+		t.Fatalf("unexpected branch: %s", manifests[0].Branch)
+	}
+}
+
+func TestWriteEnvFiles(t *testing.T) {
+	t.Parallel()
+
+	root := t.TempDir()
+	manifest := BuildManifest(
+		"/repo/main",
+		filepath.Join(root, ".git"),
+		"feat/env",
+		WorktreeLaneProduct,
+		"HEAD",
+		root,
+		PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
+		DependencyModeNamespaced,
+	)
+
+	if err := WriteEnvFiles(manifest); err != nil {
+		t.Fatalf("WriteEnvFiles() error = %v", err)
+	}
+
+	for _, path := range []string{manifest.EnvFile, manifest.WebEnvFile} {
+		if _, err := os.Stat(path); err != nil {
+			t.Fatalf("expected env file %s to exist: %v", path, err)
+		}
+	}
+
+	backendEnv, err := os.ReadFile(manifest.EnvFile)
+	if err != nil {
+		t.Fatalf("read backend env file: %v", err)
+	}
+	if !containsAll(
+		string(backendEnv),
+		"POSTGRES_DB=agentlab_"+strings.ReplaceAll(worktreeID("feat/env"), "-", "_"),
+		"DEFAULT_REDIS_PREFIX=agentlab:"+worktreeID("feat/env"),
+		"S3_FILE_STORE_BUCKET_NAME=onyx-agentlab-"+worktreeID("feat/env"),
+	) {
+		t.Fatalf("backend env file missing dependency namespace entries: %s", string(backendEnv))
+	}
+}
+
+func TestFindByIdentifierRejectsAmbiguousSlug(t *testing.T) {
+	t.Parallel()
+
+	commonGitDir := t.TempDir()
+	manifests := []Manifest{
+		BuildManifest(
+			"/repo/main",
+			commonGitDir,
+			"feat/foo_bar",
+			WorktreeLaneProduct,
+			"HEAD",
+			"/repo-worktrees/"+worktreeID("feat/foo_bar"),
+			PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
+			DependencyModeNamespaced,
+		),
+		BuildManifest(
+			"/repo/main",
+			commonGitDir,
+			"feat/foo-bar",
+			WorktreeLaneProduct,
+			"HEAD",
+			"/repo-worktrees/"+worktreeID("feat/foo-bar"),
+			PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
+			DependencyModeNamespaced,
+		),
+	}
+
+	for _, manifest := range manifests {
+		if err := WriteManifest(commonGitDir, manifest); err != nil {
+			t.Fatalf("WriteManifest() error = %v", err)
+		}
+	}
+
+	if _, found, err := FindByIdentifier(commonGitDir, "feat-foo-bar"); err == nil || found {
+		t.Fatalf("expected ambiguous slug lookup to fail, found=%t err=%v", found, err)
+	}
+}
+
+func TestBootstrapLinksAndClonesFromSource(t *testing.T) {
+	t.Parallel()
+
+	sourceRoot := t.TempDir()
+	checkoutRoot := t.TempDir()
+	commonGitDir := filepath.Join(sourceRoot, ".git")
+
+	writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env"), "OPENAI_API_KEY=test\n")
+	writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env.web"), "AUTH_TYPE=basic\n")
+	writeTestFile(t, filepath.Join(sourceRoot, ".venv", "bin", "python"), "#!/bin/sh\n")
+	writeTestFile(t, filepath.Join(sourceRoot, "web", "node_modules", ".bin", "next"), "#!/bin/sh\n")
+
+	manifest := BuildManifest(
+		sourceRoot,
+		commonGitDir,
+		"feat/bootstrap",
+		WorktreeLaneProduct,
+		"HEAD",
+		checkoutRoot,
+		PortSet{Web: 3305, API: 8385, ModelServer: 9305, MCP: 8395},
+		DependencyModeNamespaced,
+	)
+
+	result, err := Bootstrap(manifest, BootstrapOptions{
+		EnvMode:    BootstrapModeLink,
+		PythonMode: BootstrapModeLink,
+		WebMode:    BootstrapModeClone,
+	})
+	if err != nil {
+		t.Fatalf("Bootstrap() error = %v", err)
+	}
+
+	if len(result.Actions) == 0 {
+		t.Fatal("expected bootstrap actions to be recorded")
+	}
+
+	if target, err := os.Readlink(filepath.Join(checkoutRoot, ".vscode", ".env")); err != nil || target == "" {
+		t.Fatalf("expected .vscode/.env symlink, err=%v target=%q", err, target)
+	}
+	if target, err := os.Readlink(filepath.Join(checkoutRoot, ".venv")); err != nil || target == "" {
+		t.Fatalf("expected .venv symlink, err=%v target=%q", err, target)
+	}
+	if _, err := os.Stat(filepath.Join(checkoutRoot, "web", "node_modules", ".bin", "next")); err != nil {
+		t.Fatalf("expected cloned node_modules marker: %v", err)
+	}
+	if _, err := os.Lstat(filepath.Join(checkoutRoot, "web", "node_modules")); err != nil {
+		t.Fatalf("expected node_modules to exist: %v", err)
+	}
+}
+
+func writeTestFile(t *testing.T, path string, content string) {
+	t.Helper()
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
+	}
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatalf("write %s: %v", path, err)
+	}
+}
+
+func containsAll(value string, parts ...string) bool {
+	for _, part := range parts {
+		if !strings.Contains(value, part) {
+			return false
+		}
+	}
+	return true
+}
--- a/tools/ods/internal/agentlab/bootstrap.go
+++ b/tools/ods/internal/agentlab/bootstrap.go
@@ -0,0 +1,233 @@
+package agentlab
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+)
+
+type BootstrapMode string
+
+const (
+	BootstrapModeAuto  BootstrapMode = "auto"
+	BootstrapModeSkip  BootstrapMode = "skip"
+	BootstrapModeLink  BootstrapMode = "link"
+	BootstrapModeCopy  BootstrapMode = "copy"
+	BootstrapModeClone BootstrapMode = "clone"
+	BootstrapModeNPM   BootstrapMode = "npm"
+)
+
+type BootstrapOptions struct {
+	EnvMode    BootstrapMode
+	PythonMode BootstrapMode
+	WebMode    BootstrapMode
+}
+
+type BootstrapResult struct {
+	Actions []string
+}
+
+func Bootstrap(manifest Manifest, opts BootstrapOptions) (*BootstrapResult, error) {
+	result := &BootstrapResult{}
+
+	if err := bootstrapEnvFiles(manifest, opts.EnvMode, result); err != nil {
+		return nil, err
+	}
+	if err := bootstrapPython(manifest, opts.PythonMode, result); err != nil {
+		return nil, err
+	}
+	if err := bootstrapWeb(manifest, opts.WebMode, result); err != nil {
+		return nil, err
+	}
+
+	return result, nil
+}
+
+func bootstrapEnvFiles(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
+	if mode == BootstrapModeSkip {
+		return nil
+	}
+
+	vscodeDir := filepath.Join(manifest.CheckoutPath, ".vscode")
+	if err := os.MkdirAll(vscodeDir, 0755); err != nil {
+		return fmt.Errorf("create .vscode dir: %w", err)
+	}
+
+	sources := []struct {
+		source string
+		target string
+		label  string
+	}{
+		{
+			source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env"),
+			target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env"),
+			label:  ".vscode/.env",
+		},
+		{
+			source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env.web"),
+			target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env.web"),
+			label:  ".vscode/.env.web",
+		},
+	}
+
+	for _, item := range sources {
+		if _, err := os.Stat(item.source); err != nil {
+			continue
+		}
+		if _, err := os.Lstat(item.target); err == nil {
+			result.Actions = append(result.Actions, fmt.Sprintf("kept existing %s", item.label))
+			continue
+		}
+
+		currentMode := mode
+		if currentMode == BootstrapModeAuto {
+			currentMode = BootstrapModeLink
+		}
+		switch currentMode {
+		case BootstrapModeLink:
+			if err := os.Symlink(item.source, item.target); err != nil {
+				return fmt.Errorf("symlink %s: %w", item.label, err)
+			}
+			result.Actions = append(result.Actions, fmt.Sprintf("linked %s from source checkout", item.label))
+		case BootstrapModeCopy, BootstrapModeClone:
+			if err := copyFile(item.source, item.target); err != nil {
+				return fmt.Errorf("copy %s: %w", item.label, err)
+			}
+			result.Actions = append(result.Actions, fmt.Sprintf("copied %s from source checkout", item.label))
+		default:
+			return fmt.Errorf("unsupported env bootstrap mode: %s", currentMode)
+		}
+	}
+
+	return nil
+}
+
+func bootstrapPython(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
+	if mode == BootstrapModeSkip {
+		return nil
+	}
+
+	sourceVenv := filepath.Join(manifest.CreatedFromPath, ".venv")
+	targetVenv := filepath.Join(manifest.CheckoutPath, ".venv")
+	if _, err := os.Stat(targetVenv); err == nil {
+		result.Actions = append(result.Actions, "kept existing .venv")
+		return nil
+	}
+	if _, err := os.Stat(sourceVenv); err != nil {
+		result.Actions = append(result.Actions, "source .venv missing; backend bootstrap deferred")
+		return nil
+	}
+
+	currentMode := mode
+	if currentMode == BootstrapModeAuto {
+		currentMode = BootstrapModeLink
+	}
+
+	switch currentMode {
+	case BootstrapModeLink:
+		if err := os.Symlink(sourceVenv, targetVenv); err != nil {
+			return fmt.Errorf("symlink .venv: %w", err)
+		}
+		result.Actions = append(result.Actions, "linked shared .venv from source checkout")
+	case BootstrapModeCopy, BootstrapModeClone:
+		if err := cloneDirectory(sourceVenv, targetVenv); err != nil {
+			return fmt.Errorf("clone .venv: %w", err)
+		}
+		result.Actions = append(result.Actions, "cloned .venv from source checkout")
+	default:
+		return fmt.Errorf("unsupported python bootstrap mode: %s", currentMode)
+	}
+
+	return nil
+}
+
+func bootstrapWeb(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
+	if mode == BootstrapModeSkip {
+		return nil
+	}
+
+	sourceModules := filepath.Join(manifest.CreatedFromPath, "web", "node_modules")
+	targetModules := filepath.Join(manifest.CheckoutPath, "web", "node_modules")
+	if _, err := os.Lstat(targetModules); err == nil {
+		result.Actions = append(result.Actions, "kept existing web/node_modules")
+		return nil
+	}
+
+	currentMode := mode
+	if currentMode == BootstrapModeAuto {
+		if _, err := os.Stat(sourceModules); err == nil {
+			currentMode = BootstrapModeClone
+		} else {
+			currentMode = BootstrapModeNPM
+		}
+	}
+
+	switch currentMode {
+	case BootstrapModeClone, BootstrapModeCopy:
+		if _, err := os.Stat(sourceModules); err != nil {
+			webDir := filepath.Join(manifest.CheckoutPath, "web")
+			cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
+			cmd.Dir = webDir
+			cmd.Stdout = os.Stdout
+			cmd.Stderr = os.Stderr
+			cmd.Stdin = os.Stdin
+			if err := cmd.Run(); err != nil {
+				return fmt.Errorf("npm ci: %w", err)
+			}
+			result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
+			return nil
+		}
+		if err := cloneDirectory(sourceModules, targetModules); err != nil {
+			return fmt.Errorf("clone web/node_modules: %w", err)
+		}
+		result.Actions = append(result.Actions, "cloned local web/node_modules into worktree")
+		return nil
+	case BootstrapModeNPM:
+		webDir := filepath.Join(manifest.CheckoutPath, "web")
+		cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
+		cmd.Dir = webDir
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		cmd.Stdin = os.Stdin
+		if err := cmd.Run(); err != nil {
+			return fmt.Errorf("npm ci: %w", err)
+		}
+		result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
+	default:
+		return fmt.Errorf("unsupported web bootstrap mode: %s", currentMode)
+	}
+
+	return nil
+}
+
+func cloneDirectory(source, target string) error {
+	if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
+		return fmt.Errorf("create parent dir for %s: %w", target, err)
+	}
+
+	if runtime.GOOS == "darwin" {
+		cmd := exec.Command("cp", "-R", "-c", source, target)
+		if err := cmd.Run(); err == nil {
+			return nil
+		}
+	}
+
+	if runtime.GOOS != "windows" {
+		cmd := exec.Command("cp", "-R", source, target)
+		if err := cmd.Run(); err == nil {
+			return nil
+		}
+	}
+
+	return fmt.Errorf("no supported directory clone strategy succeeded for %s", source)
+}
+
+func copyFile(source, target string) error {
+	data, err := os.ReadFile(source)
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(target, data, 0644)
+}
--- a/tools/ods/internal/agentlab/deps.go
+++ b/tools/ods/internal/agentlab/deps.go
@@ -0,0 +1,252 @@
+package agentlab
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/alembic"
+	"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
+)
+
+type DependencyResult struct {
+	Actions []string
+}
+
+type DependencyStatus struct {
+	Mode                 DependencyMode `json:"mode"`
+	Namespace            string         `json:"namespace,omitempty"`
+	PostgresDatabase     string         `json:"postgres_database,omitempty"`
+	PostgresReady        bool           `json:"postgres_ready"`
+	PostgresTableCount   int            `json:"postgres_table_count,omitempty"`
+	RedisPrefix          string         `json:"redis_prefix,omitempty"`
+	RedisReady           bool           `json:"redis_ready"`
+	RedisKeyCount        int            `json:"redis_key_count,omitempty"`
+	FileStoreBucket      string         `json:"file_store_bucket,omitempty"`
+	FileStoreReady       bool           `json:"file_store_ready"`
+	FileStoreObjectCount int            `json:"file_store_object_count,omitempty"`
+	SearchInfraMode      string         `json:"search_infra_mode"`
+}
+
+func ProvisionDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
+	deps := manifest.ResolvedDependencies()
+	result := &DependencyResult{}
+
+	switch deps.Mode {
+	case DependencyModeShared:
+		result.Actions = append(result.Actions, "using shared Postgres, Redis, and MinIO state")
+	case DependencyModeNamespaced:
+		if _, err := runPythonScript(manifest, "ensure_database.py"); err != nil {
+			return manifest, nil, fmt.Errorf("ensure PostgreSQL database %s: %w", deps.PostgresDatabase, err)
+		}
+		result.Actions = append(result.Actions, fmt.Sprintf("ensured PostgreSQL database %s", deps.PostgresDatabase))
+
+		envMap, err := runtimeEnvMap(manifest)
+		if err != nil {
+			return manifest, nil, err
+		}
+		if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
+			return manifest, nil, fmt.Errorf("migrate namespaced database %s: %w", deps.PostgresDatabase, err)
+		}
+		result.Actions = append(result.Actions, fmt.Sprintf("migrated PostgreSQL database %s", deps.PostgresDatabase))
+
+		if _, err := runPythonScript(manifest, "ensure_bucket.py"); err != nil {
+			return manifest, nil, fmt.Errorf("ensure file-store bucket %s: %w", deps.FileStoreBucket, err)
+		}
+		result.Actions = append(result.Actions, fmt.Sprintf("ensured file-store bucket %s", deps.FileStoreBucket))
+		result.Actions = append(result.Actions, fmt.Sprintf("reserved Redis prefix %s", deps.RedisPrefix))
+	default:
+		return manifest, nil, fmt.Errorf("unsupported dependency mode: %s", deps.Mode)
+	}
+
+	result.Actions = append(result.Actions, "search infrastructure remains shared-only")
+	manifest.Dependencies = deps
+	manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		return manifest, nil, err
+	}
+	return manifest, result, nil
+}
+
+func InspectDependencies(manifest Manifest) (*DependencyStatus, error) {
+	deps := manifest.ResolvedDependencies()
+	status := &DependencyStatus{
+		Mode:             deps.Mode,
+		Namespace:        deps.Namespace,
+		PostgresDatabase: deps.PostgresDatabase,
+		RedisPrefix:      deps.RedisPrefix,
+		FileStoreBucket:  deps.FileStoreBucket,
+		SearchInfraMode:  deps.SearchInfraMode,
+	}
+
+	if deps.Mode == DependencyModeShared {
+		status.PostgresReady = true
+		status.RedisReady = true
+		status.FileStoreReady = true
+		return status, nil
+	}
+
+	output, err := runPythonScript(manifest, "dependency_status.py")
+	if err != nil {
+		return nil, fmt.Errorf("inspect namespaced dependencies: %w", err)
+	}
+	if err := json.Unmarshal([]byte(output), status); err != nil {
+		return nil, fmt.Errorf("parse dependency status: %w", err)
+	}
+	return status, nil
+}
+
+func ResetDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
+	deps := manifest.ResolvedDependencies()
+	result := &DependencyResult{}
+	if deps.Mode == DependencyModeShared {
+		result.Actions = append(result.Actions, "shared dependency mode selected; reset is a no-op")
+		return manifest, result, nil
+	}
+
+	if _, err := runPythonScript(manifest, "reset_dependencies.py"); err != nil {
+		return manifest, nil, fmt.Errorf("reset namespaced dependencies: %w", err)
+	}
+	result.Actions = append(result.Actions, fmt.Sprintf("dropped and recreated PostgreSQL database %s", deps.PostgresDatabase))
+	result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
+	result.Actions = append(result.Actions, fmt.Sprintf("emptied file-store bucket %s", deps.FileStoreBucket))
+
+	envMap, err := runtimeEnvMap(manifest)
+	if err != nil {
+		return manifest, nil, err
+	}
+	if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
+		return manifest, nil, fmt.Errorf("re-migrate namespaced database %s: %w", deps.PostgresDatabase, err)
+	}
+	result.Actions = append(result.Actions, fmt.Sprintf("re-migrated PostgreSQL database %s", deps.PostgresDatabase))
+	result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not reset")
+
+	manifest.Dependencies = deps
+	manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		return manifest, nil, err
+	}
+	return manifest, result, nil
+}
+
+func TeardownDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
+	deps := manifest.ResolvedDependencies()
+	result := &DependencyResult{}
+	if deps.Mode == DependencyModeShared {
+		result.Actions = append(result.Actions, "shared dependency mode selected; teardown is a no-op")
+		return manifest, result, nil
+	}
+
+	if _, err := runPythonScript(manifest, "teardown_dependencies.py"); err != nil {
+		return manifest, nil, fmt.Errorf("tear down namespaced dependencies: %w", err)
+	}
+	result.Actions = append(result.Actions, fmt.Sprintf("dropped PostgreSQL database %s", deps.PostgresDatabase))
+	result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
+	result.Actions = append(result.Actions, fmt.Sprintf("deleted file-store bucket %s", deps.FileStoreBucket))
+	result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not torn down")
+
+	manifest.Dependencies = deps
+	manifest.Dependencies.LastProvisionedAt = ""
+	if err := WriteManifest(commonGitDir, manifest); err != nil {
+		return manifest, nil, err
+	}
+	return manifest, result, nil
+}
+
+func runtimeEnvMap(manifest Manifest) (map[string]string, error) {
+	envMap := make(map[string]string)
+	repoRoot := runtimeRepoRoot(manifest)
+
+	backendEnvPath := filepath.Join(repoRoot, ".vscode", ".env")
+	if _, err := os.Stat(backendEnvPath); err == nil {
+		fileVars, err := envutil.LoadFile(backendEnvPath)
+		if err != nil {
+			return nil, err
+		}
+		for _, entry := range fileVars {
+			if idx := strings.Index(entry, "="); idx > 0 {
+				envMap[entry[:idx]] = entry[idx+1:]
+			}
+		}
+	}
+
+	for key, value := range manifest.RuntimeEnv() {
+		envMap[key] = value
+	}
+	return envMap, nil
+}
+
+func runPythonScript(manifest Manifest, scriptName string) (string, error) {
+	pythonBinary, err := findPythonBinary(manifest)
+	if err != nil {
+		return "", err
+	}
+	code, err := loadPythonScript(scriptName)
+	if err != nil {
+		return "", err
+	}
+
+	envMap, err := runtimeEnvMap(manifest)
+	if err != nil {
+		return "", err
+	}
+
+	cmd := exec.Command(pythonBinary, "-c", code)
+	cmd.Dir = filepath.Join(runtimeRepoRoot(manifest), "backend")
+	cmd.Env = envutil.ApplyOverrides(os.Environ(), envMap)
+
+	var stdout bytes.Buffer
+	var stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		message := strings.TrimSpace(stderr.String())
+		if message == "" {
+			message = strings.TrimSpace(stdout.String())
+		}
+		if message == "" {
+			message = err.Error()
+		}
+		return "", fmt.Errorf("%s", message)
+	}
+
+	return strings.TrimSpace(stdout.String()), nil
+}
+
+func findPythonBinary(manifest Manifest) (string, error) {
+	var candidates []string
+	if runtime.GOOS == "windows" {
+		candidates = []string{
+			filepath.Join(manifest.CheckoutPath, ".venv", "Scripts", "python.exe"),
+			filepath.Join(manifest.CreatedFromPath, ".venv", "Scripts", "python.exe"),
+		}
+	} else {
+		candidates = []string{
+			filepath.Join(manifest.CheckoutPath, ".venv", "bin", "python"),
+			filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python"),
+		}
+	}
+
+	for _, candidate := range candidates {
+		if _, err := os.Stat(candidate); err == nil {
+			return candidate, nil
+		}
+	}
+	return "", fmt.Errorf("could not find a Python interpreter in %s/.venv or %s/.venv", manifest.CheckoutPath, manifest.CreatedFromPath)
+}
+
+func runtimeRepoRoot(manifest Manifest) string {
+	if manifest.CheckoutPath != "" {
+		if _, err := os.Stat(filepath.Join(manifest.CheckoutPath, "backend")); err == nil {
+			return manifest.CheckoutPath
+		}
+	}
+	return manifest.CreatedFromPath
+}
--- a/tools/ods/internal/agentlab/scripts.go
+++ b/tools/ods/internal/agentlab/scripts.go
@@ -0,0 +1,17 @@
+package agentlab
+
+import (
+	"embed"
+	"fmt"
+)
+
+//go:embed scripts/*.py
+var pythonScripts embed.FS
+
+func loadPythonScript(name string) (string, error) {
+	data, err := pythonScripts.ReadFile("scripts/" + name)
+	if err != nil {
+		return "", fmt.Errorf("load python script %s: %w", name, err)
+	}
+	return string(data), nil
+}
--- a/tools/ods/internal/agentlab/scripts/dependency_status.py
+++ b/tools/ods/internal/agentlab/scripts/dependency_status.py
@@ -0,0 +1,90 @@
+import json
+import os
+
+import boto3
+import psycopg2
+import urllib3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from redis import Redis
+
+
+db_name = os.environ["POSTGRES_DB"]
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=db_name
+)
+with conn.cursor() as cur:
+    cur.execute(
+        "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"
+    )
+    table_count = int(cur.fetchone()[0])
+conn.close()
+
+redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+
+redis_client = Redis(
+    host=os.environ.get("REDIS_HOST", "localhost"),
+    port=int(os.environ.get("REDIS_PORT", "6379")),
+    db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
+    password=os.environ.get("REDIS_PASSWORD") or None,
+    ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
+    ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
+)
+redis_key_count = 0
+for _ in redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000):
+    redis_key_count += 1
+
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+s3_client = boto3.client(**kwargs)
+bucket_ready = True
+bucket_object_count = 0
+try:
+    s3_client.head_bucket(Bucket=bucket)
+    paginator = s3_client.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=bucket):
+        bucket_object_count += len(page.get("Contents", []))
+except ClientError:
+    bucket_ready = False
+
+print(
+    json.dumps(
+        {
+            "mode": os.environ["AGENT_LAB_DEPENDENCY_MODE"],
+            "namespace": os.environ.get("AGENT_LAB_NAMESPACE", ""),
+            "postgres_database": db_name,
+            "postgres_ready": True,
+            "postgres_table_count": table_count,
+            "redis_prefix": redis_prefix,
+            "redis_ready": True,
+            "redis_key_count": redis_key_count,
+            "file_store_bucket": bucket,
+            "file_store_ready": bucket_ready,
+            "file_store_object_count": bucket_object_count,
+            "search_infra_mode": os.environ.get(
+                "AGENT_LAB_SEARCH_INFRA_MODE", "shared"
+            ),
+        }
+    )
+)
--- a/tools/ods/internal/agentlab/scripts/ensure_bucket.py
+++ b/tools/ods/internal/agentlab/scripts/ensure_bucket.py
@@ -0,0 +1,40 @@
+import os
+
+import boto3
+import urllib3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+
+
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+client = boto3.client(**kwargs)
+try:
+    client.head_bucket(Bucket=bucket)
+except ClientError as exc:
+    status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
+    if status not in (403, 404):
+        raise
+    if endpoint or region == "us-east-1":
+        client.create_bucket(Bucket=bucket)
+    else:
+        client.create_bucket(
+            Bucket=bucket, CreateBucketConfiguration={"LocationConstraint": region}
+        )
+print(bucket)
--- a/tools/ods/internal/agentlab/scripts/ensure_database.py
+++ b/tools/ods/internal/agentlab/scripts/ensure_database.py
@@ -0,0 +1,23 @@
+import os
+
+import psycopg2
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+
+
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+target_db = os.environ["POSTGRES_DB"]
+admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=admin_db
+)
+conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+with conn.cursor() as cur:
+    cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (target_db,))
+    if cur.fetchone() is None:
+        cur.execute(f'CREATE DATABASE "{target_db}"')
+conn.close()
+print(target_db)
--- a/tools/ods/internal/agentlab/scripts/reset_dependencies.py
+++ b/tools/ods/internal/agentlab/scripts/reset_dependencies.py
@@ -0,0 +1,67 @@
+import os
+
+import boto3
+import psycopg2
+import urllib3
+from botocore.config import Config
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+from redis import Redis
+
+
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+target_db = os.environ["POSTGRES_DB"]
+admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=admin_db
+)
+conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+with conn.cursor() as cur:
+    cur.execute(
+        "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
+        (target_db,),
+    )
+    cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
+    cur.execute(f'CREATE DATABASE "{target_db}"')
+conn.close()
+
+redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
+redis_client = Redis(
+    host=os.environ.get("REDIS_HOST", "localhost"),
+    port=int(os.environ.get("REDIS_PORT", "6379")),
+    db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
+    password=os.environ.get("REDIS_PASSWORD") or None,
+    ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
+    ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
+)
+keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
+if keys:
+    redis_client.delete(*keys)
+
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+s3_client = boto3.client(**kwargs)
+paginator = s3_client.get_paginator("list_objects_v2")
+for page in paginator.paginate(Bucket=bucket):
+    objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
+    if objects:
+        s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})
--- a/tools/ods/internal/agentlab/scripts/teardown_dependencies.py
+++ b/tools/ods/internal/agentlab/scripts/teardown_dependencies.py
@@ -0,0 +1,73 @@
+import os
+
+import boto3
+import psycopg2
+import urllib3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+from redis import Redis
+
+
+host = os.environ.get("POSTGRES_HOST", "localhost")
+port = os.environ.get("POSTGRES_PORT", "5432")
+user = os.environ.get("POSTGRES_USER", "postgres")
+password = os.environ.get("POSTGRES_PASSWORD", "password")
+target_db = os.environ["POSTGRES_DB"]
+admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
+
+conn = psycopg2.connect(
+    host=host, port=port, user=user, password=password, dbname=admin_db
+)
+conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+with conn.cursor() as cur:
+    cur.execute(
+        "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
+        (target_db,),
+    )
+    cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
+conn.close()
+
+redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
+redis_client = Redis(
+    host=os.environ.get("REDIS_HOST", "localhost"),
+    port=int(os.environ.get("REDIS_PORT", "6379")),
+    db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
+    password=os.environ.get("REDIS_PASSWORD") or None,
+    ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
+    ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
+)
+keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
+if keys:
+    redis_client.delete(*keys)
+
+bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
+endpoint = os.environ.get("S3_ENDPOINT_URL") or None
+access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
+secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
+region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
+verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
+
+kwargs = {"service_name": "s3", "region_name": region}
+if endpoint:
+    kwargs["endpoint_url"] = endpoint
+    kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
+    if not verify_ssl:
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        kwargs["verify"] = False
+if access_key and secret_key:
+    kwargs["aws_access_key_id"] = access_key
+    kwargs["aws_secret_access_key"] = secret_key
+
+s3_client = boto3.client(**kwargs)
+try:
+    paginator = s3_client.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=bucket):
+        objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
+        if objects:
+            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})
+    s3_client.delete_bucket(Bucket=bucket)
+except ClientError as exc:
+    status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
+    if status not in (403, 404):
+        raise
--- a/tools/ods/internal/alembic/alembic.go
+++ b/tools/ods/internal/alembic/alembic.go
@@ -53,12 +53,17 @@ func FindAlembicBinary() (string, error) {
 // otherwise it will attempt to run via docker exec on a container
 // that has alembic installed (e.g., api_server).
 func Run(args []string, schema Schema) error {
+	return RunWithEnv(args, schema, nil)
+}
+
+// RunWithEnv executes an alembic command with explicit environment overrides.
+func RunWithEnv(args []string, schema Schema, extraEnv map[string]string) error {
 	// Check if we need to run via docker exec
 	if shouldUseDockerExec() {
-		return runViaDockerExec(args, schema)
+		return runViaDockerExec(args, schema, extraEnv)
 	}

-	return runLocally(args, schema)
+	return runLocally(args, schema, extraEnv)
 }

 // shouldUseDockerExec determines if we should run alembic via docker exec.
@@ -79,7 +84,7 @@ func shouldUseDockerExec() bool {
 }

 // runLocally runs alembic on the local machine.
-func runLocally(args []string, schema Schema) error {
+func runLocally(args []string, schema Schema, extraEnv map[string]string) error {
 	backendDir, err := paths.BackendDir()
 	if err != nil {
 		return fmt.Errorf("failed to find backend directory: %w", err)
@@ -104,13 +109,13 @@ func runLocally(args []string, schema Schema) error {
 	cmd.Stdin = os.Stdin

 	// Pass through POSTGRES_* environment variables
-	cmd.Env = buildAlembicEnv()
+	cmd.Env = buildAlembicEnv(extraEnv)

 	return cmd.Run()
 }

 // runViaDockerExec runs alembic inside a Docker container that has network access.
-func runViaDockerExec(args []string, schema Schema) error {
+func runViaDockerExec(args []string, schema Schema, extraEnv map[string]string) error {
 	// Find a container with alembic installed (api_server)
 	container, err := findAlembicContainer()
 	if err != nil {
@@ -136,7 +141,11 @@ func runViaDockerExec(args []string, schema Schema) error {

 	// Run alembic inside the container
 	// The container should have the correct env vars and network access
-	dockerArgs := []string{"exec", "-i", container, "alembic"}
+	dockerArgs := []string{"exec", "-i"}
+	for key, value := range extraEnv {
+		dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
+	}
+	dockerArgs = append(dockerArgs, container, "alembic")
 	dockerArgs = append(dockerArgs, alembicArgs...)

 	cmd := exec.Command("docker", dockerArgs...)
@@ -158,7 +167,7 @@ var alembicContainerNames = []string{
 // It inherits the current environment and ensures POSTGRES_* variables are set.
 // If POSTGRES_HOST is not explicitly set, it attempts to detect the PostgreSQL
 // container IP address automatically.
-func buildAlembicEnv() []string {
+func buildAlembicEnv(extraEnv map[string]string) []string {
 	env := os.Environ()

 	// Get postgres config (which reads from env with defaults)
@@ -188,6 +197,10 @@ func buildAlembicEnv() []string {
 		}
 	}

+	for key, value := range extraEnv {
+		env = append(env, fmt.Sprintf("%s=%s", key, value))
+	}
+
 	return env
 }

@@ -238,6 +251,14 @@ func Upgrade(revision string, schema Schema) error {
 	return Run([]string{"upgrade", revision}, schema)
 }

+// UpgradeWithEnv runs alembic upgrade with explicit environment overrides.
+func UpgradeWithEnv(revision string, schema Schema, extraEnv map[string]string) error {
+	if revision == "" {
+		revision = "head"
+	}
+	return RunWithEnv([]string{"upgrade", revision}, schema, extraEnv)
+}
+
 // Downgrade runs alembic downgrade to the specified revision.
 func Downgrade(revision string, schema Schema) error {
 	return Run([]string{"downgrade", revision}, schema)
--- a/tools/ods/internal/envutil/envutil.go
+++ b/tools/ods/internal/envutil/envutil.go
@@ -0,0 +1,105 @@
+package envutil
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"sort"
+	"strings"
+)
+
+// LoadFile parses a .env-style file into KEY=VALUE entries suitable for
+// appending to os.Environ(). Blank lines and comments are skipped.
+func LoadFile(path string) ([]string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("open env file %s: %w", path, err)
+	}
+	defer func() { _ = f.Close() }()
+
+	var envVars []string
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		if idx := strings.Index(line, "="); idx > 0 {
+			key := strings.TrimSpace(line[:idx])
+			value := strings.TrimSpace(line[idx+1:])
+			value = strings.Trim(value, `"'`)
+			envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("read env file %s: %w", path, err)
+	}
+
+	return envVars, nil
+}
+
+// Merge combines shell environment with file-based defaults. Shell values take
+// precedence, so file entries are only added for keys not already present.
+func Merge(shellEnv, fileVars []string) []string {
+	existing := make(map[string]bool, len(shellEnv))
+	for _, entry := range shellEnv {
+		if idx := strings.Index(entry, "="); idx > 0 {
+			existing[entry[:idx]] = true
+		}
+	}
+
+	merged := make([]string, len(shellEnv))
+	copy(merged, shellEnv)
+	for _, entry := range fileVars {
+		if idx := strings.Index(entry, "="); idx > 0 {
+			key := entry[:idx]
+			if !existing[key] {
+				merged = append(merged, entry)
+			}
+		}
+	}
+
+	return merged
+}
+
+// ApplyOverrides replaces or appends KEY=VALUE entries in env with the provided
+// overrides. The returned slice contains at most one entry per overridden key.
+func ApplyOverrides(env []string, overrides map[string]string) []string {
+	if len(overrides) == 0 {
+		return env
+	}
+
+	overrideKeys := make(map[string]bool, len(overrides))
+	for key := range overrides {
+		overrideKeys[key] = true
+	}
+
+	filtered := make([]string, 0, len(env)+len(overrides))
+	for _, entry := range env {
+		if idx := strings.Index(entry, "="); idx > 0 {
+			if overrideKeys[entry[:idx]] {
+				continue
+			}
+		}
+		filtered = append(filtered, entry)
+	}
+
+	filtered = append(filtered, MapToEnvEntries(overrides)...)
+	return filtered
+}
+
+// MapToEnvEntries converts a string map into KEY=VALUE entries in stable order.
+func MapToEnvEntries(values map[string]string) []string {
+	keys := make([]string, 0, len(values))
+	for key := range values {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+
+	entries := make([]string, 0, len(keys))
+	for _, key := range keys {
+		entries = append(entries, fmt.Sprintf("%s=%s", key, values[key]))
+	}
+	return entries
+}
--- a/tools/ods/internal/journey/journey.go
+++ b/tools/ods/internal/journey/journey.go
@@ -0,0 +1,122 @@
+package journey
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+const (
+	RegistryPath    = "web/tests/e2e/journeys/registry.json"
+	DefaultPlanPath = ".github/agent-journeys.json"
+)
+
+type Definition struct {
+	Name                string `json:"name"`
+	Description         string `json:"description"`
+	TestPath            string `json:"test_path"`
+	Project             string `json:"project"`
+	RequiresModelServer bool   `json:"requires_model_server"`
+	SkipGlobalSetup     bool   `json:"skip_global_setup"`
+}
+
+type Registry struct {
+	Journeys []Definition `json:"journeys"`
+}
+
+type Plan struct {
+	Journeys []string `json:"journeys"`
+}
+
+func LoadRegistry(repoRoot string) (Registry, error) {
+	var registry Registry
+
+	data, err := os.ReadFile(filepath.Join(repoRoot, RegistryPath))
+	if err != nil {
+		return registry, fmt.Errorf("read journey registry: %w", err)
+	}
+	if err := json.Unmarshal(data, &registry); err != nil {
+		return registry, fmt.Errorf("parse journey registry: %w", err)
+	}
+	if len(registry.Journeys) == 0 {
+		return registry, fmt.Errorf("journey registry is empty")
+	}
+
+	for _, journey := range registry.Journeys {
+		if strings.TrimSpace(journey.Name) == "" {
+			return registry, fmt.Errorf("journey registry contains an entry with an empty name")
+		}
+		if strings.TrimSpace(journey.TestPath) == "" {
+			return registry, fmt.Errorf("journey %q is missing test_path", journey.Name)
+		}
+		if strings.TrimSpace(journey.Project) == "" {
+			return registry, fmt.Errorf("journey %q is missing project", journey.Name)
+		}
+	}
+
+	return registry, nil
+}
+
+func LoadPlan(planPath string) (Plan, error) {
+	var plan Plan
+
+	data, err := os.ReadFile(planPath)
+	if err != nil {
+		return plan, fmt.Errorf("read journey plan: %w", err)
+	}
+	if err := json.Unmarshal(data, &plan); err != nil {
+		return plan, fmt.Errorf("parse journey plan: %w", err)
+	}
+	if len(plan.Journeys) == 0 {
+		return plan, fmt.Errorf("journey plan contains no journeys")
+	}
+	return plan, nil
+}
+
+func ResolveDefinitions(repoRoot string, names []string) ([]Definition, error) {
+	registry, err := LoadRegistry(repoRoot)
+	if err != nil {
+		return nil, err
+	}
+
+	byName := make(map[string]Definition, len(registry.Journeys))
+	for _, definition := range registry.Journeys {
+		byName[definition.Name] = definition
+	}
+
+	definitions := make([]Definition, 0, len(names))
+	for _, name := range names {
+		definition, ok := byName[name]
+		if !ok {
+			return nil, fmt.Errorf("unknown journey %q", name)
+		}
+		definitions = append(definitions, definition)
+	}
+
+	return definitions, nil
+}
+
+func Slug(value string) string {
+	normalized := strings.TrimSpace(strings.ToLower(value))
+	normalized = strings.ReplaceAll(normalized, "/", "-")
+	var builder strings.Builder
+	lastDash := false
+	for _, r := range normalized {
+		if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
+			builder.WriteRune(r)
+			lastDash = false
+			continue
+		}
+		if !lastDash {
+			builder.WriteByte('-')
+			lastDash = true
+		}
+	}
+	slug := strings.Trim(builder.String(), "-")
+	if slug == "" {
+		return "journey"
+	}
+	return slug
+}
--- a/tools/ods/internal/journey/journey_test.go
+++ b/tools/ods/internal/journey/journey_test.go
@@ -0,0 +1,59 @@
+package journey
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestResolveDefinitions(t *testing.T) {
+	t.Helper()
+
+	root := t.TempDir()
+	registryDir := filepath.Join(root, "web", "tests", "e2e", "journeys")
+	if err := os.MkdirAll(registryDir, 0755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(registryDir, "registry.json"), []byte(`{
+  "journeys": [
+    {
+      "name": "auth-landing",
+      "description": "test",
+      "test_path": "tests/e2e/journeys/auth_landing.spec.ts",
+      "project": "journey",
+      "requires_model_server": false,
+      "skip_global_setup": true
+    }
+  ]
+}`), 0644); err != nil {
+		t.Fatalf("write registry: %v", err)
+	}
+
+	definitions, err := ResolveDefinitions(root, []string{"auth-landing"})
+	if err != nil {
+		t.Fatalf("resolve definitions: %v", err)
+	}
+	if len(definitions) != 1 {
+		t.Fatalf("expected 1 definition, got %d", len(definitions))
+	}
+	if definitions[0].Project != "journey" {
+		t.Fatalf("expected project journey, got %q", definitions[0].Project)
+	}
+}
+
+func TestLoadPlanRequiresJourneys(t *testing.T) {
+	t.Helper()
+
+	path := filepath.Join(t.TempDir(), "journeys.json")
+	if err := os.WriteFile(path, []byte(`{"journeys":["auth-landing"]}`), 0644); err != nil {
+		t.Fatalf("write plan: %v", err)
+	}
+
+	plan, err := LoadPlan(path)
+	if err != nil {
+		t.Fatalf("load plan: %v", err)
+	}
+	if len(plan.Journeys) != 1 || plan.Journeys[0] != "auth-landing" {
+		t.Fatalf("unexpected plan contents: %+v", plan)
+	}
+}
--- a/tools/ods/internal/prreview/prreview.go
+++ b/tools/ods/internal/prreview/prreview.go
@@ -0,0 +1,147 @@
+package prreview
+
+import (
+	"fmt"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+type Source string
+
+const (
+	SourceHuman    Source = "human"
+	SourceCodex    Source = "codex"
+	SourceGreptile Source = "greptile"
+	SourceCubic    Source = "cubic"
+	SourceBot      Source = "bot"
+)
+
+type Comment struct {
+	ID          int    `json:"id"`
+	Body        string `json:"body"`
+	AuthorLogin string `json:"author_login"`
+	URL         string `json:"url,omitempty"`
+	CreatedAt   string `json:"created_at,omitempty"`
+}
+
+type Thread struct {
+	ID         string    `json:"id"`
+	IsResolved bool      `json:"is_resolved"`
+	IsOutdated bool      `json:"is_outdated"`
+	Path       string    `json:"path,omitempty"`
+	Line       int       `json:"line,omitempty"`
+	StartLine  int       `json:"start_line,omitempty"`
+	Comments   []Comment `json:"comments"`
+}
+
+type PullRequest struct {
+	Number  int      `json:"number"`
+	Title   string   `json:"title"`
+	URL     string   `json:"url,omitempty"`
+	Threads []Thread `json:"threads"`
+}
+
+type ThreadSummary struct {
+	Thread      Thread   `json:"thread"`
+	Source      Source   `json:"source"`
+	Category    string   `json:"category"`
+	DuplicateOf string   `json:"duplicate_of,omitempty"`
+	Reasons     []string `json:"reasons,omitempty"`
+}
+
+type TriageResult struct {
+	PullRequest PullRequest     `json:"pull_request"`
+	Summaries   []ThreadSummary `json:"summaries"`
+}
+
+var nonAlphaNum = regexp.MustCompile(`[^a-z0-9]+`)
+
+func ClassifySource(login string) Source {
+	lower := strings.ToLower(strings.TrimSpace(login))
+	switch {
+	case strings.Contains(lower, "codex"):
+		return SourceCodex
+	case strings.Contains(lower, "greptile"):
+		return SourceGreptile
+	case strings.Contains(lower, "cubic"):
+		return SourceCubic
+	case strings.HasSuffix(lower, "[bot]") || strings.Contains(lower, "bot"):
+		return SourceBot
+	default:
+		return SourceHuman
+	}
+}
+
+func Triage(pr PullRequest) TriageResult {
+	summaries := make([]ThreadSummary, 0, len(pr.Threads))
+	seen := map[string]string{}
+
+	for _, thread := range pr.Threads {
+		source := SourceHuman
+		if len(thread.Comments) > 0 {
+			source = ClassifySource(thread.Comments[0].AuthorLogin)
+		}
+
+		summary := ThreadSummary{
+			Thread:   thread,
+			Source:   source,
+			Category: "actionable",
+		}
+
+		if thread.IsResolved {
+			summary.Category = "resolved"
+			summary.Reasons = append(summary.Reasons, "thread already resolved")
+		} else if thread.IsOutdated {
+			summary.Category = "outdated"
+			summary.Reasons = append(summary.Reasons, "thread marked outdated by GitHub")
+		}
+
+		key := duplicateKey(thread)
+		if existing, ok := seen[key]; ok && summary.Category == "actionable" {
+			summary.Category = "duplicate"
+			summary.DuplicateOf = existing
+			summary.Reasons = append(summary.Reasons, fmt.Sprintf("duplicates %s", existing))
+		} else if summary.Category == "actionable" {
+			seen[key] = thread.ID
+		}
+
+		if source == SourceHuman && summary.Category == "actionable" {
+			summary.Reasons = append(summary.Reasons, "human review requires explicit response or fix")
+		}
+		if source != SourceHuman && summary.Category == "actionable" {
+			summary.Reasons = append(summary.Reasons, fmt.Sprintf("%s-generated review comment", source))
+		}
+
+		summaries = append(summaries, summary)
+	}
+
+	sort.Slice(summaries, func(i, j int) bool {
+		if summaries[i].Category != summaries[j].Category {
+			return summaries[i].Category < summaries[j].Category
+		}
+		if summaries[i].Source != summaries[j].Source {
+			return summaries[i].Source < summaries[j].Source
+		}
+		return summaries[i].Thread.ID < summaries[j].Thread.ID
+	})
+
+	return TriageResult{
+		PullRequest: pr,
+		Summaries:   summaries,
+	}
+}
+
+func duplicateKey(thread Thread) string {
+	parts := []string{thread.Path, fmt.Sprintf("%d", thread.Line)}
+	if len(thread.Comments) > 0 {
+		parts = append(parts, normalizeBody(thread.Comments[0].Body))
+	}
+	return strings.Join(parts, "::")
+}
+
+func normalizeBody(body string) string {
+	normalized := strings.ToLower(strings.TrimSpace(body))
+	normalized = nonAlphaNum.ReplaceAllString(normalized, " ")
+	return strings.Join(strings.Fields(normalized), " ")
+}
--- a/tools/ods/internal/prreview/prreview_test.go
+++ b/tools/ods/internal/prreview/prreview_test.go
@@ -0,0 +1,61 @@
+package prreview
+
+import "testing"
+
+func TestClassifySource(t *testing.T) {
+	t.Helper()
+
+	cases := map[string]Source{
+		"openai-codex-reviewer[bot]": SourceCodex,
+		"greptile-ai[bot]":           SourceGreptile,
+		"cubic-review[bot]":          SourceCubic,
+		"renovate[bot]":              SourceBot,
+		"human-user":                 SourceHuman,
+	}
+
+	for login, expected := range cases {
+		if actual := ClassifySource(login); actual != expected {
+			t.Fatalf("classify %q: expected %s, got %s", login, expected, actual)
+		}
+	}
+}
+
+func TestTriageMarksDuplicates(t *testing.T) {
+	t.Helper()
+
+	result := Triage(PullRequest{
+		Number: 42,
+		Threads: []Thread{
+			{
+				ID:   "thread-1",
+				Path: "web/src/foo.tsx",
+				Line: 10,
+				Comments: []Comment{
+					{ID: 1, AuthorLogin: "greptile-ai[bot]", Body: "Handle null values here."},
+				},
+			},
+			{
+				ID:   "thread-2",
+				Path: "web/src/foo.tsx",
+				Line: 10,
+				Comments: []Comment{
+					{ID: 2, AuthorLogin: "openai-codex-reviewer[bot]", Body: "Handle null values here"},
+				},
+			},
+		},
+	})
+
+	if len(result.Summaries) != 2 {
+		t.Fatalf("expected 2 summaries, got %d", len(result.Summaries))
+	}
+
+	var duplicateFound bool
+	for _, summary := range result.Summaries {
+		if summary.Thread.ID == "thread-2" && summary.Category == "duplicate" {
+			duplicateFound = true
+		}
+	}
+	if !duplicateFound {
+		t.Fatalf("expected duplicate thread to be detected: %+v", result.Summaries)
+	}
+}
--- a/uv.lock
+++ b/uv.lock
@@ -4511,7 +4511,7 @@ dev = [
    { name = "matplotlib", specifier = "==3.10.8" },
    { name = "mypy", specifier = "==1.13.0" },
    { name = "mypy-extensions", specifier = "==1.0.0" },
-    { name = "onyx-devtools", specifier = "==0.7.5" },
+    { name = "onyx-devtools", specifier = "==0.7.4" },
    { name = "openapi-generator-cli", specifier = "==7.17.0" },
    { name = "pandas-stubs", specifier = "~=2.3.3" },
    { name = "pre-commit", specifier = "==3.2.2" },
@@ -4554,19 +4554,19 @@ model-server = [

 [[package]]
 name = "onyx-devtools"
-version = "0.7.5"
+version = "0.7.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "fastapi" },
    { name = "openapi-generator-cli" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/f8/844e34f5126ae40fff0d012bba0b28f031f8871062759bb3789eae4f5e0a/onyx_devtools-0.7.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b3cd434c722ae48a1f651748a9f094711b29d1a9f37fbbadef3144f2cdb0f16d", size = 4238900, upload-time = "2026-04-10T07:02:16.382Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/97/d1db725f900b199fa3f7a7a7c9b51ae75d4b18755c924f00f06a7703e552/onyx_devtools-0.7.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c50e3d76d4f8cc4faa6250e758d42f0249067f0e17bc82b99c6c00dd48114393", size = 3913672, upload-time = "2026-04-10T07:02:17.46Z" },
-    { url = "https://files.pythonhosted.org/packages/31/83/e11bedb0a1321b63c844a418be1990c172ed363c6ee612978c3a38df71f1/onyx_devtools-0.7.5-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:ec01aeaaa14854b0933bb85bbfc51184599d3dbf1c0097ff59c1c72db8222a5a", size = 3779585, upload-time = "2026-04-10T07:02:16.31Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/85/128d25cd35c1adc436dcff9ab4f2c20cf29528d09415280c1230ff0ca993/onyx_devtools-0.7.5-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:586d50ecb6dcea95611135e4cd4529ebedd8ab84a41b1adf3be1280a48dc52af", size = 4201962, upload-time = "2026-04-10T07:02:14.466Z" },
-    { url = "https://files.pythonhosted.org/packages/99/5d/83c80f918b399fea998cd41bfe90bda733eda77e133ca4dc1e9ce18a9b4a/onyx_devtools-0.7.5-py3-none-win_amd64.whl", hash = "sha256:c45d80f0093ba738120b77c4c0bde13843e33d786ae8608eb10490f06183d89b", size = 4320088, upload-time = "2026-04-10T07:02:17.09Z" },
-    { url = "https://files.pythonhosted.org/packages/26/bf/b9c85cc61981bd71c0f1cbb50192763b11788a7c8636b1e01f750251c92c/onyx_devtools-0.7.5-py3-none-win_arm64.whl", hash = "sha256:9852a7cc29939371e016b794f2cffdb88680280d857d24c191c5188884416a3d", size = 3858839, upload-time = "2026-04-10T07:02:20.098Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/3f/584bb003333b6e6d632b06bbf99d410c7a71adde1711076fd44fe88d966d/onyx_devtools-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6c51d9199ff8ff8fe64a3cfcf77f8170508722b33a1de54c5474be0447b7afa8", size = 4237700, upload-time = "2026-04-09T21:28:20.694Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/04/8c28522d51a66b1bdc997a1c72821122eab23f048459646c6ee62a39f6eb/onyx_devtools-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f64a4cec6d3616b9ca7354e326994882c9ff2cb3f9fc9a44e55f0eb6a6ff1c1c", size = 3912751, upload-time = "2026-04-09T21:28:23.079Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/e6/ae60307cc50064dacb58e003c9a367d5c85118fd89a597abf3de5fd66f0a/onyx_devtools-0.7.4-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:31c7cecaaa329e3f6d53864290bc53fd0b823453c6cfdb8be7931a8925f5c075", size = 3778188, upload-time = "2026-04-09T21:28:23.14Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/d1/5a2789efac7d8f19d30d4d8da1862dd10a16b65d8c9b200542a959094a17/onyx_devtools-0.7.4-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:4c44e3c21253ea92127af483155190c14426c729d93e244aedc33875f74d3514", size = 4200526, upload-time = "2026-04-09T21:28:23.711Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/40/56a467eaa7b78411971898191cf0dc3ee49b7f448d1cfe76cd432f6458d3/onyx_devtools-0.7.4-py3-none-win_amd64.whl", hash = "sha256:6fa2b63b702bc5ecbeed5f9eadec57d61ac5c4a646cf5fbd66ee340f53b7d81c", size = 4319090, upload-time = "2026-04-09T21:28:23.26Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/ef/c866fa8ce1f75e1ac67bc239e767b8944cb1a12a44950986ce57e06db17f/onyx_devtools-0.7.4-py3-none-win_arm64.whl", hash = "sha256:c84cbe6a85474dc9f005f079796cf031e80c4249897432ad9f370cd27f72970a", size = 3857229, upload-time = "2026-04-09T21:28:23.484Z" },
 ]

 [[package]]
--- a/web/lib/opal/src/components/divider/Divider.stories.tsx
+++ b/web/lib/opal/src/components/divider/Divider.stories.tsx
@@ -1,46 +0,0 @@
-import React from "react";
-import type { Meta, StoryObj } from "@storybook/react";
-import { Divider } from "@opal/components/divider/components";
-
-const meta: Meta<typeof Divider> = {
-  title: "opal/components/Divider",
-  component: Divider,
-  tags: ["autodocs"],
-};
-
-export default meta;
-type Story = StoryObj<typeof Divider>;
-
-export const Plain: Story = {
-  render: () => <Divider />,
-};
-
-export const WithTitle: Story = {
-  render: () => <Divider title="Section" />,
-};
-
-export const WithDescription: Story = {
-  render: () => (
-    <Divider description="Additional configuration options for power users." />
-  ),
-};
-
-export const Foldable: Story = {
-  render: () => (
-    <Divider title="Advanced Options" foldable defaultOpen={false}>
-      <div style={{ padding: "0.5rem 0" }}>
-        <p>This content is revealed when the divider is expanded.</p>
-      </div>
-    </Divider>
-  ),
-};
-
-export const FoldableDefaultOpen: Story = {
-  render: () => (
-    <Divider title="Details" foldable defaultOpen>
-      <div style={{ padding: "0.5rem 0" }}>
-        <p>This starts open by default.</p>
-      </div>
-    </Divider>
-  ),
-};
--- a/web/lib/opal/src/components/divider/README.md
+++ b/web/lib/opal/src/components/divider/README.md
@@ -1,62 +0,0 @@
-# Divider
-
-**Import:** `import { Divider } from "@opal/components";`
-
-A horizontal rule that optionally displays a title, description, or foldable content section.
-
-## Props
-
-The component uses a discriminated union with four variants. `title` and `description` are mutually exclusive; `foldable` requires `title`.
-
-### Bare divider
-
-No props — renders a plain horizontal line.
-
-### Titled divider
-
-| Prop | Type | Default | Description |
-|---|---|---|---|
-| `title` | `string \| RichStr` | **(required)** | Label to the left of the line |
-
-### Described divider
-
-| Prop | Type | Default | Description |
-|---|---|---|---|
-| `description` | `string \| RichStr` | **(required)** | Text below the line |
-
-### Foldable divider
-
-| Prop | Type | Default | Description |
-|---|---|---|---|
-| `title` | `string \| RichStr` | **(required)** | Label to the left of the line |
-| `foldable` | `true` | **(required)** | Enables fold/expand behavior |
-| `open` | `boolean` | — | Controlled open state |
-| `defaultOpen` | `boolean` | `false` | Uncontrolled initial open state |
-| `onOpenChange` | `(open: boolean) => void` | — | Callback when toggled |
-| `children` | `ReactNode` | — | Content revealed when open |
-
-## Usage Examples
-
-```tsx
-import { Divider } from "@opal/components";
-
-// Plain line
-<Divider />
-
-// With title
-<Divider title="Advanced" />
-
-// With description
-<Divider description="Additional configuration options." />
-
-// Foldable
-<Divider title="Advanced Options" foldable>
-  <p>Hidden content here</p>
-</Divider>
-
-// Controlled foldable
-const [open, setOpen] = useState(false);
-<Divider title="Details" foldable open={open} onOpenChange={setOpen}>
-  <p>Controlled content</p>
-</Divider>
-```
--- a/web/lib/opal/src/components/divider/components.tsx
+++ b/web/lib/opal/src/components/divider/components.tsx
@@ -1,163 +0,0 @@
-"use client";
-
-import "@opal/components/divider/styles.css";
-import { useState, useCallback } from "react";
-import type { RichStr } from "@opal/types";
-import { Button, Text } from "@opal/components";
-import { SvgChevronRight } from "@opal/icons";
-import { Interactive } from "@opal/core";
-
-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
-interface DividerNeverFields {
-  open?: never;
-  defaultOpen?: never;
-  onOpenChange?: never;
-  children?: never;
-}
-
-/** Plain line — no title, no description. */
-interface DividerBareProps extends DividerNeverFields {
-  title?: never;
-  description?: never;
-  foldable?: false;
-  ref?: React.Ref<HTMLDivElement>;
-}
-
-/** Line with a title to the left. */
-interface DividerTitledProps extends DividerNeverFields {
-  title: string | RichStr;
-  description?: never;
-  foldable?: false;
-  ref?: React.Ref<HTMLDivElement>;
-}
-
-/** Line with a description below. */
-interface DividerDescribedProps extends DividerNeverFields {
-  title?: never;
-  /** Description rendered below the divider line. */
-  description: string | RichStr;
-  foldable?: false;
-  ref?: React.Ref<HTMLDivElement>;
-}
-
-/** Foldable — requires title, reveals children. */
-interface DividerFoldableProps {
-  /** Title is required when foldable. */
-  title: string | RichStr;
-  foldable: true;
-  description?: never;
-  /** Controlled open state. */
-  open?: boolean;
-  /** Uncontrolled default open state. */
-  defaultOpen?: boolean;
-  /** Callback when open state changes. */
-  onOpenChange?: (open: boolean) => void;
-  /** Content revealed when open. */
-  children?: React.ReactNode;
-  ref?: React.Ref<HTMLDivElement>;
-}
-
-type DividerProps =
-  | DividerBareProps
-  | DividerTitledProps
-  | DividerDescribedProps
-  | DividerFoldableProps;
-
-// ---------------------------------------------------------------------------
-// Divider
-// ---------------------------------------------------------------------------
-
-function Divider(props: DividerProps) {
-  if (props.foldable) {
-    return <FoldableDivider {...props} />;
-  }
-
-  const { ref } = props;
-  const title = "title" in props ? props.title : undefined;
-  const description = "description" in props ? props.description : undefined;
-
-  return (
-    <div ref={ref} className="opal-divider">
-      <div className="opal-divider-row">
-        {title && (
-          <div className="opal-divider-title">
-            <Text font="secondary-body" color="text-03" nowrap>
-              {title}
-            </Text>
-          </div>
-        )}
-        <div className="opal-divider-line" />
-      </div>
-      {description && (
-        <div className="opal-divider-description">
-          <Text font="secondary-body" color="text-03">
-            {description}
-          </Text>
-        </div>
-      )}
-    </div>
-  );
-}
-
-// ---------------------------------------------------------------------------
-// FoldableDivider (internal)
-// ---------------------------------------------------------------------------
-
-function FoldableDivider({
-  title,
-  open: controlledOpen,
-  defaultOpen = false,
-  onOpenChange,
-  children,
-}: DividerFoldableProps) {
-  const [internalOpen, setInternalOpen] = useState(defaultOpen);
-  const isControlled = controlledOpen !== undefined;
-  const isOpen = isControlled ? controlledOpen : internalOpen;
-
-  const toggle = useCallback(() => {
-    const next = !isOpen;
-    if (!isControlled) setInternalOpen(next);
-    onOpenChange?.(next);
-  }, [isOpen, isControlled, onOpenChange]);
-
-  return (
-    <>
-      <Interactive.Stateless
-        variant="default"
-        prominence="tertiary"
-        interaction={isOpen ? "hover" : "rest"}
-        onClick={toggle}
-      >
-        <Interactive.Container
-          roundingVariant="sm"
-          heightVariant="fit"
-          widthVariant="full"
-        >
-          <div className="opal-divider">
-            <div className="opal-divider-row">
-              <div className="opal-divider-title">
-                <Text font="secondary-body" color="inherit" nowrap>
-                  {title}
-                </Text>
-              </div>
-              <div className="opal-divider-line" />
-              <div className="opal-divider-chevron" data-open={isOpen}>
-                <Button
-                  icon={SvgChevronRight}
-                  size="sm"
-                  prominence="tertiary"
-                />
-              </div>
-            </div>
-          </div>
-        </Interactive.Container>
-      </Interactive.Stateless>
-      {isOpen && children}
-    </>
-  );
-}
-
-export { Divider, type DividerProps };
--- a/web/lib/opal/src/components/divider/styles.css
+++ b/web/lib/opal/src/components/divider/styles.css
@@ -1,38 +0,0 @@
-/* ---------------------------------------------------------------------------
-   Divider
-
-   A horizontal rule with optional title, foldable chevron, or description.
-   --------------------------------------------------------------------------- */
-
-.opal-divider {
-  @apply flex flex-col w-full;
-  padding: 0.25rem 0.5rem;
-  gap: 0.75rem;
-}
-
-.opal-divider-row {
-  @apply flex flex-row items-center w-full;
-  gap: 2px;
-  padding: 0px;
-}
-
-.opal-divider-title {
-  @apply flex flex-col justify-center;
-  padding: 0px 2px;
-}
-
-.opal-divider-line {
-  @apply flex-1 h-px bg-border-01;
-}
-
-.opal-divider-description {
-  padding: 0px 2px;
-}
-
-.opal-divider-chevron {
-  @apply transition-transform duration-200 ease-in-out;
-}
-
-.opal-divider-chevron[data-open="true"] {
-  transform: rotate(90deg);
-}
--- a/web/lib/opal/src/components/index.ts
+++ b/web/lib/opal/src/components/index.ts
@@ -54,12 +54,6 @@ export {
  type TagColor,
 } from "@opal/components/tag/components";

-/* Divider */
-export {
-  Divider,
-  type DividerProps,
-} from "@opal/components/divider/components";
-
 /* Card */
 export {
  Card,
--- a/web/lib/opal/src/logos/anthropic.tsx
+++ b/web/lib/opal/src/logos/anthropic.tsx
@@ -10,7 +10,7 @@ const SvgAnthropic = ({ size, ...props }: IconProps) => (
  >
    <path
      d="M36.1779 9.78003H29.1432L41.9653 42.2095H49L36.1779 9.78003ZM15.8221 9.78003L3 42.2095H10.1844L12.8286 35.4243H26.2495L28.8438 42.2095H36.0282L23.2061 9.78003H15.8221ZM15.1236 29.3874L19.5141 18.0121L23.9046 29.3874H15.1236Z"
-      fill="var(--text-05)"
+      fill="currentColor"
    />
  </svg>
 );
--- a/web/lib/opal/src/logos/aws.tsx
+++ b/web/lib/opal/src/logos/aws.tsx
@@ -12,7 +12,7 @@ const SvgAws = ({ size, ...props }: IconProps) => (
    <title>AWS</title>
    <path
      d="M14.6195 23.2934C14.6195 23.9333 14.7233 24.4522 14.8443 24.8326C14.9827 25.2131 15.1556 25.6282 15.3978 26.0778C15.4842 26.2162 15.5188 26.3546 15.5188 26.4756C15.5188 26.6486 15.4151 26.8215 15.1902 26.9945L14.1007 27.7208C13.945 27.8246 13.7894 27.8765 13.651 27.8765C13.4781 27.8765 13.3051 27.79 13.1322 27.6344C12.89 27.3749 12.6825 27.0982 12.5096 26.8215C12.3366 26.5275 12.1637 26.1989 11.9734 25.8011C10.6245 27.3922 8.92958 28.1878 6.88881 28.1878C5.43606 28.1878 4.27731 27.7727 3.42988 26.9426C2.58244 26.1124 2.15007 25.0056 2.15007 23.622C2.15007 22.152 2.66891 20.9586 3.72389 20.0593C4.77886 19.16 6.17973 18.7103 7.96108 18.7103C8.54909 18.7103 9.15441 18.7622 9.79431 18.8487C10.4342 18.9352 11.0914 19.0735 11.7832 19.2292V17.9667C11.7832 16.6523 11.5065 15.7356 10.9703 15.1995C10.4169 14.6634 9.483 14.404 8.15132 14.404C7.546 14.404 6.9234 14.4731 6.28349 14.6288C5.64359 14.7844 5.02098 14.9747 4.41567 15.2168C4.13896 15.3379 3.93142 15.407 3.81036 15.4416C3.6893 15.4762 3.60282 15.4935 3.53364 15.4935C3.29152 15.4935 3.17046 15.3206 3.17046 14.9574V14.1099C3.17046 13.8332 3.20505 13.6257 3.29152 13.5046C3.37799 13.3836 3.53364 13.2625 3.77577 13.1414C4.38108 12.8301 5.10746 12.5707 5.9549 12.3632C6.80233 12.1384 7.70165 12.0346 8.65286 12.0346C10.7109 12.0346 12.2156 12.5015 13.1841 13.4355C14.1353 14.3694 14.6195 15.7875 14.6195 17.6899V23.2934ZM7.63248 25.9222C8.2032 25.9222 8.79122 25.8184 9.41383 25.6109C10.0364 25.4034 10.5899 25.0229 11.0568 24.504C11.3335 24.1754 11.5411 23.8122 11.6448 23.3972C11.7486 22.9821 11.8178 22.4806 11.8178 21.8925V21.1662C11.3162 21.0451 10.7801 20.9413 10.2267 20.8722C9.67325 20.803 9.13711 20.7684 8.60098 20.7684C7.44224 20.7684 6.5948 20.9932 6.02407 21.4602C5.45335 21.9271 5.17664 22.5843 5.17664 23.4491C5.17664 24.2619 5.38417 24.8672 5.81654 25.2823C6.23161 25.7147 6.83692 25.9222 7.63248 25.9222ZM21.5201 27.79C21.2088 27.79 21.0012 27.7381 20.8629 27.6171C20.7245 27.5133 20.6035 27.2712 20.4997 26.9426L16.4355 13.5738C16.3317 13.2279 16.2798 13.0031 16.2798 12.882C16.2798 12.6053 16.4182 12.4497 16.6949 12.4497H18.3897C18.7183 12.4497 18.9432 12.5015 19.0642 12.6226C19.2026 12.7264 19.3064 12.9685 19.4101 13.2971L22.3156 24.7462L25.0136 13.2971C25.1001 12.9512 25.2038 12.7264 25.3422 12.6226C25.4806 12.5188 25.7227 12.4497 26.034 12.4497H27.4176C27.7462 12.4497 27.971 12.5015 28.1093 12.6226C28.2477 12.7264 28.3688 12.9685 28.4379 13.2971L31.1705 24.8845L34.1625 13.2971C34.2662 12.9512 34.3873 12.7264 34.5084 12.6226C34.6467 12.5188 34.8716 12.4497 35.1829 12.4497H36.7913C37.068 12.4497 37.2236 12.588 37.2236 12.882C37.2236 12.9685 37.2063 13.055 37.189 13.1587C37.1717 13.2625 37.1372 13.4009 37.068 13.5911L32.9 26.9599C32.7962 27.3058 32.6751 27.5306 32.5368 27.6344C32.3984 27.7381 32.1736 27.8073 31.8796 27.8073H30.3922C30.0636 27.8073 29.8388 27.7554 29.7004 27.6344C29.5621 27.5133 29.441 27.2885 29.3719 26.9426L26.6912 15.7875L24.0278 26.9253C23.9413 27.2712 23.8376 27.496 23.6992 27.6171C23.5609 27.7381 23.3187 27.79 23.0074 27.79H21.5201ZM43.7437 28.257C42.8444 28.257 41.9451 28.1532 41.0803 27.9457C40.2156 27.7381 39.5411 27.5133 39.0914 27.2539C38.8147 27.0982 38.6245 26.9253 38.5553 26.7696C38.4861 26.614 38.4515 26.441 38.4515 26.2854V25.4034C38.4515 25.0402 38.5899 24.8672 38.8493 24.8672C38.9531 24.8672 39.0569 24.8845 39.1606 24.9191C39.2644 24.9537 39.42 25.0229 39.593 25.0921C40.181 25.3515 40.8209 25.559 41.4954 25.6974C42.1872 25.8357 42.8617 25.9049 43.5535 25.9049C44.643 25.9049 45.4905 25.7147 46.0785 25.3342C46.6665 24.9537 46.9778 24.4003 46.9778 23.6912C46.9778 23.2069 46.8222 22.8092 46.5109 22.4806C46.1996 22.152 45.6115 21.858 44.7641 21.5812L42.2564 20.803C40.9939 20.4052 40.0599 19.8172 39.4892 19.0389C38.9185 18.278 38.6245 17.4305 38.6245 16.5312C38.6245 15.8048 38.7801 15.1649 39.0914 14.6115C39.4027 14.0581 39.8178 13.5738 40.3367 13.1933C40.8555 12.7956 41.4435 12.5015 42.1353 12.294C42.8271 12.0865 43.5535 12 44.3144 12C44.6949 12 45.0927 12.0173 45.4732 12.0692C45.871 12.1211 46.2341 12.1902 46.5973 12.2594C46.9432 12.3459 47.2718 12.4324 47.5831 12.5361C47.8944 12.6399 48.1366 12.7437 48.3095 12.8474C48.5516 12.9858 48.7246 13.1242 48.8283 13.2798C48.9321 13.4182 48.984 13.6084 48.984 13.8505V14.6634C48.984 15.0266 48.8456 15.2168 48.5862 15.2168C48.4479 15.2168 48.223 15.1476 47.929 15.0093C46.9432 14.5596 45.8364 14.3348 44.6084 14.3348C43.6227 14.3348 42.8444 14.4904 42.3083 14.819C41.7721 15.1476 41.4954 15.6492 41.4954 16.3583C41.4954 16.8425 41.6684 17.2576 42.0142 17.5862C42.3601 17.9148 43 18.2434 43.9167 18.5374L46.3725 19.3156C47.6177 19.7134 48.517 20.2668 49.0532 20.9759C49.5893 21.685 49.8487 22.4979 49.8487 23.3972C49.8487 24.1408 49.6931 24.8153 49.3991 25.4034C49.0878 25.9914 48.6727 26.5102 48.1366 26.9253C47.6004 27.3577 46.9605 27.669 46.2168 27.8938C45.4386 28.1359 44.6257 28.257 43.7437 28.257Z"
-      className="fill-[#252F3E] dark:fill-text-05"
+      fill="#252F3E"
    />
    <path
      fillRule="evenodd"
--- a/web/lib/opal/src/logos/cohere.tsx
+++ b/web/lib/opal/src/logos/cohere.tsx
@@ -1,25 +0,0 @@
-import type { IconProps } from "@opal/types";
-const SvgCohere = ({ size, ...props }: IconProps) => (
-  <svg
-    width={size}
-    height={size}
-    viewBox="0 0 52 52"
-    fill="none"
-    xmlns="http://www.w3.org/2000/svg"
-    {...props}
-  >
-    <path
-      d="M18.256 30.224C19.4293 30.224 21.776 30.1653 25.0613 28.816C28.8747 27.232 36.384 24.416 41.84 21.4827C45.6533 19.4293 47.296 16.7307 47.296 13.0933C47.296 8.10667 43.248 4 38.2027 4H17.0827C9.86667 4 4 9.86667 4 17.0827C4 24.2987 9.51467 30.224 18.256 30.224Z"
-      fill="#39594D"
-    />
-    <path
-      d="M21.8347 39.2C21.8347 35.68 23.9467 32.4533 27.232 31.104L33.8613 28.3467C40.608 25.5893 48 30.5173 48 37.792C48 43.424 43.424 48 37.792 48H30.576C25.7653 48 21.8347 44.0693 21.8347 39.2Z"
-      fill="#D18EE2"
-    />
-    <path
-      d="M11.568 31.9253C7.40267 31.9253 4 35.328 4 39.4933V40.4907C4 44.5973 7.40267 48 11.568 48C15.7333 48 19.136 44.5973 19.136 40.432V39.4347C19.0773 35.328 15.7333 31.9253 11.568 31.9253Z"
-      fill="#FF7759"
-    />
-  </svg>
-);
-export default SvgCohere;
--- a/web/lib/opal/src/logos/index.ts
+++ b/web/lib/opal/src/logos/index.ts
@@ -3,7 +3,6 @@ export { default as SvgAws } from "@opal/logos/aws";
 export { default as SvgAzure } from "@opal/logos/azure";
 export { default as SvgBifrost } from "@opal/logos/bifrost";
 export { default as SvgClaude } from "@opal/logos/claude";
-export { default as SvgCohere } from "@opal/logos/cohere";
 export { default as SvgDeepseek } from "@opal/logos/deepseek";
 export { default as SvgDiscord } from "@opal/logos/discord";
 export { default as SvgGemini } from "@opal/logos/gemini";
@@ -12,7 +11,6 @@ export { default as SvgLitellm } from "@opal/logos/litellm";
 export { default as SvgLmStudio } from "@opal/logos/lm-studio";
 export { default as SvgMicrosoft } from "@opal/logos/microsoft";
 export { default as SvgMistral } from "@opal/logos/mistral";
-export { default as SvgNomic } from "@opal/logos/nomic";
 export { default as SvgOllama } from "@opal/logos/ollama";
 export { default as SvgOnyxLogo } from "@opal/logos/onyx-logo";
 export { default as SvgOnyxLogoTyped } from "@opal/logos/onyx-logo-typed";
@@ -21,4 +19,3 @@ export { default as SvgOpenai } from "@opal/logos/openai";
 export { default as SvgOpenrouter } from "@opal/logos/openrouter";
 export { default as SvgQwen } from "@opal/logos/qwen";
 export { default as SvgSlack } from "@opal/logos/slack";
-export { default as SvgVoyage } from "@opal/logos/voyage";
--- a/web/lib/opal/src/logos/nomic.tsx
+++ b/web/lib/opal/src/logos/nomic.tsx
@@ -1,21 +0,0 @@
-import type { IconProps } from "@opal/types";
-const SvgNomic = ({ size, ...props }: IconProps) => (
-  <svg
-    width={size}
-    height={size}
-    viewBox="0 0 52 52"
-    fill="none"
-    xmlns="http://www.w3.org/2000/svg"
-    {...props}
-  >
-    <path
-      d="M35.858 6.31995H46V45.6709H35.6146C32.0852 36.8676 25.1481 27.7804 15.7363 24.8189V6.31995H25.4726C26.5274 12.7296 30.1618 18.3744 35.858 21.6546V6.31995Z"
-      fill="var(--text-05)"
-    />
-    <path
-      d="M15.7363 24.8189V45.6709H6L6 30.0927C9.05968 27.6167 11.9635 25.8737 15.7363 24.8189Z"
-      fill="var(--text-05)"
-    />
-  </svg>
-);
-export default SvgNomic;
--- a/web/lib/opal/src/logos/ollama.tsx
+++ b/web/lib/opal/src/logos/ollama.tsx
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Wenxi Onyx	4f793ff870	Merge remote-tracking branch 'origin/main' into codex/agent-lab	2026-04-09 16:15:03 -07:00
Wenxi Onyx	55f570261f	Merge remote-tracking branch 'origin/main' into codex/agent-lab	2026-04-09 15:07:50 -07:00
Wenxi Onyx	289a7b807e	agent lab init	2026-04-09 15:07:02 -07:00