Compare commits

..

4 Commits

Author SHA1 Message Date
Wenxi Onyx
4f793ff870 Merge remote-tracking branch 'origin/main' into codex/agent-lab 2026-04-09 16:15:03 -07:00
Jamison Lahman
4a96ef13d7 chore(devtools): devcontainer allows go and rust repos (#10041) 2026-04-09 15:46:50 -07:00
Wenxi Onyx
55f570261f Merge remote-tracking branch 'origin/main' into codex/agent-lab 2026-04-09 15:07:50 -07:00
Wenxi Onyx
289a7b807e agent lab init 2026-04-09 15:07:02 -07:00
68 changed files with 7477 additions and 526 deletions

View File

@@ -10,6 +10,7 @@
"source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig.host,type=bind,readonly",
"source=${localEnv:HOME}/.ssh,target=/home/dev/.ssh.host,type=bind,readonly",
"source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim.host,type=bind,readonly",
"source=onyx-devcontainer-cache,target=/home/dev/.cache,type=volume",
"source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
],
"remoteUser": "dev",

View File

@@ -5,7 +5,7 @@ set -euo pipefail
# bind-mounted files are accessible without running as root.
#
# Standard Docker: Workspace is owned by the host user's UID (e.g. 1000).
# We remap dev to that UID fast and seamless.
# We remap dev to that UID -- fast and seamless.
#
# Rootless Docker: Workspace appears as root-owned (UID 0) inside the
# container due to user-namespace mapping. We can't remap
@@ -23,9 +23,10 @@ DEV_GID=$(id -g "$TARGET_USER")
DEV_HOME=/home/"$TARGET_USER"
# Ensure directories that tools expect exist under ~dev.
# ~/.local is a named Docker volume ensure subdirs exist and are owned by dev.
# ~/.local and ~/.cache are named Docker volumes -- ensure they are owned by dev.
mkdir -p "$DEV_HOME"/.local/state "$DEV_HOME"/.local/share
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.local
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.cache
# Copy host configs mounted as *.host into their real locations.
# This gives the dev user owned copies without touching host originals.
@@ -41,7 +42,7 @@ if [ -d "$DEV_HOME/.config/nvim.host" ]; then
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.config/nvim"
fi
# Already matching nothing to do.
# Already matching -- nothing to do.
if [ "$WS_UID" = "$DEV_UID" ] && [ "$WS_GID" = "$DEV_GID" ]; then
exit 0
fi

View File

@@ -24,7 +24,7 @@ fi
ipset create allowed-domains hash:net || true
ipset flush allowed-domains
# Fetch GitHub IP ranges (IPv4 only ipset hash:net and iptables are IPv4)
# Fetch GitHub IP ranges (IPv4 only -- ipset hash:net and iptables are IPv4)
GITHUB_IPS=$(curl -s https://api.github.com/meta | jq -r '.api[]' 2>/dev/null | grep -v ':' || echo "")
for ip in $GITHUB_IPS; do
if ! ipset add allowed-domains "$ip" -exist 2>&1; then
@@ -42,6 +42,9 @@ ALLOWED_DOMAINS=(
"update.code.visualstudio.com"
"pypi.org"
"files.pythonhosted.org"
"go.dev"
"storage.googleapis.com"
"static.rust-lang.org"
)
for domain in "${ALLOWED_DOMAINS[@]}"; do

416
AGENTS.md
View File

@@ -1,361 +1,55 @@
# PROJECT KNOWLEDGE BASE
This file provides guidance to AI agents when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
#### Worker Types
1. **Primary Worker** (`celery_app.py`)
- Coordinates core background tasks and system-wide operations
- Handles connector management, document sync, pruning, and periodic checks
- Runs with 4 threads concurrency
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
2. **Docfetching Worker** (`docfetching`)
- Fetches documents from external data sources (connectors)
- Spawns docprocessing tasks for each document batch
- Implements watchdog monitoring for stuck connectors
- Configurable concurrency (default from env)
3. **Docprocessing Worker** (`docprocessing`)
- Processes fetched documents through the indexing pipeline:
- Upserts documents to PostgreSQL
- Chunks documents and adds contextual information
- Embeds chunks via model server
- Writes chunks to Vespa vector database
- Updates document metadata
- Configurable concurrency (default from env)
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)
- Handles Knowledge Graph processing and clustering
- Builds relationships between documents
- Runs clustering algorithms
- Configurable concurrency
7. **Monitoring Worker** (`monitoring`)
- System health monitoring and metrics collection
- Monitors Celery queues, process memory, and system status
- Single thread (monitoring doesn't need parallelism)
- Cloud-specific monitoring tasks
8. **User File Processing Worker** (`user_file_processing`)
- Processes user-uploaded files
- Handles user file indexing and project synchronization
- Configurable concurrency
9. **Beat Worker** (`beat`)
- Celery's scheduler for periodic tasks
- Uses DynamicTenantScheduler for multi-tenant support
- Schedules tasks like:
- Indexing checks (every 15 seconds)
- Connector deletion checks (every 20 seconds)
- Vespa sync checks (every 20 seconds)
- Pruning checks (every 20 seconds)
- KG processing (every 60 seconds)
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
- Never enqueue a task without an expiration. Always supply `expires=` when
sending tasks, either from the beat schedule or directly from another task. It
should never be acceptable to submit code which enqueues tasks without an
expiration, as doing so can lead to unbounded task queue growth.
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.
**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
**Task Time Limits**:
Since all tasks are executed in thread pools, the time limit features of Celery are silently
disabled and won't work. Timeout logic must be implemented within the task itself.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
### Directory Structure
```
backend/
├── onyx/
│ ├── auth/ # Authentication & authorization
│ ├── chat/ # Chat functionality & LLM interactions
│ ├── connectors/ # Data source connectors
│ ├── db/ # Database models & operations
│ ├── document_index/ # Vespa integration
│ ├── federated_connectors/ # External search connectors
│ ├── llm/ # LLM provider integrations
│ └── server/ # API endpoints & routers
├── ee/ # Enterprise Edition features
├── alembic/ # Database migrations
└── tests/ # Test suites
web/
├── src/app/ # Next.js app router pages
├── src/components/ # Reusable React components
└── src/lib/ # Utilities & business logic
```
## Frontend Standards
Frontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
# Multi-tenant migration
alembic -n schema_private revision -m "description"
```
Write the migration manually and place it in the file that alembic creates when running the above command.
## Testing Strategy
First, you must activate the virtual environment with `source .venv/bin/activate`.
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
```bash
pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, _including_ the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
To run them:
```bash
npx playwright test <TEST_NAME>
```
For shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
## Security Considerations
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
## AI/LLM Integration
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
What the change is meant to do.
**Important Notes**
Things you come across in your research that are important to the implementation.
**Implementation strategy**
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: _Timeline_, _Rollback plan_
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
## Error Handling
**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
`{"error_code": "...", "detail": "..."}` shape. This eliminates boilerplate and keeps error
handling consistent across the entire backend.
```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# ✅ Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
# ✅ Good — no extra message needed
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
# ✅ Good — upstream service with dynamic status code
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
# ❌ Bad — using HTTPException directly
raise HTTPException(status_code=404, detail="Session not found")
# ❌ Bad — starlette constant
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
```
Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
category is needed, add it there first — do not invent ad-hoc codes.
**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
status code is dynamic (comes from the upstream response), use `status_code_override`:
```python
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
```
## Best Practices
In addition to the other content in this file, best practices for contributing
to the codebase can be found in the "Engineering Best Practices" section of
`CONTRIBUTING.md`. Understand its contents and follow them.
# Project Knowledge Base
This file is the entrypoint for agents working in this repository. Keep it small.
## Start Here
- General development workflow and repo conventions: [CONTRIBUTING.md](./CONTRIBUTING.md)
- Frontend standards for `web/` and `desktop/`: [web/AGENTS.md](./web/AGENTS.md)
- Backend testing strategy and commands: [backend/tests/README.md](./backend/tests/README.md)
- Celery worker and task guidance: [backend/onyx/background/celery/README.md](./backend/onyx/background/celery/README.md)
- Backend API error-handling rules: [backend/onyx/error_handling/README.md](./backend/onyx/error_handling/README.md)
- Plan-writing guidance: [plans/README.md](./plans/README.md)
## Agent-Lab Docs
When working on `agent-lab` or on tasks explicitly about agent-engineering, use:
- [docs/agent/README.md](./docs/agent/README.md)
These docs are the system of record for the `agent-lab` workflow.
## Universal Notes
- For non-trivial work, create the target worktree first and keep the edit, test, and PR loop
inside that worktree. Do not prototype in one checkout and copy the patch into another unless
you are explicitly debugging the harness itself.
- Use `ods worktree create` for harness-managed worktrees. Do not use raw `git worktree add` when
you want the `agent-lab` workflow, because it will skip the manifest, env overlays, dependency
bootstrap, and lane-aware base-ref selection.
- When a change needs browser proof, use the harness journey flow instead of ad hoc screen capture:
record `before` in the target worktree before making the change, then record `after` in that
same worktree after validation. Use `ods journey compare` only when you need to recover a missed
baseline or compare two explicit revisions after the fact.
- After opening a PR, treat review feedback and failing checks as part of the same loop:
use `ods pr-review ...` for GitHub review threads and `ods pr-checks diagnose` plus `ods trace`
for failing Playwright runs.
- PR titles and commit messages should use conventional-commit style such as `fix: ...` or
`feat: ...`. Do not use `[codex]` prefixes in this repo.
- If Python dependencies appear missing, activate the root venv with `source .venv/bin/activate`.
- To make tests work, check the root `.env` file for an OpenAI key.
- If using Playwright to explore the frontend, you can usually log in with username `a@example.com`
and password `a` at `http://localhost:3000`.
- Assume Onyx services are already running unless the task indicates otherwise. Check `backend/log`
if you need to verify service activity.
- When making backend calls in local development flows, go through the frontend proxy:
`http://localhost:3000/api/...`, not `http://localhost:8080/...`.
- Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`. Do not add ad hoc DB access
elsewhere.
## How To Use This File
- Use this file as a map, not a manual.
- Follow the nearest authoritative doc for the subsystem you are changing.
- If a repeated rule matters enough to teach every future agent, document it near the code it
governs or encode it mechanically.

View File

@@ -12,7 +12,7 @@ founders@onyx.app for more information. Please visit https://github.com/onyx-dot
ARG ENABLE_CRAFT=false
# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV ONYX_RUNNING_IN_DOCKER="true" \
ENV DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true" \
PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

View File

@@ -1,7 +1,7 @@
# Base stage with dependencies
FROM python:3.11.7-slim-bookworm AS base
ENV ONYX_RUNNING_IN_DOCKER="true" \
ENV DANSWER_RUNNING_IN_DOCKER="true" \
HF_HOME=/app/.cache/huggingface
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

View File

@@ -0,0 +1,37 @@
# Celery Development Notes
This document is the local reference for Celery worker structure and task-writing rules in Onyx.
## Worker Types
Onyx uses multiple specialized workers:
1. `primary`: coordinates core background tasks and system-wide operations.
2. `docfetching`: fetches documents from connectors and schedules downstream work.
3. `docprocessing`: runs the indexing pipeline for fetched documents.
4. `light`: handles lightweight and fast operations.
5. `heavy`: handles more resource-intensive operations.
6. `kg_processing`: runs knowledge-graph processing and clustering.
7. `monitoring`: collects health and system metrics.
8. `user_file_processing`: processes user-uploaded files.
9. `beat`: schedules periodic work.
For actual implementation details, inspect:
- `backend/onyx/background/celery/apps/`
- `backend/onyx/background/celery/configs/`
- `backend/onyx/background/celery/tasks/`
## Task Rules
- Always use `@shared_task` rather than `@celery_app`.
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks/`.
- Never enqueue a task without `expires=`. This is a hard requirement because stale queued work can
accumulate without bound.
- Do not rely on Celery time-limit enforcement. These workers run in thread pools, so timeout logic
must be implemented inside the task itself.
## Testing Note
If you change Celery worker code and want to validate it against a running local worker, the worker
usually needs to be restarted manually. There is no general auto-restart on code change.

View File

@@ -5,7 +5,7 @@ from logging.handlers import RotatingFileHandler
import psutil
from onyx.utils.platform import is_running_in_container
from onyx.utils.logger import is_running_in_container
from onyx.utils.logger import setup_logger
# Regular application logger

View File

@@ -42,7 +42,7 @@ from onyx.db.models import UserGroup
from onyx.db.search_settings import get_active_search_settings_list
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.utils.platform import is_running_in_container
from onyx.utils.logger import is_running_in_container
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from shared_configs.configs import MULTI_TENANT

View File

@@ -0,0 +1,47 @@
# Error Handling
This directory is the local source of truth for backend API error handling.
## Primary Rule
Raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
The global FastAPI exception handler converts `OnyxError` into the standard JSON shape:
```json
{"error_code": "...", "detail": "..."}
```
This keeps API behavior consistent and avoids repetitive route-level boilerplate.
## Examples
```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
# Good
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
# Good: preserve a dynamic upstream status code
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY,
detail,
status_code_override=e.response.status_code,
)
```
Avoid:
```python
raise HTTPException(status_code=404, detail="Session not found")
```
## Notes
- Available error codes are defined in `backend/onyx/error_handling/error_codes.py`.
- If a new error category is needed, add it there first rather than inventing ad hoc strings.
- When forwarding upstream service failures with dynamic status codes, use `status_code_override`.

View File

@@ -6,7 +6,6 @@ from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session
from onyx import __version__ as onyx_version
from onyx.utils.platform import is_running_in_container
from onyx.auth.permissions import require_permission
from onyx.auth.users import is_user_admin
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
@@ -112,7 +111,6 @@ def fetch_settings(
if DISABLE_VECTOR_DB
else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
),
is_containerized=is_running_in_container(),
)

View File

@@ -131,7 +131,3 @@ class UserSettings(Settings):
else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
)
)
# True when the backend is running inside a container (Docker/Podman).
# The frontend uses this to default local-service URLs (e.g. Ollama,
# LM Studio) to host.docker.internal instead of localhost.
is_containerized: bool = False

View File

@@ -169,7 +169,11 @@ def get_standard_formatter() -> ColoredFormatter:
)
from onyx.utils.platform import is_running_in_container # noqa: F401
DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"
def is_running_in_container() -> bool:
return os.getenv(DANSWER_DOCKER_ENV_STR) == "true"
def setup_logger(

View File

@@ -1,25 +0,0 @@
import logging
import os
logger = logging.getLogger(__name__)
_ONYX_DOCKER_ENV_STR = "ONYX_RUNNING_IN_DOCKER"
_DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"
def is_running_in_container() -> bool:
onyx_val = os.getenv(_ONYX_DOCKER_ENV_STR)
if onyx_val is not None:
return onyx_val == "true"
danswer_val = os.getenv(_DANSWER_DOCKER_ENV_STR)
if danswer_val is not None:
logger.warning(
"%s is deprecated and will be ignored in a future release. "
"Use %s instead.",
_DANSWER_DOCKER_ENV_STR,
_ONYX_DOCKER_ENV_STR,
)
return danswer_val == "true"
return False

View File

@@ -45,6 +45,15 @@ npx playwright test <TEST_NAME>
Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
their own `conftest.py` for directory-scoped fixtures.
## Additional Onyx-Specific Guidance
- Activate the root venv first with `source .venv/bin/activate`.
- For many product changes in this repo, prefer integration tests or external dependency unit tests
over isolated unit tests.
- When writing integration tests, check `backend/tests/integration/common_utils/` and the root
`conftest.py` for fixtures and managers before inventing new helpers.
- Prefer existing fixtures over constructing users or entities manually inside tests.
## Running Tests Repeatedly (`pytest-repeat`)
Use `pytest-repeat` to catch flaky tests by running them multiple times:

View File

@@ -0,0 +1,89 @@
# Initial Architecture Map
Status: provisional baseline. This is a routing map for agents, not a complete design spec for
every subsystem. Update it as the repo becomes more explicit.
## Top-Level Surfaces
The repository is easiest to reason about as six main surfaces:
| Surface | Primary Paths | Purpose |
| --- | --- | --- |
| Backend product logic | `backend/onyx/`, `backend/ee/onyx/` | Core auth, chat, search, indexing, connectors, API, and enterprise extensions |
| Data and persistence | `backend/onyx/db/`, `backend/ee/onyx/db/`, `backend/alembic/` | DB models, data access logic, and schema migrations |
| Frontend product surfaces | `web/src/app/`, `web/src/sections/`, `web/src/layouts/` | Next.js routes, screens, and feature-level UI composition |
| Frontend design system and shared UI | `web/lib/opal/`, `web/src/refresh-components/` | Preferred primitives for new UI work |
| Devtools and local developer workflows | `tools/ods/`, `cli/` | Repo automation, CI helpers, visual regression tooling, and CLI integrations |
| Agent-facing platform work | `backend/onyx/server/features/build/`, `backend/onyx/mcp_server/`, `backend/onyx/deep_research/`, `backend/onyx/agents/` | Sandbox runtime, MCP tool surface, agent orchestration, and research workflows |
| Agent-lab harness state | shared git metadata under `$(git rev-parse --git-common-dir)/onyx-agent-lab/` | Local worktree manifests, ports, env overlays, and verification artifacts for agentized development |
## Backend Map
Use these paths as the first stop when routing backend changes:
| Area | Paths | Notes |
| --- | --- | --- |
| Authentication and access control | `backend/onyx/auth/`, `backend/onyx/access/`, `backend/ee/onyx/access/` | User identity, auth flows, permissions |
| Chat and answer generation | `backend/onyx/chat/`, `backend/onyx/server/query_and_chat/` | Chat loop, message processing, streaming |
| Retrieval and tools | `backend/onyx/tools/`, `backend/onyx/context/`, `backend/onyx/mcp_server/` | Search tools, web tools, context assembly, MCP exposure |
| Connectors and indexing | `backend/onyx/connectors/`, `backend/onyx/document_index/`, `backend/onyx/background/` | Source sync, indexing, pruning, permissions sync |
| LLM and prompt infrastructure | `backend/onyx/llm/`, `backend/onyx/prompts/`, `backend/ee/onyx/prompts/` | Provider integrations and prompting |
| Server APIs and feature entrypoints | `backend/onyx/server/`, `backend/ee/onyx/server/` | FastAPI routes and product feature APIs |
| Agent and build platform | `backend/onyx/server/features/build/`, `backend/onyx/agents/`, `backend/onyx/deep_research/` | Sandboxes, agent runtimes, orchestration, long-running research |
| Persistence | `backend/onyx/db/`, `backend/ee/onyx/db/` | Put DB operations here, not in route handlers or feature modules |
## Frontend Map
For frontend work, route changes by intent first, then by component maturity:
| Intent | Preferred Paths | Notes |
| --- | --- | --- |
| Next.js route/page work | `web/src/app/` | App Router pages and page-local wiring |
| Feature composition | `web/src/sections/`, `web/src/layouts/` | Preferred place for reusable feature-level assemblies |
| New shared UI primitives | `web/lib/opal/`, `web/src/refresh-components/` | Default targets for new reusable UI |
| Legacy shared UI | `web/src/components/` | Avoid for new work unless forced by the local surface |
| Frontend business logic | `web/src/lib/`, `web/src/hooks/`, `web/src/interfaces/` | Utilities, hooks, typed interfaces |
Important frontend rule already established in [web/AGENTS.md](../../web/AGENTS.md):
- Do not use `web/src/components/` for new component work.
## Existing Hard Constraints
These rules already exist and should be treated as architectural boundaries:
- Backend errors should raise `OnyxError`, not `HTTPException`.
- DB operations belong under `backend/onyx/db/` or `backend/ee/onyx/db/`.
- New FastAPI APIs should not use `response_model`.
- Celery tasks should use `@shared_task`.
- Enqueued Celery tasks must include `expires=`.
- Backend calls in local/manual flows should go through `http://localhost:3000/api/...`.
## Change Routing Heuristics
Use these heuristics before editing:
1. If the task changes persistence semantics, start in the DB layer and migrations.
2. If the task changes user-visible UI, find the route in `web/src/app/`, then move downward into
`sections`, `layouts`, and preferred shared UI.
3. If the task spans product behavior and background execution, inspect both the API entrypoint and
the relevant Celery path.
4. If the task concerns agentization, build, or local execution, check whether
`backend/onyx/server/features/build/` or `tools/ods/` is the better home before creating a new
subsystem.
5. If the task needs isolated local boot, browser validation, or per-change artifacts, check
[HARNESS.md](./HARNESS.md) before inventing another ad hoc runner.
6. If the change touches a historically messy area, consult [LEGACY_ZONES.md](./LEGACY_ZONES.md)
before adding more local patterns.
## Test Routing
Onyx already has a clear testing ladder:
- `backend/tests/unit/`: isolated logic only
- `backend/tests/external_dependency_unit/`: real infra, direct function calls, selective mocking
- `backend/tests/integration/`: real deployment, no mocking
- `web/tests/e2e/`: full frontend-backend coordination
Prefer the lowest layer that still validates the real behavior. For many product changes in this
repo, that means integration or Playwright rather than unit tests.

147
docs/agent/BRANCHING.md Normal file
View File

@@ -0,0 +1,147 @@
# Branching Model for `agent-lab`
This is the branching policy for `agent-lab`. It is intentionally separate from the default
workflow on `main`.
This document explains how to use a long-running `agent-lab` branch without making `main`
implicitly depend on lab-only agent-engineering changes.
## Goals
- Keep `main` stable and consensus-driven.
- Allow opt-in agent-engineering improvements to live on `agent-lab`.
- Let engineers and agents use `agent-lab` as a control checkout for worktree-based development.
- Ensure product PRs to `main` originate from `main`-based branches, not from `agent-lab`.
## Branch Roles
| Branch | Purpose |
| --- | --- |
| `main` | Shipping branch and team default |
| `codex/agent-lab` | Long-running control checkout containing the harness and agent-engineering improvements |
| `codex/lab/<name>` | Short-lived branch for `agent-lab`-only tooling, docs, or workflow work |
| `codex/fix/<name>`, `codex/feat/<name>`, etc. | Short-lived product branch cut from `origin/main` and managed by the `agent-lab` control checkout |
## Core Rule
`main` must never depend on `agent-lab`.
That means:
- `codex/agent-lab` may contain extra tooling, docs, checks, and workflow changes.
- Product branches may be managed by the `agent-lab` control checkout, but they must still be based
on `origin/main`.
- A PR to `main` should come from a `main`-based product branch, not from `codex/agent-lab`.
## Preferred Workflow
### Lab-Only Work
Use this for agent-engineering docs, harnesses, optional checks, or tooling that should remain on
`agent-lab` for now.
1. Branch from `codex/agent-lab` into `codex/lab/<name>`.
For local isolation, create the branch via `ods worktree create codex/lab/<name>`.
2. Make the lab-only changes.
3. Open the PR back into `codex/agent-lab`.
4. Do not open these changes directly to `main` unless the team later agrees to upstream them.
### Product Feature Work
Use this when you want to fix a product bug or build a shipping feature for `main`.
1. Stay in the `codex/agent-lab` control checkout.
2. Create a product worktree from `origin/main`, using a conventional branch lane such as:
- `ods worktree create codex/fix/<name>`
- `ods worktree create codex/feat/<name>`
3. Make the code changes inside that worktree checkout.
4. Run harness commands from the control checkout against the tracked worktree:
- `ods agent-check --worktree codex/fix/<name>`
- `ods verify --worktree codex/fix/<name>`
- `ods backend api --worktree codex/fix/<name>`
- `ods web dev --worktree codex/fix/<name>`
5. If the change needs browser proof, record a before/after journey:
- before editing: `ods journey run --worktree codex/fix/<name> --journey <name> --label before`
- after validating the fix: `ods journey run --worktree codex/fix/<name> --journey <name> --label after`
- use `ods journey compare` only when the initial `before` capture was missed and a recovery
baseline is needed later
- after the PR exists, publish the artifact directory you captured or the fallback compare run
with `ods journey publish --run-dir <dir> --pr <number>`
6. Commit, push, and open the PR from the product worktree checkout itself.
Prefer `ods pr-open` so the repo template and conventional-commit title check stay in the same
control plane.
7. Open the PR directly from that product branch to `main`.
8. After the PR is open, use:
- `ods pr-review triage --pr <number>`
- `ods pr-checks diagnose --pr <number>`
- `ods pr-review respond --comment-id ... --thread-id ... --body ...`
## Commit Hygiene Rules
This workflow only works if commits are separated cleanly.
Agents and humans should:
- keep lab-only workflow changes in separate commits from product logic
- avoid mixing refactors, harness changes, and feature behavior in one commit
- use conventional-commit messages and PR titles
- prefer multiple small commits over one large mixed commit
Good split:
- `docs(agent-lab): clarify control-checkout workflow`
- `fix: suppress logged-out modal on fresh unauthenticated load`
- `test: add regression coverage for auth-page logout modal`
Bad split:
- `misc: update agent docs, add lint, change connector UI, fix API`
## Guidance for Agents
When an agent is working on product code, it should assume:
1. The product branch should be created from `origin/main`, not from `codex/agent-lab`.
2. The `codex/agent-lab` checkout is the control plane for `ods` commands until the harness is
upstreamed more broadly.
3. The code change itself should still be made and committed inside the target product worktree.
4. A PR to `main` should use a conventional-commit title such as `fix: ...` or `feat: ...`.
If a product bug is discovered while editing on `codex/agent-lab`, treat that as exploration.
Restart the real fix in a fresh `main`-based product worktree and port only the minimal product
patch there.
## What Should Usually Stay on `agent-lab`
These are usually lab-only unless explicitly approved for upstreaming:
- branch-specific workflow docs
- harness-only `ods` commands
- non-consensus lint rules
- agent harness scripts
- opt-in automation for review or promotion
- branch-specific AGENTS guidance
## What Can Be Promoted to `main`
These can be promoted once they stand on their own:
- product feature code
- product tests
- bug fixes
- low-controversy lint rules with team agreement
- small devtools improvements that are useful outside `agent-lab`
## Review Standard
If opening a PR to `main` from the `agent-lab` control workflow:
- make sure the PR branch itself is based on `origin/main`
- use a conventional-commit title
- mention any control-plane validation that was run with `ods ... --worktree <branch>`
- attach journey artifacts when browser behavior changed
- treat review-thread replies and failing checks as part of the same agent loop, not as a separate
manual phase
This keeps the product branch reviewable without forcing reviewers to understand the entire
`agent-lab` branch.

View File

@@ -0,0 +1,73 @@
# Golden Rules
These are the current rules for the `agent-lab` workflow. The long-term goal is to move the useful
ones from prose into shared checks, scripts, or tests where appropriate.
Some of these are already documented elsewhere in the repo as project standards. In this file,
they should be treated as the active rules for work done on `agent-lab`.
## Current Rules
### Backend
1. Raise `OnyxError` instead of `HTTPException`.
2. Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`.
3. Use `@shared_task` for Celery tasks.
4. Never enqueue a Celery task without `expires=`.
5. Do not use FastAPI `response_model` on new APIs.
6. Keep Python strictly typed.
### Frontend
1. Prefer `web/lib/opal/` and `web/src/refresh-components/` for new shared UI.
2. Do not add new shared components under `web/src/components/`.
3. Route backend calls through the frontend `/api/...` surface in local and test flows.
4. Keep TypeScript strictly typed.
### Workflow
1. Start in a tracked worktree created by `ods worktree create`. Do not use raw `git worktree add`
for harness-managed work.
2. For harness work, use `codex/lab/...` branches based on `codex/agent-lab`. For product work,
use conventional branches such as `codex/fix/...` or `codex/feat/...` based on `origin/main`.
3. Make edits inside the target worktree. Copying a patch from another checkout is only acceptable
when debugging the harness itself.
4. Prefer integration or external-dependency-unit tests over unit tests when validating real Onyx
behavior.
5. When a repeated review comment appears, convert it into repo-local documentation or a mechanical
check.
6. For browser-visible changes, prefer a registered `ods journey` capture over an ad hoc manual
recording. The before/after artifacts should live with the PR loop.
7. Use `ods pr-review` to fetch and triage GitHub review threads instead of relying on memory or
the web UI alone. Reply and resolve from the same workflow when confidence is high.
8. Use `ods pr-checks diagnose` to detect failing GitHub checks and point the next remediation
command. For Playwright failures, pair it with `ods trace`.
6. PR titles and commit messages should use conventional-commit style such as `fix: ...` or
`feat: ...`. Never use `[codex]` prefixes in this repo.
9. When touching legacy areas, leave the area more explicit than you found it: better naming,
better boundaries, or a follow-up cleanup note.
## Mechanical Checks
These are strong candidates for `ods agent-check` or dedicated linters:
| Check | Why it matters |
| --- | --- |
| Ban `HTTPException` in backend product code | Keeps API error handling consistent |
| Ban direct DB mutations outside DB directories | Preserves layering |
| Detect task enqueue calls missing `expires=` | Prevents queue growth and stale work |
| Detect new imports from `web/src/components/` in non-legacy code | Prevents further UI drift |
| Detect direct calls to backend ports in tests/scripts where frontend proxy should be used | Preserves realistic request paths |
| Detect missing docs/agent references for new repo-level rules | Prevents knowledge from staying only in chat |
## Rule Promotion Policy
Promote a rule from prose into enforcement when at least one is true:
- it has been violated more than once
- a violation is expensive to detect late
- the remediation is mechanical
- the error message can teach the correct pattern succinctly
Agents work better with fast, local, actionable failures than with broad stylistic feedback after a
PR is opened.

267
docs/agent/HARNESS.md Normal file
View File

@@ -0,0 +1,267 @@
# Worktree Harness
This document defines the `agent-lab` harness model for doing end-to-end work on `onyx`.
The goal is to make one agent capable of taking one isolated change from edit to verification
without depending on human memory for ports, paths, or validation steps.
## Principles
These decisions follow the same principles described in OpenAI's
[Harness engineering](https://openai.com/index/harness-engineering/) and
[Unlocking the Codex harness](https://openai.com/index/unlocking-the-codex-harness/) articles:
- each task should run in its own git worktree
- the app should be bootable per worktree
- browser state should be directly legible to the agent
- logs, traces, and test artifacts should be attached to the same worktree lifecycle
- repository docs plus local metadata should be the system of record, not chat memory
## Current Harness Surface
The first `agent-lab` harness layer lives in `tools/ods/`.
Implemented command surfaces:
- `ods worktree create <branch>`: creates a git worktree plus local agent metadata
- `ods worktree deps up|status|reset|down`: provisions and manages namespaced external state
- `ods worktree status`: lists tracked worktrees and their URLs
- `ods worktree show [worktree]`: prints the manifest for one worktree
- `ods worktree remove <worktree>`: removes the worktree and local harness state
- `ods journey list|run|compare|publish`: records registered browser journeys, including local
before/after video artifacts and optional PR publication
- `ods pr-review fetch|triage|respond|resolve`: turns GitHub review threads into a local
machine-readable loop
- `ods pr-checks status|diagnose`: makes failing GitHub checks queryable from the same control
plane
- `ods verify`: runs the agent verification ladder and writes a machine-readable summary
- `ods agent-check`: runs diff-based architectural and doc checks
## Required Workflow
This is the required `agent-lab` workflow going forward:
1. Create the target worktree first with `ods worktree create`.
2. Make the code changes inside that worktree.
3. Run verification against that same worktree.
4. Open the PR from that same worktree.
Do not implement a change in one checkout and then rsync or patch it into another checkout just to
test it. That is only acceptable when explicitly debugging the harness itself.
Also do not use raw `git worktree add` for harness-managed work. `ods worktree create` is the
authoritative entrypoint because it disables repo hooks during checkout, writes the local manifest,
bootstraps env/runtime dependencies, provisions namespaced state, and records the worktree lane and
base ref.
## Control Checkout Model
Right now the harness code itself lives on `codex/agent-lab`, not on plain `main`.
That means the `codex/agent-lab` checkout acts as the control plane:
- lab worktrees such as `codex/lab/...` are based on `codex/agent-lab`
- product worktrees such as `codex/fix/...` or `codex/feat/...` are based on `origin/main`
- the `agent-lab` checkout can still manage those product worktrees via `--worktree`
flags on `ods backend`, `ods web`, `ods verify`, and `ods agent-check`
This lets us use the harness to manage a `main`-based product branch before the harness itself has
been upstreamed to `main`.
## Worktree Metadata
Each `agent-lab` worktree gets a local manifest stored under the shared git metadata directory:
```text
$(git rev-parse --git-common-dir)/onyx-agent-lab/worktrees/<id>/
```
The manifest tracks:
- branch name
- checkout path
- base ref used when the branch was created
- dependency mode and namespace-derived external dependency settings
- reserved ports for web, API, model server, and MCP
- browser-facing URLs
- generated env overlay file paths
- artifact directory
- last verification summary
This state is local runtime metadata. It is intentionally not checked into the repo.
## Boot Model
The current harness boot model isolates the mutable application processes and can also isolate the
mutable non-search data plane.
Per worktree:
- Next.js dev server gets its own `PORT`
- browser-facing base URL is unique
- backend API port is unique
- model server port is unique
- MCP port reservation exists for future worktree-local MCP runtime use
- artifacts are written to a worktree-specific directory
Today this is enough to make the app bootable per worktree without requiring a fully duplicated
dependency container stack for every task.
Important boundary:
- isolated today: app processes, ports, URLs, local artifacts, worktree-local dependency installs,
PostgreSQL database, Redis key prefix, and MinIO file-store bucket when the worktree runs in
`namespaced` dependency mode
- shared today: OpenSearch/Vespa and the rest of the local dependency stack started via docker
compose
This means a normal `agent-lab` worktree can run against:
- a dedicated Postgres database on the shared local Postgres server
- a dedicated Redis namespace on the shared local Redis instance
- a dedicated MinIO file-store bucket on the shared local object store
OpenSearch/Vespa remain shared-only by design on this branch. The harness should never imply
otherwise.
This is a deliberate brownfield adaptation of the OpenAI articles worktree-per-task model:
keep the common path mechanically isolated where the repo already supports it, and explicitly mark
the high-complexity surfaces that remain shared.
## Dependency Modes
`agent-lab` currently supports two dependency modes:
- `namespaced`: default mode for agent feature work. Creates one Postgres database, one Redis
prefix, and one MinIO bucket per worktree.
- `shared`: reuse the existing local DB/Redis/MinIO state when full isolation is unnecessary.
The worktree manifest is the source of truth for the selected mode and the derived namespace values.
Search infrastructure policy:
- OpenSearch/Vespa are always shared
- there is no current plan to add namespaced or per-worktree search stacks on `agent-lab`
- tasks that mutate search/index infrastructure should be treated as higher-risk and validated with
extra care because the harness does not isolate that surface
## Backend and Web Integration
When `ods backend ...` or `ods web ...` runs inside a tracked `agent-lab` worktree, it should
derive runtime settings from the worktree manifest automatically.
Current behavior:
- `ods backend api` defaults to the reserved worktree API port
- `ods backend model_server` defaults to the reserved worktree model-server port
- `ods web dev` gets the reserved worktree web port plus `BASE_URL`, `WEB_DOMAIN`,
`INTERNAL_URL`, and `MCP_INTERNAL_URL`
- backend and web commands also inherit the manifests dependency namespace env overrides
- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files mirror those values
- `ods worktree bootstrap` prepares the worktree to run by linking env files, linking or cloning
the Python runtime, and preparing `web/node_modules`
- `ods worktree deps up` provisions namespaced Postgres/Redis/MinIO state when needed
- `ods backend ... --worktree <id>` and `ods web ... --worktree <id>` let the `agent-lab`
control checkout run app processes against a tracked target worktree
This makes the standard dev commands work in an isolated way without inventing a second startup
surface just for agents.
## Browser Validation
Use two browser surfaces with different jobs:
- Chrome DevTools MCP for exploratory validation, DOM snapshots, navigation, and interactive bug
reproduction
- Playwright for codified end-to-end verification, screenshots, and retained traces
- `ods journey run` for the default article-style loop inside one worktree: capture `before` before
the fix, then capture `after` after the fix and publish the resulting artifacts to the PR when
needed
- `ods journey compare` as the fallback path when the agent missed the initial `before` capture or
needs a strict baseline-vs-branch comparison after the fact
Important detail:
- The default path should not launch two worktrees just to prove a normal UI bug fix. Use one
tracked product worktree, start the app in that worktree, and record `before` and `after` from
that same environment.
- If the fix is still uncommitted, always capture from the tracked target worktree, not from a
temporary `HEAD` checkout.
- `ods journey compare` is reserved for recovery or explicit revision comparison, not as the
standard path for every PR.
The worktree manifest's `web` URL is the source of truth for both.
If an agent needs to inspect live UI behavior while iterating, it should prefer Chrome DevTools MCP
against the worktree URL. If the behavior needs to become a repeatable regression check, encode it
as Playwright coverage under `web/tests/e2e/`.
## Verification Ladder
The expected verification sequence for a worktree is:
1. `ods agent-check`
2. targeted backend tests when backend behavior changed
3. targeted Playwright runs when UI or frontend-backend flows changed
4. `ods journey run --label before` before the code change, then `ods journey run --label after`
after the change when the PR needs durable browser proof
5. screenshot and trace review when UI validation fails
`ods verify` is the first unified entrypoint for this ladder. It writes a JSON summary into the
worktree artifact directory so later agent runs can inspect prior results directly.
For product worktrees based on `main`, the intended control-plane usage is:
1. from `codex/agent-lab`, run `ods worktree create codex/fix/<name>`
2. edit inside the created `main`-based checkout
3. from `codex/agent-lab`, run `ods verify --worktree codex/fix/<name>`
4. if live processes are needed, run `ods backend ... --worktree codex/fix/<name>` and
`ods web ... --worktree codex/fix/<name>`
5. commit, push, and open the PR from the product worktree checkout itself
## Artifacts
Per-worktree artifacts are written under the local harness state directory, not into chat.
Current artifact classes:
- verification summaries
- pytest logs
- Playwright logs
- journey screenshots, videos, traces, and compare summaries
- PR review thread snapshots and triage outputs
- dependency namespace metadata in the local manifest
Existing repo outputs are still relevant:
- Playwright traces and screenshots under `web/output/`
- screenshot diff reports from `ods screenshot-diff`
- CI trace retrieval from `ods trace`
## Known Gaps
This is the initial harness layer, not the finished system.
Still missing:
- one-command `up/down` orchestration for all local processes
- worktree-local observability stack for logs, metrics, and traces
- worktree-local MCP server runtime wiring
- automatic promotion tooling from `agent-lab` feature branches to `main`
- recurring doc-gardening and cleanup agents
- resumable long-running task server for local development tasks
Resolved in the current harness layer:
- fresh-worktree bootstrap for `.venv`, `.vscode/.env*`, and `web/node_modules`
- namespaced isolation for Postgres, Redis, and MinIO on a per-worktree basis
- registered before/after browser journeys with durable artifact directories
- GitHub review-thread fetch/triage/respond tooling
- GitHub failing-check diagnosis from the same `ods` control plane
Non-goals on this branch:
- OpenSearch/Vespa namespacing
- per-worktree vector/search stacks
Those are the next places to invest if we want to match the article more closely.

View File

@@ -0,0 +1,87 @@
# Legacy Zones
Status: initial classification. This file exists to stop agents from treating every existing
pattern in the repository as equally desirable precedent.
## Zone Types
| Zone | Meaning | Edit Policy |
| --- | --- | --- |
| `strict` | Preferred surface for new work | Freely extend, but keep boundaries explicit and add tests |
| `transition` | Actively evolving surface with mixed patterns | Prefer local consistency, avoid introducing new abstractions casually |
| `legacy-adapter` | Known historical surface or deprecated pattern area | Avoid new dependencies on it; prefer facades, wrappers, or migrations away |
| `frozen` | Only touch for bug fixes, security, or explicitly scoped work | Do not expand the pattern set |
## Initial Classification
### Strict
These are good default targets for new investment:
- `backend/onyx/db/`
- `backend/ee/onyx/db/`
- `backend/onyx/error_handling/`
- `backend/onyx/mcp_server/`
- `backend/onyx/server/features/build/`
- `tools/ods/`
- `web/lib/opal/`
- `web/src/refresh-components/`
- `web/src/layouts/`
- `web/src/sections/cards/`
### Transition
These areas are important and active, but they mix styles, eras, and responsibilities:
- `backend/onyx/server/`
- `backend/ee/onyx/server/`
- `backend/onyx/chat/`
- `backend/onyx/tools/`
- `backend/onyx/agents/`
- `backend/onyx/deep_research/`
- `web/src/app/`
- `web/src/sections/`
- `web/src/lib/`
Edit guidance:
- prefer incremental refactors over sweeping rewrites
- keep changes local when the area lacks clear boundaries
- add tests before extracting new shared abstractions
### Legacy-Adapter
These areas should not be treated as default precedent for new work:
- `web/src/components/`
- `backend/model_server/legacy/`
Edit guidance:
- do not add fresh reusable components or helper patterns here
- if a task requires touching these areas, prefer introducing an adapter in a stricter surface
- if you must extend a legacy file, keep the blast radius small and document follow-up cleanup
### Frozen
No repo-wide frozen zones are declared yet beyond files or subsystems that are clearly deprecated on
their face. Add explicit entries here rather than relying on tribal knowledge.
## Brownfield Rules
When a task lands in a non-strict zone:
1. Identify whether the task is fixing behavior, adding capability, or migrating structure.
2. Avoid copying local patterns into stricter parts of the codebase.
3. If an unsafe pattern is unavoidable, isolate it behind a typed boundary.
4. Record newly discovered smells in [GOLDEN_RULES.md](./GOLDEN_RULES.md) or a follow-on
execution plan.
## Promotion Criteria
A transition area can move toward `strict` when:
- its dependency boundaries are easy to explain
- new code has a preferred home
- tests are reliable enough for agents to use as feedback loops
- recurring review comments have been turned into written or mechanical rules

View File

@@ -0,0 +1,48 @@
# Quality Score Baseline
This file is an intentionally rough baseline for how legible the repository is to coding agents.
It is not a product quality report. It is a scorecard for agent development ergonomics.
## Scoring Rubric
Each area is scored from `0` to `5` on four dimensions:
- `Legibility`: how easy it is to discover the right files and concepts
- `Boundaries`: how clearly dependency and ownership seams are defined
- `Verification`: how available and reliable the feedback loops are
- `Agent ergonomics`: how likely an agent is to make a correct change without human rescue
Overall score is directional, not mathematically precise.
## Initial Baseline
| Area | Legibility | Boundaries | Verification | Agent ergonomics | Overall | Notes |
| --- | --- | --- | --- | --- | --- | --- |
| Backend core (`backend/onyx/`, `backend/ee/onyx/`) | 3 | 3 | 4 | 3 | 3.25 | Strong test surface, but top-level routing docs are thin |
| Persistence (`backend/onyx/db/`, migrations) | 4 | 4 | 3 | 4 | 3.75 | Clearer than most areas because path-level rules already exist |
| Frontend modern surfaces (`web/src/app/`, `sections`, `opal`, `refresh-components`) | 3 | 3 | 3 | 3 | 3.0 | Direction exists, but mixed generations still leak across boundaries |
| Frontend legacy shared UI (`web/src/components/`) | 1 | 1 | 2 | 1 | 1.25 | Explicitly deprecated, but still present and easy for agents to cargo-cult |
| Agent platform and build sandbox (`backend/onyx/server/features/build/`) | 3 | 4 | 3 | 4 | 3.5 | Good substrate for agentization, but not yet aimed at repo development workflows |
| MCP, CLI, and devtools (`backend/onyx/mcp_server/`, `cli/`, `tools/ods/`) | 4 | 4 | 4 | 4 | 4.0 | `agent-check`, worktree manifests, `ods verify`, `ods journey`, and PR review/check tooling give this surface a real control plane |
| Repo-level docs and plans | 4 | 3 | 4 | 4 | 3.75 | `docs/agent/` now describes the journey/review/check loop directly, though subsystem coverage is still uneven |
## Biggest Gaps
1. Repo-level architecture knowledge is still thinner than the runtime and workflow docs.
2. Brownfield and legacy zones are not explicitly flagged enough for agents.
3. Important engineering rules still outnumber the mechanical checks that enforce them.
4. The worktree harness does not yet include a local observability stack or one-command process orchestration.
## Near-Term Targets
The next improvements should aim to move these areas:
- Repo-level docs and plans: `3.0 -> 4.0`
- Frontend legacy safety: `1.25 -> 2.5`
- Backend core agent ergonomics: `3.0 -> 4.0`
- Worktree observability and runtime automation: `2.5 -> 4.0`
## Update Policy
When a new check, map, or workflow materially improves agent behavior, update this scorecard and
note what changed. If a score changes, the adjacent notes should explain why.

68
docs/agent/README.md Normal file
View File

@@ -0,0 +1,68 @@
# Agent Engineering Docs
This directory is the knowledge base for the `agent-lab` workflow around making development of
`onyx` itself more agentized.
The goal is not to replace the root [AGENTS.md](../../AGENTS.md).
The goal is to keep architecture maps, unsafe-zone notes, quality signals, and follow-on
execution plans in a form that coding agents can discover and update.
On `agent-lab`, this directory is the system of record for agent-engineering workflow.
## Principles
- Keep the entrypoint small. The root `AGENTS.md` should point here; it should not become a
growing encyclopedia.
- Create the target worktree first. The intended workflow is one task, one tracked worktree, one
verification loop, and one PR from that same checkout.
- Keep artifacts with the workflow. Browser videos, traces, review summaries, and check triage
should be produced by harness commands and stored as machine-readable outputs, not recreated
from chat memory.
- Prefer maps over manuals. Agents need navigable pointers to the right subsystem, not a giant
blob of undifferentiated instructions.
- Encode recurring judgment into the repo. If a rule matters often, document it here and then
promote it into a check, linter, test, or script.
- Distinguish legacy from greenfield. Agents will copy the patterns they see. If an area is
historically messy, we need to say so explicitly.
- Version decisions with the code. If a design choice matters for future changes, it should live
in-repo rather than in chat or memory.
## Documents
- [ARCHITECTURE.md](./ARCHITECTURE.md): top-level codebase map and change-routing guidance.
- [BRANCHING.md](./BRANCHING.md): branch model for long-running `agent-lab` development and
promotion of product-only changes to `main`.
- [HARNESS.md](./HARNESS.md): worktree runtime model, verification ladder, and browser/tooling
expectations.
- [LEGACY_ZONES.md](./LEGACY_ZONES.md): edit policy for strict, transitional, and legacy areas.
- [GOLDEN_RULES.md](./GOLDEN_RULES.md): active rules for `agent-lab` and promotion targets for
mechanical enforcement.
- [QUALITY_SCORE.md](./QUALITY_SCORE.md): baseline legibility and maintainability assessment for
agent work.
## Operating Model
Use this directory for information that should change how future agents work in the `agent-lab`
workflow:
- architecture maps
- dependency and layering rules
- "do not extend this pattern" warnings
- safe extension points
- recurring cleanup policies
- harness/runtime behavior for worktree-based development
- before/after browser journeys and PR artifact publication
- GitHub review and failing-check control loops
- quality scorecards
- active execution plans for agent-engineering improvements
Current workflow split:
- `codex/agent-lab` is the control checkout for the harness itself.
- `codex/lab/<name>` branches are for harness/docs/tooling work based on `codex/agent-lab`.
- `codex/fix/<name>`, `codex/feat/<name>`, and similar conventional product branches should be
created from `origin/main`, even when they are managed from the `agent-lab` control checkout.
- PR titles and commit messages should use conventional-commit style, never `[codex]` prefixes.
Do not turn this into a dumping ground. If something is local to one feature, keep it with that
feature. This directory is for `agent-lab`-level agent-development guidance.

View File

@@ -28,11 +28,11 @@ Some commands require external tools to be installed and configured:
- **uv** - Required for `backend` commands
- Install from [docs.astral.sh/uv](https://docs.astral.sh/uv/)
- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, and `trace` commands
- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, `trace`, `pr-review`, and `pr-checks` commands
- Install from [cli.github.com](https://cli.github.com/)
- Authenticate with `gh auth login`
- **AWS CLI** - Required for `screenshot-diff` commands (S3 baseline sync)
- **AWS CLI** - Required for `screenshot-diff` commands and `journey publish` (S3 artifact sync)
- Install from [aws.amazon.com/cli](https://aws.amazon.com/cli/)
- Authenticate with `aws sso login` or `aws configure`
@@ -196,11 +196,19 @@ ods backend <subcommand>
| Flag | Default | Description |
|------|---------|-------------|
| `--no-ee` | `false` | Disable Enterprise Edition features (enabled by default) |
| `--worktree` | current checkout | Run the command against a tracked agent-lab worktree |
| `--port` | `8080` (api) / `9000` (model_server) | Port to listen on |
Shell environment takes precedence over `.env` file values, so inline overrides
work as expected (e.g. `S3_ENDPOINT_URL=foo ods backend api`).
When run inside a tracked `agent-lab` worktree, `ods backend api` and
`ods backend model_server` will automatically use that worktree's reserved
ports unless you override them explicitly with `--port`.
The same command can also be launched from the `codex/agent-lab` control
checkout against another tracked worktree via `--worktree <branch>`.
**Examples:**
```shell
@@ -218,6 +226,9 @@ ods backend model_server
# Start the model server on a custom port
ods backend model_server --port 9001
# Run the API server for a tracked product worktree from the control checkout
ods backend api --worktree codex/fix/auth-banner-modal
```
### `web` - Run Frontend Scripts
@@ -231,6 +242,14 @@ ods web <script> [args...]
Script names are available via shell completion (for supported shells via
`ods completion`), and are read from `web/package.json`.
When run inside a tracked `agent-lab` worktree, `ods web ...` automatically
injects the worktree's `PORT`, `BASE_URL`, `WEB_DOMAIN`, `INTERNAL_URL`, and
`MCP_INTERNAL_URL` so the Next.js dev server boots against the right isolated
stack.
From the `codex/agent-lab` control checkout, `--worktree <branch>` applies the
same wiring to a tracked target worktree.
**Examples:**
```shell
@@ -242,6 +261,162 @@ ods web lint
# Forward extra args to the script
ods web test --watch
# Run the Next.js dev server for a tracked product worktree
ods web dev --worktree codex/fix/auth-banner-modal
```
### `worktree` - Manage Agent-Lab Worktrees
Create and manage local git worktrees for agentized development. Each tracked
worktree gets:
- a reserved port bundle for web, API, model server, and MCP
- an explicit dependency mode for local external state
- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files
- a local artifact directory for verification logs and summaries
- a manifest stored under the shared git metadata directory
- bootstrap support for env files, Python runtime, and frontend dependencies
`ods worktree create` is the authoritative entrypoint for this workflow. Do not
use raw `git worktree add` when you want the `agent-lab` harness, because you
will skip the manifest, env overlays, dependency bootstrap, and lane-aware base
selection.
```shell
ods worktree <subcommand>
```
**Subcommands:**
- `create <branch>` - Create a worktree and manifest
- `bootstrap [worktree]` - Prepare env files and dependencies for a worktree
- `deps up|status|reset|down [worktree]` - Provision and manage namespaced external state
- `status` - List tracked worktrees and URLs
- `show [worktree]` - Show detailed metadata for one worktree
- `remove <worktree>` - Remove a worktree and its local state
`ods worktree create` bootstraps new worktrees by default. The current bootstrap
behavior is:
- link `.vscode/.env` and `.vscode/.env.web` from the source checkout when present
- link the source checkout's `.venv` when present
- clone `web/node_modules` into the worktree when present, falling back to
`npm ci --prefer-offline --no-audit`
Current isolation boundary:
- worktree-local: web/API/model-server ports, URLs, env overlays, artifact dirs
- namespaced when `--dependency-mode namespaced` is used: PostgreSQL database,
Redis prefix, and MinIO file-store bucket
- always shared: OpenSearch/Vespa and the rest of the docker-compose dependency stack
`namespaced` is the default dependency mode on `agent-lab`. `shared` is still
available for lighter-weight work that does not need isolated DB/Redis/MinIO
state.
Branch lanes:
- `codex/lab/<name>` worktrees are treated as harness work and default to
`codex/agent-lab` as the base ref
- `codex/fix/<name>`, `codex/feat/<name>`, and other conventional product lanes
default to `origin/main` as the base ref
- branches that do not encode a lane fall back to `HEAD`; use `--from` or a
clearer branch name when the base matters
Control-plane note:
- the harness lives on `codex/agent-lab`
- product worktrees can still be based on `origin/main`
- run `ods backend`, `ods web`, `ods verify`, and `ods agent-check` with
`--worktree <branch>` from the control checkout when the target worktree does
not carry the harness code itself
Search/vector note:
- OpenSearch/Vespa stay shared-only
- this branch intentionally does not implement namespaced or per-worktree search stacks
- tasks that touch search/index infrastructure should assume a shared surface
**Examples:**
```shell
# Create a product bugfix worktree from main
ods worktree create codex/fix/auth-banner-modal
# Create a lab-only worktree from agent-lab
ods worktree create codex/lab/browser-validation
# Reuse the shared DB/Redis/MinIO state for a lighter-weight task
ods worktree create codex/fix/ui-polish --dependency-mode shared
# Re-bootstrap an existing worktree
ods worktree bootstrap codex/fix/auth-banner-modal
# Inspect the current worktree's namespaced dependency state
ods worktree deps status
# Reset the current worktree's Postgres/Redis/MinIO namespace
ods worktree deps reset
# See tracked worktrees
ods worktree status
# Show the current worktree manifest
ods worktree show
# Remove a worktree when finished
ods worktree remove codex/fix/auth-banner-modal
# Remove a worktree and tear down its namespaced dependencies
ods worktree remove codex/fix/auth-banner-modal --drop-deps
```
### `verify` - Run the Agent-Lab Verification Ladder
Run a unified verification flow for the current checkout. `ods verify` is the
first worktree-aware entrypoint that combines:
- `agent-check`
- optional targeted pytest execution
- optional targeted Playwright execution
- machine-readable verification summaries written to the worktree artifact dir
```shell
ods verify
```
Useful flags:
| Flag | Description |
|------|-------------|
| `--base-ref <ref>` | Ref to compare against for `agent-check` |
| `--skip-agent-check` | Skip the diff-based rules step |
| `--worktree <id>` | Run verification against a tracked worktree from the control checkout |
| `--pytest <path>` | Run a specific pytest path or node id (repeatable) |
| `--playwright <path>` | Run a specific Playwright test path (repeatable) |
| `--playwright-grep <expr>` | Pass `--grep` through to Playwright |
| `--playwright-project <name>` | Limit Playwright to one project |
Examples:
```shell
# Run just the diff-based checks
ods verify
# Validate a backend change with one focused integration target
ods verify --pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
# Validate a UI change with one Playwright suite
ods verify --playwright tests/e2e/chat/welcome_page.spec.ts --playwright-project admin
# Run both backend and UI checks
ods verify \
--pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py \
--playwright tests/e2e/admin/default-agent.spec.ts
# Verify a tracked product worktree from the control checkout
ods verify --worktree codex/fix/auth-banner-modal
```
### `dev` - Devcontainer Management
@@ -325,6 +500,56 @@ Check that specified modules are only lazily imported (used for keeping backend
ods check-lazy-imports
```
### `agent-check` - Check New Agent-Safety Violations
Run a small set of diff-based checks aimed at keeping new changes agent-friendly
without failing on historical debt already present in the repository.
This command is part of the expected workflow on `agent-lab`. It is not necessarily a repo-wide
mandatory gate on `main`.
```shell
ods agent-check
```
Current checks flag newly added:
- `HTTPException` usage in backend product code
- `response_model=` on backend APIs
- Celery `.delay()` calls
- imports from `web/src/components/` outside the legacy component tree
The command also validates the `docs/agent/` knowledge base by checking that
required files exist and that local markdown links in that surface resolve
correctly.
Useful flags:
| Flag | Description |
|------|-------------|
| `--staged` | Check the staged diff instead of the working tree |
| `--base-ref <ref>` | Diff against a git ref other than `HEAD` |
| `--worktree <id>` | Check a tracked worktree from the control checkout |
Examples:
```shell
# Check working tree changes
ods agent-check
# Check only staged changes
ods agent-check --staged
# Compare the branch against main
ods agent-check --base-ref origin/main
# Limit the diff to specific paths
ods agent-check web/src backend/onyx/server/features/build
# Run against a tracked product worktree from the control checkout
ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
```
### `run-ci` - Run CI on Fork PRs
Pull requests from forks don't automatically trigger GitHub Actions for security reasons.
@@ -516,6 +741,148 @@ ods trace --project admin
ods trace --list
```
### `journey` - Capture Before/After Browser Journeys
Run a registered Playwright journey with video capture. The default workflow is
to record `before` and `after` inside the same tracked worktree as the change.
`journey compare` remains available as a recovery path when you need to compare
two explicit revisions/worktrees after the fact.
Registered journeys live in `web/tests/e2e/journeys/registry.json`.
An optional `.github/agent-journeys.json` file can list journeys for a PR:
```json
{
"journeys": ["auth-landing"]
}
```
```shell
ods journey <subcommand>
```
**Subcommands:**
- `list` - Show registered journeys
- `run` - Run one journey against the current or target worktree
- `compare` - Capture `before` and `after` artifacts across two revisions/worktrees when a missed baseline must be recovered
- `publish` - Upload a compare run to S3 and upsert the PR comment
**Examples:**
```shell
# List journey definitions
ods journey list
# Capture before in the tracked product worktree before editing
ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label before
# Capture after in that same worktree after validating the fix
ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label after
# Recover a missed baseline later by comparing origin/main to a tracked product worktree
ods journey compare \
--journey auth-landing \
--after-worktree codex/fix/auth-banner-modal
# Publish an existing compare run to PR #10007
ods journey publish \
--run-dir .git/onyx-agent-lab/journeys/20260408-123000 \
--pr 10007
```
`journey run` writes a `summary.json` into the capture directory. `journey compare`
writes a `summary.json` into its run directory and, when `--pr` is supplied,
uploads that directory to S3 and upserts a PR comment with before/after links.
### `pr-review` - Fetch and Respond to GitHub Review Threads
Treat PR review comments as a local machine-readable workflow instead of relying
on the GitHub UI alone.
```shell
ods pr-review <subcommand>
```
**Subcommands:**
- `fetch` - Download review threads into local harness state
- `triage` - Classify threads as actionable, duplicate, outdated, or resolved
- `respond` - Reply to an inline review comment and optionally resolve its thread
- `resolve` - Resolve a review thread without posting a reply
**Examples:**
```shell
# Fetch review threads for the current branch PR
ods pr-review fetch
# Triage review threads for a specific PR
ods pr-review triage --pr 10007
# Reply to a top-level review comment and resolve the thread
ods pr-review respond \
--pr 10007 \
--comment-id 2512997464 \
--thread-id PRRT_kwDO... \
--body "Fixed in the latest patch. Added a regression journey as well."
```
Fetched and triaged review data is written under the local harness state
directory:
```text
$(git rev-parse --git-common-dir)/onyx-agent-lab/reviews/pr-<number>/
```
### `pr-checks` - Diagnose Failing GitHub Checks
Inspect the latest checks on a PR and surface the failing ones with the next
recommended remediation command.
```shell
ods pr-checks <subcommand>
```
**Subcommands:**
- `status` - list all checks for the PR
- `diagnose` - list only failing checks and point to the next step
**Examples:**
```shell
# Show all checks on the current branch PR
ods pr-checks status
# Show only failing checks and the next remediation command
ods pr-checks diagnose --pr 10007
```
`pr-checks diagnose` is especially useful after pushing a fix or after replying
to review comments. For Playwright failures it points directly at `ods trace`.
### `pr-open` - Open a PR With the Repo Template
Create a pull request through `gh` while enforcing a conventional-commit title.
If `--title` is omitted, `ods` uses the latest commit subject. The PR body
defaults to `.github/pull_request_template.md`. PRs are ready-for-review by
default; use `--draft` only when you explicitly need that state.
```shell
ods pr-open
ods pr-open --title "fix: suppress logged-out modal on fresh auth load"
```
### `pr-merge` - Merge a PR Through `gh`
Merge or auto-merge a pull request with an explicit merge method.
```shell
ods pr-merge --pr 10007 --method squash
ods pr-merge --pr 10007 --method squash --auto --delete-branch
```
### Testing Changes Locally (Dry Run)
Both `run-ci` and `cherry-pick` support `--dry-run` to test without making remote changes:

View File

@@ -0,0 +1,161 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"sort"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentcheck"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentdocs"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
type AgentCheckOptions struct {
Staged bool
BaseRef string
Worktree string
RepoRoot string
}
type AgentCheckResult struct {
Violations []agentcheck.Violation
DocViolations []agentdocs.Violation
}
// NewAgentCheckCommand creates the agent-check command.
func NewAgentCheckCommand() *cobra.Command {
opts := &AgentCheckOptions{}
cmd := &cobra.Command{
Use: "agent-check [paths...]",
Short: "Run diff-based checks for agent-safe changes",
Long: `Run diff-based checks for agent-safe changes.
This command inspects added lines in the current git diff and flags a small set
of newly introduced repo-level violations without failing on historical debt.
By default it compares the working tree against HEAD. Use --staged to inspect
the staged diff instead, or --base-ref to compare against a different ref.
Use --worktree to run the same check against a tracked target worktree from the
agent-lab control checkout.
Examples:
ods agent-check
ods agent-check --staged
ods agent-check --base-ref origin/main
ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
ods agent-check web/src backend/onyx/server/features/build`,
Run: func(cmd *cobra.Command, args []string) {
runAgentCheck(opts, args)
},
}
cmd.Flags().BoolVar(&opts.Staged, "staged", false, "check staged changes instead of the working tree")
cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to diff against instead of HEAD")
cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to check instead of the current checkout")
return cmd
}
func runAgentCheck(opts *AgentCheckOptions, providedPaths []string) {
repoRoot, _, _ := resolveAgentLabTarget(opts.Worktree)
opts.RepoRoot = repoRoot
result, err := evaluateAgentCheck(opts, providedPaths)
if err != nil {
log.Fatalf("Failed to run agent-check: %v", err)
}
if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
log.Info("✅ agent-check found no new violations.")
return
}
sort.Slice(result.Violations, func(i, j int) bool {
if result.Violations[i].Path != result.Violations[j].Path {
return result.Violations[i].Path < result.Violations[j].Path
}
if result.Violations[i].LineNum != result.Violations[j].LineNum {
return result.Violations[i].LineNum < result.Violations[j].LineNum
}
return result.Violations[i].RuleID < result.Violations[j].RuleID
})
for _, violation := range result.Violations {
log.Errorf("\n❌ %s:%d [%s]", violation.Path, violation.LineNum, violation.RuleID)
log.Errorf(" %s", violation.Message)
log.Errorf(" Added line: %s", strings.TrimSpace(violation.Content))
}
for _, violation := range result.DocViolations {
log.Errorf("\n❌ %s [agent-docs]", violation.Path)
log.Errorf(" %s", violation.Message)
}
fmt.Fprintf(
os.Stderr,
"\nFound %d agent-check violation(s) and %d agent-docs violation(s).\n",
len(result.Violations),
len(result.DocViolations),
)
os.Exit(1)
}
func evaluateAgentCheck(opts *AgentCheckOptions, providedPaths []string) (*AgentCheckResult, error) {
diffOutput, err := getAgentCheckDiff(opts, providedPaths)
if err != nil {
return nil, err
}
addedLines, err := agentcheck.ParseAddedLines(diffOutput)
if err != nil {
return nil, err
}
root := opts.RepoRoot
if root == "" {
var err error
root, err = paths.GitRoot()
if err != nil {
return nil, fmt.Errorf("determine git root: %w", err)
}
}
result := &AgentCheckResult{
Violations: agentcheck.CheckAddedLines(addedLines),
DocViolations: agentdocs.Validate(root),
}
return result, nil
}
func getAgentCheckDiff(opts *AgentCheckOptions, providedPaths []string) (string, error) {
args := []string{"diff", "--no-color", "--unified=0"}
if opts.Staged {
args = append(args, "--cached")
} else if opts.BaseRef != "" {
args = append(args, opts.BaseRef)
} else {
args = append(args, "HEAD")
}
if len(providedPaths) > 0 {
args = append(args, "--")
args = append(args, providedPaths...)
}
cmd := exec.Command("git", args...)
if opts.RepoRoot != "" {
cmd.Dir = opts.RepoRoot
}
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git %s failed: %w\n%s", strings.Join(args, " "), err, string(output))
}
return string(output), nil
}

View File

@@ -0,0 +1,32 @@
package cmd
import (
log "github.com/sirupsen/logrus"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
func resolveAgentLabTarget(identifier string) (string, agentlab.Manifest, bool) {
if identifier == "" {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
manifest, found := currentAgentLabManifest(repoRoot)
return repoRoot, manifest, found
}
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
log.Fatalf("Failed to resolve worktree %q: %v", identifier, err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
return manifest.CheckoutPath, manifest, true
}

View File

@@ -1,7 +1,6 @@
package cmd
import (
"bufio"
"errors"
"fmt"
"net"
@@ -14,14 +13,16 @@ import (
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
)
// NewBackendCommand creates the parent "backend" command with subcommands for
// running backend services.
// BackendOptions holds options shared across backend subcommands.
type BackendOptions struct {
NoEE bool
NoEE bool
Worktree string
}
func NewBackendCommand() *cobra.Command {
@@ -44,6 +45,7 @@ Available subcommands:
}
cmd.PersistentFlags().BoolVar(&opts.NoEE, "no-ee", false, "Disable Enterprise Edition features (enabled by default)")
cmd.PersistentFlags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
cmd.AddCommand(newBackendAPICommand(opts))
cmd.AddCommand(newBackendModelServerCommand(opts))
@@ -62,9 +64,10 @@ func newBackendAPICommand(opts *BackendOptions) *cobra.Command {
Examples:
ods backend api
ods backend api --port 9090
ods backend api --no-ee`,
ods backend api --no-ee
ods backend api --worktree codex/fix/auth-banner-modal`,
Run: func(cmd *cobra.Command, args []string) {
runBackendService("api", "onyx.main:app", port, opts)
runBackendService("api", "onyx.main:app", port, cmd.Flags().Changed("port"), opts)
},
}
@@ -83,9 +86,10 @@ func newBackendModelServerCommand(opts *BackendOptions) *cobra.Command {
Examples:
ods backend model_server
ods backend model_server --port 9001`,
ods backend model_server --port 9001
ods backend model_server --worktree codex/fix/auth-banner-modal`,
Run: func(cmd *cobra.Command, args []string) {
runBackendService("model_server", "model_server.main:app", port, opts)
runBackendService("model_server", "model_server.main:app", port, cmd.Flags().Changed("port"), opts)
},
}
@@ -137,16 +141,25 @@ func resolvePort(port string) string {
return port
}
func runBackendService(name, module, port string, opts *BackendOptions) {
root, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to find git root: %v", err)
func runBackendService(name, module, port string, portExplicit bool, opts *BackendOptions) {
root, worktreeManifest, hasWorktreeManifest := resolveAgentLabTarget(opts.Worktree)
if hasWorktreeManifest && !portExplicit {
switch name {
case "api":
port = strconv.Itoa(worktreeManifest.Ports.API)
case "model_server":
port = strconv.Itoa(worktreeManifest.Ports.ModelServer)
}
}
port = resolvePort(port)
envFile := ensureBackendEnvFile(root)
fileVars := loadBackendEnvFile(envFile)
fileVars, err := envutil.LoadFile(envFile)
if err != nil {
log.Fatalf("Failed to load env file %s: %v", envFile, err)
}
eeDefaults := eeEnvDefaults(opts.NoEE)
fileVars = append(fileVars, eeDefaults...)
@@ -162,9 +175,17 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
if !opts.NoEE {
log.Info("Enterprise Edition enabled (use --no-ee to disable)")
}
if hasWorktreeManifest {
log.Infof("agent-lab worktree %s detected: web=%s api=%s", worktreeManifest.Branch, worktreeManifest.URLs.Web, worktreeManifest.URLs.API)
log.Infof("lane=%s base-ref=%s", worktreeManifest.ResolvedLane(), worktreeManifest.BaseRef)
log.Infof("dependency mode=%s search-infra=%s", worktreeManifest.ResolvedDependencies().Mode, worktreeManifest.ResolvedDependencies().SearchInfraMode)
}
log.Debugf("Running in %s: uv %v", backendDir, uvicornArgs)
mergedEnv := mergeEnv(os.Environ(), fileVars)
mergedEnv := envutil.Merge(os.Environ(), fileVars)
if hasWorktreeManifest {
mergedEnv = envutil.ApplyOverrides(mergedEnv, worktreeManifest.RuntimeEnv())
}
log.Debugf("Applied %d env vars from %s (shell takes precedence)", len(fileVars), envFile)
svcCmd := exec.Command("uv", uvicornArgs...)
@@ -185,6 +206,18 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
}
}
func currentAgentLabManifest(repoRoot string) (agentlab.Manifest, bool) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
return agentlab.Manifest{}, false
}
manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
if err != nil {
return agentlab.Manifest{}, false
}
return manifest, found
}
// eeEnvDefaults returns env entries for EE and license enforcement settings.
// These are appended to the file vars so they act as defaults — shell env
// and .env file values still take precedence via mergeEnv.
@@ -231,59 +264,3 @@ func ensureBackendEnvFile(root string) string {
log.Infof("Created %s from template (review and fill in <REPLACE THIS> values)", envFile)
return envFile
}
// mergeEnv combines shell environment with file-based defaults. Shell values
// take precedence — file entries are only added for keys not already present.
func mergeEnv(shellEnv, fileVars []string) []string {
existing := make(map[string]bool, len(shellEnv))
for _, entry := range shellEnv {
if idx := strings.Index(entry, "="); idx > 0 {
existing[entry[:idx]] = true
}
}
merged := make([]string, len(shellEnv))
copy(merged, shellEnv)
for _, entry := range fileVars {
if idx := strings.Index(entry, "="); idx > 0 {
key := entry[:idx]
if !existing[key] {
merged = append(merged, entry)
} else {
log.Debugf("Env var %s already set in shell, skipping .env value", key)
}
}
}
return merged
}
// loadBackendEnvFile parses a .env file into KEY=VALUE entries suitable for
// appending to os.Environ(). Blank lines and comments are skipped.
func loadBackendEnvFile(path string) []string {
f, err := os.Open(path)
if err != nil {
log.Fatalf("Failed to open env file %s: %v", path, err)
}
defer func() { _ = f.Close() }()
var envVars []string
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if idx := strings.Index(line, "="); idx > 0 {
key := strings.TrimSpace(line[:idx])
value := strings.TrimSpace(line[idx+1:])
value = strings.Trim(value, `"'`)
envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("Failed to read env file %s: %v", path, err)
}
return envVars
}

View File

@@ -37,8 +37,6 @@ func NewDesktopCommand() *cobra.Command {
runDesktopScript(args)
},
}
cmd.Flags().SetInterspersed(false)
return cmd
}

View File

@@ -0,0 +1,63 @@
package cmd
import (
"fmt"
"os/exec"
"strings"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
)
func ghString(args ...string) (string, error) {
git.CheckGitHubCLI()
cmd := exec.Command("gh", args...)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return "", fmt.Errorf("gh %s failed: %w: %s", strings.Join(args, " "), err, strings.TrimSpace(string(exitErr.Stderr)))
}
return "", fmt.Errorf("gh %s failed: %w", strings.Join(args, " "), err)
}
return strings.TrimSpace(string(output)), nil
}
func resolvePRNumber(explicit string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return explicit, nil
}
return ghString("pr", "view", "--json", "number", "--jq", ".number")
}
func currentRepoSlug() (string, error) {
return ghString("repo", "view", "--json", "owner,name", "--jq", `.owner.login + "/" + .name`)
}
func upsertIssueComment(repoSlug, prNumber, marker, body string) error {
commentID, err := ghString(
"api",
fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
"--jq",
fmt.Sprintf(".[] | select(.body | startswith(%q)) | .id", marker),
)
if err != nil {
return err
}
if commentID != "" {
_, err := ghString(
"api",
"--method", "PATCH",
fmt.Sprintf("repos/%s/issues/comments/%s", repoSlug, commentID),
"-f", fmt.Sprintf("body=%s", body),
)
return err
}
_, err = ghString(
"api",
"--method", "POST",
fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
"-f", fmt.Sprintf("body=%s", body),
)
return err
}

865
tools/ods/cmd/journey.go Normal file
View File

@@ -0,0 +1,865 @@
package cmd
import (
"encoding/json"
"fmt"
"net/http"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/journey"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
"github.com/onyx-dot-app/onyx/tools/ods/internal/s3"
)
const defaultJourneyHTTPRegion = "us-east-2"
type JourneyRunOptions struct {
Journey string
Label string
Worktree string
OutputDir string
Project string
}
type JourneyCompareOptions struct {
Journeys []string
PlanFile string
BeforeRef string
AfterRef string
AfterWorktree string
DependencyMode string
PR string
KeepWorktrees bool
Bucket string
}
type JourneyPublishOptions struct {
RunDir string
PR string
Bucket string
}
type JourneyCaptureSummary struct {
Journey string `json:"journey"`
Label string `json:"label"`
Worktree string `json:"worktree,omitempty"`
URL string `json:"url"`
ArtifactDir string `json:"artifact_dir"`
LogPath string `json:"log_path"`
VideoFiles []string `json:"video_files,omitempty"`
TraceFiles []string `json:"trace_files,omitempty"`
Screenshots []string `json:"screenshots,omitempty"`
MetadataJSON []string `json:"metadata_json,omitempty"`
}
type JourneyCompareSummary struct {
GeneratedAt string `json:"generated_at"`
BeforeRef string `json:"before_ref"`
AfterRef string `json:"after_ref"`
RunDir string `json:"run_dir"`
S3Prefix string `json:"s3_prefix,omitempty"`
S3HTTPBase string `json:"s3_http_base,omitempty"`
Captures []JourneyCaptureSummary `json:"captures"`
}
type managedProcess struct {
Name string
Cmd *exec.Cmd
LogPath string
}
// NewJourneyCommand creates the journey command surface.
func NewJourneyCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "journey",
Short: "Capture before/after browser journeys as agent artifacts",
}
cmd.AddCommand(newJourneyListCommand())
cmd.AddCommand(newJourneyRunCommand())
cmd.AddCommand(newJourneyCompareCommand())
cmd.AddCommand(newJourneyPublishCommand())
return cmd
}
func newJourneyListCommand() *cobra.Command {
return &cobra.Command{
Use: "list",
Short: "List registered browser journeys",
Run: func(cmd *cobra.Command, args []string) {
runJourneyList()
},
}
}
func newJourneyRunCommand() *cobra.Command {
opts := &JourneyRunOptions{}
cmd := &cobra.Command{
Use: "run",
Short: "Run a single registered journey against the current or target worktree",
Long: `Run one registered journey against the current checkout or a tracked worktree.
This is the default before/after workflow for product changes:
1. capture --label before in the target worktree before editing
2. implement and validate the change in that same worktree
3. capture --label after in that same worktree
Use journey compare only when you need to recover a missed baseline or compare
two explicit revisions after the fact.`,
Run: func(cmd *cobra.Command, args []string) {
runJourneyRun(opts)
},
}
cmd.Flags().StringVar(&opts.Journey, "journey", "", "registered journey name to run")
cmd.Flags().StringVar(&opts.Label, "label", "after", "artifact label for this capture (for example before or after)")
cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
cmd.Flags().StringVar(&opts.OutputDir, "output-dir", "", "explicit artifact directory for the capture")
cmd.Flags().StringVar(&opts.Project, "project", "", "override the Playwright project from the journey registry")
_ = cmd.MarkFlagRequired("journey")
return cmd
}
func newJourneyCompareCommand() *cobra.Command {
opts := &JourneyCompareOptions{}
cmd := &cobra.Command{
Use: "compare",
Short: "Capture before and after videos by replaying registered journeys against two revisions",
Long: `Create or reuse worktrees for the before and after revisions, boot the app in each one,
record the configured journeys, and write a machine-readable summary. If --pr is supplied,
the compare run is also uploaded to S3 and linked from the pull request.
This is the fallback path, not the default workflow. Prefer journey run inside a
single tracked product worktree when you can capture before and after during the
normal edit loop.`,
Run: func(cmd *cobra.Command, args []string) {
runJourneyCompare(opts)
},
}
cmd.Flags().StringArrayVar(&opts.Journeys, "journey", nil, "registered journey name to capture (repeatable)")
cmd.Flags().StringVar(&opts.PlanFile, "plan-file", "", "JSON file containing {\"journeys\":[...]} (defaults to .github/agent-journeys.json when present)")
cmd.Flags().StringVar(&opts.BeforeRef, "before-ref", "origin/main", "git ref for the before capture")
cmd.Flags().StringVar(&opts.AfterRef, "after-ref", "HEAD", "git ref for the after capture when --after-worktree is not supplied")
cmd.Flags().StringVar(&opts.AfterWorktree, "after-worktree", "", "existing tracked worktree to use for the after capture")
cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode for temporary worktrees: namespaced or shared")
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to upload/comment against after capture")
cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
cmd.Flags().BoolVar(&opts.KeepWorktrees, "keep-worktrees", false, "keep temporary journey worktrees after the capture run")
return cmd
}
func newJourneyPublishCommand() *cobra.Command {
opts := &JourneyPublishOptions{}
cmd := &cobra.Command{
Use: "publish",
Short: "Upload a previously captured compare run and update the pull request comment",
Run: func(cmd *cobra.Command, args []string) {
runJourneyPublish(opts)
},
}
cmd.Flags().StringVar(&opts.RunDir, "run-dir", "", "compare run directory containing summary.json")
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to publish against")
cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
_ = cmd.MarkFlagRequired("run-dir")
return cmd
}
func runJourneyList() {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
registry, err := journey.LoadRegistry(repoRoot)
if err != nil {
log.Fatalf("Failed to load journey registry: %v", err)
}
for _, definition := range registry.Journeys {
fmt.Printf("%s\t%s\tproject=%s\tmodel_server=%t\n", definition.Name, definition.Description, definition.Project, definition.RequiresModelServer)
}
}
func runJourneyRun(opts *JourneyRunOptions) {
repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
harnessRoot, err := resolveJourneyHarnessRoot(repoRoot, manifest, hasManifest)
if err != nil {
log.Fatalf("Failed to resolve journey harness root: %v", err)
}
capture, err := captureJourney(harnessRoot, repoRoot, manifest, hasManifest, opts.Journey, opts.Label, opts.OutputDir, opts.Project)
if err != nil {
log.Fatalf("Journey capture failed: %v", err)
}
summaryPath := filepath.Join(capture.ArtifactDir, "summary.json")
data, err := json.MarshalIndent(capture, "", " ")
if err != nil {
log.Fatalf("Failed to encode journey summary: %v", err)
}
if err := os.WriteFile(summaryPath, data, 0644); err != nil {
log.Fatalf("Failed to write journey summary: %v", err)
}
log.Infof("Journey %s (%s) captured to %s", capture.Journey, capture.Label, capture.ArtifactDir)
}
func runJourneyCompare(opts *JourneyCompareOptions) {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
definitions, err := resolveJourneyDefinitions(repoRoot, opts.Journeys, opts.PlanFile)
if err != nil {
log.Fatalf("Failed to resolve journeys: %v", err)
}
currentRoot, currentManifest, hasCurrentManifest := resolveAgentLabTarget("")
if opts.AfterWorktree == "" && strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") && !hasCurrentManifest && git.HasUncommittedChanges() {
log.Fatalf("The current checkout has uncommitted changes, but it is not a tracked agent-lab worktree. Create the product worktree first and rerun with --after-worktree <branch> so the after capture reflects the real patch.")
}
_ = currentRoot
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
runDir := filepath.Join(agentlab.StateRoot(commonGitDir), "journeys", time.Now().UTC().Format("20060102-150405"))
if err := os.MkdirAll(runDir, 0755); err != nil {
log.Fatalf("Failed to create journey run dir: %v", err)
}
beforeTarget, err := createTemporaryJourneyWorktree(opts.BeforeRef, "before", agentlab.DependencyMode(opts.DependencyMode))
if err != nil {
log.Fatalf("Failed to create before worktree: %v", err)
}
if !opts.KeepWorktrees {
defer cleanupJourneyTarget(beforeTarget)
}
var afterTarget journeyTarget
if opts.AfterWorktree != "" {
afterTarget, err = resolveJourneyTarget(opts.AfterWorktree)
if err != nil {
log.Fatalf("Failed to resolve after worktree: %v", err)
}
if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
}
} else if strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") {
if hasCurrentManifest {
afterTarget = journeyTarget{
Identifier: currentManifest.Branch,
Manifest: currentManifest,
}
if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
}
log.Infof("Using current tracked worktree %s for the after capture", afterTarget.Identifier)
} else {
afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
if err != nil {
log.Fatalf("Failed to create after worktree: %v", err)
}
if !opts.KeepWorktrees {
defer cleanupJourneyTarget(afterTarget)
}
}
} else {
afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
if err != nil {
log.Fatalf("Failed to create after worktree: %v", err)
}
if !opts.KeepWorktrees {
defer cleanupJourneyTarget(afterTarget)
}
}
summary := JourneyCompareSummary{
GeneratedAt: time.Now().UTC().Format(time.RFC3339),
BeforeRef: opts.BeforeRef,
AfterRef: opts.AfterRef,
RunDir: runDir,
Captures: []JourneyCaptureSummary{},
}
beforeCaptures, err := captureJourneySet(beforeTarget, definitions, "before", runDir)
if err != nil {
log.Fatalf("Before capture failed: %v", err)
}
summary.Captures = append(summary.Captures, beforeCaptures...)
afterCaptures, err := captureJourneySet(afterTarget, definitions, "after", runDir)
if err != nil {
log.Fatalf("After capture failed: %v", err)
}
summary.Captures = append(summary.Captures, afterCaptures...)
writeJourneyCompareSummary(runDir, summary)
log.Infof("Journey compare summary written to %s", filepath.Join(runDir, "summary.json"))
if opts.PR != "" {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
updated, err := publishJourneyCompare(runDir, prNumber, opts.Bucket)
if err != nil {
log.Fatalf("Failed to publish journey compare run: %v", err)
}
writeJourneyCompareSummary(runDir, updated)
}
}
func runJourneyPublish(opts *JourneyPublishOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
updated, err := publishJourneyCompare(opts.RunDir, prNumber, opts.Bucket)
if err != nil {
log.Fatalf("Failed to publish journey compare run: %v", err)
}
writeJourneyCompareSummary(opts.RunDir, updated)
log.Infof("Published journey compare run from %s", opts.RunDir)
}
func resolveJourneyDefinitions(repoRoot string, requested []string, planFile string) ([]journey.Definition, error) {
journeyNames := append([]string{}, requested...)
resolvedPlan := strings.TrimSpace(planFile)
if resolvedPlan == "" {
defaultPlan := filepath.Join(repoRoot, journey.DefaultPlanPath)
if _, err := os.Stat(defaultPlan); err == nil {
resolvedPlan = defaultPlan
}
}
if resolvedPlan != "" {
plan, err := journey.LoadPlan(resolvedPlan)
if err != nil {
return nil, err
}
journeyNames = append(journeyNames, plan.Journeys...)
}
if len(journeyNames) == 0 {
return nil, fmt.Errorf("no journeys requested; pass --journey or provide %s", journey.DefaultPlanPath)
}
seen := map[string]bool{}
deduped := make([]string, 0, len(journeyNames))
for _, name := range journeyNames {
if !seen[name] {
seen[name] = true
deduped = append(deduped, name)
}
}
return journey.ResolveDefinitions(repoRoot, deduped)
}
func resolveJourneyHarnessRoot(targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool) (string, error) {
candidates := []string{targetRepoRoot}
if hasManifest && manifest.CreatedFromPath != "" {
candidates = append([]string{manifest.CreatedFromPath}, candidates...)
}
for _, candidate := range candidates {
if _, err := os.Stat(filepath.Join(candidate, journey.RegistryPath)); err == nil {
return candidate, nil
}
}
return "", fmt.Errorf("no journey registry found in target repo %s or control checkout %s", targetRepoRoot, manifest.CreatedFromPath)
}
func captureJourney(harnessRoot, targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool, journeyName, label, outputDir, projectOverride string) (JourneyCaptureSummary, error) {
definitions, err := journey.ResolveDefinitions(harnessRoot, []string{journeyName})
if err != nil {
return JourneyCaptureSummary{}, err
}
definition := definitions[0]
targetDir := strings.TrimSpace(outputDir)
if targetDir == "" {
if hasManifest {
targetDir = filepath.Join(manifest.ArtifactDir, "journeys", journey.Slug(definition.Name), journey.Slug(label))
} else {
targetDir = filepath.Join(targetRepoRoot, "web", "output", "journeys", journey.Slug(definition.Name), journey.Slug(label))
}
}
if err := os.MkdirAll(targetDir, 0755); err != nil {
return JourneyCaptureSummary{}, fmt.Errorf("create journey artifact dir: %w", err)
}
playwrightOutputDir := filepath.Join(targetDir, "playwright")
logPath := filepath.Join(targetDir, "journey.log")
projectName := definition.Project
if strings.TrimSpace(projectOverride) != "" {
projectName = projectOverride
}
envOverrides := map[string]string{
"PLAYWRIGHT_JOURNEY_MODE": "1",
"PLAYWRIGHT_JOURNEY_CAPTURE_DIR": targetDir,
"PLAYWRIGHT_OUTPUT_DIR": playwrightOutputDir,
}
if definition.SkipGlobalSetup {
envOverrides["PLAYWRIGHT_SKIP_GLOBAL_SETUP"] = "1"
}
if hasManifest {
for key, value := range manifest.RuntimeEnv() {
envOverrides[key] = value
}
}
step, passed := runLoggedCommand(
"journey-"+definition.Name,
logPath,
filepath.Join(harnessRoot, "web"),
envOverrides,
"npx",
"playwright", "test", definition.TestPath, "--project", projectName,
)
if !passed {
return JourneyCaptureSummary{}, fmt.Errorf("%s", strings.Join(step.Details, "\n"))
}
artifactSummary, err := summarizeJourneyArtifacts(targetDir)
if err != nil {
return JourneyCaptureSummary{}, err
}
artifactSummary.Journey = definition.Name
artifactSummary.Label = label
artifactSummary.ArtifactDir = targetDir
artifactSummary.LogPath = logPath
if hasManifest {
artifactSummary.Worktree = manifest.Branch
artifactSummary.URL = manifest.URLs.Web
} else {
artifactSummary.URL = envOverrides["BASE_URL"]
}
return artifactSummary, nil
}
type journeyTarget struct {
Identifier string
Manifest agentlab.Manifest
Temporary bool
}
func resolveJourneyTarget(identifier string) (journeyTarget, error) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
return journeyTarget{}, err
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
return journeyTarget{}, err
}
if !found {
return journeyTarget{}, fmt.Errorf("no worktree found for %q", identifier)
}
return journeyTarget{Identifier: manifest.Branch, Manifest: manifest}, nil
}
func createTemporaryJourneyWorktree(ref, label string, mode agentlab.DependencyMode) (journeyTarget, error) {
branch := fmt.Sprintf("codex/journey-%s-%s-%d", journey.Slug(label), journey.Slug(strings.ReplaceAll(ref, "/", "-")), time.Now().UTC().UnixNano())
if err := runSelfCommand("worktree", "create", branch, "--from", ref, "--dependency-mode", string(mode)); err != nil {
return journeyTarget{}, err
}
if err := runSelfCommand("worktree", "deps", "up", branch); err != nil {
return journeyTarget{}, err
}
target, err := resolveJourneyTarget(branch)
if err != nil {
return journeyTarget{}, err
}
target.Temporary = true
return target, nil
}
func cleanupJourneyTarget(target journeyTarget) {
if !target.Temporary {
return
}
if err := runSelfCommand("worktree", "remove", target.Identifier, "--force", "--drop-deps"); err != nil {
log.Warnf("Failed to remove temporary worktree %s: %v", target.Identifier, err)
}
if err := exec.Command("git", "branch", "-D", target.Identifier).Run(); err != nil {
log.Warnf("Failed to delete temporary branch %s: %v", target.Identifier, err)
}
}
func captureJourneySet(target journeyTarget, definitions []journey.Definition, label, runDir string) ([]JourneyCaptureSummary, error) {
harnessRoot, err := resolveJourneyHarnessRoot(target.Manifest.CheckoutPath, target.Manifest, true)
if err != nil {
return nil, err
}
requiresModelServer := false
for _, definition := range definitions {
if definition.RequiresModelServer {
requiresModelServer = true
break
}
}
processes, err := startJourneyServices(target, runDir, requiresModelServer)
if err != nil {
return nil, err
}
defer stopManagedProcesses(processes)
captures := make([]JourneyCaptureSummary, 0, len(definitions))
for _, definition := range definitions {
outputDir := filepath.Join(runDir, journey.Slug(definition.Name), journey.Slug(label))
capture, err := captureJourney(harnessRoot, target.Manifest.CheckoutPath, target.Manifest, true, definition.Name, label, outputDir, "")
if err != nil {
return nil, err
}
captures = append(captures, capture)
}
return captures, nil
}
func startJourneyServices(target journeyTarget, runDir string, includeModelServer bool) ([]managedProcess, error) {
logDir := filepath.Join(runDir, "services", journey.Slug(target.Manifest.Branch))
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, fmt.Errorf("create service log dir: %w", err)
}
processes := make([]managedProcess, 0, 3)
apiProcess, err := startManagedProcess(
"api",
filepath.Join(logDir, "api.log"),
"backend", "api", "--worktree", target.Identifier,
)
if err != nil {
return nil, err
}
processes = append(processes, apiProcess)
if includeModelServer {
modelProcess, err := startManagedProcess(
"model_server",
filepath.Join(logDir, "model_server.log"),
"backend", "model_server", "--worktree", target.Identifier,
)
if err != nil {
stopManagedProcesses(processes)
return nil, err
}
processes = append(processes, modelProcess)
}
webProcess, err := startManagedProcess(
"web",
filepath.Join(logDir, "web.log"),
"web", "dev", "--worktree", target.Identifier, "--", "--webpack",
)
if err != nil {
stopManagedProcesses(processes)
return nil, err
}
processes = append(processes, webProcess)
if err := waitForJourneyURL(target.Manifest.URLs.API+"/health", 2*time.Minute, processes...); err != nil {
stopManagedProcesses(processes)
return nil, err
}
if err := waitForJourneyURL(target.Manifest.URLs.Web+"/api/health", 3*time.Minute, processes...); err != nil {
stopManagedProcesses(processes)
return nil, err
}
return processes, nil
}
func startManagedProcess(name, logPath string, args ...string) (managedProcess, error) {
executable, err := os.Executable()
if err != nil {
return managedProcess{}, fmt.Errorf("determine ods executable: %w", err)
}
if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
return managedProcess{}, fmt.Errorf("create log dir: %w", err)
}
logFile, err := os.Create(logPath)
if err != nil {
return managedProcess{}, fmt.Errorf("create log file: %w", err)
}
cmd := exec.Command(executable, args...)
cmd.Stdout = logFile
cmd.Stderr = logFile
cmd.Stdin = nil
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if err := cmd.Start(); err != nil {
_ = logFile.Close()
return managedProcess{}, fmt.Errorf("start %s: %w", name, err)
}
_ = logFile.Close()
return managedProcess{Name: name, Cmd: cmd, LogPath: logPath}, nil
}
func stopManagedProcesses(processes []managedProcess) {
for i := len(processes) - 1; i >= 0; i-- {
process := processes[i]
if process.Cmd == nil || process.Cmd.Process == nil {
continue
}
_ = process.Cmd.Process.Signal(os.Interrupt)
done := make(chan struct{})
go func(cmd *exec.Cmd) {
_, _ = cmd.Process.Wait()
close(done)
}(process.Cmd)
select {
case <-done:
case <-time.After(10 * time.Second):
_ = process.Cmd.Process.Kill()
}
}
}
func waitForJourneyURL(url string, timeout time.Duration, processes ...managedProcess) error {
client := &http.Client{Timeout: 5 * time.Second}
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if err := ensureManagedProcessesRunning(processes); err != nil {
return fmt.Errorf("while waiting for %s: %w", url, err)
}
resp, err := client.Get(url)
if err == nil {
_ = resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 500 {
return nil
}
}
time.Sleep(3 * time.Second)
}
if err := ensureManagedProcessesRunning(processes); err != nil {
return fmt.Errorf("while waiting for %s: %w", url, err)
}
return fmt.Errorf("timed out waiting for %s", url)
}
func ensureManagedProcessesRunning(processes []managedProcess) error {
for _, process := range processes {
if process.Cmd == nil || process.Cmd.Process == nil {
continue
}
if err := syscall.Kill(process.Cmd.Process.Pid, 0); err != nil {
if err == syscall.ESRCH {
return fmt.Errorf("%s exited early\n%s", process.Name, readJourneyLogTail(process.LogPath, 40))
}
if err != syscall.EPERM {
return fmt.Errorf("check %s process health: %w", process.Name, err)
}
}
}
return nil
}
func readJourneyLogTail(path string, lineCount int) string {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Sprintf("failed to read %s: %v", path, err)
}
trimmed := strings.TrimRight(string(data), "\n")
if trimmed == "" {
return fmt.Sprintf("%s is empty", path)
}
lines := strings.Split(trimmed, "\n")
if len(lines) > lineCount {
lines = lines[len(lines)-lineCount:]
}
return fmt.Sprintf("recent log tail from %s:\n%s", path, strings.Join(lines, "\n"))
}
func summarizeJourneyArtifacts(root string) (JourneyCaptureSummary, error) {
summary := JourneyCaptureSummary{}
err := filepath.Walk(root, func(path string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if info.IsDir() {
return nil
}
relative, err := filepath.Rel(root, path)
if err != nil {
return err
}
switch {
case strings.HasSuffix(path, ".webm"):
summary.VideoFiles = append(summary.VideoFiles, relative)
case strings.HasSuffix(path, "trace.zip"):
summary.TraceFiles = append(summary.TraceFiles, relative)
case strings.HasSuffix(path, ".png"):
summary.Screenshots = append(summary.Screenshots, relative)
case strings.HasSuffix(path, ".json") && filepath.Base(path) != "summary.json":
summary.MetadataJSON = append(summary.MetadataJSON, relative)
}
return nil
})
if err != nil {
return summary, fmt.Errorf("walk journey artifacts: %w", err)
}
sort.Strings(summary.VideoFiles)
sort.Strings(summary.TraceFiles)
sort.Strings(summary.Screenshots)
sort.Strings(summary.MetadataJSON)
return summary, nil
}
func runSelfCommand(args ...string) error {
executable, err := os.Executable()
if err != nil {
return err
}
cmd := exec.Command(executable, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
return cmd.Run()
}
func writeJourneyCompareSummary(runDir string, summary JourneyCompareSummary) {
data, err := json.MarshalIndent(summary, "", " ")
if err != nil {
log.Fatalf("Failed to encode journey compare summary: %v", err)
}
if err := os.WriteFile(filepath.Join(runDir, "summary.json"), data, 0644); err != nil {
log.Fatalf("Failed to write journey compare summary: %v", err)
}
}
func publishJourneyCompare(runDir, prNumber, bucketOverride string) (JourneyCompareSummary, error) {
var summary JourneyCompareSummary
data, err := os.ReadFile(filepath.Join(runDir, "summary.json"))
if err != nil {
return summary, fmt.Errorf("read compare summary: %w", err)
}
if err := json.Unmarshal(data, &summary); err != nil {
return summary, fmt.Errorf("parse compare summary: %w", err)
}
bucket := bucketOverride
if bucket == "" {
bucket = getS3Bucket()
}
timestamp := filepath.Base(runDir)
s3Prefix := fmt.Sprintf("s3://%s/journeys/pr-%s/%s/", bucket, prNumber, timestamp)
if err := s3.SyncUp(runDir, s3Prefix, true); err != nil {
return summary, err
}
httpBase := fmt.Sprintf("https://%s.s3.%s.amazonaws.com/journeys/pr-%s/%s/", bucket, defaultJourneyHTTPRegion, prNumber, timestamp)
summary.S3Prefix = s3Prefix
summary.S3HTTPBase = httpBase
repoSlug, err := currentRepoSlug()
if err != nil {
return summary, err
}
body := buildJourneyPRComment(summary)
if err := upsertIssueComment(repoSlug, prNumber, "<!-- agent-journey-report -->", body); err != nil {
return summary, err
}
return summary, nil
}
func buildJourneyPRComment(summary JourneyCompareSummary) string {
type capturePair struct {
before *JourneyCaptureSummary
after *JourneyCaptureSummary
}
byJourney := map[string]*capturePair{}
for i := range summary.Captures {
capture := &summary.Captures[i]
pair := byJourney[capture.Journey]
if pair == nil {
pair = &capturePair{}
byJourney[capture.Journey] = pair
}
switch capture.Label {
case "before":
pair.before = capture
case "after":
pair.after = capture
}
}
names := make([]string, 0, len(byJourney))
for name := range byJourney {
names = append(names, name)
}
sort.Strings(names)
lines := []string{
"<!-- agent-journey-report -->",
"### Agent Journey Report",
"",
fmt.Sprintf("Before ref: `%s`", summary.BeforeRef),
fmt.Sprintf("After ref: `%s`", summary.AfterRef),
"",
"| Journey | Before | After |",
"|---------|--------|-------|",
}
for _, name := range names {
pair := byJourney[name]
before := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.before)
after := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.after)
lines = append(lines, fmt.Sprintf("| `%s` | %s | %s |", name, before, after))
}
return strings.Join(lines, "\n")
}
func journeyLink(runDir, httpBase string, capture *JourneyCaptureSummary) string {
if capture == nil {
return "_not captured_"
}
artifactDir, err := filepath.Rel(runDir, capture.ArtifactDir)
if err != nil {
artifactDir = capture.ArtifactDir
}
if len(capture.VideoFiles) > 0 {
return fmt.Sprintf("[video](%s%s)", httpBase, pathJoin(artifactDir, capture.VideoFiles[0]))
}
if len(capture.Screenshots) > 0 {
return fmt.Sprintf("[screenshot](%s%s)", httpBase, pathJoin(artifactDir, capture.Screenshots[0]))
}
return "_no artifact_"
}
func pathJoin(parts ...string) string {
clean := make([]string, 0, len(parts))
for _, part := range parts {
if part == "" {
continue
}
clean = append(clean, strings.Trim(part, "/"))
}
return strings.Join(clean, "/")
}

289
tools/ods/cmd/pr_checks.go Normal file
View File

@@ -0,0 +1,289 @@
package cmd
import (
"encoding/json"
"fmt"
"os/exec"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
)
type PRChecksOptions struct {
PR string
}
type ghChecksResponse struct {
Data struct {
Repository struct {
PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
URL string `json:"url"`
HeadRef string `json:"headRefName"`
Commits struct {
Nodes []struct {
Commit struct {
StatusCheckRollup struct {
Contexts struct {
Nodes []struct {
Type string `json:"__typename"`
Name string `json:"name"`
DisplayTitle string `json:"displayTitle"`
WorkflowName string `json:"workflowName"`
Status string `json:"status"`
Conclusion string `json:"conclusion"`
DetailsURL string `json:"detailsUrl"`
Context string `json:"context"`
State string `json:"state"`
TargetURL string `json:"targetUrl"`
Description string `json:"description"`
} `json:"nodes"`
} `json:"contexts"`
} `json:"statusCheckRollup"`
} `json:"commit"`
} `json:"nodes"`
} `json:"commits"`
} `json:"pullRequest"`
} `json:"repository"`
} `json:"data"`
}
// NewPRChecksCommand creates the pr-checks command surface.
func NewPRChecksCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "pr-checks",
Short: "Inspect GitHub PR checks and surface failing runs for remediation",
}
cmd.AddCommand(newPRChecksStatusCommand())
cmd.AddCommand(newPRChecksDiagnoseCommand())
return cmd
}
func newPRChecksStatusCommand() *cobra.Command {
opts := &PRChecksOptions{}
cmd := &cobra.Command{
Use: "status",
Short: "List all status checks for a pull request",
Run: func(cmd *cobra.Command, args []string) {
runPRChecksStatus(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
return cmd
}
func newPRChecksDiagnoseCommand() *cobra.Command {
opts := &PRChecksOptions{}
cmd := &cobra.Command{
Use: "diagnose",
Short: "List only failing checks and point to the next remediation command",
Run: func(cmd *cobra.Command, args []string) {
runPRChecksDiagnose(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
return cmd
}
func runPRChecksStatus(opts *PRChecksOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
response, err := fetchPRChecks(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR checks: %v", err)
}
fmt.Printf("PR #%d %s\n", response.Data.Repository.PullRequest.Number, response.Data.Repository.PullRequest.Title)
for _, check := range flattenChecks(response) {
fmt.Printf("[%s] %s (%s) %s\n", check.result(), check.displayName(), check.kind(), check.url())
}
}
func runPRChecksDiagnose(opts *PRChecksOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
response, err := fetchPRChecks(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR checks: %v", err)
}
failing := failingChecks(response)
if len(failing) == 0 {
fmt.Printf("No failing checks found on PR #%s\n", prNumber)
return
}
fmt.Printf("Failing checks for PR #%s:\n", prNumber)
for _, check := range failing {
fmt.Printf("- %s (%s)\n", check.displayName(), check.url())
if strings.Contains(strings.ToLower(check.displayName()), "playwright") {
fmt.Printf(" next: ods trace --pr %s\n", prNumber)
} else {
fmt.Printf(" next: gh run view <run-id> --log-failed\n")
}
}
}
func fetchPRChecks(prNumber string) (*ghChecksResponse, error) {
repoSlug, err := currentRepoSlug()
if err != nil {
return nil, err
}
parts := strings.SplitN(repoSlug, "/", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("unexpected repo slug %q", repoSlug)
}
git.CheckGitHubCLI()
query := `query($owner:String!, $name:String!, $number:Int!) {
repository(owner:$owner, name:$name) {
pullRequest(number:$number) {
number
title
url
headRefName
commits(last:1) {
nodes {
commit {
statusCheckRollup {
contexts(first:100) {
nodes {
__typename
... on CheckRun {
name
status
conclusion
detailsUrl
}
... on StatusContext {
context
state
targetUrl
description
}
}
}
}
}
}
}
}
}
}`
cmd := exec.Command(
"gh", "api", "graphql",
"-f", "query="+query,
"-F", "owner="+parts[0],
"-F", "name="+parts[1],
"-F", "number="+prNumber,
)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("gh api graphql failed: %w", err)
}
var response ghChecksResponse
if err := json.Unmarshal(output, &response); err != nil {
return nil, fmt.Errorf("parse PR checks: %w", err)
}
return &response, nil
}
type flattenedCheck struct {
Type string
Name string
DisplayTitle string
WorkflowName string
Status string
Conclusion string
DetailsURL string
Context string
State string
TargetURL string
}
func flattenChecks(response *ghChecksResponse) []flattenedCheck {
result := []flattenedCheck{}
if response == nil || len(response.Data.Repository.PullRequest.Commits.Nodes) == 0 {
return result
}
for _, node := range response.Data.Repository.PullRequest.Commits.Nodes[0].Commit.StatusCheckRollup.Contexts.Nodes {
result = append(result, flattenedCheck{
Type: node.Type,
Name: node.Name,
DisplayTitle: node.DisplayTitle,
WorkflowName: node.WorkflowName,
Status: node.Status,
Conclusion: node.Conclusion,
DetailsURL: node.DetailsURL,
Context: node.Context,
State: node.State,
TargetURL: node.TargetURL,
})
}
return result
}
func (c flattenedCheck) displayName() string {
switch c.Type {
case "CheckRun":
if c.DisplayTitle != "" {
return c.DisplayTitle
}
if c.WorkflowName != "" && c.Name != "" {
return c.WorkflowName + " / " + c.Name
}
return c.Name
default:
return c.Context
}
}
func (c flattenedCheck) kind() string {
if c.Type == "" {
return "status"
}
return c.Type
}
func (c flattenedCheck) result() string {
if c.Type == "CheckRun" {
if c.Conclusion != "" {
return strings.ToLower(c.Conclusion)
}
return strings.ToLower(c.Status)
}
return strings.ToLower(c.State)
}
func (c flattenedCheck) url() string {
if c.DetailsURL != "" {
return c.DetailsURL
}
return c.TargetURL
}
func failingChecks(response *ghChecksResponse) []flattenedCheck {
checks := flattenChecks(response)
failing := make([]flattenedCheck, 0, len(checks))
for _, check := range checks {
result := check.result()
if result == "failure" || result == "failed" || result == "timed_out" || result == "cancelled" || result == "error" {
failing = append(failing, check)
}
}
return failing
}

73
tools/ods/cmd/pr_merge.go Normal file
View File

@@ -0,0 +1,73 @@
package cmd
import (
"os"
"os/exec"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
)
type PRMergeOptions struct {
PR string
Auto bool
DeleteBranch bool
Method string
}
// NewPRMergeCommand creates the pr-merge command.
func NewPRMergeCommand() *cobra.Command {
opts := &PRMergeOptions{}
cmd := &cobra.Command{
Use: "pr-merge",
Short: "Merge a GitHub pull request through gh with explicit method flags",
Run: func(cmd *cobra.Command, args []string) {
runPRMerge(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().BoolVar(&opts.Auto, "auto", false, "enable auto-merge instead of merging immediately")
cmd.Flags().BoolVar(&opts.DeleteBranch, "delete-branch", false, "delete the branch after merge")
cmd.Flags().StringVar(&opts.Method, "method", "squash", "merge method: squash, merge, or rebase")
return cmd
}
func runPRMerge(opts *PRMergeOptions) {
git.CheckGitHubCLI()
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
args := []string{"pr", "merge", prNumber}
switch opts.Method {
case "squash":
args = append(args, "--squash")
case "merge":
args = append(args, "--merge")
case "rebase":
args = append(args, "--rebase")
default:
log.Fatalf("Invalid merge method %q: expected squash, merge, or rebase", opts.Method)
}
if opts.Auto {
args = append(args, "--auto")
}
if opts.DeleteBranch {
args = append(args, "--delete-branch")
}
cmd := exec.Command("gh", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
log.Fatalf("Failed to merge PR #%s: %v", prNumber, err)
}
}

89
tools/ods/cmd/pr_open.go Normal file
View File

@@ -0,0 +1,89 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
var conventionalPRTitlePattern = regexp.MustCompile(`^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test)(\([^)]+\))?: .+`)
type PROpenOptions struct {
Title string
Base string
BodyFile string
Draft bool
}
// NewPROpenCommand creates the pr-open command.
func NewPROpenCommand() *cobra.Command {
opts := &PROpenOptions{}
cmd := &cobra.Command{
Use: "pr-open",
Short: "Open a GitHub pull request using the repo template and a conventional-commit title",
Run: func(cmd *cobra.Command, args []string) {
runPROpen(opts)
},
}
cmd.Flags().StringVar(&opts.Title, "title", "", "PR title (defaults to the latest commit subject)")
cmd.Flags().StringVar(&opts.Base, "base", "main", "base branch for the PR")
cmd.Flags().StringVar(&opts.BodyFile, "body-file", "", "explicit PR body file (defaults to .github/pull_request_template.md)")
cmd.Flags().BoolVar(&opts.Draft, "draft", false, "open the PR as a draft")
return cmd
}
func runPROpen(opts *PROpenOptions) {
git.CheckGitHubCLI()
title := strings.TrimSpace(opts.Title)
if title == "" {
subject, err := git.GetCommitMessage("HEAD")
if err != nil {
log.Fatalf("Failed to determine PR title from HEAD: %v", err)
}
title = subject
}
if !conventionalPRTitlePattern.MatchString(title) {
log.Fatalf("PR title must follow conventional-commit style. Got %q", title)
}
bodyFile := strings.TrimSpace(opts.BodyFile)
if bodyFile == "" {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
bodyFile = filepath.Join(repoRoot, ".github", "pull_request_template.md")
}
bodyBytes, err := os.ReadFile(bodyFile)
if err != nil {
log.Fatalf("Failed to read PR body file %s: %v", bodyFile, err)
}
args := []string{"pr", "create", "--base", opts.Base, "--title", title, "--body", string(bodyBytes)}
if opts.Draft {
args = append(args, "--draft")
}
cmd := exec.Command("gh", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
log.Fatalf("Failed to open PR: %v", err)
}
fmt.Printf("Opened PR with title %q\n", title)
}

393
tools/ods/cmd/pr_review.go Normal file
View File

@@ -0,0 +1,393 @@
package cmd
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/prreview"
)
type PRReviewFetchOptions struct {
PR string
Output string
}
type PRReviewTriageOptions struct {
PR string
Output string
}
type PRReviewRespondOptions struct {
PR string
CommentID int
Body string
ThreadID string
}
type ghReviewResponse struct {
Data struct {
Repository struct {
PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
URL string `json:"url"`
ReviewThreads struct {
Nodes []struct {
ID string `json:"id"`
IsResolved bool `json:"isResolved"`
IsOutdated bool `json:"isOutdated"`
Path string `json:"path"`
Line int `json:"line"`
StartLine int `json:"startLine"`
Comments struct {
Nodes []struct {
DatabaseID int `json:"databaseId"`
Body string
URL string `json:"url"`
CreatedAt string `json:"createdAt"`
Author struct {
Login string `json:"login"`
} `json:"author"`
} `json:"nodes"`
} `json:"comments"`
} `json:"nodes"`
} `json:"reviewThreads"`
} `json:"pullRequest"`
} `json:"repository"`
} `json:"data"`
}
// NewPRReviewCommand creates the pr-review command surface.
func NewPRReviewCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "pr-review",
Short: "Fetch, triage, and respond to GitHub pull request review threads",
}
cmd.AddCommand(newPRReviewFetchCommand())
cmd.AddCommand(newPRReviewTriageCommand())
cmd.AddCommand(newPRReviewRespondCommand())
cmd.AddCommand(newPRReviewResolveCommand())
return cmd
}
func newPRReviewFetchCommand() *cobra.Command {
opts := &PRReviewFetchOptions{}
cmd := &cobra.Command{
Use: "fetch",
Short: "Fetch pull request review threads and write them to local harness state",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewFetch(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the fetched review JSON")
return cmd
}
func newPRReviewTriageCommand() *cobra.Command {
opts := &PRReviewTriageOptions{}
cmd := &cobra.Command{
Use: "triage",
Short: "Classify unresolved review threads into actionable, duplicate, outdated, or resolved",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewTriage(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the triage JSON")
return cmd
}
func newPRReviewRespondCommand() *cobra.Command {
opts := &PRReviewRespondOptions{}
cmd := &cobra.Command{
Use: "respond",
Short: "Reply to an inline pull request review comment and optionally resolve the thread",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewRespond(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().IntVar(&opts.CommentID, "comment-id", 0, "top-level pull request review comment ID to reply to")
cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve after replying")
cmd.Flags().StringVar(&opts.Body, "body", "", "reply body to post")
_ = cmd.MarkFlagRequired("comment-id")
_ = cmd.MarkFlagRequired("body")
return cmd
}
func newPRReviewResolveCommand() *cobra.Command {
opts := &PRReviewRespondOptions{}
cmd := &cobra.Command{
Use: "resolve",
Short: "Resolve a review thread without posting a reply",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewResolve(opts)
},
}
cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve")
_ = cmd.MarkFlagRequired("thread-id")
return cmd
}
func runPRReviewFetch(opts *PRReviewFetchOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
review, err := fetchPRReview(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR review threads: %v", err)
}
outputPath, err := reviewOutputPath(prNumber, opts.Output, "threads.json")
if err != nil {
log.Fatalf("Failed to determine output path: %v", err)
}
writeJSON(outputPath, review)
log.Infof("Fetched %d review threads into %s", len(review.Threads), outputPath)
}
func runPRReviewTriage(opts *PRReviewTriageOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
review, err := fetchPRReview(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR review threads: %v", err)
}
triage := prreview.Triage(review)
outputPath, err := reviewOutputPath(prNumber, opts.Output, "triage.json")
if err != nil {
log.Fatalf("Failed to determine output path: %v", err)
}
writeJSON(outputPath, triage)
for _, summary := range triage.Summaries {
lineRef := ""
if summary.Thread.Path != "" {
lineRef = summary.Thread.Path
if summary.Thread.Line > 0 {
lineRef = fmt.Sprintf("%s:%d", lineRef, summary.Thread.Line)
}
}
fmt.Printf("[%s] %s %s %s\n", summary.Category, summary.Source, summary.Thread.ID, lineRef)
for _, reason := range summary.Reasons {
fmt.Printf(" - %s\n", reason)
}
}
log.Infof("Wrote PR review triage to %s", outputPath)
}
func runPRReviewRespond(opts *PRReviewRespondOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
repoSlug, err := currentRepoSlug()
if err != nil {
log.Fatalf("Failed to resolve repo slug: %v", err)
}
if err := replyToReviewComment(repoSlug, prNumber, opts.CommentID, opts.Body); err != nil {
log.Fatalf("Failed to reply to review comment: %v", err)
}
if strings.TrimSpace(opts.ThreadID) != "" {
if err := resolveReviewThread(opts.ThreadID); err != nil {
log.Fatalf("Failed to resolve review thread: %v", err)
}
}
log.Infof("Posted reply to review comment %d on PR #%s", opts.CommentID, prNumber)
}
func runPRReviewResolve(opts *PRReviewRespondOptions) {
if err := resolveReviewThread(opts.ThreadID); err != nil {
log.Fatalf("Failed to resolve review thread: %v", err)
}
log.Infof("Resolved review thread %s", opts.ThreadID)
}
func fetchPRReview(prNumber string) (prreview.PullRequest, error) {
repoSlug, err := currentRepoSlug()
if err != nil {
return prreview.PullRequest{}, err
}
parts := strings.SplitN(repoSlug, "/", 2)
if len(parts) != 2 {
return prreview.PullRequest{}, fmt.Errorf("unexpected repo slug %q", repoSlug)
}
response, err := ghGraphQL(parts[0], parts[1], prNumber)
if err != nil {
return prreview.PullRequest{}, err
}
pr := prreview.PullRequest{
Number: response.Data.Repository.PullRequest.Number,
Title: response.Data.Repository.PullRequest.Title,
URL: response.Data.Repository.PullRequest.URL,
Threads: []prreview.Thread{},
}
for _, thread := range response.Data.Repository.PullRequest.ReviewThreads.Nodes {
item := prreview.Thread{
ID: thread.ID,
IsResolved: thread.IsResolved,
IsOutdated: thread.IsOutdated,
Path: thread.Path,
Line: thread.Line,
StartLine: thread.StartLine,
Comments: []prreview.Comment{},
}
for _, comment := range thread.Comments.Nodes {
item.Comments = append(item.Comments, prreview.Comment{
ID: comment.DatabaseID,
Body: comment.Body,
AuthorLogin: comment.Author.Login,
URL: comment.URL,
CreatedAt: comment.CreatedAt,
})
}
pr.Threads = append(pr.Threads, item)
}
return pr, nil
}
func ghGraphQL(owner, name, prNumber string) (*ghReviewResponse, error) {
git.CheckGitHubCLI()
query := `query($owner:String!, $name:String!, $number:Int!) {
repository(owner:$owner, name:$name) {
pullRequest(number:$number) {
number
title
url
reviewThreads(first:100) {
nodes {
id
isResolved
isOutdated
path
line
startLine
comments(first:100) {
nodes {
databaseId
body
url
createdAt
author {
login
}
}
}
}
}
}
}
}`
cmd := exec.Command(
"gh", "api", "graphql",
"-f", "query="+query,
"-F", "owner="+owner,
"-F", "name="+name,
"-F", "number="+prNumber,
)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("gh api graphql failed: %w", err)
}
var response ghReviewResponse
if err := json.Unmarshal(output, &response); err != nil {
return nil, fmt.Errorf("parse graphql response: %w", err)
}
return &response, nil
}
func replyToReviewComment(repoSlug, prNumber string, commentID int, body string) error {
_, err := ghString(
"api",
"--method", "POST",
fmt.Sprintf("repos/%s/pulls/%s/comments/%d/replies", repoSlug, prNumber, commentID),
"-f", "body="+body,
)
return err
}
func resolveReviewThread(threadID string) error {
git.CheckGitHubCLI()
mutation := `mutation($threadId:ID!) {
resolveReviewThread(input:{threadId:$threadId}) {
thread {
id
isResolved
}
}
}`
cmd := exec.Command(
"gh", "api", "graphql",
"-f", "query="+mutation,
"-F", "threadId="+threadID,
)
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("resolve review thread: %w: %s", err, strings.TrimSpace(string(output)))
}
return nil
}
func reviewOutputPath(prNumber, explicit, fileName string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return explicit, nil
}
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
return "", err
}
stateDir := filepath.Join(agentlab.StateRoot(commonGitDir), "reviews", "pr-"+prNumber)
if err := os.MkdirAll(stateDir, 0755); err != nil {
return "", fmt.Errorf("create review state dir: %w", err)
}
return filepath.Join(stateDir, fileName), nil
}
func writeJSON(path string, value any) {
data, err := json.MarshalIndent(value, "", " ")
if err != nil {
log.Fatalf("Failed to encode JSON for %s: %v", path, err)
}
if err := os.WriteFile(path, data, 0644); err != nil {
log.Fatalf("Failed to write %s: %v", path, err)
}
}

View File

@@ -41,6 +41,7 @@ func NewRootCommand() *cobra.Command {
cmd.PersistentFlags().BoolVar(&opts.Debug, "debug", false, "run in debug mode")
// Add subcommands
cmd.AddCommand(NewAgentCheckCommand())
cmd.AddCommand(NewBackendCommand())
cmd.AddCommand(NewCheckLazyImportsCommand())
cmd.AddCommand(NewCherryPickCommand())
@@ -48,8 +49,13 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewDeployCommand())
cmd.AddCommand(NewOpenAPICommand())
cmd.AddCommand(NewComposeCommand())
cmd.AddCommand(NewJourneyCommand())
cmd.AddCommand(NewLogsCommand())
cmd.AddCommand(NewPullCommand())
cmd.AddCommand(NewPRChecksCommand())
cmd.AddCommand(NewPRMergeCommand())
cmd.AddCommand(NewPROpenCommand())
cmd.AddCommand(NewPRReviewCommand())
cmd.AddCommand(NewRunCICommand())
cmd.AddCommand(NewScreenshotDiffCommand())
cmd.AddCommand(NewDesktopCommand())
@@ -58,6 +64,8 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewLatestStableTagCommand())
cmd.AddCommand(NewWhoisCommand())
cmd.AddCommand(NewTraceCommand())
cmd.AddCommand(NewVerifyCommand())
cmd.AddCommand(NewWorktreeCommand())
return cmd
}

318
tools/ods/cmd/verify.go Normal file
View File

@@ -0,0 +1,318 @@
package cmd
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"time"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
)
type VerifyOptions struct {
BaseRef string
SkipAgentCheck bool
Worktree string
PytestPaths []string
PlaywrightPaths []string
PlaywrightGrep string
PlaywrightProject string
}
type VerifySummary struct {
GeneratedAt string `json:"generated_at"`
RepoRoot string `json:"repo_root"`
Worktree *agentlab.Manifest `json:"worktree,omitempty"`
Steps []VerifyStepSummary `json:"steps"`
}
type VerifyStepSummary struct {
Name string `json:"name"`
Status string `json:"status"`
Command []string `json:"command,omitempty"`
DurationMS int64 `json:"duration_ms"`
LogPath string `json:"log_path,omitempty"`
ArtifactDir string `json:"artifact_dir,omitempty"`
Details []string `json:"details,omitempty"`
}
// NewVerifyCommand creates the verify command.
func NewVerifyCommand() *cobra.Command {
opts := &VerifyOptions{}
cmd := &cobra.Command{
Use: "verify",
Short: "Run the agent-lab verification ladder and write a machine-readable summary",
Long: `Run the agent-lab verification ladder for the current checkout.
This command composes the diff-based agent-check with optional pytest and
Playwright execution, then writes a JSON summary into the worktree artifact
directory so agents can inspect the result without re-parsing console output.
Use --worktree to run the same flow against a tracked target worktree from the
agent-lab control checkout.`,
Run: func(cmd *cobra.Command, args []string) {
runVerify(opts)
},
}
cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to compare against for agent-check (defaults to the worktree base ref or HEAD)")
cmd.Flags().BoolVar(&opts.SkipAgentCheck, "skip-agent-check", false, "skip the diff-based agent-check step")
cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to verify from instead of the current checkout")
cmd.Flags().StringArrayVar(&opts.PytestPaths, "pytest", nil, "pytest path or node id to run (repeatable)")
cmd.Flags().StringArrayVar(&opts.PlaywrightPaths, "playwright", nil, "Playwright test path to run (repeatable)")
cmd.Flags().StringVar(&opts.PlaywrightGrep, "playwright-grep", "", "grep passed through to Playwright")
cmd.Flags().StringVar(&opts.PlaywrightProject, "playwright-project", "", "Playwright project to run")
return cmd
}
func runVerify(opts *VerifyOptions) {
repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
runAt := time.Now().UTC()
artifactRoot := filepath.Join(repoRoot, "web", "output")
if hasManifest {
artifactRoot = filepath.Join(manifest.ArtifactDir, "verify", runAt.Format("20060102-150405"))
}
if err := os.MkdirAll(artifactRoot, 0755); err != nil {
log.Fatalf("Failed to create verify artifact dir: %v", err)
}
summary := VerifySummary{
GeneratedAt: runAt.Format(time.RFC3339),
RepoRoot: repoRoot,
Steps: make([]VerifyStepSummary, 0, 3),
}
if hasManifest {
manifestCopy := manifest
summary.Worktree = &manifestCopy
}
if !opts.SkipAgentCheck {
baseRef := opts.BaseRef
if baseRef == "" && hasManifest {
baseRef = manifest.BaseRef
}
if baseRef == "" {
baseRef = "HEAD"
}
step, passed := runAgentCheckVerifyStep(repoRoot, opts.Worktree, baseRef)
summary.Steps = append(summary.Steps, step)
if !passed {
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
os.Exit(1)
}
}
if len(opts.PytestPaths) > 0 {
step, passed := runPytestVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts.PytestPaths)
summary.Steps = append(summary.Steps, step)
if !passed {
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
os.Exit(1)
}
}
if len(opts.PlaywrightPaths) > 0 || opts.PlaywrightGrep != "" {
step, passed := runPlaywrightVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts)
summary.Steps = append(summary.Steps, step)
if !passed {
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
os.Exit(1)
}
}
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
log.Infof("Verification summary written to %s", filepath.Join(artifactRoot, "summary.json"))
}
func runAgentCheckVerifyStep(repoRoot, worktree, baseRef string) (VerifyStepSummary, bool) {
startedAt := time.Now()
opts := &AgentCheckOptions{BaseRef: baseRef, Worktree: worktree, RepoRoot: repoRoot}
result, err := evaluateAgentCheck(opts, nil)
step := VerifyStepSummary{
Name: "agent-check",
Command: []string{"ods", "agent-check", "--base-ref", baseRef},
DurationMS: time.Since(startedAt).Milliseconds(),
}
if worktree != "" {
step.Command = append(step.Command, "--worktree", worktree)
}
if err != nil {
step.Status = "failed"
step.Details = []string{err.Error()}
return step, false
}
if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
step.Status = "passed"
return step, true
}
step.Status = "failed"
for _, violation := range result.Violations {
step.Details = append(step.Details, fmt.Sprintf("%s:%d [%s] %s", violation.Path, violation.LineNum, violation.RuleID, violation.Message))
}
for _, violation := range result.DocViolations {
step.Details = append(step.Details, fmt.Sprintf("%s [agent-docs] %s", violation.Path, violation.Message))
}
return step, false
}
func runPytestVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, pytestPaths []string) (VerifyStepSummary, bool) {
pythonExecutable := pythonForRepo(repoRoot)
args := append([]string{"-m", "dotenv", "-f", ".vscode/.env", "run", "--", "pytest"}, pytestPaths...)
extraEnv := map[string]string{}
if hasManifest {
for key, value := range manifest.RuntimeEnv() {
extraEnv[key] = value
}
}
step, passed := runLoggedCommand(
"pytest",
filepath.Join(artifactRoot, "pytest.log"),
filepath.Join(repoRoot, "backend"),
extraEnv,
pythonExecutable,
args...,
)
if hasManifest {
step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
}
return step, passed
}
func runPlaywrightVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, opts *VerifyOptions) (VerifyStepSummary, bool) {
args := []string{"playwright", "test"}
args = append(args, opts.PlaywrightPaths...)
if opts.PlaywrightGrep != "" {
args = append(args, "--grep", opts.PlaywrightGrep)
}
if opts.PlaywrightProject != "" {
args = append(args, "--project", opts.PlaywrightProject)
}
extraEnv := map[string]string{}
if hasManifest {
for key, value := range manifest.RuntimeEnv() {
extraEnv[key] = value
}
}
step, passed := runLoggedCommand(
"playwright",
filepath.Join(artifactRoot, "playwright.log"),
filepath.Join(repoRoot, "web"),
extraEnv,
"npx",
args...,
)
step.ArtifactDir = filepath.Join(repoRoot, "web", "output")
if hasManifest {
step.Details = append(step.Details, fmt.Sprintf("base url: %s", manifest.URLs.Web))
step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
step.Details = append(step.Details, fmt.Sprintf("reuse Chrome DevTools MCP against %s for interactive browser validation", manifest.URLs.Web))
step.Details = append(step.Details, manifest.DependencyWarnings()...)
}
return step, passed
}
func runLoggedCommand(name, logPath, workdir string, extraEnv map[string]string, executable string, args ...string) (VerifyStepSummary, bool) {
startedAt := time.Now()
if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
return VerifyStepSummary{
Name: name,
Status: "failed",
DurationMS: time.Since(startedAt).Milliseconds(),
Details: []string{fmt.Sprintf("create log dir: %v", err)},
}, false
}
logFile, err := os.Create(logPath)
if err != nil {
return VerifyStepSummary{
Name: name,
Status: "failed",
DurationMS: time.Since(startedAt).Milliseconds(),
Details: []string{fmt.Sprintf("create log file: %v", err)},
}, false
}
defer func() { _ = logFile.Close() }()
cmd := exec.Command(executable, args...)
cmd.Dir = workdir
cmd.Stdout = io.MultiWriter(os.Stdout, logFile)
cmd.Stderr = io.MultiWriter(os.Stderr, logFile)
if len(extraEnv) > 0 {
cmd.Env = envutil.ApplyOverrides(os.Environ(), extraEnv)
}
step := VerifyStepSummary{
Name: name,
Command: append([]string{executable}, args...),
LogPath: logPath,
DurationMS: 0,
}
err = cmd.Run()
step.DurationMS = time.Since(startedAt).Milliseconds()
if err != nil {
step.Status = "failed"
step.Details = []string{err.Error()}
return step, false
}
step.Status = "passed"
return step, true
}
func writeVerifySummary(summary VerifySummary, artifactRoot, commonGitDir string, manifest agentlab.Manifest, hasManifest bool, runAt time.Time) {
summaryPath := filepath.Join(artifactRoot, "summary.json")
data, err := json.MarshalIndent(summary, "", " ")
if err != nil {
log.Fatalf("Failed to encode verify summary: %v", err)
}
if err := os.WriteFile(summaryPath, data, 0644); err != nil {
log.Fatalf("Failed to write verify summary: %v", err)
}
if hasManifest {
if err := agentlab.UpdateVerification(commonGitDir, manifest, summaryPath, runAt); err != nil {
log.Warnf("Failed to update worktree verification metadata: %v", err)
}
}
}
func pythonForRepo(repoRoot string) string {
candidate := filepath.Join(repoRoot, ".venv", "bin", "python")
if _, err := os.Stat(candidate); err == nil {
return candidate
}
if manifest, found := currentAgentLabManifest(repoRoot); found {
sharedCandidate := filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python")
if _, err := os.Stat(sharedCandidate); err == nil {
return sharedCandidate
}
}
return "python"
}

View File

@@ -13,6 +13,7 @@ import (
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
@@ -22,28 +23,32 @@ type webPackageJSON struct {
// NewWebCommand creates a command that runs npm scripts from the web directory.
func NewWebCommand() *cobra.Command {
var worktree string
cmd := &cobra.Command{
Use: "web <script> [args...]",
Short: "Run web/package.json npm scripts",
Long: webHelpDescription(),
Args: cobra.MinimumNArgs(1),
Args: cobra.MinimumNArgs(1),
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
if len(args) > 0 {
return nil, cobra.ShellCompDirectiveNoFileComp
}
return webScriptNames(), cobra.ShellCompDirectiveNoFileComp
},
Run: func(cmd *cobra.Command, args []string) {
runWebScript(args)
},
}
cmd.Flags().SetInterspersed(false)
cmd.Flags().StringVar(&worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
cmd.Run = func(cmd *cobra.Command, args []string) {
runWebScript(args, worktree)
}
return cmd
}
func runWebScript(args []string) {
webDir, err := webDir()
func runWebScript(args []string, worktree string) {
repoRoot, manifest, hasManifest := resolveAgentLabTarget(worktree)
webDir, err := webDirForRoot(repoRoot)
if err != nil {
log.Fatalf("Failed to find web directory: %v", err)
}
@@ -68,6 +73,13 @@ func runWebScript(args []string) {
webCmd.Stderr = os.Stderr
webCmd.Stdin = os.Stdin
if hasManifest {
webCmd.Env = envutil.ApplyOverrides(os.Environ(), manifest.RuntimeEnv())
log.Infof("agent-lab worktree %s detected: web=%s api=%s", manifest.Branch, manifest.URLs.Web, manifest.URLs.API)
log.Infof("lane=%s base-ref=%s", manifest.ResolvedLane(), manifest.BaseRef)
log.Infof("dependency mode=%s search-infra=%s", manifest.ResolvedDependencies().Mode, manifest.ResolvedDependencies().SearchInfraMode)
}
if err := webCmd.Run(); err != nil {
// For wrapped commands, preserve the child process's exit code and
// avoid duplicating already-printed stderr output.
@@ -101,7 +113,8 @@ func webHelpDescription() string {
Examples:
ods web dev
ods web lint
ods web test --watch`
ods web test --watch
ods web dev --worktree codex/fix/auth-banner-modal`
scripts := webScriptNames()
if len(scripts) == 0 {
@@ -112,7 +125,7 @@ Examples:
}
func loadWebScripts() (map[string]string, error) {
webDir, err := webDir()
webDir, err := webDirForRoot("")
if err != nil {
return nil, err
}
@@ -135,10 +148,13 @@ func loadWebScripts() (map[string]string, error) {
return pkg.Scripts, nil
}
func webDir() (string, error) {
root, err := paths.GitRoot()
if err != nil {
return "", err
func webDirForRoot(root string) (string, error) {
if root == "" {
var err error
root, err = paths.GitRoot()
if err != nil {
return "", err
}
}
return filepath.Join(root, "web"), nil
}

626
tools/ods/cmd/worktree.go Normal file
View File

@@ -0,0 +1,626 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"text/tabwriter"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
type WorktreeCreateOptions struct {
From string
Path string
Bootstrap bool
DependencyMode string
}
type WorktreeRemoveOptions struct {
Force bool
DropDeps bool
}
type WorktreeBootstrapOptions struct {
EnvMode string
PythonMode string
WebMode string
}
// NewWorktreeCommand creates the parent worktree command.
func NewWorktreeCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "worktree",
Short: "Manage agent-lab git worktrees and harness metadata",
Long: `Manage agent-lab git worktrees and the local harness state that makes
them bootable with isolated ports, URLs, and artifact directories.`,
}
cmd.AddCommand(newWorktreeCreateCommand())
cmd.AddCommand(newWorktreeBootstrapCommand())
cmd.AddCommand(newWorktreeDepsCommand())
cmd.AddCommand(newWorktreeStatusCommand())
cmd.AddCommand(newWorktreeShowCommand())
cmd.AddCommand(newWorktreeRemoveCommand())
return cmd
}
func newWorktreeCreateCommand() *cobra.Command {
opts := &WorktreeCreateOptions{}
cmd := &cobra.Command{
Use: "create <branch>",
Short: "Create a new agent-lab worktree with isolated runtime metadata",
Long: `Create a tracked agent-lab worktree and bootstrap its local runtime state.
Branch lanes control the default base ref when --from is not supplied:
codex/lab/<name> -> codex/agent-lab
codex/fix/<name> -> origin/main
codex/feat/<name> -> origin/main
Use conventional branch lanes for product work so the base stays explicit.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
runWorktreeCreate(args[0], opts)
},
}
cmd.Flags().StringVar(&opts.From, "from", "", "git ref to branch from (defaults are inferred from the branch lane)")
cmd.Flags().StringVar(&opts.Path, "path", "", "custom checkout path for the new worktree")
cmd.Flags().BoolVar(&opts.Bootstrap, "bootstrap", true, "bootstrap env, Python, and frontend dependencies for the worktree")
cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode: namespaced or shared")
return cmd
}
func newWorktreeBootstrapCommand() *cobra.Command {
opts := &WorktreeBootstrapOptions{}
cmd := &cobra.Command{
Use: "bootstrap [worktree]",
Short: "Bootstrap env files and dependencies for an agent-lab worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeBootstrap(identifier, opts)
},
}
cmd.Flags().StringVar(&opts.EnvMode, "env-mode", string(agentlab.BootstrapModeAuto), "env bootstrap mode: auto, skip, link, copy")
cmd.Flags().StringVar(&opts.PythonMode, "python-mode", string(agentlab.BootstrapModeAuto), "Python bootstrap mode: auto, skip, link, copy")
cmd.Flags().StringVar(&opts.WebMode, "web-mode", string(agentlab.BootstrapModeAuto), "frontend bootstrap mode: auto, skip, clone, copy, npm")
return cmd
}
func newWorktreeDepsCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "deps",
Short: "Manage namespaced external dependencies for an agent-lab worktree",
}
cmd.AddCommand(&cobra.Command{
Use: "up [worktree]",
Short: "Provision external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsUp(identifier)
},
})
cmd.AddCommand(&cobra.Command{
Use: "status [worktree]",
Short: "Inspect external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsStatus(identifier)
},
})
cmd.AddCommand(&cobra.Command{
Use: "reset [worktree]",
Short: "Reset namespaced external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsReset(identifier)
},
})
cmd.AddCommand(&cobra.Command{
Use: "down [worktree]",
Short: "Tear down namespaced external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsDown(identifier)
},
})
return cmd
}
func newWorktreeStatusCommand() *cobra.Command {
return &cobra.Command{
Use: "status",
Short: "List tracked agent-lab worktrees",
Run: func(cmd *cobra.Command, args []string) {
runWorktreeStatus()
},
}
}
func newWorktreeShowCommand() *cobra.Command {
return &cobra.Command{
Use: "show [worktree]",
Short: "Show detailed metadata for an agent-lab worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeShow(identifier)
},
}
}
func newWorktreeRemoveCommand() *cobra.Command {
opts := &WorktreeRemoveOptions{}
cmd := &cobra.Command{
Use: "remove <worktree>",
Short: "Remove an agent-lab worktree and its local state",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
runWorktreeRemove(args[0], opts)
},
}
cmd.Flags().BoolVar(&opts.Force, "force", false, "force removal even if git reports uncommitted changes")
cmd.Flags().BoolVar(&opts.DropDeps, "drop-deps", false, "tear down namespaced dependencies before removing the worktree")
return cmd
}
func runWorktreeCreate(branch string, opts *WorktreeCreateOptions) {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
if manifest, found, err := agentlab.FindByIdentifier(commonGitDir, branch); err != nil {
log.Fatalf("Failed to inspect existing worktrees: %v", err)
} else if found {
log.Fatalf("Worktree already exists for %s at %s", manifest.Branch, manifest.CheckoutPath)
}
manifests, err := agentlab.LoadAll(commonGitDir)
if err != nil {
log.Fatalf("Failed to load worktree metadata: %v", err)
}
ports, err := agentlab.AllocatePorts(manifests)
if err != nil {
log.Fatalf("Failed to allocate worktree ports: %v", err)
}
dependencyMode := agentlab.DependencyMode(opts.DependencyMode)
switch dependencyMode {
case agentlab.DependencyModeShared, agentlab.DependencyModeNamespaced:
default:
log.Fatalf("Invalid dependency mode %q: must be shared or namespaced", opts.DependencyMode)
}
checkoutPath := opts.Path
if checkoutPath == "" {
checkoutPath = agentlab.DefaultCheckoutPath(repoRoot, branch)
}
checkoutPath, err = filepath.Abs(checkoutPath)
if err != nil {
log.Fatalf("Failed to resolve checkout path: %v", err)
}
if _, err := os.Stat(checkoutPath); err == nil {
log.Fatalf("Checkout path already exists: %s", checkoutPath)
}
baseSelection := agentlab.ResolveCreateBaseRef(branch, opts.From, agentlab.GitRefExists)
manifest := agentlab.BuildManifest(
repoRoot,
commonGitDir,
branch,
baseSelection.Lane,
baseSelection.Ref,
checkoutPath,
ports,
dependencyMode,
)
args := []string{"-c", "core.hooksPath=/dev/null", "worktree", "add", "-b", branch, checkoutPath, baseSelection.Ref}
log.Infof("Creating worktree %s at %s", branch, checkoutPath)
gitCmd := exec.Command("git", args...)
gitCmd.Stdout = os.Stdout
gitCmd.Stderr = os.Stderr
gitCmd.Stdin = os.Stdin
if err := gitCmd.Run(); err != nil {
log.Fatalf("git worktree add failed: %v", err)
}
if resolvedPath, err := filepath.EvalSymlinks(checkoutPath); err == nil {
manifest.CheckoutPath = resolvedPath
}
if err := agentlab.WriteEnvFiles(manifest); err != nil {
log.Fatalf("Failed to write worktree env files: %v", err)
}
if err := agentlab.WriteManifest(commonGitDir, manifest); err != nil {
log.Fatalf("Failed to write worktree manifest: %v", err)
}
if opts.Bootstrap {
bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
EnvMode: agentlab.BootstrapModeAuto,
PythonMode: agentlab.BootstrapModeAuto,
WebMode: agentlab.BootstrapModeAuto,
})
if err != nil {
log.Fatalf("Failed to bootstrap worktree: %v", err)
}
for _, action := range bootstrapResult.Actions {
fmt.Printf(" bootstrap: %s\n", action)
}
}
manifest, dependencyResult, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to provision worktree dependencies: %v", err)
}
for _, action := range dependencyResult.Actions {
fmt.Printf(" deps: %s\n", action)
}
fmt.Printf("Created agent-lab worktree %s\n", manifest.Branch)
fmt.Printf(" checkout: %s\n", manifest.CheckoutPath)
fmt.Printf(" lane: %s\n", manifest.ResolvedLane())
fmt.Printf(" base ref: %s\n", manifest.BaseRef)
fmt.Printf(" base selection: %s\n", baseSelection.Reason)
fmt.Printf(" dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
if manifest.ResolvedDependencies().Namespace != "" {
fmt.Printf(" dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
}
if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced {
fmt.Printf(" postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
fmt.Printf(" redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
fmt.Printf(" file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
}
fmt.Printf(" web url: %s\n", manifest.URLs.Web)
fmt.Printf(" api url: %s\n", manifest.URLs.API)
fmt.Printf(" mcp url: %s\n", manifest.URLs.MCP)
fmt.Printf(" artifacts: %s\n", manifest.ArtifactDir)
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf(" note: %s\n", warning)
}
fmt.Printf("\nNext steps:\n")
fmt.Printf(" cd %s\n", manifest.CheckoutPath)
fmt.Printf(" # Make edits in the worktree itself.\n")
if manifest.ResolvedLane() == agentlab.WorktreeLaneProduct {
fmt.Printf(" # Run harness commands from the control checkout with --worktree %s.\n", manifest.Branch)
fmt.Printf(" ods verify --worktree %s\n", manifest.Branch)
fmt.Printf(" ods backend api --worktree %s\n", manifest.Branch)
fmt.Printf(" ods web dev --worktree %s\n", manifest.Branch)
} else {
fmt.Printf(" ods backend api\n")
fmt.Printf(" ods backend model_server\n")
fmt.Printf(" ods web dev\n")
fmt.Printf(" ods verify\n")
}
}
func runWorktreeBootstrap(identifier string, opts *WorktreeBootstrapOptions) {
manifest := mustResolveWorktree(identifier)
bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
EnvMode: agentlab.BootstrapMode(opts.EnvMode),
PythonMode: agentlab.BootstrapMode(opts.PythonMode),
WebMode: agentlab.BootstrapMode(opts.WebMode),
})
if err != nil {
log.Fatalf("Failed to bootstrap worktree %s: %v", manifest.Branch, err)
}
fmt.Printf("Bootstrapped %s\n", manifest.Branch)
for _, action := range bootstrapResult.Actions {
fmt.Printf(" %s\n", action)
}
}
func runWorktreeDepsUp(identifier string) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest := mustResolveWorktree(identifier)
manifest, result, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to provision dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("Provisioned dependencies for %s\n", manifest.Branch)
for _, action := range result.Actions {
fmt.Printf(" %s\n", action)
}
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf(" note: %s\n", warning)
}
}
func runWorktreeDepsStatus(identifier string) {
manifest := mustResolveWorktree(identifier)
status, err := agentlab.InspectDependencies(manifest)
if err != nil {
log.Fatalf("Failed to inspect dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("branch: %s\n", manifest.Branch)
fmt.Printf("mode: %s\n", status.Mode)
if status.Namespace != "" {
fmt.Printf("namespace: %s\n", status.Namespace)
}
if status.PostgresDatabase != "" {
fmt.Printf("postgres database: %s (ready=%t tables=%d)\n", status.PostgresDatabase, status.PostgresReady, status.PostgresTableCount)
}
if status.RedisPrefix != "" {
fmt.Printf("redis prefix: %s (ready=%t keys=%d)\n", status.RedisPrefix, status.RedisReady, status.RedisKeyCount)
}
if status.FileStoreBucket != "" {
fmt.Printf("file-store bucket: %s (ready=%t objects=%d)\n", status.FileStoreBucket, status.FileStoreReady, status.FileStoreObjectCount)
}
fmt.Printf("search infra: %s\n", status.SearchInfraMode)
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf("note: %s\n", warning)
}
}
func runWorktreeDepsReset(identifier string) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest := mustResolveWorktree(identifier)
manifest, result, err := agentlab.ResetDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to reset dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("Reset dependencies for %s\n", manifest.Branch)
for _, action := range result.Actions {
fmt.Printf(" %s\n", action)
}
}
func runWorktreeDepsDown(identifier string) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest := mustResolveWorktree(identifier)
manifest, result, err := agentlab.TeardownDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to tear down dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("Tore down dependencies for %s\n", manifest.Branch)
for _, action := range result.Actions {
fmt.Printf(" %s\n", action)
}
}
func runWorktreeStatus() {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
repoRoot, _ := paths.GitRoot()
current, _, _ := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
manifests, err := agentlab.LoadAll(commonGitDir)
if err != nil {
log.Fatalf("Failed to load worktree manifests: %v", err)
}
if len(manifests) == 0 {
log.Info("No agent-lab worktrees tracked yet.")
return
}
tw := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
if _, err := fmt.Fprintln(tw, "CURRENT\tBRANCH\tLANE\tMODE\tWEB\tAPI\tPATH"); err != nil {
log.Fatalf("Failed to write worktree header: %v", err)
}
for _, manifest := range manifests {
marker := ""
if manifest.ID == current.ID && manifest.ID != "" {
marker = "*"
}
if _, err := fmt.Fprintf(
tw,
"%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
marker,
manifest.Branch,
manifest.ResolvedLane(),
manifest.ResolvedDependencies().Mode,
manifest.URLs.Web,
manifest.URLs.API,
manifest.CheckoutPath,
); err != nil {
log.Fatalf("Failed to write worktree row for %s: %v", manifest.Branch, err)
}
}
_ = tw.Flush()
}
func runWorktreeShow(identifier string) {
manifest := mustResolveWorktree(identifier)
fmt.Printf("branch: %s\n", manifest.Branch)
fmt.Printf("id: %s\n", manifest.ID)
fmt.Printf("lane: %s\n", manifest.ResolvedLane())
fmt.Printf("checkout: %s\n", manifest.CheckoutPath)
fmt.Printf("base-ref: %s\n", manifest.BaseRef)
fmt.Printf("state-dir: %s\n", manifest.StateDir)
fmt.Printf("artifacts: %s\n", manifest.ArtifactDir)
fmt.Printf("backend env: %s\n", manifest.EnvFile)
fmt.Printf("web env: %s\n", manifest.WebEnvFile)
fmt.Printf("compose project: %s\n", manifest.ComposeProject)
fmt.Printf("dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
if manifest.ResolvedDependencies().Namespace != "" {
fmt.Printf("dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
}
if manifest.ResolvedDependencies().PostgresDatabase != "" {
fmt.Printf("postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
fmt.Printf("redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
fmt.Printf("file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
}
fmt.Printf("search infra: %s\n", manifest.ResolvedDependencies().SearchInfraMode)
fmt.Printf("web url: %s\n", manifest.URLs.Web)
fmt.Printf("api url: %s\n", manifest.URLs.API)
fmt.Printf("mcp url: %s\n", manifest.URLs.MCP)
fmt.Printf("ports: web=%d api=%d model_server=%d mcp=%d\n", manifest.Ports.Web, manifest.Ports.API, manifest.Ports.ModelServer, manifest.Ports.MCP)
if manifest.LastVerifiedAt != "" {
fmt.Printf("last verified: %s\n", manifest.LastVerifiedAt)
}
if manifest.LastVerifySummary != "" {
fmt.Printf("last summary: %s\n", manifest.LastVerifySummary)
}
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf("note: %s\n", warning)
}
}
func mustResolveWorktree(identifier string) agentlab.Manifest {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
if identifier == "" {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
if err != nil {
log.Fatalf("Failed to resolve current worktree manifest: %v", err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
return manifest
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
log.Fatalf("Failed to resolve worktree manifest: %v", err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
return manifest
}
func runWorktreeRemove(identifier string, opts *WorktreeRemoveOptions) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
log.Fatalf("Failed to resolve worktree: %v", err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
if opts.DropDeps {
var teardownResult *agentlab.DependencyResult
manifest, teardownResult, err = agentlab.TeardownDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to tear down worktree dependencies: %v", err)
}
for _, action := range teardownResult.Actions {
fmt.Printf(" deps: %s\n", action)
}
}
args := []string{"worktree", "remove"}
if opts.Force {
args = append(args, "--force")
}
args = append(args, manifest.CheckoutPath)
log.Infof("Removing worktree %s", manifest.Branch)
gitCmd := exec.Command("git", args...)
gitCmd.Stdout = os.Stdout
gitCmd.Stderr = os.Stderr
gitCmd.Stdin = os.Stdin
if err := gitCmd.Run(); err != nil {
if opts.Force && isOrphanedWorktree(manifest.CheckoutPath) {
log.Warnf("git detached %s but left an orphaned checkout behind; removing %s", manifest.Branch, manifest.CheckoutPath)
if removeErr := os.RemoveAll(manifest.CheckoutPath); removeErr != nil {
log.Fatalf("git worktree remove failed: %v (fallback cleanup failed: %v)", err, removeErr)
}
} else {
log.Fatalf("git worktree remove failed: %v", err)
}
}
if err := agentlab.RemoveState(commonGitDir, manifest.ID); err != nil {
log.Fatalf("Failed to remove worktree state: %v", err)
}
fmt.Printf("Removed agent-lab worktree %s\n", manifest.Branch)
if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced && !opts.DropDeps {
fmt.Printf(" note: namespaced Postgres/Redis/MinIO state was left in place. Use `ods worktree deps down %s` before removal if you want cleanup.\n", manifest.Branch)
}
}
func isOrphanedWorktree(checkoutPath string) bool {
output, err := exec.Command("git", "worktree", "list", "--porcelain").Output()
if err == nil && strings.Contains(string(output), "worktree "+checkoutPath+"\n") {
return false
}
if _, statErr := os.Stat(checkoutPath); os.IsNotExist(statErr) {
return true
}
if statusErr := exec.Command("git", "-C", checkoutPath, "status", "--short").Run(); statusErr != nil {
return true
}
return false
}

View File

@@ -0,0 +1,95 @@
package agentcheck
import (
"bufio"
"fmt"
"path/filepath"
"regexp"
"strings"
)
var hunkHeaderPattern = regexp.MustCompile(`^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@`)
type AddedLine struct {
Path string
LineNum int
Content string
}
type Violation struct {
RuleID string
Path string
LineNum int
Message string
Content string
}
func ParseAddedLines(diff string) ([]AddedLine, error) {
scanner := bufio.NewScanner(strings.NewReader(diff))
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var addedLines []AddedLine
currentPath := ""
currentNewLine := 0
inHunk := false
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "+++ "):
currentPath = normalizeDiffPath(strings.TrimPrefix(line, "+++ "))
inHunk = false
case strings.HasPrefix(line, "@@ "):
match := hunkHeaderPattern.FindStringSubmatch(line)
if len(match) != 2 {
return nil, fmt.Errorf("failed to parse hunk header: %s", line)
}
var err error
currentNewLine, err = parseLineNumber(match[1])
if err != nil {
return nil, err
}
inHunk = true
case !inHunk || currentPath == "":
continue
case strings.HasPrefix(line, "+") && !strings.HasPrefix(line, "+++"):
addedLines = append(addedLines, AddedLine{
Path: currentPath,
LineNum: currentNewLine,
Content: strings.TrimPrefix(line, "+"),
})
currentNewLine++
case strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---"):
continue
default:
currentNewLine++
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to scan diff: %w", err)
}
return addedLines, nil
}
func normalizeDiffPath(path string) string {
path = strings.TrimSpace(path)
path = strings.TrimPrefix(path, "b/")
if path == "/dev/null" {
return ""
}
return filepath.ToSlash(path)
}
func parseLineNumber(value string) (int, error) {
lineNum := 0
for _, ch := range value {
if ch < '0' || ch > '9' {
return 0, fmt.Errorf("invalid line number: %s", value)
}
lineNum = lineNum*10 + int(ch-'0')
}
return lineNum, nil
}

View File

@@ -0,0 +1,143 @@
package agentcheck
import (
"reflect"
"testing"
)
func TestParseAddedLines(t *testing.T) {
diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
index 1111111..2222222 100644
--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
@@ -10,1 +11,3 @@
context = old_value
+from fastapi import HTTPException
-raise OldError()
+raise HTTPException(status_code=400, detail="bad")
@@ -20,0 +23,1 @@
+task.delay (payload)
diff --git a/web/src/sections/Foo.tsx b/web/src/sections/Foo.tsx
index 1111111..2222222 100644
--- a/web/src/sections/Foo.tsx
+++ b/web/src/sections/Foo.tsx
@@ -3,0 +4 @@
+import { Thing } from "@/components/Thing";`
addedLines, err := ParseAddedLines(diff)
if err != nil {
t.Fatalf("ParseAddedLines returned error: %v", err)
}
if len(addedLines) != 4 {
t.Fatalf("expected 4 added lines, got %d", len(addedLines))
}
if addedLines[0].Path != "backend/onyx/server/foo.py" || addedLines[0].LineNum != 12 {
t.Fatalf("unexpected first added line: %+v", addedLines[0])
}
if addedLines[2].Path != "backend/onyx/server/foo.py" || addedLines[2].LineNum != 23 {
t.Fatalf("unexpected third added line: %+v", addedLines[2])
}
if addedLines[3].Path != "web/src/sections/Foo.tsx" || addedLines[3].LineNum != 4 {
t.Fatalf("unexpected final added line: %+v", addedLines[3])
}
}
func TestParseAddedLinesRejectsMalformedHunkHeader(t *testing.T) {
diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
@@ invalid @@
+raise HTTPException(status_code=400, detail="bad")`
if _, err := ParseAddedLines(diff); err == nil {
t.Fatal("expected malformed hunk header to return an error")
}
}
func TestCheckAddedLinesFindsExpectedViolations(t *testing.T) {
lines := []AddedLine{
{Path: "backend/onyx/server/foo.py", LineNum: 10, Content: "from fastapi import HTTPException"},
{Path: "backend/onyx/server/foo.py", LineNum: 11, Content: `raise HTTPException(status_code=400, detail="bad")`},
{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
{Path: "backend/onyx/server/foo.py", LineNum: 13, Content: "my_task.delay (payload)"},
{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `export { Thing } from "@/components/Thing";`},
}
violations := CheckAddedLines(lines)
if len(violations) != 5 {
t.Fatalf("expected 5 violations, got %d: %+v", len(violations), violations)
}
expectedRules := []string{
"no-new-http-exception",
"no-new-http-exception",
"no-new-response-model",
"no-new-delay",
"no-new-legacy-component-import",
}
for i, expectedRule := range expectedRules {
if violations[i].RuleID != expectedRule {
t.Fatalf("expected rule %q at index %d, got %q", expectedRule, i, violations[i].RuleID)
}
}
}
func TestCheckAddedLinesIgnoresCommentsStringsAndAllowedScopes(t *testing.T) {
lines := []AddedLine{
{Path: "backend/onyx/server/foo.py", LineNum: 1, Content: `message = "HTTPException"`},
{Path: "backend/onyx/server/foo.py", LineNum: 2, Content: `detail = "response_model="`},
{Path: "backend/onyx/server/foo.py", LineNum: 3, Content: `note = ".delay("`},
{Path: "backend/onyx/server/foo.py", LineNum: 4, Content: `# HTTPException`},
{Path: "backend/onyx/server/foo.py", LineNum: 5, Content: `handler = HTTPExceptionAlias`},
{Path: "backend/onyx/main.py", LineNum: 6, Content: `raise HTTPException(status_code=400, detail="bad")`},
{Path: "backend/tests/unit/test_foo.py", LineNum: 7, Content: `from fastapi import HTTPException`},
{Path: "backend/model_server/foo.py", LineNum: 8, Content: `task.delay(payload)`},
{Path: "web/src/sections/Foo.tsx", LineNum: 9, Content: `const path = "@/components/Thing";`},
{Path: "web/src/sections/Foo.tsx", LineNum: 10, Content: `// import { Thing } from "@/components/Thing";`},
{Path: "web/src/components/Foo.tsx", LineNum: 11, Content: `import { Bar } from "@/components/Bar";`},
}
violations := CheckAddedLines(lines)
if len(violations) != 0 {
t.Fatalf("expected no violations, got %+v", violations)
}
}
func TestCheckAddedLinesWithRulesSupportsCustomRuleSets(t *testing.T) {
lines := []AddedLine{
{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `import type { Thing } from "@/components/Thing";`},
}
rules := []Rule{
{
ID: "python-response-model-only",
Message: "response_model is not allowed",
Scope: backendProductPythonScope(),
Match: func(line lineView) bool {
return responseModelPattern.MatchString(line.CodeSansStrings)
},
},
}
violations := CheckAddedLinesWithRules(lines, rules)
expected := []Violation{
{
RuleID: "python-response-model-only",
Path: "backend/onyx/server/foo.py",
LineNum: 12,
Message: "response_model is not allowed",
Content: "response_model = FooResponse",
},
}
if !reflect.DeepEqual(expected, violations) {
t.Fatalf("unexpected violations: %+v", violations)
}
}

View File

@@ -0,0 +1,101 @@
package agentcheck
import "strings"
func stripLineComment(path string, content string) string {
switch {
case strings.HasSuffix(path, ".py"):
return stripCommentMarker(content, "#")
case isJSLikePath(path):
return stripCommentMarker(content, "//")
default:
return content
}
}
func isJSLikePath(path string) bool {
return strings.HasSuffix(path, ".js") ||
strings.HasSuffix(path, ".jsx") ||
strings.HasSuffix(path, ".ts") ||
strings.HasSuffix(path, ".tsx")
}
func stripCommentMarker(line string, marker string) string {
if marker == "" {
return line
}
var builder strings.Builder
quote := byte(0)
escaped := false
for i := 0; i < len(line); i++ {
ch := line[i]
if quote != 0 {
builder.WriteByte(ch)
if escaped {
escaped = false
continue
}
if ch == '\\' && quote != '`' {
escaped = true
continue
}
if ch == quote {
quote = 0
}
continue
}
if strings.HasPrefix(line[i:], marker) {
break
}
builder.WriteByte(ch)
if isQuote(ch) {
quote = ch
}
}
return builder.String()
}
func stripQuotedStrings(line string) string {
var builder strings.Builder
quote := byte(0)
escaped := false
for i := 0; i < len(line); i++ {
ch := line[i]
if quote != 0 {
if escaped {
escaped = false
continue
}
if ch == '\\' && quote != '`' {
escaped = true
continue
}
if ch == quote {
quote = 0
}
continue
}
if isQuote(ch) {
quote = ch
builder.WriteByte(' ')
continue
}
builder.WriteByte(ch)
}
return builder.String()
}
func isQuote(ch byte) bool {
return ch == '"' || ch == '\'' || ch == '`'
}

View File

@@ -0,0 +1,170 @@
package agentcheck
import (
"regexp"
"strings"
)
var (
httpExceptionPattern = regexp.MustCompile(`\bHTTPException\b`)
responseModelPattern = regexp.MustCompile(`\bresponse_model\s*=`)
delayCallPattern = regexp.MustCompile(`\.\s*delay\s*\(`)
componentPathPattern = regexp.MustCompile(`["'](?:@/components/|\.\.?/components/|\.\.?/.*/components/)`)
importExportPattern = regexp.MustCompile(`^\s*(?:import|export)\b`)
)
type Scope func(path string) bool
type Matcher func(line lineView) bool
type Rule struct {
ID string
Message string
Scope Scope
Match Matcher
}
type lineView struct {
AddedLine
Path string
Code string
CodeSansStrings string
TrimmedCode string
}
func CheckAddedLines(lines []AddedLine) []Violation {
return CheckAddedLinesWithRules(lines, DefaultRules())
}
func CheckAddedLinesWithRules(lines []AddedLine, rules []Rule) []Violation {
var violations []Violation
for _, addedLine := range lines {
line := buildLineView(addedLine)
if line.Path == "" {
continue
}
for _, rule := range rules {
if rule.Scope != nil && !rule.Scope(line.Path) {
continue
}
if rule.Match == nil || !rule.Match(line) {
continue
}
violations = append(violations, Violation{
RuleID: rule.ID,
Path: line.Path,
LineNum: line.LineNum,
Message: rule.Message,
Content: line.Content,
})
}
}
return violations
}
func DefaultRules() []Rule {
return append([]Rule(nil), defaultRules...)
}
var defaultRules = []Rule{
{
ID: "no-new-http-exception",
Message: "Do not introduce new HTTPException usage in backend product code. Raise OnyxError instead.",
Scope: backendProductPythonScope(exactPath("backend/onyx/main.py")),
Match: func(line lineView) bool {
return hasPythonCode(line) && httpExceptionPattern.MatchString(line.CodeSansStrings)
},
},
{
ID: "no-new-response-model",
Message: "Do not introduce response_model on new FastAPI APIs. Type the function directly instead.",
Scope: backendProductPythonScope(),
Match: func(line lineView) bool {
return hasPythonCode(line) && responseModelPattern.MatchString(line.CodeSansStrings)
},
},
{
ID: "no-new-delay",
Message: "Do not introduce Celery .delay() calls. Use an enqueue path that sets expires= explicitly.",
Scope: backendProductPythonScope(),
Match: func(line lineView) bool {
return hasPythonCode(line) && delayCallPattern.MatchString(line.CodeSansStrings)
},
},
{
ID: "no-new-legacy-component-import",
Message: "Do not introduce new imports from web/src/components. Prefer Opal or refresh-components.",
Scope: nonLegacyWebSourceScope(),
Match: func(line lineView) bool {
return isLegacyComponentImport(line)
},
},
}
func buildLineView(line AddedLine) lineView {
path := normalizeDiffPath(line.Path)
code := stripLineComment(path, line.Content)
return lineView{
AddedLine: line,
Path: path,
Code: code,
CodeSansStrings: stripQuotedStrings(code),
TrimmedCode: strings.TrimSpace(code),
}
}
func backendProductPythonScope(excluded ...Scope) Scope {
return func(path string) bool {
if !strings.HasPrefix(path, "backend/") || !strings.HasSuffix(path, ".py") {
return false
}
if strings.HasPrefix(path, "backend/tests/") {
return false
}
if strings.HasPrefix(path, "backend/model_server/") {
return false
}
if strings.Contains(path, "/__pycache__/") {
return false
}
for _, exclude := range excluded {
if exclude != nil && exclude(path) {
return false
}
}
return true
}
}
func nonLegacyWebSourceScope() Scope {
return func(path string) bool {
if !strings.HasPrefix(path, "web/src/") {
return false
}
return !strings.HasPrefix(path, "web/src/components/")
}
}
func exactPath(target string) Scope {
return func(path string) bool {
return path == target
}
}
func hasPythonCode(line lineView) bool {
return strings.TrimSpace(line.CodeSansStrings) != ""
}
func isLegacyComponentImport(line lineView) bool {
if line.TrimmedCode == "" {
return false
}
if !importExportPattern.MatchString(line.TrimmedCode) {
return false
}
return componentPathPattern.MatchString(line.Code)
}

View File

@@ -0,0 +1,107 @@
package agentdocs
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
)
var markdownLinkPattern = regexp.MustCompile(`\[[^\]]+\]\(([^)]+)\)`)
var requiredFiles = []string{
"AGENTS.md",
"docs/agent/README.md",
"docs/agent/ARCHITECTURE.md",
"docs/agent/BRANCHING.md",
"docs/agent/HARNESS.md",
"docs/agent/GOLDEN_RULES.md",
"docs/agent/LEGACY_ZONES.md",
"docs/agent/QUALITY_SCORE.md",
}
type Violation struct {
Path string
Message string
}
func Validate(root string) []Violation {
if _, err := os.Stat(filepath.Join(root, filepath.FromSlash("docs/agent/README.md"))); err != nil {
return nil
}
var violations []Violation
for _, relPath := range requiredFiles {
if _, err := os.Stat(filepath.Join(root, filepath.FromSlash(relPath))); err != nil {
violations = append(violations, Violation{
Path: relPath,
Message: "required agent-lab knowledge-base file is missing",
})
}
}
for _, relPath := range requiredFiles {
absPath := filepath.Join(root, filepath.FromSlash(relPath))
content, err := os.ReadFile(absPath)
if err != nil {
continue
}
violations = append(violations, validateMarkdownLinks(root, relPath, string(content))...)
}
return violations
}
func validateMarkdownLinks(root string, relPath string, content string) []Violation {
var violations []Violation
matches := markdownLinkPattern.FindAllStringSubmatch(content, -1)
docDir := filepath.Dir(filepath.Join(root, filepath.FromSlash(relPath)))
for _, match := range matches {
if len(match) != 2 {
continue
}
target := strings.TrimSpace(match[1])
if target == "" {
continue
}
if strings.HasPrefix(target, "http://") || strings.HasPrefix(target, "https://") {
continue
}
if strings.HasPrefix(target, "#") || strings.HasPrefix(target, "mailto:") {
continue
}
target = stripAnchor(target)
var absTarget string
if filepath.IsAbs(target) {
absTarget = target
} else {
absTarget = filepath.Join(docDir, target)
}
if _, err := os.Stat(absTarget); err != nil {
violations = append(violations, Violation{
Path: relPath,
Message: fmt.Sprintf(
"broken markdown link target: %s",
target,
),
})
}
}
return violations
}
func stripAnchor(target string) string {
if idx := strings.Index(target, "#"); idx >= 0 {
return target[:idx]
}
return target
}

View File

@@ -0,0 +1,61 @@
package agentdocs
import (
"os"
"path/filepath"
"testing"
)
func TestValidateSuccess(t *testing.T) {
root := t.TempDir()
writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Architecture](./ARCHITECTURE.md)
[Root](../../AGENTS.md)`)
writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/BRANCHING.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/HARNESS.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/GOLDEN_RULES.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/LEGACY_ZONES.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/QUALITY_SCORE.md"), `ok`)
violations := Validate(root)
if len(violations) != 0 {
t.Fatalf("expected no violations, got %+v", violations)
}
}
func TestValidateMissingAndBrokenLinks(t *testing.T) {
root := t.TempDir()
writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Missing](./MISSING.md)`)
writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
violations := Validate(root)
if len(violations) < 2 {
t.Fatalf("expected multiple violations, got %+v", violations)
}
}
func TestValidateSkipsReposWithoutAgentLabDocs(t *testing.T) {
root := t.TempDir()
writeFile(t, filepath.Join(root, "README.md"), `plain repo`)
violations := Validate(root)
if len(violations) != 0 {
t.Fatalf("expected no violations for repo without agent-lab docs, got %+v", violations)
}
}
func writeFile(t *testing.T, path string, content string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("failed to create dir for %s: %v", path, err)
}
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatalf("failed to write %s: %v", path, err)
}
}

View File

@@ -0,0 +1,585 @@
package agentlab
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
)
const (
stateDirName = "onyx-agent-lab"
worktreesDirName = "worktrees"
envFileName = ".env.agent-lab"
webEnvFileName = ".env.web.agent-lab"
defaultWebPort = 3300
defaultAPIPort = 8380
defaultModelPort = 9300
defaultMCPPort = 8390
portSearchWindow = 400
dockerProjectPrefix = "onyx"
searchInfraMode = "shared"
)
var nonAlphaNumPattern = regexp.MustCompile(`[^a-z0-9]+`)
type DependencyMode string
const (
DependencyModeShared DependencyMode = "shared"
DependencyModeNamespaced DependencyMode = "namespaced"
)
type WorktreeLane string
const (
WorktreeLaneLab WorktreeLane = "lab"
WorktreeLaneProduct WorktreeLane = "product"
WorktreeLaneCustom WorktreeLane = "custom"
)
var productBranchPrefixes = []string{
"build/",
"chore/",
"ci/",
"docs/",
"feat/",
"fix/",
"perf/",
"refactor/",
"revert/",
"style/",
"test/",
}
type DependencyConfig struct {
Mode DependencyMode `json:"mode"`
Namespace string `json:"namespace,omitempty"`
PostgresDatabase string `json:"postgres_database,omitempty"`
RedisPrefix string `json:"redis_prefix,omitempty"`
FileStoreBucket string `json:"file_store_bucket,omitempty"`
SearchInfraMode string `json:"search_infra_mode"`
LastProvisionedAt string `json:"last_provisioned_at,omitempty"`
}
type PortSet struct {
Web int `json:"web"`
API int `json:"api"`
ModelServer int `json:"model_server"`
MCP int `json:"mcp"`
}
type URLSet struct {
Web string `json:"web"`
API string `json:"api"`
MCP string `json:"mcp"`
}
type Manifest struct {
ID string `json:"id"`
Branch string `json:"branch"`
Lane WorktreeLane `json:"lane,omitempty"`
BaseRef string `json:"base_ref"`
CreatedFromPath string `json:"created_from_path"`
CheckoutPath string `json:"checkout_path"`
StateDir string `json:"state_dir"`
ArtifactDir string `json:"artifact_dir"`
EnvFile string `json:"env_file"`
WebEnvFile string `json:"web_env_file"`
ComposeProject string `json:"compose_project"`
Dependencies DependencyConfig `json:"dependencies"`
Ports PortSet `json:"ports"`
URLs URLSet `json:"urls"`
CreatedAt time.Time `json:"created_at"`
LastVerifiedAt string `json:"last_verified_at,omitempty"`
LastVerifySummary string `json:"last_verify_summary,omitempty"`
}
func Slug(value string) string {
normalized := strings.ToLower(strings.TrimSpace(value))
normalized = strings.ReplaceAll(normalized, "/", "-")
normalized = strings.ReplaceAll(normalized, "_", "-")
normalized = nonAlphaNumPattern.ReplaceAllString(normalized, "-")
normalized = strings.Trim(normalized, "-")
if normalized == "" {
return "worktree"
}
return normalized
}
func worktreeID(value string) string {
slug := Slug(value)
sum := sha256.Sum256([]byte(value))
return fmt.Sprintf("%s-%s", slug, hex.EncodeToString(sum[:4]))
}
func ComposeProjectName(id string) string {
slug := Slug(id)
if len(slug) > 32 {
slug = slug[:32]
}
return fmt.Sprintf("%s-%s", dockerProjectPrefix, slug)
}
func GetCommonGitDir() (string, error) {
cmd := exec.Command("git", "rev-parse", "--path-format=absolute", "--git-common-dir")
output, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("git rev-parse --git-common-dir failed: %w", err)
}
return strings.TrimSpace(string(output)), nil
}
func StateRoot(commonGitDir string) string {
return filepath.Join(commonGitDir, stateDirName)
}
func WorktreesRoot(commonGitDir string) string {
return filepath.Join(StateRoot(commonGitDir), worktreesDirName)
}
func WorktreeStateDir(commonGitDir, id string) string {
return filepath.Join(WorktreesRoot(commonGitDir), Slug(id))
}
func ManifestPath(commonGitDir, id string) string {
return filepath.Join(WorktreeStateDir(commonGitDir, id), "manifest.json")
}
func DefaultCheckoutPath(repoRoot, id string) string {
parent := filepath.Dir(repoRoot)
worktreesRoot := filepath.Join(parent, filepath.Base(repoRoot)+"-worktrees")
return filepath.Join(worktreesRoot, worktreeID(id))
}
func NormalizeBranchForLane(branch string) string {
normalized := strings.TrimSpace(branch)
normalized = strings.TrimPrefix(normalized, "refs/heads/")
normalized = strings.TrimPrefix(normalized, "origin/")
normalized = strings.TrimPrefix(normalized, "codex/")
return normalized
}
func InferLane(branch string) WorktreeLane {
normalized := NormalizeBranchForLane(branch)
if strings.HasPrefix(normalized, "lab/") {
return WorktreeLaneLab
}
for _, prefix := range productBranchPrefixes {
if strings.HasPrefix(normalized, prefix) {
return WorktreeLaneProduct
}
}
return WorktreeLaneCustom
}
type BaseRefSelection struct {
Ref string
Lane WorktreeLane
Reason string
}
func ResolveCreateBaseRef(branch, requested string, refExists func(string) bool) BaseRefSelection {
lane := InferLane(branch)
if requested != "" {
return BaseRefSelection{
Ref: requested,
Lane: lane,
Reason: "using explicit --from value",
}
}
switch lane {
case WorktreeLaneLab:
for _, candidate := range []string{"codex/agent-lab", "agent-lab", "origin/codex/agent-lab", "origin/agent-lab"} {
if refExists(candidate) {
return BaseRefSelection{
Ref: candidate,
Lane: lane,
Reason: fmt.Sprintf("inferred lab lane from branch name; using %s as the base ref", candidate),
}
}
}
return BaseRefSelection{
Ref: "HEAD",
Lane: lane,
Reason: "inferred lab lane from branch name, but no agent-lab ref exists locally; falling back to HEAD",
}
case WorktreeLaneProduct:
for _, candidate := range []string{"origin/main", "main"} {
if refExists(candidate) {
return BaseRefSelection{
Ref: candidate,
Lane: lane,
Reason: fmt.Sprintf("inferred product lane from branch name; using %s as the base ref", candidate),
}
}
}
return BaseRefSelection{
Ref: "HEAD",
Lane: lane,
Reason: "inferred product lane from branch name, but no main ref exists locally; falling back to HEAD",
}
default:
return BaseRefSelection{
Ref: "HEAD",
Lane: lane,
Reason: "no lane inferred from branch name; defaulting to HEAD. Prefer codex/lab/... for harness work and codex/fix... or codex/feat... for product work, or pass --from explicitly",
}
}
}
func GitRefExists(ref string) bool {
cmd := exec.Command("git", "rev-parse", "--verify", "--quiet", ref)
return cmd.Run() == nil
}
func BuildManifest(repoRoot, commonGitDir, branch string, lane WorktreeLane, baseRef, checkoutPath string, ports PortSet, dependencyMode DependencyMode) Manifest {
id := worktreeID(branch)
stateDir := WorktreeStateDir(commonGitDir, id)
artifactDir := filepath.Join(stateDir, "artifacts")
envDir := filepath.Join(checkoutPath, ".vscode")
return Manifest{
ID: id,
Branch: branch,
Lane: lane,
BaseRef: baseRef,
CreatedFromPath: repoRoot,
CheckoutPath: checkoutPath,
StateDir: stateDir,
ArtifactDir: artifactDir,
EnvFile: filepath.Join(envDir, envFileName),
WebEnvFile: filepath.Join(envDir, webEnvFileName),
ComposeProject: ComposeProjectName(id),
Dependencies: BuildDependencyConfig(branch, dependencyMode),
Ports: ports,
URLs: URLSet{
Web: fmt.Sprintf("http://127.0.0.1:%d", ports.Web),
API: fmt.Sprintf("http://127.0.0.1:%d", ports.API),
MCP: fmt.Sprintf("http://127.0.0.1:%d", ports.MCP),
},
CreatedAt: time.Now().UTC(),
}
}
func (m Manifest) ResolvedLane() WorktreeLane {
if m.Lane == "" {
return InferLane(m.Branch)
}
return m.Lane
}
func BuildDependencyConfig(branch string, mode DependencyMode) DependencyConfig {
if mode == "" {
mode = DependencyModeShared
}
config := DependencyConfig{
Mode: mode,
SearchInfraMode: searchInfraMode,
}
if mode != DependencyModeNamespaced {
return config
}
namespace := worktreeID(branch)
dbSuffix := strings.ReplaceAll(namespace, "-", "_")
database := fmt.Sprintf("agentlab_%s", dbSuffix)
if len(database) > 63 {
database = database[:63]
}
bucket := fmt.Sprintf("onyx-agentlab-%s", namespace)
if len(bucket) > 63 {
bucket = bucket[:63]
bucket = strings.Trim(bucket, "-")
}
config.Namespace = namespace
config.PostgresDatabase = database
config.RedisPrefix = fmt.Sprintf("agentlab:%s", namespace)
config.FileStoreBucket = bucket
return config
}
func (m Manifest) ResolvedDependencies() DependencyConfig {
if m.Dependencies.Mode == "" {
return BuildDependencyConfig(m.Branch, DependencyModeShared)
}
resolved := m.Dependencies
if resolved.SearchInfraMode == "" {
resolved.SearchInfraMode = searchInfraMode
}
return resolved
}
func (m Manifest) RuntimeEnv() map[string]string {
env := map[string]string{
"AGENT_LAB_ARTIFACT_DIR": m.ArtifactDir,
"AGENT_LAB_DEPENDENCY_MODE": string(m.ResolvedDependencies().Mode),
"AGENT_LAB_SEARCH_INFRA_MODE": m.ResolvedDependencies().SearchInfraMode,
"AGENT_LAB_WORKTREE_ID": m.ID,
"AGENT_LAB_WORKTREE_URL": m.URLs.Web,
"BASE_URL": m.URLs.Web,
"INTERNAL_URL": m.URLs.API,
"MCP_INTERNAL_URL": m.URLs.MCP,
"PORT": fmt.Sprintf("%d", m.Ports.Web),
"WEB_DOMAIN": m.URLs.Web,
}
deps := m.ResolvedDependencies()
if deps.Namespace != "" {
env["AGENT_LAB_NAMESPACE"] = deps.Namespace
}
if deps.Mode == DependencyModeNamespaced {
env["POSTGRES_DB"] = deps.PostgresDatabase
env["DEFAULT_REDIS_PREFIX"] = deps.RedisPrefix
env["S3_FILE_STORE_BUCKET_NAME"] = deps.FileStoreBucket
}
return env
}
func (m Manifest) ShellEnv() map[string]string {
return m.RuntimeEnv()
}
func (m Manifest) DependencyWarnings() []string {
deps := m.ResolvedDependencies()
if deps.SearchInfraMode == searchInfraMode {
return []string{
"Search infrastructure remains shared across worktrees. OpenSearch/Vespa state is not namespaced or torn down by agent-lab.",
}
}
return nil
}
func (m Manifest) EnvFileContents(kind string) string {
values := m.RuntimeEnv()
deps := m.ResolvedDependencies()
var lines []string
lines = append(lines, "# Generated by `ods worktree create` for agent-lab.")
lines = append(lines, "# This file only contains worktree-local overrides.")
lines = append(lines, fmt.Sprintf("AGENT_LAB_WORKTREE_ID=%s", m.ID))
lines = append(lines, fmt.Sprintf("AGENT_LAB_ARTIFACT_DIR=%s", m.ArtifactDir))
lines = append(lines, fmt.Sprintf("AGENT_LAB_DEPENDENCY_MODE=%s", deps.Mode))
lines = append(lines, fmt.Sprintf("AGENT_LAB_SEARCH_INFRA_MODE=%s", deps.SearchInfraMode))
if deps.Namespace != "" {
lines = append(lines, fmt.Sprintf("AGENT_LAB_NAMESPACE=%s", deps.Namespace))
}
switch kind {
case "web":
lines = append(lines, fmt.Sprintf("PORT=%d", m.Ports.Web))
lines = append(lines, fmt.Sprintf("BASE_URL=%s", values["BASE_URL"]))
lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
default:
lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
if deps.Mode == DependencyModeNamespaced {
lines = append(lines, fmt.Sprintf("POSTGRES_DB=%s", deps.PostgresDatabase))
lines = append(lines, fmt.Sprintf("DEFAULT_REDIS_PREFIX=%s", deps.RedisPrefix))
lines = append(lines, fmt.Sprintf("S3_FILE_STORE_BUCKET_NAME=%s", deps.FileStoreBucket))
}
}
return strings.Join(lines, "\n") + "\n"
}
func WriteManifest(commonGitDir string, manifest Manifest) error {
stateDir := WorktreeStateDir(commonGitDir, manifest.ID)
if err := os.MkdirAll(stateDir, 0755); err != nil {
return fmt.Errorf("create worktree state dir: %w", err)
}
if err := os.MkdirAll(manifest.ArtifactDir, 0755); err != nil {
return fmt.Errorf("create artifact dir: %w", err)
}
data, err := json.MarshalIndent(manifest, "", " ")
if err != nil {
return fmt.Errorf("marshal manifest: %w", err)
}
if err := os.WriteFile(ManifestPath(commonGitDir, manifest.ID), data, 0644); err != nil {
return fmt.Errorf("write manifest: %w", err)
}
return nil
}
func WriteEnvFiles(manifest Manifest) error {
if err := os.MkdirAll(filepath.Dir(manifest.EnvFile), 0755); err != nil {
return fmt.Errorf("create env dir: %w", err)
}
if err := os.WriteFile(manifest.EnvFile, []byte(manifest.EnvFileContents("backend")), 0644); err != nil {
return fmt.Errorf("write backend env file: %w", err)
}
if err := os.WriteFile(manifest.WebEnvFile, []byte(manifest.EnvFileContents("web")), 0644); err != nil {
return fmt.Errorf("write web env file: %w", err)
}
return nil
}
func LoadAll(commonGitDir string) ([]Manifest, error) {
worktreesRoot := WorktreesRoot(commonGitDir)
entries, err := os.ReadDir(worktreesRoot)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
return nil, fmt.Errorf("read worktrees dir: %w", err)
}
manifests := make([]Manifest, 0, len(entries))
for _, entry := range entries {
if !entry.IsDir() {
continue
}
manifest, err := LoadManifest(filepath.Join(worktreesRoot, entry.Name(), "manifest.json"))
if err != nil {
return nil, err
}
manifests = append(manifests, manifest)
}
sort.Slice(manifests, func(i, j int) bool {
return manifests[i].Branch < manifests[j].Branch
})
return manifests, nil
}
func LoadManifest(path string) (Manifest, error) {
var manifest Manifest
data, err := os.ReadFile(path)
if err != nil {
return manifest, fmt.Errorf("read manifest %s: %w", path, err)
}
if err := json.Unmarshal(data, &manifest); err != nil {
return manifest, fmt.Errorf("parse manifest %s: %w", path, err)
}
return manifest, nil
}
func FindByRepoRoot(commonGitDir, repoRoot string) (Manifest, bool, error) {
manifests, err := LoadAll(commonGitDir)
if err != nil {
return Manifest{}, false, err
}
repoRoot = normalizePath(repoRoot)
for _, manifest := range manifests {
if normalizePath(manifest.CheckoutPath) == repoRoot {
return manifest, true, nil
}
}
return Manifest{}, false, nil
}
func FindByIdentifier(commonGitDir, identifier string) (Manifest, bool, error) {
manifests, err := LoadAll(commonGitDir)
if err != nil {
return Manifest{}, false, err
}
slug := Slug(identifier)
cleanIdentifier := normalizePath(identifier)
var slugMatches []Manifest
for _, manifest := range manifests {
switch {
case manifest.ID == slug:
return manifest, true, nil
case manifest.Branch == identifier:
return manifest, true, nil
case normalizePath(manifest.CheckoutPath) == cleanIdentifier:
return manifest, true, nil
case slug != "" && Slug(manifest.Branch) == slug:
slugMatches = append(slugMatches, manifest)
}
}
if len(slugMatches) == 1 {
return slugMatches[0], true, nil
}
if len(slugMatches) > 1 {
return Manifest{}, false, fmt.Errorf("identifier %q matches multiple worktrees; use the branch, full id, or checkout path", identifier)
}
return Manifest{}, false, nil
}
func RemoveState(commonGitDir, id string) error {
if err := os.RemoveAll(WorktreeStateDir(commonGitDir, id)); err != nil {
return fmt.Errorf("remove worktree state: %w", err)
}
return nil
}
func UpdateVerification(commonGitDir string, manifest Manifest, summaryPath string, verifiedAt time.Time) error {
manifest.LastVerifySummary = summaryPath
manifest.LastVerifiedAt = verifiedAt.UTC().Format(time.RFC3339)
return WriteManifest(commonGitDir, manifest)
}
func AllocatePorts(existing []Manifest) (PortSet, error) {
reserved := make(map[int]bool)
for _, manifest := range existing {
reserved[manifest.Ports.Web] = true
reserved[manifest.Ports.API] = true
reserved[manifest.Ports.ModelServer] = true
reserved[manifest.Ports.MCP] = true
}
for offset := 0; offset < portSearchWindow; offset++ {
ports := PortSet{
Web: defaultWebPort + offset,
API: defaultAPIPort + offset,
ModelServer: defaultModelPort + offset,
MCP: defaultMCPPort + offset,
}
if reserved[ports.Web] || reserved[ports.API] || reserved[ports.ModelServer] || reserved[ports.MCP] {
continue
}
if portsAvailable(ports) {
return ports, nil
}
}
return PortSet{}, fmt.Errorf("failed to allocate an available worktree port set after %d attempts", portSearchWindow)
}
func portsAvailable(ports PortSet) bool {
candidates := []int{ports.Web, ports.API, ports.ModelServer, ports.MCP}
for _, port := range candidates {
ln, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
return false
}
_ = ln.Close()
}
return true
}
func normalizePath(path string) string {
clean := filepath.Clean(path)
resolved, err := filepath.EvalSymlinks(clean)
if err == nil {
return filepath.Clean(resolved)
}
return clean
}

View File

@@ -0,0 +1,312 @@
package agentlab
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestSlug(t *testing.T) {
t.Parallel()
tests := map[string]string{
"feat/My Feature": "feat-my-feature",
"lab/agent_docs": "lab-agent-docs",
" ": "worktree",
}
for input, want := range tests {
input := input
want := want
t.Run(input, func(t *testing.T) {
t.Parallel()
if got := Slug(input); got != want {
t.Fatalf("Slug(%q) = %q, want %q", input, got, want)
}
})
}
}
func TestWorktreeIDIsCollisionResistant(t *testing.T) {
t.Parallel()
idOne := worktreeID("feat/foo_bar")
idTwo := worktreeID("feat/foo-bar")
if idOne == idTwo {
t.Fatalf("expected distinct worktree ids, got %q", idOne)
}
if !strings.HasPrefix(idOne, "feat-foo-bar-") {
t.Fatalf("unexpected worktree id format: %s", idOne)
}
}
func TestInferLane(t *testing.T) {
t.Parallel()
tests := map[string]WorktreeLane{
"lab/docs": WorktreeLaneLab,
"codex/lab/docs": WorktreeLaneLab,
"fix/auth-banner-modal": WorktreeLaneProduct,
"codex/feat/agent-check": WorktreeLaneProduct,
"chore/update-readme": WorktreeLaneProduct,
"codex/auth-banner-modal": WorktreeLaneCustom,
"agent-lab": WorktreeLaneCustom,
}
for branch, want := range tests {
branch := branch
want := want
t.Run(branch, func(t *testing.T) {
t.Parallel()
if got := InferLane(branch); got != want {
t.Fatalf("InferLane(%q) = %q, want %q", branch, got, want)
}
})
}
}
func TestResolveCreateBaseRef(t *testing.T) {
t.Parallel()
refExists := func(ref string) bool {
switch ref {
case "codex/agent-lab", "origin/main":
return true
default:
return false
}
}
product := ResolveCreateBaseRef("codex/fix/auth-banner-modal", "", refExists)
if product.Ref != "origin/main" || product.Lane != WorktreeLaneProduct {
t.Fatalf("unexpected product base selection: %+v", product)
}
lab := ResolveCreateBaseRef("codex/lab/bootstrap-docs", "", refExists)
if lab.Ref != "codex/agent-lab" || lab.Lane != WorktreeLaneLab {
t.Fatalf("unexpected lab base selection: %+v", lab)
}
explicit := ResolveCreateBaseRef("codex/auth-banner-modal", "origin/release", refExists)
if explicit.Ref != "origin/release" || explicit.Lane != WorktreeLaneCustom {
t.Fatalf("unexpected explicit base selection: %+v", explicit)
}
custom := ResolveCreateBaseRef("codex/auth-banner-modal", "", refExists)
if custom.Ref != "HEAD" || custom.Lane != WorktreeLaneCustom {
t.Fatalf("unexpected custom base selection: %+v", custom)
}
}
func TestBuildManifest(t *testing.T) {
t.Parallel()
ports := PortSet{Web: 3301, API: 8381, ModelServer: 9301, MCP: 8391}
manifest := BuildManifest(
"/repo/main",
"/repo/.git",
"feat/agent-harness",
WorktreeLaneProduct,
"origin/main",
"/worktrees/feat-agent-harness",
ports,
DependencyModeNamespaced,
)
if manifest.ID != worktreeID("feat/agent-harness") {
t.Fatalf("unexpected manifest id: %s", manifest.ID)
}
if manifest.URLs.Web != "http://127.0.0.1:3301" {
t.Fatalf("unexpected web url: %s", manifest.URLs.Web)
}
if manifest.ComposeProject != "onyx-"+worktreeID("feat/agent-harness") {
t.Fatalf("unexpected compose project: %s", manifest.ComposeProject)
}
if got := manifest.ShellEnv()["INTERNAL_URL"]; got != "http://127.0.0.1:8381" {
t.Fatalf("unexpected INTERNAL_URL: %s", got)
}
if got := manifest.ResolvedDependencies().PostgresDatabase; got != "agentlab_"+strings.ReplaceAll(worktreeID("feat/agent-harness"), "-", "_") {
t.Fatalf("unexpected postgres database: %s", got)
}
if got := manifest.RuntimeEnv()["DEFAULT_REDIS_PREFIX"]; got != "agentlab:"+worktreeID("feat/agent-harness") {
t.Fatalf("unexpected redis prefix: %s", got)
}
}
func TestWriteManifestAndLoadAll(t *testing.T) {
t.Parallel()
commonGitDir := t.TempDir()
manifest := BuildManifest(
"/repo/main",
commonGitDir,
"lab/docs",
WorktreeLaneLab,
"HEAD",
"/repo-worktrees/lab-docs",
PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
DependencyModeShared,
)
if err := WriteManifest(commonGitDir, manifest); err != nil {
t.Fatalf("WriteManifest() error = %v", err)
}
manifests, err := LoadAll(commonGitDir)
if err != nil {
t.Fatalf("LoadAll() error = %v", err)
}
if len(manifests) != 1 {
t.Fatalf("LoadAll() length = %d, want 1", len(manifests))
}
if manifests[0].Branch != manifest.Branch {
t.Fatalf("unexpected branch: %s", manifests[0].Branch)
}
}
func TestWriteEnvFiles(t *testing.T) {
t.Parallel()
root := t.TempDir()
manifest := BuildManifest(
"/repo/main",
filepath.Join(root, ".git"),
"feat/env",
WorktreeLaneProduct,
"HEAD",
root,
PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
DependencyModeNamespaced,
)
if err := WriteEnvFiles(manifest); err != nil {
t.Fatalf("WriteEnvFiles() error = %v", err)
}
for _, path := range []string{manifest.EnvFile, manifest.WebEnvFile} {
if _, err := os.Stat(path); err != nil {
t.Fatalf("expected env file %s to exist: %v", path, err)
}
}
backendEnv, err := os.ReadFile(manifest.EnvFile)
if err != nil {
t.Fatalf("read backend env file: %v", err)
}
if !containsAll(
string(backendEnv),
"POSTGRES_DB=agentlab_"+strings.ReplaceAll(worktreeID("feat/env"), "-", "_"),
"DEFAULT_REDIS_PREFIX=agentlab:"+worktreeID("feat/env"),
"S3_FILE_STORE_BUCKET_NAME=onyx-agentlab-"+worktreeID("feat/env"),
) {
t.Fatalf("backend env file missing dependency namespace entries: %s", string(backendEnv))
}
}
func TestFindByIdentifierRejectsAmbiguousSlug(t *testing.T) {
t.Parallel()
commonGitDir := t.TempDir()
manifests := []Manifest{
BuildManifest(
"/repo/main",
commonGitDir,
"feat/foo_bar",
WorktreeLaneProduct,
"HEAD",
"/repo-worktrees/"+worktreeID("feat/foo_bar"),
PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
DependencyModeNamespaced,
),
BuildManifest(
"/repo/main",
commonGitDir,
"feat/foo-bar",
WorktreeLaneProduct,
"HEAD",
"/repo-worktrees/"+worktreeID("feat/foo-bar"),
PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
DependencyModeNamespaced,
),
}
for _, manifest := range manifests {
if err := WriteManifest(commonGitDir, manifest); err != nil {
t.Fatalf("WriteManifest() error = %v", err)
}
}
if _, found, err := FindByIdentifier(commonGitDir, "feat-foo-bar"); err == nil || found {
t.Fatalf("expected ambiguous slug lookup to fail, found=%t err=%v", found, err)
}
}
func TestBootstrapLinksAndClonesFromSource(t *testing.T) {
t.Parallel()
sourceRoot := t.TempDir()
checkoutRoot := t.TempDir()
commonGitDir := filepath.Join(sourceRoot, ".git")
writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env"), "OPENAI_API_KEY=test\n")
writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env.web"), "AUTH_TYPE=basic\n")
writeTestFile(t, filepath.Join(sourceRoot, ".venv", "bin", "python"), "#!/bin/sh\n")
writeTestFile(t, filepath.Join(sourceRoot, "web", "node_modules", ".bin", "next"), "#!/bin/sh\n")
manifest := BuildManifest(
sourceRoot,
commonGitDir,
"feat/bootstrap",
WorktreeLaneProduct,
"HEAD",
checkoutRoot,
PortSet{Web: 3305, API: 8385, ModelServer: 9305, MCP: 8395},
DependencyModeNamespaced,
)
result, err := Bootstrap(manifest, BootstrapOptions{
EnvMode: BootstrapModeLink,
PythonMode: BootstrapModeLink,
WebMode: BootstrapModeClone,
})
if err != nil {
t.Fatalf("Bootstrap() error = %v", err)
}
if len(result.Actions) == 0 {
t.Fatal("expected bootstrap actions to be recorded")
}
if target, err := os.Readlink(filepath.Join(checkoutRoot, ".vscode", ".env")); err != nil || target == "" {
t.Fatalf("expected .vscode/.env symlink, err=%v target=%q", err, target)
}
if target, err := os.Readlink(filepath.Join(checkoutRoot, ".venv")); err != nil || target == "" {
t.Fatalf("expected .venv symlink, err=%v target=%q", err, target)
}
if _, err := os.Stat(filepath.Join(checkoutRoot, "web", "node_modules", ".bin", "next")); err != nil {
t.Fatalf("expected cloned node_modules marker: %v", err)
}
if _, err := os.Lstat(filepath.Join(checkoutRoot, "web", "node_modules")); err != nil {
t.Fatalf("expected node_modules to exist: %v", err)
}
}
func writeTestFile(t *testing.T, path string, content string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
}
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
func containsAll(value string, parts ...string) bool {
for _, part := range parts {
if !strings.Contains(value, part) {
return false
}
}
return true
}

View File

@@ -0,0 +1,233 @@
package agentlab
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
)
type BootstrapMode string
const (
BootstrapModeAuto BootstrapMode = "auto"
BootstrapModeSkip BootstrapMode = "skip"
BootstrapModeLink BootstrapMode = "link"
BootstrapModeCopy BootstrapMode = "copy"
BootstrapModeClone BootstrapMode = "clone"
BootstrapModeNPM BootstrapMode = "npm"
)
type BootstrapOptions struct {
EnvMode BootstrapMode
PythonMode BootstrapMode
WebMode BootstrapMode
}
type BootstrapResult struct {
Actions []string
}
func Bootstrap(manifest Manifest, opts BootstrapOptions) (*BootstrapResult, error) {
result := &BootstrapResult{}
if err := bootstrapEnvFiles(manifest, opts.EnvMode, result); err != nil {
return nil, err
}
if err := bootstrapPython(manifest, opts.PythonMode, result); err != nil {
return nil, err
}
if err := bootstrapWeb(manifest, opts.WebMode, result); err != nil {
return nil, err
}
return result, nil
}
func bootstrapEnvFiles(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
if mode == BootstrapModeSkip {
return nil
}
vscodeDir := filepath.Join(manifest.CheckoutPath, ".vscode")
if err := os.MkdirAll(vscodeDir, 0755); err != nil {
return fmt.Errorf("create .vscode dir: %w", err)
}
sources := []struct {
source string
target string
label string
}{
{
source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env"),
target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env"),
label: ".vscode/.env",
},
{
source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env.web"),
target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env.web"),
label: ".vscode/.env.web",
},
}
for _, item := range sources {
if _, err := os.Stat(item.source); err != nil {
continue
}
if _, err := os.Lstat(item.target); err == nil {
result.Actions = append(result.Actions, fmt.Sprintf("kept existing %s", item.label))
continue
}
currentMode := mode
if currentMode == BootstrapModeAuto {
currentMode = BootstrapModeLink
}
switch currentMode {
case BootstrapModeLink:
if err := os.Symlink(item.source, item.target); err != nil {
return fmt.Errorf("symlink %s: %w", item.label, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("linked %s from source checkout", item.label))
case BootstrapModeCopy, BootstrapModeClone:
if err := copyFile(item.source, item.target); err != nil {
return fmt.Errorf("copy %s: %w", item.label, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("copied %s from source checkout", item.label))
default:
return fmt.Errorf("unsupported env bootstrap mode: %s", currentMode)
}
}
return nil
}
func bootstrapPython(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
if mode == BootstrapModeSkip {
return nil
}
sourceVenv := filepath.Join(manifest.CreatedFromPath, ".venv")
targetVenv := filepath.Join(manifest.CheckoutPath, ".venv")
if _, err := os.Stat(targetVenv); err == nil {
result.Actions = append(result.Actions, "kept existing .venv")
return nil
}
if _, err := os.Stat(sourceVenv); err != nil {
result.Actions = append(result.Actions, "source .venv missing; backend bootstrap deferred")
return nil
}
currentMode := mode
if currentMode == BootstrapModeAuto {
currentMode = BootstrapModeLink
}
switch currentMode {
case BootstrapModeLink:
if err := os.Symlink(sourceVenv, targetVenv); err != nil {
return fmt.Errorf("symlink .venv: %w", err)
}
result.Actions = append(result.Actions, "linked shared .venv from source checkout")
case BootstrapModeCopy, BootstrapModeClone:
if err := cloneDirectory(sourceVenv, targetVenv); err != nil {
return fmt.Errorf("clone .venv: %w", err)
}
result.Actions = append(result.Actions, "cloned .venv from source checkout")
default:
return fmt.Errorf("unsupported python bootstrap mode: %s", currentMode)
}
return nil
}
func bootstrapWeb(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
if mode == BootstrapModeSkip {
return nil
}
sourceModules := filepath.Join(manifest.CreatedFromPath, "web", "node_modules")
targetModules := filepath.Join(manifest.CheckoutPath, "web", "node_modules")
if _, err := os.Lstat(targetModules); err == nil {
result.Actions = append(result.Actions, "kept existing web/node_modules")
return nil
}
currentMode := mode
if currentMode == BootstrapModeAuto {
if _, err := os.Stat(sourceModules); err == nil {
currentMode = BootstrapModeClone
} else {
currentMode = BootstrapModeNPM
}
}
switch currentMode {
case BootstrapModeClone, BootstrapModeCopy:
if _, err := os.Stat(sourceModules); err != nil {
webDir := filepath.Join(manifest.CheckoutPath, "web")
cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
cmd.Dir = webDir
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
return fmt.Errorf("npm ci: %w", err)
}
result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
return nil
}
if err := cloneDirectory(sourceModules, targetModules); err != nil {
return fmt.Errorf("clone web/node_modules: %w", err)
}
result.Actions = append(result.Actions, "cloned local web/node_modules into worktree")
return nil
case BootstrapModeNPM:
webDir := filepath.Join(manifest.CheckoutPath, "web")
cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
cmd.Dir = webDir
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
return fmt.Errorf("npm ci: %w", err)
}
result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
default:
return fmt.Errorf("unsupported web bootstrap mode: %s", currentMode)
}
return nil
}
func cloneDirectory(source, target string) error {
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
return fmt.Errorf("create parent dir for %s: %w", target, err)
}
if runtime.GOOS == "darwin" {
cmd := exec.Command("cp", "-R", "-c", source, target)
if err := cmd.Run(); err == nil {
return nil
}
}
if runtime.GOOS != "windows" {
cmd := exec.Command("cp", "-R", source, target)
if err := cmd.Run(); err == nil {
return nil
}
}
return fmt.Errorf("no supported directory clone strategy succeeded for %s", source)
}
func copyFile(source, target string) error {
data, err := os.ReadFile(source)
if err != nil {
return err
}
return os.WriteFile(target, data, 0644)
}

View File

@@ -0,0 +1,252 @@
package agentlab
import (
"bytes"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
"github.com/onyx-dot-app/onyx/tools/ods/internal/alembic"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
)
type DependencyResult struct {
Actions []string
}
type DependencyStatus struct {
Mode DependencyMode `json:"mode"`
Namespace string `json:"namespace,omitempty"`
PostgresDatabase string `json:"postgres_database,omitempty"`
PostgresReady bool `json:"postgres_ready"`
PostgresTableCount int `json:"postgres_table_count,omitempty"`
RedisPrefix string `json:"redis_prefix,omitempty"`
RedisReady bool `json:"redis_ready"`
RedisKeyCount int `json:"redis_key_count,omitempty"`
FileStoreBucket string `json:"file_store_bucket,omitempty"`
FileStoreReady bool `json:"file_store_ready"`
FileStoreObjectCount int `json:"file_store_object_count,omitempty"`
SearchInfraMode string `json:"search_infra_mode"`
}
func ProvisionDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
deps := manifest.ResolvedDependencies()
result := &DependencyResult{}
switch deps.Mode {
case DependencyModeShared:
result.Actions = append(result.Actions, "using shared Postgres, Redis, and MinIO state")
case DependencyModeNamespaced:
if _, err := runPythonScript(manifest, "ensure_database.py"); err != nil {
return manifest, nil, fmt.Errorf("ensure PostgreSQL database %s: %w", deps.PostgresDatabase, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("ensured PostgreSQL database %s", deps.PostgresDatabase))
envMap, err := runtimeEnvMap(manifest)
if err != nil {
return manifest, nil, err
}
if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
return manifest, nil, fmt.Errorf("migrate namespaced database %s: %w", deps.PostgresDatabase, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("migrated PostgreSQL database %s", deps.PostgresDatabase))
if _, err := runPythonScript(manifest, "ensure_bucket.py"); err != nil {
return manifest, nil, fmt.Errorf("ensure file-store bucket %s: %w", deps.FileStoreBucket, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("ensured file-store bucket %s", deps.FileStoreBucket))
result.Actions = append(result.Actions, fmt.Sprintf("reserved Redis prefix %s", deps.RedisPrefix))
default:
return manifest, nil, fmt.Errorf("unsupported dependency mode: %s", deps.Mode)
}
result.Actions = append(result.Actions, "search infrastructure remains shared-only")
manifest.Dependencies = deps
manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
if err := WriteManifest(commonGitDir, manifest); err != nil {
return manifest, nil, err
}
return manifest, result, nil
}
func InspectDependencies(manifest Manifest) (*DependencyStatus, error) {
deps := manifest.ResolvedDependencies()
status := &DependencyStatus{
Mode: deps.Mode,
Namespace: deps.Namespace,
PostgresDatabase: deps.PostgresDatabase,
RedisPrefix: deps.RedisPrefix,
FileStoreBucket: deps.FileStoreBucket,
SearchInfraMode: deps.SearchInfraMode,
}
if deps.Mode == DependencyModeShared {
status.PostgresReady = true
status.RedisReady = true
status.FileStoreReady = true
return status, nil
}
output, err := runPythonScript(manifest, "dependency_status.py")
if err != nil {
return nil, fmt.Errorf("inspect namespaced dependencies: %w", err)
}
if err := json.Unmarshal([]byte(output), status); err != nil {
return nil, fmt.Errorf("parse dependency status: %w", err)
}
return status, nil
}
func ResetDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
deps := manifest.ResolvedDependencies()
result := &DependencyResult{}
if deps.Mode == DependencyModeShared {
result.Actions = append(result.Actions, "shared dependency mode selected; reset is a no-op")
return manifest, result, nil
}
if _, err := runPythonScript(manifest, "reset_dependencies.py"); err != nil {
return manifest, nil, fmt.Errorf("reset namespaced dependencies: %w", err)
}
result.Actions = append(result.Actions, fmt.Sprintf("dropped and recreated PostgreSQL database %s", deps.PostgresDatabase))
result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
result.Actions = append(result.Actions, fmt.Sprintf("emptied file-store bucket %s", deps.FileStoreBucket))
envMap, err := runtimeEnvMap(manifest)
if err != nil {
return manifest, nil, err
}
if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
return manifest, nil, fmt.Errorf("re-migrate namespaced database %s: %w", deps.PostgresDatabase, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("re-migrated PostgreSQL database %s", deps.PostgresDatabase))
result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not reset")
manifest.Dependencies = deps
manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
if err := WriteManifest(commonGitDir, manifest); err != nil {
return manifest, nil, err
}
return manifest, result, nil
}
func TeardownDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
deps := manifest.ResolvedDependencies()
result := &DependencyResult{}
if deps.Mode == DependencyModeShared {
result.Actions = append(result.Actions, "shared dependency mode selected; teardown is a no-op")
return manifest, result, nil
}
if _, err := runPythonScript(manifest, "teardown_dependencies.py"); err != nil {
return manifest, nil, fmt.Errorf("tear down namespaced dependencies: %w", err)
}
result.Actions = append(result.Actions, fmt.Sprintf("dropped PostgreSQL database %s", deps.PostgresDatabase))
result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
result.Actions = append(result.Actions, fmt.Sprintf("deleted file-store bucket %s", deps.FileStoreBucket))
result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not torn down")
manifest.Dependencies = deps
manifest.Dependencies.LastProvisionedAt = ""
if err := WriteManifest(commonGitDir, manifest); err != nil {
return manifest, nil, err
}
return manifest, result, nil
}
func runtimeEnvMap(manifest Manifest) (map[string]string, error) {
envMap := make(map[string]string)
repoRoot := runtimeRepoRoot(manifest)
backendEnvPath := filepath.Join(repoRoot, ".vscode", ".env")
if _, err := os.Stat(backendEnvPath); err == nil {
fileVars, err := envutil.LoadFile(backendEnvPath)
if err != nil {
return nil, err
}
for _, entry := range fileVars {
if idx := strings.Index(entry, "="); idx > 0 {
envMap[entry[:idx]] = entry[idx+1:]
}
}
}
for key, value := range manifest.RuntimeEnv() {
envMap[key] = value
}
return envMap, nil
}
func runPythonScript(manifest Manifest, scriptName string) (string, error) {
pythonBinary, err := findPythonBinary(manifest)
if err != nil {
return "", err
}
code, err := loadPythonScript(scriptName)
if err != nil {
return "", err
}
envMap, err := runtimeEnvMap(manifest)
if err != nil {
return "", err
}
cmd := exec.Command(pythonBinary, "-c", code)
cmd.Dir = filepath.Join(runtimeRepoRoot(manifest), "backend")
cmd.Env = envutil.ApplyOverrides(os.Environ(), envMap)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
message := strings.TrimSpace(stderr.String())
if message == "" {
message = strings.TrimSpace(stdout.String())
}
if message == "" {
message = err.Error()
}
return "", fmt.Errorf("%s", message)
}
return strings.TrimSpace(stdout.String()), nil
}
func findPythonBinary(manifest Manifest) (string, error) {
var candidates []string
if runtime.GOOS == "windows" {
candidates = []string{
filepath.Join(manifest.CheckoutPath, ".venv", "Scripts", "python.exe"),
filepath.Join(manifest.CreatedFromPath, ".venv", "Scripts", "python.exe"),
}
} else {
candidates = []string{
filepath.Join(manifest.CheckoutPath, ".venv", "bin", "python"),
filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python"),
}
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
return candidate, nil
}
}
return "", fmt.Errorf("could not find a Python interpreter in %s/.venv or %s/.venv", manifest.CheckoutPath, manifest.CreatedFromPath)
}
func runtimeRepoRoot(manifest Manifest) string {
if manifest.CheckoutPath != "" {
if _, err := os.Stat(filepath.Join(manifest.CheckoutPath, "backend")); err == nil {
return manifest.CheckoutPath
}
}
return manifest.CreatedFromPath
}

View File

@@ -0,0 +1,17 @@
package agentlab
import (
"embed"
"fmt"
)
//go:embed scripts/*.py
var pythonScripts embed.FS
func loadPythonScript(name string) (string, error) {
data, err := pythonScripts.ReadFile("scripts/" + name)
if err != nil {
return "", fmt.Errorf("load python script %s: %w", name, err)
}
return string(data), nil
}

View File

@@ -0,0 +1,90 @@
import json
import os
import boto3
import psycopg2
import urllib3
from botocore.config import Config
from botocore.exceptions import ClientError
from redis import Redis
db_name = os.environ["POSTGRES_DB"]
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=db_name
)
with conn.cursor() as cur:
cur.execute(
"SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"
)
table_count = int(cur.fetchone()[0])
conn.close()
redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
redis_client = Redis(
host=os.environ.get("REDIS_HOST", "localhost"),
port=int(os.environ.get("REDIS_PORT", "6379")),
db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
password=os.environ.get("REDIS_PASSWORD") or None,
ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
)
redis_key_count = 0
for _ in redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000):
redis_key_count += 1
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
s3_client = boto3.client(**kwargs)
bucket_ready = True
bucket_object_count = 0
try:
s3_client.head_bucket(Bucket=bucket)
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket):
bucket_object_count += len(page.get("Contents", []))
except ClientError:
bucket_ready = False
print(
json.dumps(
{
"mode": os.environ["AGENT_LAB_DEPENDENCY_MODE"],
"namespace": os.environ.get("AGENT_LAB_NAMESPACE", ""),
"postgres_database": db_name,
"postgres_ready": True,
"postgres_table_count": table_count,
"redis_prefix": redis_prefix,
"redis_ready": True,
"redis_key_count": redis_key_count,
"file_store_bucket": bucket,
"file_store_ready": bucket_ready,
"file_store_object_count": bucket_object_count,
"search_infra_mode": os.environ.get(
"AGENT_LAB_SEARCH_INFRA_MODE", "shared"
),
}
)
)

View File

@@ -0,0 +1,40 @@
import os
import boto3
import urllib3
from botocore.config import Config
from botocore.exceptions import ClientError
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
client = boto3.client(**kwargs)
try:
client.head_bucket(Bucket=bucket)
except ClientError as exc:
status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
if status not in (403, 404):
raise
if endpoint or region == "us-east-1":
client.create_bucket(Bucket=bucket)
else:
client.create_bucket(
Bucket=bucket, CreateBucketConfiguration={"LocationConstraint": region}
)
print(bucket)

View File

@@ -0,0 +1,23 @@
import os
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
target_db = os.environ["POSTGRES_DB"]
admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=admin_db
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
with conn.cursor() as cur:
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (target_db,))
if cur.fetchone() is None:
cur.execute(f'CREATE DATABASE "{target_db}"')
conn.close()
print(target_db)

View File

@@ -0,0 +1,67 @@
import os
import boto3
import psycopg2
import urllib3
from botocore.config import Config
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from redis import Redis
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
target_db = os.environ["POSTGRES_DB"]
admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=admin_db
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
with conn.cursor() as cur:
cur.execute(
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
(target_db,),
)
cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
cur.execute(f'CREATE DATABASE "{target_db}"')
conn.close()
redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
redis_client = Redis(
host=os.environ.get("REDIS_HOST", "localhost"),
port=int(os.environ.get("REDIS_PORT", "6379")),
db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
password=os.environ.get("REDIS_PASSWORD") or None,
ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
)
keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
if keys:
redis_client.delete(*keys)
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
s3_client = boto3.client(**kwargs)
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket):
objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
if objects:
s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})

View File

@@ -0,0 +1,73 @@
import os
import boto3
import psycopg2
import urllib3
from botocore.config import Config
from botocore.exceptions import ClientError
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from redis import Redis
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
target_db = os.environ["POSTGRES_DB"]
admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=admin_db
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
with conn.cursor() as cur:
cur.execute(
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
(target_db,),
)
cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
conn.close()
redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
redis_client = Redis(
host=os.environ.get("REDIS_HOST", "localhost"),
port=int(os.environ.get("REDIS_PORT", "6379")),
db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
password=os.environ.get("REDIS_PASSWORD") or None,
ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
)
keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
if keys:
redis_client.delete(*keys)
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
s3_client = boto3.client(**kwargs)
try:
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket):
objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
if objects:
s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})
s3_client.delete_bucket(Bucket=bucket)
except ClientError as exc:
status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
if status not in (403, 404):
raise

View File

@@ -53,12 +53,17 @@ func FindAlembicBinary() (string, error) {
// otherwise it will attempt to run via docker exec on a container
// that has alembic installed (e.g., api_server).
func Run(args []string, schema Schema) error {
return RunWithEnv(args, schema, nil)
}
// RunWithEnv executes an alembic command with explicit environment overrides.
func RunWithEnv(args []string, schema Schema, extraEnv map[string]string) error {
// Check if we need to run via docker exec
if shouldUseDockerExec() {
return runViaDockerExec(args, schema)
return runViaDockerExec(args, schema, extraEnv)
}
return runLocally(args, schema)
return runLocally(args, schema, extraEnv)
}
// shouldUseDockerExec determines if we should run alembic via docker exec.
@@ -79,7 +84,7 @@ func shouldUseDockerExec() bool {
}
// runLocally runs alembic on the local machine.
func runLocally(args []string, schema Schema) error {
func runLocally(args []string, schema Schema, extraEnv map[string]string) error {
backendDir, err := paths.BackendDir()
if err != nil {
return fmt.Errorf("failed to find backend directory: %w", err)
@@ -104,13 +109,13 @@ func runLocally(args []string, schema Schema) error {
cmd.Stdin = os.Stdin
// Pass through POSTGRES_* environment variables
cmd.Env = buildAlembicEnv()
cmd.Env = buildAlembicEnv(extraEnv)
return cmd.Run()
}
// runViaDockerExec runs alembic inside a Docker container that has network access.
func runViaDockerExec(args []string, schema Schema) error {
func runViaDockerExec(args []string, schema Schema, extraEnv map[string]string) error {
// Find a container with alembic installed (api_server)
container, err := findAlembicContainer()
if err != nil {
@@ -136,7 +141,11 @@ func runViaDockerExec(args []string, schema Schema) error {
// Run alembic inside the container
// The container should have the correct env vars and network access
dockerArgs := []string{"exec", "-i", container, "alembic"}
dockerArgs := []string{"exec", "-i"}
for key, value := range extraEnv {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
dockerArgs = append(dockerArgs, container, "alembic")
dockerArgs = append(dockerArgs, alembicArgs...)
cmd := exec.Command("docker", dockerArgs...)
@@ -158,7 +167,7 @@ var alembicContainerNames = []string{
// It inherits the current environment and ensures POSTGRES_* variables are set.
// If POSTGRES_HOST is not explicitly set, it attempts to detect the PostgreSQL
// container IP address automatically.
func buildAlembicEnv() []string {
func buildAlembicEnv(extraEnv map[string]string) []string {
env := os.Environ()
// Get postgres config (which reads from env with defaults)
@@ -188,6 +197,10 @@ func buildAlembicEnv() []string {
}
}
for key, value := range extraEnv {
env = append(env, fmt.Sprintf("%s=%s", key, value))
}
return env
}
@@ -238,6 +251,14 @@ func Upgrade(revision string, schema Schema) error {
return Run([]string{"upgrade", revision}, schema)
}
// UpgradeWithEnv runs alembic upgrade with explicit environment overrides.
func UpgradeWithEnv(revision string, schema Schema, extraEnv map[string]string) error {
if revision == "" {
revision = "head"
}
return RunWithEnv([]string{"upgrade", revision}, schema, extraEnv)
}
// Downgrade runs alembic downgrade to the specified revision.
func Downgrade(revision string, schema Schema) error {
return Run([]string{"downgrade", revision}, schema)

View File

@@ -0,0 +1,105 @@
package envutil
import (
"bufio"
"fmt"
"os"
"sort"
"strings"
)
// LoadFile parses a .env-style file into KEY=VALUE entries suitable for
// appending to os.Environ(). Blank lines and comments are skipped.
func LoadFile(path string) ([]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open env file %s: %w", path, err)
}
defer func() { _ = f.Close() }()
var envVars []string
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if idx := strings.Index(line, "="); idx > 0 {
key := strings.TrimSpace(line[:idx])
value := strings.TrimSpace(line[idx+1:])
value = strings.Trim(value, `"'`)
envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("read env file %s: %w", path, err)
}
return envVars, nil
}
// Merge combines shell environment with file-based defaults. Shell values take
// precedence, so file entries are only added for keys not already present.
func Merge(shellEnv, fileVars []string) []string {
existing := make(map[string]bool, len(shellEnv))
for _, entry := range shellEnv {
if idx := strings.Index(entry, "="); idx > 0 {
existing[entry[:idx]] = true
}
}
merged := make([]string, len(shellEnv))
copy(merged, shellEnv)
for _, entry := range fileVars {
if idx := strings.Index(entry, "="); idx > 0 {
key := entry[:idx]
if !existing[key] {
merged = append(merged, entry)
}
}
}
return merged
}
// ApplyOverrides replaces or appends KEY=VALUE entries in env with the provided
// overrides. The returned slice contains at most one entry per overridden key.
func ApplyOverrides(env []string, overrides map[string]string) []string {
if len(overrides) == 0 {
return env
}
overrideKeys := make(map[string]bool, len(overrides))
for key := range overrides {
overrideKeys[key] = true
}
filtered := make([]string, 0, len(env)+len(overrides))
for _, entry := range env {
if idx := strings.Index(entry, "="); idx > 0 {
if overrideKeys[entry[:idx]] {
continue
}
}
filtered = append(filtered, entry)
}
filtered = append(filtered, MapToEnvEntries(overrides)...)
return filtered
}
// MapToEnvEntries converts a string map into KEY=VALUE entries in stable order.
func MapToEnvEntries(values map[string]string) []string {
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
sort.Strings(keys)
entries := make([]string, 0, len(keys))
for _, key := range keys {
entries = append(entries, fmt.Sprintf("%s=%s", key, values[key]))
}
return entries
}

View File

@@ -0,0 +1,122 @@
package journey
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
)
const (
RegistryPath = "web/tests/e2e/journeys/registry.json"
DefaultPlanPath = ".github/agent-journeys.json"
)
type Definition struct {
Name string `json:"name"`
Description string `json:"description"`
TestPath string `json:"test_path"`
Project string `json:"project"`
RequiresModelServer bool `json:"requires_model_server"`
SkipGlobalSetup bool `json:"skip_global_setup"`
}
type Registry struct {
Journeys []Definition `json:"journeys"`
}
type Plan struct {
Journeys []string `json:"journeys"`
}
func LoadRegistry(repoRoot string) (Registry, error) {
var registry Registry
data, err := os.ReadFile(filepath.Join(repoRoot, RegistryPath))
if err != nil {
return registry, fmt.Errorf("read journey registry: %w", err)
}
if err := json.Unmarshal(data, &registry); err != nil {
return registry, fmt.Errorf("parse journey registry: %w", err)
}
if len(registry.Journeys) == 0 {
return registry, fmt.Errorf("journey registry is empty")
}
for _, journey := range registry.Journeys {
if strings.TrimSpace(journey.Name) == "" {
return registry, fmt.Errorf("journey registry contains an entry with an empty name")
}
if strings.TrimSpace(journey.TestPath) == "" {
return registry, fmt.Errorf("journey %q is missing test_path", journey.Name)
}
if strings.TrimSpace(journey.Project) == "" {
return registry, fmt.Errorf("journey %q is missing project", journey.Name)
}
}
return registry, nil
}
func LoadPlan(planPath string) (Plan, error) {
var plan Plan
data, err := os.ReadFile(planPath)
if err != nil {
return plan, fmt.Errorf("read journey plan: %w", err)
}
if err := json.Unmarshal(data, &plan); err != nil {
return plan, fmt.Errorf("parse journey plan: %w", err)
}
if len(plan.Journeys) == 0 {
return plan, fmt.Errorf("journey plan contains no journeys")
}
return plan, nil
}
func ResolveDefinitions(repoRoot string, names []string) ([]Definition, error) {
registry, err := LoadRegistry(repoRoot)
if err != nil {
return nil, err
}
byName := make(map[string]Definition, len(registry.Journeys))
for _, definition := range registry.Journeys {
byName[definition.Name] = definition
}
definitions := make([]Definition, 0, len(names))
for _, name := range names {
definition, ok := byName[name]
if !ok {
return nil, fmt.Errorf("unknown journey %q", name)
}
definitions = append(definitions, definition)
}
return definitions, nil
}
func Slug(value string) string {
normalized := strings.TrimSpace(strings.ToLower(value))
normalized = strings.ReplaceAll(normalized, "/", "-")
var builder strings.Builder
lastDash := false
for _, r := range normalized {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
builder.WriteRune(r)
lastDash = false
continue
}
if !lastDash {
builder.WriteByte('-')
lastDash = true
}
}
slug := strings.Trim(builder.String(), "-")
if slug == "" {
return "journey"
}
return slug
}

View File

@@ -0,0 +1,59 @@
package journey
import (
"os"
"path/filepath"
"testing"
)
func TestResolveDefinitions(t *testing.T) {
t.Helper()
root := t.TempDir()
registryDir := filepath.Join(root, "web", "tests", "e2e", "journeys")
if err := os.MkdirAll(registryDir, 0755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(filepath.Join(registryDir, "registry.json"), []byte(`{
"journeys": [
{
"name": "auth-landing",
"description": "test",
"test_path": "tests/e2e/journeys/auth_landing.spec.ts",
"project": "journey",
"requires_model_server": false,
"skip_global_setup": true
}
]
}`), 0644); err != nil {
t.Fatalf("write registry: %v", err)
}
definitions, err := ResolveDefinitions(root, []string{"auth-landing"})
if err != nil {
t.Fatalf("resolve definitions: %v", err)
}
if len(definitions) != 1 {
t.Fatalf("expected 1 definition, got %d", len(definitions))
}
if definitions[0].Project != "journey" {
t.Fatalf("expected project journey, got %q", definitions[0].Project)
}
}
func TestLoadPlanRequiresJourneys(t *testing.T) {
t.Helper()
path := filepath.Join(t.TempDir(), "journeys.json")
if err := os.WriteFile(path, []byte(`{"journeys":["auth-landing"]}`), 0644); err != nil {
t.Fatalf("write plan: %v", err)
}
plan, err := LoadPlan(path)
if err != nil {
t.Fatalf("load plan: %v", err)
}
if len(plan.Journeys) != 1 || plan.Journeys[0] != "auth-landing" {
t.Fatalf("unexpected plan contents: %+v", plan)
}
}

View File

@@ -0,0 +1,147 @@
package prreview
import (
"fmt"
"regexp"
"sort"
"strings"
)
type Source string
const (
SourceHuman Source = "human"
SourceCodex Source = "codex"
SourceGreptile Source = "greptile"
SourceCubic Source = "cubic"
SourceBot Source = "bot"
)
type Comment struct {
ID int `json:"id"`
Body string `json:"body"`
AuthorLogin string `json:"author_login"`
URL string `json:"url,omitempty"`
CreatedAt string `json:"created_at,omitempty"`
}
type Thread struct {
ID string `json:"id"`
IsResolved bool `json:"is_resolved"`
IsOutdated bool `json:"is_outdated"`
Path string `json:"path,omitempty"`
Line int `json:"line,omitempty"`
StartLine int `json:"start_line,omitempty"`
Comments []Comment `json:"comments"`
}
type PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
URL string `json:"url,omitempty"`
Threads []Thread `json:"threads"`
}
type ThreadSummary struct {
Thread Thread `json:"thread"`
Source Source `json:"source"`
Category string `json:"category"`
DuplicateOf string `json:"duplicate_of,omitempty"`
Reasons []string `json:"reasons,omitempty"`
}
type TriageResult struct {
PullRequest PullRequest `json:"pull_request"`
Summaries []ThreadSummary `json:"summaries"`
}
var nonAlphaNum = regexp.MustCompile(`[^a-z0-9]+`)
func ClassifySource(login string) Source {
lower := strings.ToLower(strings.TrimSpace(login))
switch {
case strings.Contains(lower, "codex"):
return SourceCodex
case strings.Contains(lower, "greptile"):
return SourceGreptile
case strings.Contains(lower, "cubic"):
return SourceCubic
case strings.HasSuffix(lower, "[bot]") || strings.Contains(lower, "bot"):
return SourceBot
default:
return SourceHuman
}
}
func Triage(pr PullRequest) TriageResult {
summaries := make([]ThreadSummary, 0, len(pr.Threads))
seen := map[string]string{}
for _, thread := range pr.Threads {
source := SourceHuman
if len(thread.Comments) > 0 {
source = ClassifySource(thread.Comments[0].AuthorLogin)
}
summary := ThreadSummary{
Thread: thread,
Source: source,
Category: "actionable",
}
if thread.IsResolved {
summary.Category = "resolved"
summary.Reasons = append(summary.Reasons, "thread already resolved")
} else if thread.IsOutdated {
summary.Category = "outdated"
summary.Reasons = append(summary.Reasons, "thread marked outdated by GitHub")
}
key := duplicateKey(thread)
if existing, ok := seen[key]; ok && summary.Category == "actionable" {
summary.Category = "duplicate"
summary.DuplicateOf = existing
summary.Reasons = append(summary.Reasons, fmt.Sprintf("duplicates %s", existing))
} else if summary.Category == "actionable" {
seen[key] = thread.ID
}
if source == SourceHuman && summary.Category == "actionable" {
summary.Reasons = append(summary.Reasons, "human review requires explicit response or fix")
}
if source != SourceHuman && summary.Category == "actionable" {
summary.Reasons = append(summary.Reasons, fmt.Sprintf("%s-generated review comment", source))
}
summaries = append(summaries, summary)
}
sort.Slice(summaries, func(i, j int) bool {
if summaries[i].Category != summaries[j].Category {
return summaries[i].Category < summaries[j].Category
}
if summaries[i].Source != summaries[j].Source {
return summaries[i].Source < summaries[j].Source
}
return summaries[i].Thread.ID < summaries[j].Thread.ID
})
return TriageResult{
PullRequest: pr,
Summaries: summaries,
}
}
func duplicateKey(thread Thread) string {
parts := []string{thread.Path, fmt.Sprintf("%d", thread.Line)}
if len(thread.Comments) > 0 {
parts = append(parts, normalizeBody(thread.Comments[0].Body))
}
return strings.Join(parts, "::")
}
func normalizeBody(body string) string {
normalized := strings.ToLower(strings.TrimSpace(body))
normalized = nonAlphaNum.ReplaceAllString(normalized, " ")
return strings.Join(strings.Fields(normalized), " ")
}

View File

@@ -0,0 +1,61 @@
package prreview
import "testing"
func TestClassifySource(t *testing.T) {
t.Helper()
cases := map[string]Source{
"openai-codex-reviewer[bot]": SourceCodex,
"greptile-ai[bot]": SourceGreptile,
"cubic-review[bot]": SourceCubic,
"renovate[bot]": SourceBot,
"human-user": SourceHuman,
}
for login, expected := range cases {
if actual := ClassifySource(login); actual != expected {
t.Fatalf("classify %q: expected %s, got %s", login, expected, actual)
}
}
}
func TestTriageMarksDuplicates(t *testing.T) {
t.Helper()
result := Triage(PullRequest{
Number: 42,
Threads: []Thread{
{
ID: "thread-1",
Path: "web/src/foo.tsx",
Line: 10,
Comments: []Comment{
{ID: 1, AuthorLogin: "greptile-ai[bot]", Body: "Handle null values here."},
},
},
{
ID: "thread-2",
Path: "web/src/foo.tsx",
Line: 10,
Comments: []Comment{
{ID: 2, AuthorLogin: "openai-codex-reviewer[bot]", Body: "Handle null values here"},
},
},
},
})
if len(result.Summaries) != 2 {
t.Fatalf("expected 2 summaries, got %d", len(result.Summaries))
}
var duplicateFound bool
for _, summary := range result.Summaries {
if summary.Thread.ID == "thread-2" && summary.Category == "duplicate" {
duplicateFound = true
}
}
if !duplicateFound {
t.Fatalf("expected duplicate thread to be detected: %+v", result.Summaries)
}
}

View File

@@ -3,8 +3,13 @@ import * as dotenv from "dotenv";
dotenv.config({ path: ".vscode/.env" });
const journeyMode = process.env.PLAYWRIGHT_JOURNEY_MODE === "1";
export default defineConfig({
globalSetup: require.resolve("./tests/e2e/global-setup"),
globalSetup:
process.env.PLAYWRIGHT_SKIP_GLOBAL_SETUP === "1"
? undefined
: require.resolve("./tests/e2e/global-setup"),
timeout: 100000, // 100 seconds timeout
expect: {
timeout: 15000, // 15 seconds timeout for all assertions to reduce flakiness
@@ -26,12 +31,12 @@ export default defineConfig({
reporter: [["list"]],
// Only run Playwright tests from tests/e2e directory (ignore Jest tests in src/)
testMatch: /.*\/tests\/e2e\/.*\.spec\.ts/,
outputDir: "output/playwright",
outputDir: process.env.PLAYWRIGHT_OUTPUT_DIR || "output/playwright",
use: {
// Base URL for the application, can be overridden via BASE_URL environment variable
baseURL: process.env.BASE_URL || "http://localhost:3000",
// Capture trace on failure
trace: "retain-on-failure",
trace: journeyMode ? "on" : "retain-on-failure",
},
projects: [
{
@@ -65,5 +70,15 @@ export default defineConfig({
},
grep: /@lite/,
},
{
name: "journey",
use: {
...devices["Desktop Chrome"],
viewport: { width: 1280, height: 720 },
video: "on",
},
grep: /@journey/,
workers: 1,
},
],
});

View File

@@ -75,10 +75,6 @@ export interface Settings {
// Factory defaults for the restore button.
default_user_file_max_upload_size_mb?: number;
default_file_token_count_threshold_k?: number;
// True when the backend runs inside a container (Docker/Podman).
// Used to default local-service URLs to host.docker.internal.
is_containerized?: boolean;
}
export enum NotificationType {

View File

@@ -0,0 +1,106 @@
import React from "react";
import { render, screen, waitFor } from "@tests/setup/test-utils";
import { RedirectError } from "@/lib/fetcher";
import AppHealthBanner from "./AppHealthBanner";
const mockLogout = jest.fn();
const mockUseSWR = jest.fn();
const mockUseCurrentUser = jest.fn();
const mockUsePathname = jest.fn();
jest.mock("swr", () => ({
__esModule: true,
...jest.requireActual("swr"),
default: (...args: unknown[]) => mockUseSWR(...args),
}));
jest.mock("next/navigation", () => ({
usePathname: () => mockUsePathname(),
useRouter: () => ({
push: jest.fn(),
}),
}));
jest.mock("@/hooks/useCurrentUser", () => ({
useCurrentUser: () => mockUseCurrentUser(),
}));
jest.mock("@/lib/user", () => ({
logout: (...args: unknown[]) => mockLogout(...args),
refreshToken: jest.fn(),
}));
describe("AppHealthBanner logout handling", () => {
beforeEach(() => {
jest.clearAllMocks();
mockLogout.mockResolvedValue(undefined);
mockUseSWR.mockReturnValue({ error: undefined });
mockUseCurrentUser.mockReturnValue({
user: undefined,
mutateUser: jest.fn(),
userError: undefined,
});
mockUsePathname.mockReturnValue("/auth/login");
});
it("does not show the logged-out modal or call logout on auth pages after a 403", async () => {
mockUseCurrentUser.mockReturnValue({
user: undefined,
mutateUser: jest.fn(),
userError: {
status: 403,
},
});
render(<AppHealthBanner />);
await waitFor(() => {
expect(mockLogout).not.toHaveBeenCalled();
});
expect(
screen.queryByText(/you have been logged out/i)
).not.toBeInTheDocument();
});
it("does not show the logged-out modal on a fresh unauthenticated load", async () => {
mockUsePathname.mockReturnValue("/");
mockUseSWR.mockReturnValue({
error: new RedirectError("auth redirect", 403, {}),
});
render(<AppHealthBanner />);
await waitFor(() => {
expect(mockLogout).not.toHaveBeenCalled();
});
expect(
screen.queryByText(/you have been logged out/i)
).not.toBeInTheDocument();
});
it("shows the logged-out modal after a 403 when a user was previously loaded", async () => {
mockUsePathname.mockReturnValue("/chat");
mockUseCurrentUser.mockReturnValue({
user: {
id: "user-1",
email: "a@example.com",
},
mutateUser: jest.fn(),
userError: {
status: 403,
},
});
render(<AppHealthBanner />);
await waitFor(() => {
expect(mockLogout).toHaveBeenCalled();
});
expect(
await screen.findByText(/you have been logged out/i)
).toBeInTheDocument();
});
});

View File

@@ -4,7 +4,7 @@ import { errorHandlingFetcher, RedirectError } from "@/lib/fetcher";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import Modal from "@/refresh-components/Modal";
import { useCallback, useEffect, useState, useRef } from "react";
import { useCallback, useEffect, useRef, useState } from "react";
import { getSecondsUntilExpiration } from "@/lib/time";
import { refreshToken } from "@/lib/user";
import { NEXT_PUBLIC_CUSTOM_REFRESH_URL } from "@/lib/constants";
@@ -24,19 +24,51 @@ export default function AppHealthBanner() {
const pathname = usePathname();
const expirationTimeoutRef = useRef<NodeJS.Timeout | null>(null);
const refreshIntervalRef = useRef<NodeJS.Timer | null>(null);
const pathnameRef = useRef<string | null>(pathname);
const hasSeenAuthenticatedUserRef = useRef(false);
const isAuthPage = pathname?.startsWith("/auth") ?? false;
const { user, mutateUser, userError } = useCurrentUser();
pathnameRef.current = pathname;
if (user) {
hasSeenAuthenticatedUserRef.current = true;
}
const maybeShowLoggedOutModal = useCallback(() => {
const currentPath = pathnameRef.current;
if (
!hasSeenAuthenticatedUserRef.current ||
!currentPath ||
currentPath.startsWith("/auth")
) {
return;
}
setShowLoggedOutModal(true);
}, []);
// Handle 403 errors from the /api/me endpoint.
// Skip entirely on auth pages — the user isn't logged in yet, so there's
// nothing to "log out" of and hitting /auth/logout just creates noise.
useEffect(() => {
if (userError && userError.status === 403 && !pathname?.includes("/auth")) {
logout().then(() => {
setShowLoggedOutModal(true);
if (
userError &&
userError.status === 403 &&
hasSeenAuthenticatedUserRef.current &&
pathname &&
!isAuthPage
) {
let cancelled = false;
logout().finally(() => {
if (!cancelled) {
maybeShowLoggedOutModal();
}
});
return () => {
cancelled = true;
};
}
}, [userError, pathname]);
}, [userError, pathname, isAuthPage, maybeShowLoggedOutModal]);
// Function to handle the "Log in" button click
function handleLogin() {
@@ -67,13 +99,10 @@ export default function AppHealthBanner() {
const timeUntilExpire = (secondsUntilExpiration + 10) * 1000;
expirationTimeoutRef.current = setTimeout(() => {
setExpired(true);
if (!pathname?.includes("/auth")) {
setShowLoggedOutModal(true);
}
maybeShowLoggedOutModal();
}, timeUntilExpire);
},
[pathname]
[maybeShowLoggedOutModal]
);
// Clean up any timeouts/intervals when component unmounts
@@ -187,6 +216,12 @@ export default function AppHealthBanner() {
}
}, [user, setupExpirationTimeout, mutateUser]);
useEffect(() => {
if (error instanceof RedirectError || expired) {
maybeShowLoggedOutModal();
}
}, [error, expired, maybeShowLoggedOutModal]);
// Logged out modal
if (showLoggedOutModal) {
return (
@@ -211,9 +246,6 @@ export default function AppHealthBanner() {
}
if (error instanceof RedirectError || expired) {
if (!pathname?.includes("/auth")) {
setShowLoggedOutModal(true);
}
return null;
} else {
return (

View File

@@ -26,7 +26,8 @@ import {
import { fetchModels } from "@/lib/llmConfig/svc";
import { toast } from "@/hooks/useToast";
import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
import { useSettingsContext } from "@/providers/SettingsProvider";
const DEFAULT_API_BASE = "http://localhost:1234";
interface LMStudioModalValues extends BaseLLMModalValues {
api_base: string;
@@ -108,10 +109,6 @@ export default function LMStudioModal({
}: LLMProviderFormProps) {
const isOnboarding = variant === "onboarding";
const { mutate } = useSWRConfig();
const { settings } = useSettingsContext();
const defaultApiBase = settings.is_containerized
? "http://host.docker.internal:1234"
: "http://localhost:1234";
const onClose = () => onOpenChange?.(false);
@@ -121,7 +118,7 @@ export default function LMStudioModal({
LLMProviderName.LM_STUDIO,
existingLlmProvider
),
api_base: existingLlmProvider?.api_base ?? defaultApiBase,
api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
custom_config: {
LM_STUDIO_API_KEY: existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY,
},

View File

@@ -30,7 +30,8 @@ import { Card } from "@opal/components";
import { toast } from "@/hooks/useToast";
import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import { useSettingsContext } from "@/providers/SettingsProvider";
const DEFAULT_API_BASE = "http://127.0.0.1:11434";
const CLOUD_API_BASE = "https://ollama.com";
enum Tab {
@@ -155,10 +156,6 @@ export default function OllamaModal({
}: LLMProviderFormProps) {
const isOnboarding = variant === "onboarding";
const { mutate } = useSWRConfig();
const { settings } = useSettingsContext();
const defaultApiBase = settings.is_containerized
? "http://host.docker.internal:11434"
: "http://127.0.0.1:11434";
const apiKey = existingLlmProvider?.custom_config?.OLLAMA_API_KEY;
const defaultTab =
existingLlmProvider && !!apiKey ? Tab.TAB_CLOUD : Tab.TAB_SELF_HOSTED;
@@ -172,7 +169,7 @@ export default function OllamaModal({
LLMProviderName.OLLAMA_CHAT,
existingLlmProvider
),
api_base: existingLlmProvider?.api_base ?? defaultApiBase,
api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
custom_config: {
OLLAMA_API_KEY: apiKey,
},

View File

@@ -0,0 +1,42 @@
import { test, expect } from "@playwright/test";
import { captureJourneyCheckpoint } from "@tests/e2e/utils/journey";
import { logPageState } from "@tests/e2e/utils/pageStateLogger";
test.describe("Journey: auth landing", () => {
test.beforeEach(async ({ page }) => {
await page.context().clearCookies();
});
test("Fresh auth landing is clean @journey", async ({ page }) => {
await page.goto("/", { waitUntil: "domcontentloaded" });
await expect
.poll(() => page.url(), { timeout: 60000 })
.toMatch(/\/auth\/(login|signup)(\?.*)?$/);
await expect
.poll(async () => (await page.locator("body").innerText()).trim(), {
timeout: 60000,
})
.toMatch(
/Create account|Create Account|Already have an account|New to Onyx\?|Sign In/i
);
await page.waitForTimeout(1000);
const loggedOutModal = page.getByText("You Have Been Logged Out", {
exact: true,
});
console.log(
`[journey-auth-landing] ${JSON.stringify({
url: page.url(),
loggedOutModalVisible: (await loggedOutModal.count()) > 0,
})}`
);
await logPageState(page, "journey auth landing");
await captureJourneyCheckpoint(page, "auth-landing");
await expect(loggedOutModal).toHaveCount(0);
await expect(page.locator("body")).toContainText(
/New to Onyx\?|Create an Account|Sign In/
);
});
});

View File

@@ -0,0 +1,12 @@
{
"journeys": [
{
"name": "auth-landing",
"description": "Fresh unauthenticated load lands on the auth UI without showing a logged-out modal.",
"test_path": "tests/e2e/journeys/auth_landing.spec.ts",
"project": "journey",
"requires_model_server": false,
"skip_global_setup": true
}
]
}

View File

@@ -0,0 +1,51 @@
import { mkdir, writeFile } from "fs/promises";
import path from "path";
import type { Page } from "@playwright/test";
function captureDir(): string | null {
const value = process.env.PLAYWRIGHT_JOURNEY_CAPTURE_DIR;
if (!value) {
return null;
}
return value;
}
function slug(value: string): string {
return value
.trim()
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "");
}
export async function captureJourneyCheckpoint(
page: Page,
name: string
): Promise<void> {
const dir = captureDir();
if (!dir) {
return;
}
const checkpoint = slug(name) || "checkpoint";
await mkdir(dir, { recursive: true });
const screenshotPath = path.join(dir, `${checkpoint}.png`);
const metadataPath = path.join(dir, `${checkpoint}.json`);
await page.screenshot({ path: screenshotPath, fullPage: true });
await writeFile(
metadataPath,
JSON.stringify(
{
checkpoint,
url: page.url(),
title: await page.title(),
captured_at: new Date().toISOString(),
},
null,
2
)
);
}