Compare commits

..

3 Commits

Author SHA1 Message Date
Wenxi Onyx
4f793ff870 Merge remote-tracking branch 'origin/main' into codex/agent-lab 2026-04-09 16:15:03 -07:00
Wenxi Onyx
55f570261f Merge remote-tracking branch 'origin/main' into codex/agent-lab 2026-04-09 15:07:50 -07:00
Wenxi Onyx
289a7b807e agent lab init 2026-04-09 15:07:02 -07:00
132 changed files with 7877 additions and 3391 deletions

View File

@@ -1,6 +1,7 @@
FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0ad8fe7cef1
RUN apt-get update && apt-get install -y --no-install-recommends \
acl \
curl \
fd-find \
fzf \

View File

@@ -14,6 +14,12 @@ A containerized development environment for working on Onyx.
## Usage
### VS Code
1. Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
2. Open this repo in VS Code
3. "Reopen in Container" when prompted
### CLI (`ods dev`)
The [`ods` devtools CLI](../tools/ods/README.md) provides workspace-aware wrappers
@@ -33,8 +39,25 @@ ods dev exec npm test
ods dev stop
```
If you don't have `ods` installed, use the `devcontainer` CLI directly:
```bash
npm install -g @devcontainers/cli
devcontainer up --workspace-folder .
devcontainer exec --workspace-folder . zsh
```
## Restarting the container
### VS Code
Open the Command Palette (`Ctrl+Shift+P` / `Cmd+Shift+P`) and run:
- **Dev Containers: Reopen in Container** — restarts the container without rebuilding
### CLI
```bash
# Restart the container
ods dev restart
@@ -43,6 +66,12 @@ ods dev restart
ods dev rebuild
```
Or without `ods`:
```bash
devcontainer up --workspace-folder . --remove-existing-container
```
## Image
The devcontainer uses a prebuilt image published to `onyxdotapp/onyx-devcontainer`.
@@ -59,19 +88,15 @@ The `devcontainer` target is defined in `docker-bake.hcl` at the repo root.
## User & permissions
The container runs as the `dev` user by default (`remoteUser` in devcontainer.json).
An init script (`init-dev-user.sh`) runs at container start to ensure the active
user has read/write access to the bind-mounted workspace:
An init script (`init-dev-user.sh`) runs at container start to ensure `dev` has
read/write access to the bind-mounted workspace:
- **Standard Docker** — `dev`'s UID/GID is remapped to match the workspace owner,
so file permissions work seamlessly.
- **Rootless Docker** — The workspace appears as root-owned (UID 0) inside the
container due to user-namespace mapping. `ods dev up` auto-detects rootless Docker
and sets `DEVCONTAINER_REMOTE_USER=root` so the container runs as root — which
maps back to your host user via the user namespace. New files are owned by your
host UID and no ACL workarounds are needed.
To override the auto-detection, set `DEVCONTAINER_REMOTE_USER` before running
`ods dev up`.
container due to user-namespace mapping. The init script grants `dev` access via
POSIX ACLs (`setfacl`), which adds a few seconds to the first container start on
large repos.
## Docker socket
@@ -84,7 +109,9 @@ from inside. `ods dev` auto-detects the socket path and sets `DOCKER_SOCK`:
| macOS (Docker Desktop) | `~/.docker/run/docker.sock` |
| Linux (standard Docker) | `/var/run/docker.sock` |
To override, set `DOCKER_SOCK` before running `ods dev up`.
To override, set `DOCKER_SOCK` before running `ods dev up`. When using the
VS Code extension or `devcontainer` CLI directly (without `ods`), you must set
`DOCKER_SOCK` yourself.
## Firewall

View File

@@ -7,15 +7,13 @@
"source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
"source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
"source=${localEnv:HOME}/.zshrc,target=/home/dev/.zshrc.host,type=bind,readonly",
"source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig,type=bind,readonly",
"source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim,type=bind,readonly",
"source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig.host,type=bind,readonly",
"source=${localEnv:HOME}/.ssh,target=/home/dev/.ssh.host,type=bind,readonly",
"source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim.host,type=bind,readonly",
"source=onyx-devcontainer-cache,target=/home/dev/.cache,type=volume",
"source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
],
"containerEnv": {
"SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
},
"remoteUser": "${localEnv:DEVCONTAINER_REMOTE_USER:dev}",
"remoteUser": "dev",
"updateRemoteUserUID": false,
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
"workspaceFolder": "/workspace",

View File

@@ -8,68 +8,38 @@ set -euo pipefail
# We remap dev to that UID -- fast and seamless.
#
# Rootless Docker: Workspace appears as root-owned (UID 0) inside the
# container due to user-namespace mapping. Requires
# DEVCONTAINER_REMOTE_USER=root (set automatically by
# ods dev up). Container root IS the host user, so
# bind-mounts and named volumes are symlinked into /root.
# container due to user-namespace mapping. We can't remap
# dev to UID 0 (that's root), so we grant access with
# POSIX ACLs instead.
WORKSPACE=/workspace
TARGET_USER=dev
REMOTE_USER="${SUDO_USER:-$TARGET_USER}"
WS_UID=$(stat -c '%u' "$WORKSPACE")
WS_GID=$(stat -c '%g' "$WORKSPACE")
DEV_UID=$(id -u "$TARGET_USER")
DEV_GID=$(id -g "$TARGET_USER")
# devcontainer.json bind-mounts and named volumes target /home/dev regardless
# of remoteUser. When running as root ($HOME=/root), Phase 1 bridges the gap
# with symlinks from ACTIVE_HOME → MOUNT_HOME.
MOUNT_HOME=/home/"$TARGET_USER"
DEV_HOME=/home/"$TARGET_USER"
if [ "$REMOTE_USER" = "root" ]; then
ACTIVE_HOME="/root"
else
ACTIVE_HOME="$MOUNT_HOME"
# Ensure directories that tools expect exist under ~dev.
# ~/.local and ~/.cache are named Docker volumes -- ensure they are owned by dev.
mkdir -p "$DEV_HOME"/.local/state "$DEV_HOME"/.local/share
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.local
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.cache
# Copy host configs mounted as *.host into their real locations.
# This gives the dev user owned copies without touching host originals.
if [ -d "$DEV_HOME/.ssh.host" ]; then
cp -a "$DEV_HOME/.ssh.host" "$DEV_HOME/.ssh"
chmod 700 "$DEV_HOME/.ssh"
chmod 600 "$DEV_HOME"/.ssh/id_* 2>/dev/null || true
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.ssh"
fi
# ── Phase 1: home directory setup ───────────────────────────────────
# ~/.local and ~/.cache are named Docker volumes mounted under MOUNT_HOME.
mkdir -p "$MOUNT_HOME"/.local/state "$MOUNT_HOME"/.local/share
# When running as root, symlink bind-mounts and named volumes into /root
# so that $HOME-relative tools (Claude Code, git, etc.) find them.
if [ "$ACTIVE_HOME" != "$MOUNT_HOME" ]; then
for item in .claude .cache .local; do
[ -d "$MOUNT_HOME/$item" ] || continue
if [ -e "$ACTIVE_HOME/$item" ] && [ ! -L "$ACTIVE_HOME/$item" ]; then
echo "warning: replacing $ACTIVE_HOME/$item with symlink to $MOUNT_HOME/$item" >&2
rm -rf "$ACTIVE_HOME/$item"
fi
ln -sfn "$MOUNT_HOME/$item" "$ACTIVE_HOME/$item"
done
# Symlink files (not directories).
for file in .claude.json .gitconfig .zshrc.host; do
[ -f "$MOUNT_HOME/$file" ] && ln -sf "$MOUNT_HOME/$file" "$ACTIVE_HOME/$file"
done
# Nested mount: .config/nvim
if [ -d "$MOUNT_HOME/.config/nvim" ]; then
mkdir -p "$ACTIVE_HOME/.config"
if [ -e "$ACTIVE_HOME/.config/nvim" ] && [ ! -L "$ACTIVE_HOME/.config/nvim" ]; then
echo "warning: replacing $ACTIVE_HOME/.config/nvim with symlink" >&2
rm -rf "$ACTIVE_HOME/.config/nvim"
fi
ln -sfn "$MOUNT_HOME/.config/nvim" "$ACTIVE_HOME/.config/nvim"
fi
fi
# ── Phase 2: workspace access ───────────────────────────────────────
# Root always has workspace access; Phase 1 handled home setup.
if [ "$REMOTE_USER" = "root" ]; then
exit 0
if [ -d "$DEV_HOME/.config/nvim.host" ]; then
mkdir -p "$DEV_HOME/.config"
cp -a "$DEV_HOME/.config/nvim.host" "$DEV_HOME/.config/nvim"
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.config/nvim"
fi
# Already matching -- nothing to do.
@@ -91,17 +61,45 @@ if [ "$WS_UID" != "0" ]; then
echo "warning: failed to remap $TARGET_USER UID to $WS_UID" >&2
fi
fi
if ! chown -R "$TARGET_USER":"$TARGET_USER" "$MOUNT_HOME" 2>&1; then
echo "warning: failed to chown $MOUNT_HOME" >&2
if ! chown -R "$TARGET_USER":"$TARGET_USER" /home/"$TARGET_USER" 2>&1; then
echo "warning: failed to chown /home/$TARGET_USER" >&2
fi
else
# ── Rootless Docker ──────────────────────────────────────────────
# Workspace is root-owned (UID 0) due to user-namespace mapping.
# The supported path is remoteUser=root (set DEVCONTAINER_REMOTE_USER=root),
# which is handled above. If we reach here, the user is running as dev
# under rootless Docker without the override.
echo "error: rootless Docker detected but remoteUser is not root." >&2
echo " Set DEVCONTAINER_REMOTE_USER=root before starting the container," >&2
echo " or use 'ods dev up' which sets it automatically." >&2
exit 1
# Workspace is root-owned inside the container. Grant dev access
# via POSIX ACLs (preserves ownership, works across the namespace
# boundary).
if command -v setfacl &>/dev/null; then
setfacl -Rm "u:${TARGET_USER}:rwX" "$WORKSPACE"
setfacl -Rdm "u:${TARGET_USER}:rwX" "$WORKSPACE" # default ACL for new files
# Git refuses to operate in repos owned by a different UID.
# Host gitconfig is mounted readonly as ~/.gitconfig.host.
# Create a real ~/.gitconfig that includes it plus container overrides.
printf '[include]\n\tpath = %s/.gitconfig.host\n[safe]\n\tdirectory = %s\n' \
"$DEV_HOME" "$WORKSPACE" > "$DEV_HOME/.gitconfig"
chown "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.gitconfig"
# If this is a worktree, the main .git dir is bind-mounted at its
# host absolute path. Grant dev access so git operations work.
GIT_COMMON_DIR=$(git -C "$WORKSPACE" rev-parse --git-common-dir 2>/dev/null || true)
if [ -n "$GIT_COMMON_DIR" ] && [ "$GIT_COMMON_DIR" != "$WORKSPACE/.git" ]; then
[ ! -d "$GIT_COMMON_DIR" ] && GIT_COMMON_DIR="$WORKSPACE/$GIT_COMMON_DIR"
if [ -d "$GIT_COMMON_DIR" ]; then
setfacl -Rm "u:${TARGET_USER}:rwX" "$GIT_COMMON_DIR"
setfacl -Rdm "u:${TARGET_USER}:rwX" "$GIT_COMMON_DIR"
git config -f "$DEV_HOME/.gitconfig" --add safe.directory "$(dirname "$GIT_COMMON_DIR")"
fi
fi
# Also fix bind-mounted dirs under ~dev that appear root-owned.
for dir in /home/"$TARGET_USER"/.claude; do
[ -d "$dir" ] && setfacl -Rm "u:${TARGET_USER}:rwX" "$dir" && setfacl -Rdm "u:${TARGET_USER}:rwX" "$dir"
done
[ -f /home/"$TARGET_USER"/.claude.json ] && \
setfacl -m "u:${TARGET_USER}:rw" /home/"$TARGET_USER"/.claude.json
else
echo "warning: setfacl not found; dev user may not have write access to workspace" >&2
echo " install the 'acl' package or set remoteUser to root" >&2
fi
fi

416
AGENTS.md
View File

@@ -1,361 +1,55 @@
# PROJECT KNOWLEDGE BASE
This file provides guidance to AI agents when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
#### Worker Types
1. **Primary Worker** (`celery_app.py`)
- Coordinates core background tasks and system-wide operations
- Handles connector management, document sync, pruning, and periodic checks
- Runs with 4 threads concurrency
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
2. **Docfetching Worker** (`docfetching`)
- Fetches documents from external data sources (connectors)
- Spawns docprocessing tasks for each document batch
- Implements watchdog monitoring for stuck connectors
- Configurable concurrency (default from env)
3. **Docprocessing Worker** (`docprocessing`)
- Processes fetched documents through the indexing pipeline:
- Upserts documents to PostgreSQL
- Chunks documents and adds contextual information
- Embeds chunks via model server
- Writes chunks to Vespa vector database
- Updates document metadata
- Configurable concurrency (default from env)
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)
- Handles Knowledge Graph processing and clustering
- Builds relationships between documents
- Runs clustering algorithms
- Configurable concurrency
7. **Monitoring Worker** (`monitoring`)
- System health monitoring and metrics collection
- Monitors Celery queues, process memory, and system status
- Single thread (monitoring doesn't need parallelism)
- Cloud-specific monitoring tasks
8. **User File Processing Worker** (`user_file_processing`)
- Processes user-uploaded files
- Handles user file indexing and project synchronization
- Configurable concurrency
9. **Beat Worker** (`beat`)
- Celery's scheduler for periodic tasks
- Uses DynamicTenantScheduler for multi-tenant support
- Schedules tasks like:
- Indexing checks (every 15 seconds)
- Connector deletion checks (every 20 seconds)
- Vespa sync checks (every 20 seconds)
- Pruning checks (every 20 seconds)
- KG processing (every 60 seconds)
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
- Never enqueue a task without an expiration. Always supply `expires=` when
sending tasks, either from the beat schedule or directly from another task. It
should never be acceptable to submit code which enqueues tasks without an
expiration, as doing so can lead to unbounded task queue growth.
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.
**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
**Task Time Limits**:
Since all tasks are executed in thread pools, the time limit features of Celery are silently
disabled and won't work. Timeout logic must be implemented within the task itself.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
### Directory Structure
```
backend/
├── onyx/
│ ├── auth/ # Authentication & authorization
│ ├── chat/ # Chat functionality & LLM interactions
│ ├── connectors/ # Data source connectors
│ ├── db/ # Database models & operations
│ ├── document_index/ # Vespa integration
│ ├── federated_connectors/ # External search connectors
│ ├── llm/ # LLM provider integrations
│ └── server/ # API endpoints & routers
├── ee/ # Enterprise Edition features
├── alembic/ # Database migrations
└── tests/ # Test suites
web/
├── src/app/ # Next.js app router pages
├── src/components/ # Reusable React components
└── src/lib/ # Utilities & business logic
```
## Frontend Standards
Frontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
# Multi-tenant migration
alembic -n schema_private revision -m "description"
```
Write the migration manually and place it in the file that alembic creates when running the above command.
## Testing Strategy
First, you must activate the virtual environment with `source .venv/bin/activate`.
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
```bash
pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, _including_ the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
To run them:
```bash
npx playwright test <TEST_NAME>
```
For shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
## Security Considerations
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
## AI/LLM Integration
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
What the change is meant to do.
**Important Notes**
Things you come across in your research that are important to the implementation.
**Implementation strategy**
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: _Timeline_, _Rollback plan_
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
## Error Handling
**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
`{"error_code": "...", "detail": "..."}` shape. This eliminates boilerplate and keeps error
handling consistent across the entire backend.
```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# ✅ Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
# ✅ Good — no extra message needed
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
# ✅ Good — upstream service with dynamic status code
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
# ❌ Bad — using HTTPException directly
raise HTTPException(status_code=404, detail="Session not found")
# ❌ Bad — starlette constant
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
```
Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
category is needed, add it there first — do not invent ad-hoc codes.
**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
status code is dynamic (comes from the upstream response), use `status_code_override`:
```python
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
```
## Best Practices
In addition to the other content in this file, best practices for contributing
to the codebase can be found in the "Engineering Best Practices" section of
`CONTRIBUTING.md`. Understand its contents and follow them.
# Project Knowledge Base
This file is the entrypoint for agents working in this repository. Keep it small.
## Start Here
- General development workflow and repo conventions: [CONTRIBUTING.md](./CONTRIBUTING.md)
- Frontend standards for `web/` and `desktop/`: [web/AGENTS.md](./web/AGENTS.md)
- Backend testing strategy and commands: [backend/tests/README.md](./backend/tests/README.md)
- Celery worker and task guidance: [backend/onyx/background/celery/README.md](./backend/onyx/background/celery/README.md)
- Backend API error-handling rules: [backend/onyx/error_handling/README.md](./backend/onyx/error_handling/README.md)
- Plan-writing guidance: [plans/README.md](./plans/README.md)
## Agent-Lab Docs
When working on `agent-lab` or on tasks explicitly about agent-engineering, use:
- [docs/agent/README.md](./docs/agent/README.md)
These docs are the system of record for the `agent-lab` workflow.
## Universal Notes
- For non-trivial work, create the target worktree first and keep the edit, test, and PR loop
inside that worktree. Do not prototype in one checkout and copy the patch into another unless
you are explicitly debugging the harness itself.
- Use `ods worktree create` for harness-managed worktrees. Do not use raw `git worktree add` when
you want the `agent-lab` workflow, because it will skip the manifest, env overlays, dependency
bootstrap, and lane-aware base-ref selection.
- When a change needs browser proof, use the harness journey flow instead of ad hoc screen capture:
record `before` in the target worktree before making the change, then record `after` in that
same worktree after validation. Use `ods journey compare` only when you need to recover a missed
baseline or compare two explicit revisions after the fact.
- After opening a PR, treat review feedback and failing checks as part of the same loop:
use `ods pr-review ...` for GitHub review threads and `ods pr-checks diagnose` plus `ods trace`
for failing Playwright runs.
- PR titles and commit messages should use conventional-commit style such as `fix: ...` or
`feat: ...`. Do not use `[codex]` prefixes in this repo.
- If Python dependencies appear missing, activate the root venv with `source .venv/bin/activate`.
- To make tests work, check the root `.env` file for an OpenAI key.
- If using Playwright to explore the frontend, you can usually log in with username `a@example.com`
and password `a` at `http://localhost:3000`.
- Assume Onyx services are already running unless the task indicates otherwise. Check `backend/log`
if you need to verify service activity.
- When making backend calls in local development flows, go through the frontend proxy:
`http://localhost:3000/api/...`, not `http://localhost:8080/...`.
- Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`. Do not add ad hoc DB access
elsewhere.
## How To Use This File
- Use this file as a map, not a manual.
- Follow the nearest authoritative doc for the subsystem you are changing.
- If a repeated rule matters enough to teach every future agent, document it near the code it
governs or encode it mechanically.

View File

@@ -13,7 +13,6 @@ from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.cache.factory import get_cache_backend
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.enums import AccountType
from onyx.db.models import License
from onyx.db.models import User
from onyx.utils.logger import setup_logger
@@ -108,13 +107,12 @@ def get_used_seats(tenant_id: str | None = None) -> int:
Get current seat usage directly from database.
For multi-tenant: counts users in UserTenantMapping for this tenant.
For self-hosted: counts all active users.
For self-hosted: counts all active users (excludes EXT_PERM_USER role
and the anonymous system user).
Only human accounts count toward seat limits.
SERVICE_ACCOUNT (API key dummy users), EXT_PERM_USER, and the
anonymous system user are excluded. BOT (Slack users) ARE counted
because they represent real humans and get upgraded to STANDARD
when they log in via web.
TODO: Exclude API key dummy users from seat counting. API keys create
users with emails like `__DANSWER_API_KEY_*` that should not count toward
seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
"""
if MULTI_TENANT:
from ee.onyx.server.tenants.user_mapping import get_tenant_count
@@ -131,7 +129,6 @@ def get_used_seats(tenant_id: str | None = None) -> int:
User.is_active == True, # type: ignore # noqa: E712
User.role != UserRole.EXT_PERM_USER,
User.email != ANONYMOUS_USER_EMAIL, # type: ignore
User.account_type != AccountType.SERVICE_ACCOUNT,
)
)
return result.scalar() or 0

View File

@@ -11,8 +11,6 @@ require a valid SCIM bearer token.
from __future__ import annotations
import hashlib
import struct
from uuid import UUID
from fastapi import APIRouter
@@ -24,7 +22,6 @@ from fastapi import Response
from fastapi.responses import JSONResponse
from fastapi_users.password import PasswordHelper
from sqlalchemy import func
from sqlalchemy import text
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
@@ -68,25 +65,12 @@ from onyx.db.permissions import recompute_user_permissions__no_commit
from onyx.db.users import assign_user_to_default_groups__no_commit
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
# Group names reserved for system default groups (seeded by migration).
_RESERVED_GROUP_NAMES = frozenset({"Admin", "Basic"})
# Namespace prefix for the seat-allocation advisory lock. Hashed together
# with the tenant ID so the lock is scoped per-tenant (unrelated tenants
# never block each other) and cannot collide with unrelated advisory locks.
_SEAT_LOCK_NAMESPACE = "onyx_scim_seat_lock"
def _seat_lock_id_for_tenant(tenant_id: str) -> int:
"""Derive a stable 64-bit signed int lock id for this tenant's seat lock."""
digest = hashlib.sha256(f"{_SEAT_LOCK_NAMESPACE}:{tenant_id}".encode()).digest()
# pg_advisory_xact_lock takes a signed 8-byte int; unpack as such.
return struct.unpack("q", digest[:8])[0]
class ScimJSONResponse(JSONResponse):
"""JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1)."""
@@ -225,37 +209,12 @@ def _apply_exclusions(
def _check_seat_availability(dal: ScimDAL) -> str | None:
"""Return an error message if seat limit is reached, else None.
Acquires a transaction-scoped advisory lock so that concurrent
SCIM requests are serialized. IdPs like Okta send provisioning
requests in parallel batches — without serialization the check is
vulnerable to a TOCTOU race where N concurrent requests each see
"seats available", all insert, and the tenant ends up over its
seat limit.
The lock is held until the caller's next COMMIT or ROLLBACK, which
means the seat count cannot change between the check here and the
subsequent INSERT/UPDATE. Each call site in this module follows
the pattern: _check_seat_availability → write → dal.commit()
(which releases the lock for the next waiting request).
"""
"""Return an error message if seat limit is reached, else None."""
check_fn = fetch_ee_implementation_or_noop(
"onyx.db.license", "check_seat_availability", None
)
if check_fn is None:
return None
# Transaction-scoped advisory lock — released on dal.commit() / dal.rollback().
# The lock id is derived from the tenant so unrelated tenants never block
# each other, and from a namespace string so it cannot collide with
# unrelated advisory locks elsewhere in the codebase.
lock_id = _seat_lock_id_for_tenant(get_current_tenant_id())
dal.session.execute(
text("SELECT pg_advisory_xact_lock(:lock_id)"),
{"lock_id": lock_id},
)
result = check_fn(dal.session, seats_needed=1)
if not result.available:
return result.error_message or "Seat limit reached"

View File

@@ -0,0 +1,37 @@
# Celery Development Notes
This document is the local reference for Celery worker structure and task-writing rules in Onyx.
## Worker Types
Onyx uses multiple specialized workers:
1. `primary`: coordinates core background tasks and system-wide operations.
2. `docfetching`: fetches documents from connectors and schedules downstream work.
3. `docprocessing`: runs the indexing pipeline for fetched documents.
4. `light`: handles lightweight and fast operations.
5. `heavy`: handles more resource-intensive operations.
6. `kg_processing`: runs knowledge-graph processing and clustering.
7. `monitoring`: collects health and system metrics.
8. `user_file_processing`: processes user-uploaded files.
9. `beat`: schedules periodic work.
For actual implementation details, inspect:
- `backend/onyx/background/celery/apps/`
- `backend/onyx/background/celery/configs/`
- `backend/onyx/background/celery/tasks/`
## Task Rules
- Always use `@shared_task` rather than `@celery_app`.
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks/`.
- Never enqueue a task without `expires=`. This is a hard requirement because stale queued work can
accumulate without bound.
- Do not rely on Celery time-limit enforcement. These workers run in thread pools, so timeout logic
must be implemented inside the task itself.
## Testing Note
If you change Celery worker code and want to validate it against a running local worker, the worker
usually needs to be restarted manually. There is no general auto-restart on code change.

View File

@@ -818,10 +818,7 @@ def translate_history_to_llm_format(
)
]
# Add image parts. Each image is preceded by a text tag
# carrying its file_id so the LLM can reference the image by
# ID when calling tools like generate_image (which expects
# reference_image_file_ids to edit a specific image).
# Add image parts
for img_file in msg.image_files:
if img_file.file_type == ChatFileType.IMAGE:
try:
@@ -829,12 +826,6 @@ def translate_history_to_llm_format(
base64_data = img_file.to_base64()
image_url = f"data:{image_type};base64,{base64_data}"
content_parts.append(
TextContentPart(
type="text",
text=f"[attached image — file_id: {img_file.file_id}]",
)
)
image_part = ImageContentPart(
type="image_url",
image_url=ImageUrlDetail(

View File

@@ -42,9 +42,6 @@ from onyx.connectors.google_drive.file_retrieval import (
get_all_files_in_my_drive_and_shared,
)
from onyx.connectors.google_drive.file_retrieval import get_external_access_for_folder
from onyx.connectors.google_drive.file_retrieval import (
get_files_by_web_view_links_batch,
)
from onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive
from onyx.connectors.google_drive.file_retrieval import get_folder_metadata
from onyx.connectors.google_drive.file_retrieval import get_root_folder_id
@@ -73,13 +70,11 @@ from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import NormalizationResult
from onyx.connectors.interfaces import Resolver
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
@@ -207,9 +202,7 @@ class DriveIdStatus(Enum):
class GoogleDriveConnector(
SlimConnectorWithPermSync,
CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint],
Resolver,
SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
):
def __init__(
self,
@@ -1672,82 +1665,6 @@ class GoogleDriveConnector(
start, end, checkpoint, include_permissions=True
)
@override
def resolve_errors(
self,
errors: list[ConnectorFailure],
include_permissions: bool = False,
) -> Generator[Document | ConnectorFailure | HierarchyNode, None, None]:
if self._creds is None or self._primary_admin_email is None:
raise RuntimeError(
"Credentials missing, should not call this method before calling load_credentials"
)
logger.info(f"Resolving {len(errors)} errors")
doc_ids = [
failure.failed_document.document_id
for failure in errors
if failure.failed_document
]
service = get_drive_service(self.creds, self.primary_admin_email)
field_type = (
DriveFileFieldType.WITH_PERMISSIONS
if include_permissions or self.exclude_domain_link_only
else DriveFileFieldType.STANDARD
)
batch_result = get_files_by_web_view_links_batch(service, doc_ids, field_type)
for doc_id, error in batch_result.errors.items():
yield ConnectorFailure(
failed_document=DocumentFailure(
document_id=doc_id,
document_link=doc_id,
),
failure_message=f"Failed to retrieve file during error resolution: {error}",
exception=error,
)
permission_sync_context = (
PermissionSyncContext(
primary_admin_email=self.primary_admin_email,
google_domain=self.google_domain,
)
if include_permissions
else None
)
retrieved_files = [
RetrievedDriveFile(
drive_file=file,
user_email=self.primary_admin_email,
completion_stage=DriveRetrievalStage.DONE,
)
for file in batch_result.files.values()
]
yield from self._get_new_ancestors_for_files(
files=retrieved_files,
seen_hierarchy_node_raw_ids=ThreadSafeSet(),
fully_walked_hierarchy_node_raw_ids=ThreadSafeSet(),
permission_sync_context=permission_sync_context,
add_prefix=True,
)
func_with_args = [
(
self._convert_retrieved_file_to_document,
(rf, permission_sync_context),
)
for rf in retrieved_files
]
results = cast(
list[Document | ConnectorFailure | None],
run_functions_tuples_in_parallel(func_with_args, max_workers=8),
)
for result in results:
if result is not None:
yield result
def _extract_slim_docs_from_google_drive(
self,
checkpoint: GoogleDriveCheckpoint,

View File

@@ -9,7 +9,6 @@ from urllib.parse import urlparse
from googleapiclient.discovery import Resource # type: ignore
from googleapiclient.errors import HttpError # type: ignore
from googleapiclient.http import BatchHttpRequest # type: ignore
from onyx.access.models import ExternalAccess
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
@@ -61,8 +60,6 @@ SLIM_FILE_FIELDS = (
)
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
MAX_BATCH_SIZE = 100
HIERARCHY_FIELDS = "id, name, parents, webViewLink, mimeType, driveId"
HIERARCHY_FIELDS_WITH_PERMISSIONS = (
@@ -219,7 +216,7 @@ def get_external_access_for_folder(
def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
"""Get the appropriate fields string for files().list() based on the field type enum."""
"""Get the appropriate fields string based on the field type enum"""
if field_type == DriveFileFieldType.SLIM:
return SLIM_FILE_FIELDS
elif field_type == DriveFileFieldType.WITH_PERMISSIONS:
@@ -228,25 +225,6 @@ def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
return FILE_FIELDS
def _extract_single_file_fields(list_fields: str) -> str:
"""Convert a files().list() fields string to one suitable for files().get().
List fields look like "nextPageToken, files(field1, field2, ...)"
Single-file fields should be just "field1, field2, ..."
"""
start = list_fields.find("files(")
if start == -1:
return list_fields
inner_start = start + len("files(")
inner_end = list_fields.rfind(")")
return list_fields[inner_start:inner_end]
def _get_single_file_fields(field_type: DriveFileFieldType) -> str:
"""Get the appropriate fields string for files().get() based on the field type enum."""
return _extract_single_file_fields(_get_fields_for_file_type(field_type))
def _get_files_in_parent(
service: Resource,
parent_id: str,
@@ -558,74 +536,3 @@ def get_file_by_web_view_link(
)
.execute()
)
class BatchRetrievalResult:
"""Result of a batch file retrieval, separating successes from errors."""
def __init__(self) -> None:
self.files: dict[str, GoogleDriveFileType] = {}
self.errors: dict[str, Exception] = {}
def get_files_by_web_view_links_batch(
service: GoogleDriveService,
web_view_links: list[str],
field_type: DriveFileFieldType,
) -> BatchRetrievalResult:
"""Retrieve multiple Google Drive files by webViewLink using the batch API.
Returns a BatchRetrievalResult containing successful file retrievals
and errors for any files that could not be fetched.
Automatically splits into chunks of MAX_BATCH_SIZE.
"""
fields = _get_single_file_fields(field_type)
if len(web_view_links) <= MAX_BATCH_SIZE:
return _get_files_by_web_view_links_batch(service, web_view_links, fields)
combined = BatchRetrievalResult()
for i in range(0, len(web_view_links), MAX_BATCH_SIZE):
chunk = web_view_links[i : i + MAX_BATCH_SIZE]
chunk_result = _get_files_by_web_view_links_batch(service, chunk, fields)
combined.files.update(chunk_result.files)
combined.errors.update(chunk_result.errors)
return combined
def _get_files_by_web_view_links_batch(
service: GoogleDriveService,
web_view_links: list[str],
fields: str,
) -> BatchRetrievalResult:
"""Single-batch implementation."""
result = BatchRetrievalResult()
def callback(
request_id: str,
response: GoogleDriveFileType,
exception: Exception | None,
) -> None:
if exception:
logger.warning(f"Error retrieving file {request_id}: {exception}")
result.errors[request_id] = exception
else:
result.files[request_id] = response
batch = cast(BatchHttpRequest, service.new_batch_http_request(callback=callback))
for web_view_link in web_view_links:
try:
file_id = _extract_file_id_from_web_view_link(web_view_link)
request = service.files().get(
fileId=file_id,
supportsAllDrives=True,
fields=fields,
)
batch.add(request, request_id=web_view_link)
except ValueError as e:
logger.warning(f"Failed to extract file ID from {web_view_link}: {e}")
result.errors[web_view_link] = e
batch.execute()
return result

View File

@@ -298,22 +298,6 @@ class CheckpointedConnectorWithPermSync(CheckpointedConnector[CT]):
raise NotImplementedError
class Resolver(BaseConnector):
@abc.abstractmethod
def resolve_errors(
self,
errors: list[ConnectorFailure],
include_permissions: bool = False,
) -> Generator[Document | ConnectorFailure | HierarchyNode, None, None]:
"""Attempts to yield back ALL the documents described by the errors, no checkpointing.
Caller's responsibility is to delete the old ConnectorFailures and replace with the new ones.
If include_permissions is True, the documents will have permissions synced.
May also yield HierarchyNode objects for ancestor folders of resolved documents.
"""
raise NotImplementedError
class HierarchyConnector(BaseConnector):
@abc.abstractmethod
def load_hierarchy(

View File

@@ -60,10 +60,8 @@ logger = setup_logger()
ONE_HOUR = 3600
_MAX_RESULTS_FETCH_IDS = 5000
_MAX_RESULTS_FETCH_IDS = 5000 # 5000
_JIRA_FULL_PAGE_SIZE = 50
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
_JIRA_BULK_FETCH_LIMIT = 100
# Constants for Jira field names
_FIELD_REPORTER = "reporter"
@@ -257,13 +255,15 @@ def _bulk_fetch_request(
return resp.json()["issues"]
def _bulk_fetch_batch(
jira_client: JIRA, issue_ids: list[str], fields: str | None
) -> list[dict[str, Any]]:
"""Fetch a single batch (must be <= _JIRA_BULK_FETCH_LIMIT).
On JSONDecodeError, recursively bisects until it succeeds or reaches size 1."""
def bulk_fetch_issues(
jira_client: JIRA, issue_ids: list[str], fields: str | None = None
) -> list[Issue]:
# TODO(evan): move away from this jira library if they continue to not support
# the endpoints we need. Using private fields is not ideal, but
# is likely fine for now since we pin the library version
try:
return _bulk_fetch_request(jira_client, issue_ids, fields)
raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
except requests.exceptions.JSONDecodeError:
if len(issue_ids) <= 1:
logger.exception(
@@ -277,25 +277,12 @@ def _bulk_fetch_batch(
f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
)
left = _bulk_fetch_batch(jira_client, issue_ids[:mid], fields)
right = _bulk_fetch_batch(jira_client, issue_ids[mid:], fields)
left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
return left + right
def bulk_fetch_issues(
jira_client: JIRA, issue_ids: list[str], fields: str | None = None
) -> list[Issue]:
# TODO(evan): move away from this jira library if they continue to not support
# the endpoints we need. Using private fields is not ideal, but
# is likely fine for now since we pin the library version
raw_issues: list[dict[str, Any]] = []
for batch in chunked(issue_ids, _JIRA_BULK_FETCH_LIMIT):
try:
raw_issues.extend(_bulk_fetch_batch(jira_client, list(batch), fields))
except Exception as e:
logger.error(f"Error fetching issues: {e}")
raise
except Exception as e:
logger.error(f"Error fetching issues: {e}")
raise
return [
Issue(jira_client._options, jira_client._session, raw=issue)

View File

@@ -1,4 +1,3 @@
from dataclasses import dataclass
from datetime import datetime
from typing import TypedDict
@@ -7,14 +6,6 @@ from pydantic import BaseModel
from onyx.onyxbot.slack.models import ChannelType
@dataclass(frozen=True)
class DirectThreadFetch:
"""Request to fetch a Slack thread directly by channel and timestamp."""
channel_id: str
thread_ts: str
class ChannelMetadata(TypedDict):
"""Type definition for cached channel metadata."""

View File

@@ -19,7 +19,6 @@ from onyx.configs.chat_configs import DOC_TIME_DECAY
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import TextSection
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.federated.models import DirectThreadFetch
from onyx.context.search.federated.models import SlackMessage
from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
@@ -50,6 +49,7 @@ from onyx.server.federated.models import FederatedConnectorDetail
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
logger = setup_logger()
@@ -58,6 +58,7 @@ HIGHLIGHT_END_CHAR = "\ue001"
CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24 # 24 hours
USER_PROFILE_CACHE_TTL = 60 * 60 * 24 # 24 hours
SLACK_THREAD_CONTEXT_WINDOW = 3 # Number of messages before matched message to include
CHANNEL_METADATA_MAX_RETRIES = 3 # Maximum retry attempts for channel metadata fetching
CHANNEL_METADATA_RETRY_DELAY = 1 # Initial retry delay in seconds (exponential backoff)
@@ -420,94 +421,6 @@ class SlackQueryResult(BaseModel):
filtered_channels: list[str] # Channels filtered out during this query
def _fetch_thread_from_url(
thread_fetch: DirectThreadFetch,
access_token: str,
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> SlackQueryResult:
"""Fetch a thread directly from a Slack URL via conversations.replies."""
channel_id = thread_fetch.channel_id
thread_ts = thread_fetch.thread_ts
slack_client = WebClient(token=access_token)
try:
response = slack_client.conversations_replies(
channel=channel_id,
ts=thread_ts,
)
response.validate()
messages: list[dict[str, Any]] = response.get("messages", [])
except SlackApiError as e:
logger.warning(
f"Failed to fetch thread from URL (channel={channel_id}, ts={thread_ts}): {e}"
)
return SlackQueryResult(messages=[], filtered_channels=[])
if not messages:
logger.warning(
f"No messages found for URL override (channel={channel_id}, ts={thread_ts})"
)
return SlackQueryResult(messages=[], filtered_channels=[])
# Build thread text from all messages
thread_text = _build_thread_text(messages, access_token, None, slack_client)
# Get channel name from metadata cache or API
channel_name = "unknown"
if channel_metadata_dict and channel_id in channel_metadata_dict:
channel_name = channel_metadata_dict[channel_id].get("name", "unknown")
else:
try:
ch_response = slack_client.conversations_info(channel=channel_id)
ch_response.validate()
channel_info: dict[str, Any] = ch_response.get("channel", {})
channel_name = channel_info.get("name", "unknown")
except SlackApiError:
pass
# Build the SlackMessage
parent_msg = messages[0]
message_ts = parent_msg.get("ts", thread_ts)
username = parent_msg.get("user", "unknown_user")
parent_text = parent_msg.get("text", "")
snippet = (
parent_text[:50].rstrip() + "..." if len(parent_text) > 50 else parent_text
).replace("\n", " ")
doc_time = datetime.fromtimestamp(float(message_ts))
decay_factor = DOC_TIME_DECAY
doc_age_years = (datetime.now() - doc_time).total_seconds() / (365 * 24 * 60 * 60)
recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
permalink = (
f"https://slack.com/archives/{channel_id}/p{message_ts.replace('.', '')}"
)
slack_message = SlackMessage(
document_id=f"{channel_id}_{message_ts}",
channel_id=channel_id,
message_id=message_ts,
thread_id=None, # Prevent double-enrichment in thread context fetch
link=permalink,
metadata={
"channel": channel_name,
"time": doc_time.isoformat(),
},
timestamp=doc_time,
recency_bias=recency_bias,
semantic_identifier=f"{username} in #{channel_name}: {snippet}",
text=thread_text,
highlighted_texts=set(),
slack_score=100000.0, # High priority — user explicitly asked for this thread
)
logger.info(
f"URL override: fetched thread from channel={channel_id}, ts={thread_ts}, {len(messages)} messages"
)
return SlackQueryResult(messages=[slack_message], filtered_channels=[])
def query_slack(
query_string: str,
access_token: str,
@@ -519,6 +432,7 @@ def query_slack(
available_channels: list[str] | None = None,
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> SlackQueryResult:
# Check if query has channel override (user specified channels in query)
has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")
@@ -748,6 +662,7 @@ def _fetch_thread_context(
"""
channel_id = message.channel_id
thread_id = message.thread_id
message_id = message.message_id
# If not a thread, return original text as success
if thread_id is None:
@@ -780,37 +695,62 @@ def _fetch_thread_context(
if len(messages) <= 1:
return ThreadContextResult.success(message.text)
# Build thread text from thread starter + all replies
thread_text = _build_thread_text(messages, access_token, team_id, slack_client)
# Build thread text from thread starter + context window around matched message
thread_text = _build_thread_text(
messages, message_id, thread_id, access_token, team_id, slack_client
)
return ThreadContextResult.success(thread_text)
def _build_thread_text(
messages: list[dict[str, Any]],
message_id: str,
thread_id: str,
access_token: str,
team_id: str | None,
slack_client: WebClient,
) -> str:
"""Build thread text including all replies.
Includes the thread parent message followed by all replies in order.
"""
"""Build the thread text from messages."""
msg_text = messages[0].get("text", "")
msg_sender = messages[0].get("user", "")
thread_text = f"<@{msg_sender}>: {msg_text}"
# All messages after index 0 are replies
replies = messages[1:]
if not replies:
return thread_text
logger.debug(f"Thread {messages[0].get('ts')}: {len(replies)} replies included")
thread_text += "\n\nReplies:"
if thread_id == message_id:
message_id_idx = 0
else:
message_id_idx = next(
(i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
)
if not message_id_idx:
return thread_text
for msg in replies:
start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)
if start_idx > 1:
thread_text += "\n..."
for i in range(start_idx, message_id_idx):
msg_text = messages[i].get("text", "")
msg_sender = messages[i].get("user", "")
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
msg_text = messages[message_id_idx].get("text", "")
msg_sender = messages[message_id_idx].get("user", "")
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
# Add following replies
len_replies = 0
for msg in messages[message_id_idx + 1 :]:
msg_text = msg.get("text", "")
msg_sender = msg.get("user", "")
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
reply = f"\n\n<@{msg_sender}>: {msg_text}"
thread_text += reply
len_replies += len(reply)
if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
thread_text += "\n..."
break
# Replace user IDs with names using cached lookups
userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))
@@ -1036,16 +976,7 @@ def slack_retrieval(
# Query slack with entity filtering
llm = get_default_llm()
query_items = build_slack_queries(query, llm, entities, available_channels)
# Partition into direct thread fetches and search query strings
direct_fetches: list[DirectThreadFetch] = []
query_strings: list[str] = []
for item in query_items:
if isinstance(item, DirectThreadFetch):
direct_fetches.append(item)
else:
query_strings.append(item)
query_strings = build_slack_queries(query, llm, entities, available_channels)
# Determine filtering based on entities OR context (bot)
include_dm = False
@@ -1062,16 +993,8 @@ def slack_retrieval(
f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
)
# Build search tasks — direct thread fetches + keyword searches
search_tasks: list[tuple] = [
(
_fetch_thread_from_url,
(fetch, access_token, channel_metadata_dict),
)
for fetch in direct_fetches
]
search_tasks.extend(
# Build search tasks
search_tasks = [
(
query_slack,
(
@@ -1087,7 +1010,7 @@ def slack_retrieval(
),
)
for query_string in query_strings
)
]
# If include_dm is True AND we're not already searching all channels,
# add additional searches without channel filters.

View File

@@ -10,7 +10,6 @@ from pydantic import ValidationError
from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.federated.models import DirectThreadFetch
from onyx.context.search.models import ChunkIndexRequest
from onyx.federated_connectors.slack.models import SlackEntities
from onyx.llm.interfaces import LLM
@@ -639,38 +638,12 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
return [query_text]
SLACK_URL_PATTERN = re.compile(
r"https?://[a-z0-9-]+\.slack\.com/archives/([A-Z0-9]+)/p(\d{16})"
)
def extract_slack_message_urls(
query_text: str,
) -> list[tuple[str, str]]:
"""Extract Slack message URLs from query text.
Parses URLs like:
https://onyx-company.slack.com/archives/C097NBWMY8Y/p1775491616524769
Returns list of (channel_id, thread_ts) tuples.
The 16-digit timestamp is converted to Slack ts format (with dot).
"""
results = []
for match in SLACK_URL_PATTERN.finditer(query_text):
channel_id = match.group(1)
raw_ts = match.group(2)
# Convert p1775491616524769 -> 1775491616.524769
thread_ts = f"{raw_ts[:10]}.{raw_ts[10:]}"
results.append((channel_id, thread_ts))
return results
def build_slack_queries(
query: ChunkIndexRequest,
llm: LLM,
entities: dict[str, Any] | None = None,
available_channels: list[str] | None = None,
) -> list[str | DirectThreadFetch]:
) -> list[str]:
"""Build Slack query strings with date filtering and query expansion."""
default_search_days = 30
if entities:
@@ -695,15 +668,6 @@ def build_slack_queries(
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"
# Check for Slack message URLs — if found, add direct fetch requests
url_fetches: list[DirectThreadFetch] = []
slack_urls = extract_slack_message_urls(query.query)
for channel_id, thread_ts in slack_urls:
url_fetches.append(
DirectThreadFetch(channel_id=channel_id, thread_ts=thread_ts)
)
logger.info(f"Detected Slack URL: channel={channel_id}, ts={thread_ts}")
# ALWAYS extract channel references from the query (not just for recency queries)
channel_references = extract_channel_references_from_query(query.query)
@@ -720,9 +684,7 @@ def build_slack_queries(
# If valid channels detected, use ONLY those channels with NO keywords
# Return query with ONLY time filter + channel filter (no keywords)
return url_fetches + [
build_channel_override_query(channel_references, time_filter)
]
return [build_channel_override_query(channel_references, time_filter)]
except ValueError as e:
# If validation fails, log the error and continue with normal flow
logger.warning(f"Channel reference validation failed: {e}")
@@ -740,8 +702,7 @@ def build_slack_queries(
rephrased_queries = expand_query_with_llm(query.query, llm)
# Build final query strings with time filters
search_queries = [
return [
rephrased_query.strip() + time_filter
for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
]
return url_fetches + search_queries

View File

@@ -0,0 +1,47 @@
# Error Handling
This directory is the local source of truth for backend API error handling.
## Primary Rule
Raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
The global FastAPI exception handler converts `OnyxError` into the standard JSON shape:
```json
{"error_code": "...", "detail": "..."}
```
This keeps API behavior consistent and avoids repetitive route-level boilerplate.
## Examples
```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
# Good
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
# Good: preserve a dynamic upstream status code
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY,
detail,
status_code_override=e.response.status_code,
)
```
Avoid:
```python
raise HTTPException(status_code=404, detail="Session not found")
```
## Notes
- Available error codes are defined in `backend/onyx/error_handling/error_codes.py`.
- If a new error category is needed, add it there first rather than inventing ad hoc strings.
- When forwarding upstream service failures with dynamic status codes, use `status_code_override`.

View File

@@ -64,20 +64,9 @@ IMPORTANT: each call to this tool is independent. Variables from previous calls
GENERATE_IMAGE_GUIDANCE = """
## generate_image
NEVER use generate_image unless the user specifically requests an image or asks to
edit/modify an existing image in the conversation.
To edit, modify, restyle, or create a variation of an image already in the
conversation, put that image's file_id in `reference_image_file_ids`. File IDs come
from two places, and both can be passed the same way:
- Images the user attached to a message carry a `[attached image — file_id: <id>]`
tag immediately before the image content. Copy the id out of that tag.
- Images produced by previous `generate_image` calls have their file_id in that
call's tool response JSON.
Only pass file_ids that actually appear in the conversation — never invent or guess
one. Leave `reference_image_file_ids` unset for a brand-new generation that doesn't
edit any existing image (for example when the user attached an image for context but
asked for a completely unrelated new picture). The first file_id in the list is the
primary edit source; any later file_ids are additional reference context.
NEVER use generate_image unless the user specifically requests an image.
For edits/variations of a previously generated image, pass `reference_image_file_ids` with
the `file_id` values returned by earlier `generate_image` tool results.
""".lstrip()
MEMORY_GUIDANCE = """

View File

@@ -96,32 +96,6 @@ def _truncate_description(description: str | None, max_length: int = 500) -> str
return description[: max_length - 3] + "..."
# TODO: Replace mask-comparison approach with an explicit Unset sentinel from the
# frontend indicating whether each credential field was actually modified. The current
# approach is brittle (e.g. short credentials produce a fixed-length mask that could
# collide) and mutates request values, which is surprising. The frontend should signal
# "unchanged" vs "new value" directly rather than relying on masked-string equality.
def _restore_masked_oauth_credentials(
request_client_id: str | None,
request_client_secret: str | None,
existing_client: OAuthClientInformationFull,
) -> tuple[str | None, str | None]:
"""If the frontend sent back masked credentials, restore the real stored values."""
if (
request_client_id
and existing_client.client_id
and request_client_id == mask_string(existing_client.client_id)
):
request_client_id = existing_client.client_id
if (
request_client_secret
and existing_client.client_secret
and request_client_secret == mask_string(existing_client.client_secret)
):
request_client_secret = existing_client.client_secret
return request_client_id, request_client_secret
router = APIRouter(prefix="/mcp")
admin_router = APIRouter(prefix="/admin/mcp")
STATE_TTL_SECONDS = 60 * 5 # 5 minutes
@@ -418,26 +392,6 @@ async def _connect_oauth(
detail=f"Server was configured with authentication type {auth_type_str}",
)
# If the frontend sent back masked credentials (unchanged by the user),
# restore the real stored values so we don't overwrite them with masks.
if mcp_server.admin_connection_config:
existing_data = extract_connection_data(
mcp_server.admin_connection_config, apply_mask=False
)
existing_client_raw = existing_data.get(MCPOAuthKeys.CLIENT_INFO.value)
if existing_client_raw:
existing_client = OAuthClientInformationFull.model_validate(
existing_client_raw
)
(
request.oauth_client_id,
request.oauth_client_secret,
) = _restore_masked_oauth_credentials(
request.oauth_client_id,
request.oauth_client_secret,
existing_client,
)
# Create admin config with client info if provided
config_data = MCPConnectionData(headers={})
if request.oauth_client_id and request.oauth_client_secret:
@@ -1402,19 +1356,6 @@ def _upsert_mcp_server(
if client_info_raw:
client_info = OAuthClientInformationFull.model_validate(client_info_raw)
# If the frontend sent back masked credentials (unchanged by the user),
# restore the real stored values so the comparison below sees no change
# and the credentials aren't overwritten with masked strings.
if client_info and request.auth_type == MCPAuthenticationType.OAUTH:
(
request.oauth_client_id,
request.oauth_client_secret,
) = _restore_masked_oauth_credentials(
request.oauth_client_id,
request.oauth_client_secret,
client_info,
)
changing_connection_config = (
not mcp_server.admin_connection_config
or (

View File

@@ -111,43 +111,6 @@ def _mask_string(value: str) -> str:
return value[:4] + "****" + value[-4:]
def _resolve_api_key(
api_key: str | None,
provider_name: str | None,
api_base: str | None,
db_session: Session,
) -> str | None:
"""Return the real API key for model-fetch endpoints.
When editing an existing provider the form value is masked (e.g.
``sk-a****b1c2``). If *provider_name* is supplied we can look up
the unmasked key from the database so the external request succeeds.
The stored key is only returned when the request's *api_base*
matches the value stored in the database.
"""
if not provider_name:
return api_key
existing_provider = fetch_existing_llm_provider(
name=provider_name, db_session=db_session
)
if existing_provider and existing_provider.api_key:
# Normalise both URLs before comparing so trailing-slash
# differences don't cause a false mismatch.
stored_base = (existing_provider.api_base or "").strip().rstrip("/")
request_base = (api_base or "").strip().rstrip("/")
if stored_base != request_base:
return api_key
stored_key = existing_provider.api_key.get_value(apply_mask=False)
# Only resolve when the incoming value is the masked form of the
# stored key — i.e. the user hasn't typed a new key.
if api_key and api_key == _mask_string(stored_key):
return stored_key
return api_key
def _sync_fetched_models(
db_session: Session,
provider_name: str,
@@ -1211,17 +1174,16 @@ def get_ollama_available_models(
return sorted_results
def _get_openrouter_models_response(api_base: str, api_key: str | None) -> dict:
def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
"""Perform GET to OpenRouter /models and return parsed JSON."""
cleaned_api_base = api_base.strip().rstrip("/")
url = f"{cleaned_api_base}/models"
headers: dict[str, str] = {
headers = {
"Authorization": f"Bearer {api_key}",
# Optional headers recommended by OpenRouter for attribution
"HTTP-Referer": "https://onyx.app",
"X-Title": "Onyx",
}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
try:
response = httpx.get(url, headers=headers, timeout=10.0)
response.raise_for_status()
@@ -1244,12 +1206,8 @@ def get_openrouter_available_models(
Parses id, name (display), context_length, and architecture.input_modalities.
"""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_openrouter_models_response(
api_base=request.api_base, api_key=api_key
api_base=request.api_base, api_key=request.api_key
)
data = response_json.get("data", [])
@@ -1342,18 +1300,13 @@ def get_lm_studio_available_models(
# If provider_name is given and the api_key hasn't been changed by the user,
# fall back to the stored API key from the database (the form value is masked).
# Only do so when the api_base matches what is stored.
api_key = request.api_key
if request.provider_name and not request.api_key_changed:
existing_provider = fetch_existing_llm_provider(
name=request.provider_name, db_session=db_session
)
if existing_provider and existing_provider.custom_config:
stored_base = (existing_provider.api_base or "").strip().rstrip("/")
if stored_base == cleaned_api_base:
api_key = existing_provider.custom_config.get(
LM_STUDIO_API_KEY_CONFIG_KEY
)
api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)
url = f"{cleaned_api_base}/api/v1/models"
headers: dict[str, str] = {}
@@ -1437,12 +1390,8 @@ def get_litellm_available_models(
db_session: Session = Depends(get_session),
) -> list[LitellmFinalModelResponse]:
"""Fetch available models from Litellm proxy /v1/models endpoint."""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_litellm_models_response(
api_key=api_key, api_base=request.api_base
api_key=request.api_key, api_base=request.api_base
)
models = response_json.get("data", [])
@@ -1499,7 +1448,7 @@ def get_litellm_available_models(
return sorted_results
def _get_litellm_models_response(api_key: str | None, api_base: str) -> dict:
def _get_litellm_models_response(api_key: str, api_base: str) -> dict:
"""Perform GET to Litellm proxy /api/v1/models and return parsed JSON."""
cleaned_api_base = api_base.strip().rstrip("/")
url = f"{cleaned_api_base}/v1/models"
@@ -1574,12 +1523,8 @@ def get_bifrost_available_models(
db_session: Session = Depends(get_session),
) -> list[BifrostFinalModelResponse]:
"""Fetch available models from Bifrost gateway /v1/models endpoint."""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_bifrost_models_response(
api_base=request.api_base, api_key=api_key
api_base=request.api_base, api_key=request.api_key
)
models = response_json.get("data", [])
@@ -1668,12 +1613,8 @@ def get_openai_compatible_server_available_models(
db_session: Session = Depends(get_session),
) -> list[OpenAICompatibleFinalModelResponse]:
"""Fetch available models from a generic OpenAI-compatible /v1/models endpoint."""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_openai_compatible_server_response(
api_base=request.api_base, api_key=api_key
api_base=request.api_base, api_key=request.api_key
)
models = response_json.get("data", [])

View File

@@ -208,6 +208,12 @@ class PythonToolOverrideKwargs(BaseModel):
chat_files: list[ChatFile] = []
class ImageGenerationToolOverrideKwargs(BaseModel):
"""Override kwargs for image generation tool calls."""
recent_generated_image_file_ids: list[str] = []
class SearchToolRunContext(BaseModel):
emitter: Emitter

View File

@@ -26,6 +26,7 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ImageGenerationToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
@@ -47,16 +48,9 @@ PROMPT_FIELD = "prompt"
REFERENCE_IMAGE_FILE_IDS_FIELD = "reference_image_file_ids"
class ImageGenerationTool(Tool[None]):
class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
NAME = "generate_image"
DESCRIPTION = (
"Generate a new image from a prompt, or edit/modify existing images"
" from this conversation. To edit existing images — whether the user"
" attached them or they were produced by a previous generate_image"
" call — pass their file_id values in `reference_image_file_ids`."
" Do not use unless the user specifically requests an image or asks"
" to edit an image."
)
DESCRIPTION = "Generate an image based on a prompt. Do not use unless the user specifically requests an image."
DISPLAY_NAME = "Image Generation"
def __init__(
@@ -148,14 +142,8 @@ class ImageGenerationTool(Tool[None]):
REFERENCE_IMAGE_FILE_IDS_FIELD: {
"type": "array",
"description": (
"Optional list of image file_id values to edit/modify/use as reference."
" Accepts file_ids from two sources, with the same mechanics for both:"
" (1) images the user attached to a user message — their file_id appears"
" in the tag `[attached image — file_id: <id>]` right before the image"
" in that message; (2) images returned by previous generate_image tool"
" calls — their file_id appears in that call's response JSON. Leave"
" unset/empty for a brand-new generation unrelated to any existing image."
" The first file_id in the list is treated as the primary edit source."
"Optional image file IDs to use as reference context for edits/variations. "
"Use the file_id values returned by previous generate_image calls."
),
"items": {
"type": "string",
@@ -266,31 +254,41 @@ class ImageGenerationTool(Tool[None]):
def _resolve_reference_image_file_ids(
self,
llm_kwargs: dict[str, Any],
override_kwargs: ImageGenerationToolOverrideKwargs | None,
) -> list[str]:
raw_reference_ids = llm_kwargs.get(REFERENCE_IMAGE_FILE_IDS_FIELD)
if raw_reference_ids is None:
# No references requested — plain generation.
return []
if not isinstance(raw_reference_ids, list) or not all(
isinstance(file_id, str) for file_id in raw_reference_ids
if raw_reference_ids is not None:
if not isinstance(raw_reference_ids, list) or not all(
isinstance(file_id, str) for file_id in raw_reference_ids
):
raise ToolCallException(
message=(
f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
),
llm_facing_message=(
f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
),
)
reference_image_file_ids = [
file_id.strip() for file_id in raw_reference_ids if file_id.strip()
]
elif (
override_kwargs
and override_kwargs.recent_generated_image_file_ids
and self.img_provider.supports_reference_images
):
raise ToolCallException(
message=(
f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
),
llm_facing_message=(
f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
),
)
# If no explicit reference was provided, default to the most recently generated image.
reference_image_file_ids = [
override_kwargs.recent_generated_image_file_ids[-1]
]
else:
reference_image_file_ids = []
# Deduplicate while preserving order (first occurrence wins, so the
# LLM's intended "primary edit source" stays at index 0).
# Deduplicate while preserving order.
deduped_reference_image_ids: list[str] = []
seen_ids: set[str] = set()
for file_id in raw_reference_ids:
file_id = file_id.strip()
if not file_id or file_id in seen_ids:
for file_id in reference_image_file_ids:
if file_id in seen_ids:
continue
seen_ids.add(file_id)
deduped_reference_image_ids.append(file_id)
@@ -304,14 +302,14 @@ class ImageGenerationTool(Tool[None]):
f"Reference images requested but provider '{self.provider}' does not support image-editing context."
),
llm_facing_message=(
"This image provider does not support editing from existing images. "
"This image provider does not support editing from previous image context. "
"Try text-only generation, or switch to a provider/model that supports image edits."
),
)
max_reference_images = self.img_provider.max_reference_images
if max_reference_images > 0:
return deduped_reference_image_ids[:max_reference_images]
return deduped_reference_image_ids[-max_reference_images:]
return deduped_reference_image_ids
def _load_reference_images(
@@ -360,7 +358,7 @@ class ImageGenerationTool(Tool[None]):
def run(
self,
placement: Placement,
override_kwargs: None = None, # noqa: ARG002
override_kwargs: ImageGenerationToolOverrideKwargs | None = None,
**llm_kwargs: Any,
) -> ToolResponse:
if PROMPT_FIELD not in llm_kwargs:
@@ -375,6 +373,7 @@ class ImageGenerationTool(Tool[None]):
shape = ImageShape(llm_kwargs.get("shape", ImageShape.SQUARE.value))
reference_image_file_ids = self._resolve_reference_image_file_ids(
llm_kwargs=llm_kwargs,
override_kwargs=override_kwargs,
)
reference_images = self._load_reference_images(reference_image_file_ids)

View File

@@ -1,3 +1,4 @@
import json
import traceback
from collections import defaultdict
from typing import Any
@@ -13,6 +14,7 @@ from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.tools.interface import Tool
from onyx.tools.models import ChatFile
from onyx.tools.models import ChatMinimalTextMessage
from onyx.tools.models import ImageGenerationToolOverrideKwargs
from onyx.tools.models import OpenURLToolOverrideKwargs
from onyx.tools.models import ParallelToolCallResponse
from onyx.tools.models import PythonToolOverrideKwargs
@@ -22,6 +24,9 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
@@ -105,6 +110,63 @@ def _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff
return merged_calls
def _extract_image_file_ids_from_tool_response_message(
message: str,
) -> list[str]:
try:
parsed_message = json.loads(message)
except json.JSONDecodeError:
return []
parsed_items: list[Any] = (
parsed_message if isinstance(parsed_message, list) else [parsed_message]
)
file_ids: list[str] = []
for item in parsed_items:
if not isinstance(item, dict):
continue
file_id = item.get("file_id")
if isinstance(file_id, str):
file_ids.append(file_id)
return file_ids
def _extract_recent_generated_image_file_ids(
message_history: list[ChatMessageSimple],
) -> list[str]:
tool_name_by_tool_call_id: dict[str, str] = {}
recent_image_file_ids: list[str] = []
seen_file_ids: set[str] = set()
for message in message_history:
if message.message_type == MessageType.ASSISTANT and message.tool_calls:
for tool_call in message.tool_calls:
tool_name_by_tool_call_id[tool_call.tool_call_id] = tool_call.tool_name
continue
if (
message.message_type != MessageType.TOOL_CALL_RESPONSE
or not message.tool_call_id
):
continue
tool_name = tool_name_by_tool_call_id.get(message.tool_call_id)
if tool_name != ImageGenerationTool.NAME:
continue
for file_id in _extract_image_file_ids_from_tool_response_message(
message.message
):
if file_id in seen_file_ids:
continue
seen_file_ids.add(file_id)
recent_image_file_ids.append(file_id)
return recent_image_file_ids
def _safe_run_single_tool(
tool: Tool,
tool_call: ToolCallKickoff,
@@ -324,6 +386,9 @@ def run_tool_calls(
url_to_citation: dict[str, int] = {
url: citation_num for citation_num, url in citation_mapping.items()
}
recent_generated_image_file_ids = _extract_recent_generated_image_file_ids(
message_history
)
# Prepare all tool calls with their override_kwargs
# Each tool gets a unique starting citation number to avoid conflicts when running in parallel
@@ -340,6 +405,7 @@ def run_tool_calls(
| WebSearchToolOverrideKwargs
| OpenURLToolOverrideKwargs
| PythonToolOverrideKwargs
| ImageGenerationToolOverrideKwargs
| MemoryToolOverrideKwargs
| None
) = None
@@ -388,6 +454,10 @@ def run_tool_calls(
override_kwargs = PythonToolOverrideKwargs(
chat_files=chat_files or [],
)
elif isinstance(tool, ImageGenerationTool):
override_kwargs = ImageGenerationToolOverrideKwargs(
recent_generated_image_file_ids=recent_generated_image_file_ids
)
elif isinstance(tool, MemoryTool):
override_kwargs = MemoryToolOverrideKwargs(
user_name=(

View File

@@ -254,7 +254,7 @@ oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
onyx-devtools==0.7.5
onyx-devtools==0.7.4
openai==2.14.0
# via
# litellm

View File

@@ -45,6 +45,15 @@ npx playwright test <TEST_NAME>
Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
their own `conftest.py` for directory-scoped fixtures.
## Additional Onyx-Specific Guidance
- Activate the root venv first with `source .venv/bin/activate`.
- For many product changes in this repo, prefer integration tests or external dependency unit tests
over isolated unit tests.
- When writing integration tests, check `backend/tests/integration/common_utils/` and the root
`conftest.py` for fixtures and managers before inventing new helpers.
- Prefer existing fixtures over constructing users or entities manually inside tests.
## Running Tests Repeatedly (`pytest-repeat`)
Use `pytest-repeat` to catch flaky tests by running them multiple times:

View File

@@ -1,239 +0,0 @@
"""Tests for GoogleDriveConnector.resolve_errors against real Google Drive."""
import json
import os
from collections.abc import Callable
from unittest.mock import patch
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import (
ALL_EXPECTED_HIERARCHY_NODES,
)
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_ID
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_ID
_DRIVE_ID_MAPPING_PATH = os.path.join(
os.path.dirname(__file__), "drive_id_mapping.json"
)
def _load_web_view_links(file_ids: list[int]) -> list[str]:
with open(_DRIVE_ID_MAPPING_PATH) as f:
mapping: dict[str, str] = json.load(f)
return [mapping[str(fid)] for fid in file_ids]
def _build_failures(web_view_links: list[str]) -> list[ConnectorFailure]:
return [
ConnectorFailure(
failed_document=DocumentFailure(
document_id=link,
document_link=link,
),
failure_message=f"Synthetic failure for {link}",
)
for link in web_view_links
]
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
def test_resolve_single_file(
mock_api_key: None, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""Resolve a single known file and verify we get back exactly one Document."""
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
shared_drive_urls=None,
include_my_drives=True,
my_drive_emails=None,
shared_folder_urls=None,
include_files_shared_with_me=False,
)
web_view_links = _load_web_view_links([0])
failures = _build_failures(web_view_links)
results = list(connector.resolve_errors(failures))
docs = [r for r in results if isinstance(r, Document)]
new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
assert len(docs) == 1
assert len(new_failures) == 0
assert docs[0].semantic_identifier == "file_0.txt"
# Should yield at least one hierarchy node (the file's parent folder chain)
assert len(hierarchy_nodes) > 0
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
def test_resolve_multiple_files(
mock_api_key: None, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""Resolve multiple files across different folders via batch API."""
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
shared_drive_urls=None,
include_my_drives=True,
my_drive_emails=None,
shared_folder_urls=None,
include_files_shared_with_me=False,
)
# Pick files from different folders: admin files (0-4), shared drive 1 (20-24), folder_2 (45-49)
file_ids = [0, 1, 20, 21, 45]
web_view_links = _load_web_view_links(file_ids)
failures = _build_failures(web_view_links)
results = list(connector.resolve_errors(failures))
docs = [r for r in results if isinstance(r, Document)]
new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
assert len(new_failures) == 0
retrieved_names = {doc.semantic_identifier for doc in docs}
expected_names = {f"file_{fid}.txt" for fid in file_ids}
assert expected_names == retrieved_names
# Files span multiple folders, so we should get hierarchy nodes
assert len(hierarchy_nodes) > 0
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
def test_resolve_hierarchy_nodes_are_valid(
mock_api_key: None, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""Verify that hierarchy nodes from resolve_errors match expected structure."""
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
shared_drive_urls=None,
include_my_drives=True,
my_drive_emails=None,
shared_folder_urls=None,
include_files_shared_with_me=False,
)
# File in folder_1 (inside shared_drive_1) — should walk up to shared_drive_1 root
web_view_links = _load_web_view_links([25])
failures = _build_failures(web_view_links)
results = list(connector.resolve_errors(failures))
hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
node_ids = {node.raw_node_id for node in hierarchy_nodes}
# File 25 is in folder_1 which is inside shared_drive_1.
# The parent walk must yield at least these two ancestors.
assert (
FOLDER_1_ID in node_ids
), f"Expected folder_1 ({FOLDER_1_ID}) in hierarchy nodes, got: {node_ids}"
assert (
SHARED_DRIVE_1_ID in node_ids
), f"Expected shared_drive_1 ({SHARED_DRIVE_1_ID}) in hierarchy nodes, got: {node_ids}"
for node in hierarchy_nodes:
if node.raw_node_id not in ALL_EXPECTED_HIERARCHY_NODES:
continue
expected = ALL_EXPECTED_HIERARCHY_NODES[node.raw_node_id]
assert node.display_name == expected.display_name, (
f"Display name mismatch for {node.raw_node_id}: "
f"expected '{expected.display_name}', got '{node.display_name}'"
)
assert node.node_type == expected.node_type, (
f"Node type mismatch for {node.raw_node_id}: "
f"expected '{expected.node_type}', got '{node.node_type}'"
)
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
def test_resolve_with_invalid_link(
mock_api_key: None, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""Resolve with a mix of valid and invalid links — invalid ones yield ConnectorFailure."""
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
shared_drive_urls=None,
include_my_drives=True,
my_drive_emails=None,
shared_folder_urls=None,
include_files_shared_with_me=False,
)
valid_links = _load_web_view_links([0])
invalid_link = "https://drive.google.com/file/d/NONEXISTENT_FILE_ID_12345"
failures = _build_failures(valid_links + [invalid_link])
results = list(connector.resolve_errors(failures))
docs = [r for r in results if isinstance(r, Document)]
new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
assert len(docs) == 1
assert docs[0].semantic_identifier == "file_0.txt"
assert len(new_failures) == 1
assert new_failures[0].failed_document is not None
assert new_failures[0].failed_document.document_id == invalid_link
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
def test_resolve_empty_errors(
mock_api_key: None, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""Resolving an empty error list should yield nothing."""
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
shared_drive_urls=None,
include_my_drives=True,
my_drive_emails=None,
shared_folder_urls=None,
include_files_shared_with_me=False,
)
results = list(connector.resolve_errors([]))
assert len(results) == 0
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
def test_resolve_entity_failures_are_skipped(
mock_api_key: None, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""Entity failures (not document failures) should be skipped by resolve_errors."""
from onyx.connectors.models import EntityFailure
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
shared_drive_urls=None,
include_my_drives=True,
my_drive_emails=None,
shared_folder_urls=None,
include_files_shared_with_me=False,
)
entity_failure = ConnectorFailure(
failed_entity=EntityFailure(entity_id="some_stage"),
failure_message="retrieval failure",
)
results = list(connector.resolve_errors([entity_failure]))
assert len(results) == 0

View File

@@ -9,7 +9,6 @@ from unittest.mock import patch
from ee.onyx.db.license import check_seat_availability
from ee.onyx.db.license import delete_license
from ee.onyx.db.license import get_license
from ee.onyx.db.license import get_used_seats
from ee.onyx.db.license import upsert_license
from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicenseSource
@@ -215,43 +214,3 @@ class TestCheckSeatAvailabilityMultiTenant:
assert result.available is False
assert result.error_message is not None
mock_tenant_count.assert_called_once_with("tenant-abc")
class TestGetUsedSeatsAccountTypeFiltering:
"""Verify get_used_seats query excludes SERVICE_ACCOUNT but includes BOT."""
@patch("ee.onyx.db.license.MULTI_TENANT", False)
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_excludes_service_accounts(self, mock_get_session: MagicMock) -> None:
"""SERVICE_ACCOUNT users should not count toward seats."""
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
mock_session.execute.return_value.scalar.return_value = 5
result = get_used_seats()
assert result == 5
# Inspect the compiled query to verify account_type filter
call_args = mock_session.execute.call_args
query = call_args[0][0]
compiled = str(query.compile(compile_kwargs={"literal_binds": True}))
assert "SERVICE_ACCOUNT" in compiled
# BOT should NOT be excluded
assert "BOT" not in compiled
@patch("ee.onyx.db.license.MULTI_TENANT", False)
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_still_excludes_ext_perm_user(self, mock_get_session: MagicMock) -> None:
"""EXT_PERM_USER exclusion should still be present."""
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
mock_session.execute.return_value.scalar.return_value = 3
get_used_seats()
call_args = mock_session.execute.call_args
query = call_args[0][0]
compiled = str(query.compile(compile_kwargs={"literal_binds": True}))
assert "EXT_PERM_USER" in compiled

View File

@@ -6,7 +6,6 @@ import requests
from jira import JIRA
from jira.resources import Issue
from onyx.connectors.jira.connector import _JIRA_BULK_FETCH_LIMIT
from onyx.connectors.jira.connector import bulk_fetch_issues
@@ -146,29 +145,3 @@ def test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:
with pytest.raises(requests.exceptions.JSONDecodeError):
bulk_fetch_issues(client, ["1", "2", bad_id, "3", "4", "5"])
def test_bulk_fetch_respects_api_batch_limit() -> None:
"""Requests to the bulkfetch endpoint never exceed _JIRA_BULK_FETCH_LIMIT IDs."""
client = _mock_jira_client()
total_issues = _JIRA_BULK_FETCH_LIMIT * 3 + 7
all_ids = [str(i) for i in range(total_issues)]
batch_sizes: list[int] = []
def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock: # noqa: ARG001
ids = json["issueIdsOrKeys"]
batch_sizes.append(len(ids))
resp = MagicMock()
resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
return resp
client._session.post.side_effect = _post_side_effect
result = bulk_fetch_issues(client, all_ids)
assert len(result) == total_issues
# keeping this hardcoded because it's the documented limit
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
assert all(size <= 100 for size in batch_sizes)
assert len(batch_sizes) == 4

View File

@@ -1,67 +0,0 @@
"""Tests for _build_thread_text function."""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.context.search.federated.slack_search import _build_thread_text
def _make_msg(user: str, text: str, ts: str) -> dict[str, str]:
return {"user": user, "text": text, "ts": ts}
class TestBuildThreadText:
"""Verify _build_thread_text includes full thread replies up to cap."""
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_includes_all_replies(self, mock_profiles: MagicMock) -> None:
"""All replies within cap are included in output."""
mock_profiles.return_value = {}
messages = [
_make_msg("U1", "parent msg", "1000.0"),
_make_msg("U2", "reply 1", "1001.0"),
_make_msg("U3", "reply 2", "1002.0"),
_make_msg("U4", "reply 3", "1003.0"),
]
result = _build_thread_text(messages, "token", "T123", MagicMock())
assert "parent msg" in result
assert "reply 1" in result
assert "reply 2" in result
assert "reply 3" in result
assert "..." not in result
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_non_thread_returns_parent_only(self, mock_profiles: MagicMock) -> None:
"""Single message (no replies) returns just the parent text."""
mock_profiles.return_value = {}
messages = [_make_msg("U1", "just a message", "1000.0")]
result = _build_thread_text(messages, "token", "T123", MagicMock())
assert "just a message" in result
assert "Replies:" not in result
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_parent_always_first(self, mock_profiles: MagicMock) -> None:
"""Thread parent message is always the first line of output."""
mock_profiles.return_value = {}
messages = [
_make_msg("U1", "I am the parent", "1000.0"),
_make_msg("U2", "I am a reply", "1001.0"),
]
result = _build_thread_text(messages, "token", "T123", MagicMock())
parent_pos = result.index("I am the parent")
reply_pos = result.index("I am a reply")
assert parent_pos < reply_pos
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_user_profiles_resolved(self, mock_profiles: MagicMock) -> None:
"""User IDs in thread text are replaced with display names."""
mock_profiles.return_value = {"U1": "Alice", "U2": "Bob"}
messages = [
_make_msg("U1", "hello", "1000.0"),
_make_msg("U2", "world", "1001.0"),
]
result = _build_thread_text(messages, "token", "T123", MagicMock())
assert "Alice" in result
assert "Bob" in result
assert "<@U1>" not in result
assert "<@U2>" not in result

View File

@@ -1,108 +0,0 @@
"""Tests for Slack URL parsing and direct thread fetch via URL override."""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.context.search.federated.models import DirectThreadFetch
from onyx.context.search.federated.slack_search import _fetch_thread_from_url
from onyx.context.search.federated.slack_search_utils import extract_slack_message_urls
class TestExtractSlackMessageUrls:
"""Verify URL parsing extracts channel_id and timestamp correctly."""
def test_standard_url(self) -> None:
query = "summarize https://mycompany.slack.com/archives/C097NBWMY8Y/p1775491616524769"
results = extract_slack_message_urls(query)
assert len(results) == 1
assert results[0] == ("C097NBWMY8Y", "1775491616.524769")
def test_multiple_urls(self) -> None:
query = (
"compare https://co.slack.com/archives/C111/p1234567890123456 "
"and https://co.slack.com/archives/C222/p9876543210987654"
)
results = extract_slack_message_urls(query)
assert len(results) == 2
assert results[0] == ("C111", "1234567890.123456")
assert results[1] == ("C222", "9876543210.987654")
def test_no_urls(self) -> None:
query = "what happened in #general last week?"
results = extract_slack_message_urls(query)
assert len(results) == 0
def test_non_slack_url_ignored(self) -> None:
query = "check https://google.com/archives/C111/p1234567890123456"
results = extract_slack_message_urls(query)
assert len(results) == 0
def test_timestamp_conversion(self) -> None:
"""p prefix removed, dot inserted after 10th digit."""
query = "https://x.slack.com/archives/CABC123/p1775491616524769"
results = extract_slack_message_urls(query)
channel_id, ts = results[0]
assert channel_id == "CABC123"
assert ts == "1775491616.524769"
assert not ts.startswith("p")
assert "." in ts
class TestFetchThreadFromUrl:
"""Verify _fetch_thread_from_url calls conversations.replies and returns SlackMessage."""
@patch("onyx.context.search.federated.slack_search._build_thread_text")
@patch("onyx.context.search.federated.slack_search.WebClient")
def test_successful_fetch(
self, mock_webclient_cls: MagicMock, mock_build_thread: MagicMock
) -> None:
mock_client = MagicMock()
mock_webclient_cls.return_value = mock_client
# Mock conversations_replies
mock_response = MagicMock()
mock_response.get.return_value = [
{"user": "U1", "text": "parent", "ts": "1775491616.524769"},
{"user": "U2", "text": "reply 1", "ts": "1775491617.000000"},
{"user": "U3", "text": "reply 2", "ts": "1775491618.000000"},
]
mock_client.conversations_replies.return_value = mock_response
# Mock channel info
mock_ch_response = MagicMock()
mock_ch_response.get.return_value = {"name": "general"}
mock_client.conversations_info.return_value = mock_ch_response
mock_build_thread.return_value = (
"U1: parent\n\nReplies:\n\nU2: reply 1\n\nU3: reply 2"
)
fetch = DirectThreadFetch(
channel_id="C097NBWMY8Y", thread_ts="1775491616.524769"
)
result = _fetch_thread_from_url(fetch, "xoxp-token")
assert len(result.messages) == 1
msg = result.messages[0]
assert msg.channel_id == "C097NBWMY8Y"
assert msg.thread_id is None # Prevents double-enrichment
assert msg.slack_score == 100000.0
assert "parent" in msg.text
mock_client.conversations_replies.assert_called_once_with(
channel="C097NBWMY8Y", ts="1775491616.524769"
)
@patch("onyx.context.search.federated.slack_search.WebClient")
def test_api_error_returns_empty(self, mock_webclient_cls: MagicMock) -> None:
from slack_sdk.errors import SlackApiError
mock_client = MagicMock()
mock_webclient_cls.return_value = mock_client
mock_client.conversations_replies.side_effect = SlackApiError(
message="channel_not_found",
response=MagicMock(status_code=404),
)
fetch = DirectThreadFetch(channel_id="CBAD", thread_ts="1234567890.123456")
result = _fetch_thread_from_url(fetch, "xoxp-token")
assert len(result.messages) == 0

View File

@@ -505,7 +505,6 @@ class TestGetLMStudioAvailableModels:
mock_session = MagicMock()
mock_provider = MagicMock()
mock_provider.api_base = "http://localhost:1234"
mock_provider.custom_config = {"LM_STUDIO_API_KEY": "stored-secret"}
response = {

View File

@@ -2,7 +2,6 @@
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
@@ -10,9 +9,7 @@ from uuid import uuid4
from fastapi import Response
from sqlalchemy.exc import IntegrityError
from ee.onyx.server.scim.api import _check_seat_availability
from ee.onyx.server.scim.api import _scim_name_to_str
from ee.onyx.server.scim.api import _seat_lock_id_for_tenant
from ee.onyx.server.scim.api import create_user
from ee.onyx.server.scim.api import delete_user
from ee.onyx.server.scim.api import get_user
@@ -744,80 +741,3 @@ class TestEmailCasePreservation:
resource = parse_scim_user(result)
assert resource.userName == "Alice@Example.COM"
assert resource.emails[0].value == "Alice@Example.COM"
class TestSeatLock:
"""Tests for the advisory lock in _check_seat_availability."""
@patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_abc")
def test_acquires_advisory_lock_before_checking(
self,
_mock_tenant: MagicMock,
mock_dal: MagicMock,
) -> None:
"""The advisory lock must be acquired before the seat check runs."""
call_order: list[str] = []
def track_execute(stmt: Any, _params: Any = None) -> None:
if "pg_advisory_xact_lock" in str(stmt):
call_order.append("lock")
mock_dal.session.execute.side_effect = track_execute
with patch(
"ee.onyx.server.scim.api.fetch_ee_implementation_or_noop"
) as mock_fetch:
mock_result = MagicMock()
mock_result.available = True
mock_fn = MagicMock(return_value=mock_result)
mock_fetch.return_value = mock_fn
def track_check(*_args: Any, **_kwargs: Any) -> Any:
call_order.append("check")
return mock_result
mock_fn.side_effect = track_check
_check_seat_availability(mock_dal)
assert call_order == ["lock", "check"]
@patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_xyz")
def test_lock_uses_tenant_scoped_key(
self,
_mock_tenant: MagicMock,
mock_dal: MagicMock,
) -> None:
"""The lock id must be derived from the tenant via _seat_lock_id_for_tenant."""
mock_result = MagicMock()
mock_result.available = True
mock_check = MagicMock(return_value=mock_result)
with patch(
"ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
return_value=mock_check,
):
_check_seat_availability(mock_dal)
mock_dal.session.execute.assert_called_once()
params = mock_dal.session.execute.call_args[0][1]
assert params["lock_id"] == _seat_lock_id_for_tenant("tenant_xyz")
def test_seat_lock_id_is_stable_and_tenant_scoped(self) -> None:
"""Lock id must be deterministic and differ across tenants."""
assert _seat_lock_id_for_tenant("t1") == _seat_lock_id_for_tenant("t1")
assert _seat_lock_id_for_tenant("t1") != _seat_lock_id_for_tenant("t2")
def test_no_lock_when_ee_absent(
self,
mock_dal: MagicMock,
) -> None:
"""No advisory lock should be acquired when the EE check is absent."""
with patch(
"ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
return_value=None,
):
result = _check_seat_availability(mock_dal)
assert result is None
mock_dal.session.execute.assert_not_called()

View File

@@ -1,115 +0,0 @@
"""Tests for ``ImageGenerationTool._resolve_reference_image_file_ids``.
The resolver turns the LLM's ``reference_image_file_ids`` argument into a
cleaned list of file IDs to hand to ``_load_reference_images``. It trusts
the LLM's picks — the LLM can only see file IDs that actually appear in
the conversation (via ``[attached image — file_id: <id>]`` tags on user
messages and the JSON returned by prior generate_image calls), so we
don't re-validate against an allow-list in the tool itself.
"""
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from onyx.tools.models import ToolCallException
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
)
from onyx.tools.tool_implementations.images.image_generation_tool import (
REFERENCE_IMAGE_FILE_IDS_FIELD,
)
def _make_tool(
supports_reference_images: bool = True,
max_reference_images: int = 16,
) -> ImageGenerationTool:
"""Construct a tool with a mock provider so no credentials/network are needed."""
with patch(
"onyx.tools.tool_implementations.images.image_generation_tool.get_image_generation_provider"
) as mock_get_provider:
mock_provider = MagicMock()
mock_provider.supports_reference_images = supports_reference_images
mock_provider.max_reference_images = max_reference_images
mock_get_provider.return_value = mock_provider
return ImageGenerationTool(
image_generation_credentials=MagicMock(),
tool_id=1,
emitter=MagicMock(),
model="gpt-image-1",
provider="openai",
)
class TestResolveReferenceImageFileIds:
def test_unset_returns_empty_plain_generation(self) -> None:
tool = _make_tool()
assert tool._resolve_reference_image_file_ids(llm_kwargs={}) == []
def test_empty_list_is_treated_like_unset(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: []},
)
assert result == []
def test_passes_llm_supplied_ids_through(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["upload-1", "gen-1"]},
)
# Order preserved — first entry is the primary edit source.
assert result == ["upload-1", "gen-1"]
def test_invalid_shape_raises(self) -> None:
tool = _make_tool()
with pytest.raises(ToolCallException):
tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: "not-a-list"},
)
def test_non_string_element_raises(self) -> None:
tool = _make_tool()
with pytest.raises(ToolCallException):
tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["ok", 123]},
)
def test_deduplicates_preserving_first_occurrence(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={
REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1", "gen-2", "gen-1"]
},
)
assert result == ["gen-1", "gen-2"]
def test_strips_whitespace_and_skips_empty_strings(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={
REFERENCE_IMAGE_FILE_IDS_FIELD: [" gen-1 ", "", " "]
},
)
assert result == ["gen-1"]
def test_provider_without_reference_support_raises(self) -> None:
tool = _make_tool(supports_reference_images=False)
with pytest.raises(ToolCallException):
tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1"]},
)
def test_truncates_to_provider_max_preserving_head(self) -> None:
"""When the LLM lists more images than the provider allows, keep the
HEAD of the list (the primary edit source + earliest extras) rather
than the tail, since the LLM put the most important one first."""
tool = _make_tool(max_reference_images=2)
result = tool._resolve_reference_image_file_ids(
llm_kwargs={
REFERENCE_IMAGE_FILE_IDS_FIELD: ["a", "b", "c", "d"]
},
)
assert result == ["a", "b"]

View File

@@ -1,5 +1,10 @@
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_runner import _extract_image_file_ids_from_tool_response_message
from onyx.tools.tool_runner import _extract_recent_generated_image_file_ids
from onyx.tools.tool_runner import _merge_tool_calls
@@ -308,3 +313,61 @@ class TestMergeToolCalls:
# String should be converted to list item
assert result[0].tool_args["queries"] == ["single_query", "q2"]
class TestImageHistoryExtraction:
def test_extracts_image_file_ids_from_json_response(self) -> None:
msg = '[{"file_id":"img-1","revised_prompt":"v1"},{"file_id":"img-2","revised_prompt":"v2"}]'
assert _extract_image_file_ids_from_tool_response_message(msg) == [
"img-1",
"img-2",
]
def test_extracts_recent_generated_image_ids_from_history(self) -> None:
history = [
ChatMessageSimple(
message="",
token_count=1,
message_type=MessageType.ASSISTANT,
tool_calls=[
ToolCallSimple(
tool_call_id="call_1",
tool_name="generate_image",
tool_arguments={"prompt": "test"},
token_count=1,
)
],
),
ChatMessageSimple(
message='[{"file_id":"img-1","revised_prompt":"r1"}]',
token_count=1,
message_type=MessageType.TOOL_CALL_RESPONSE,
tool_call_id="call_1",
),
]
assert _extract_recent_generated_image_file_ids(history) == ["img-1"]
def test_ignores_non_image_tool_responses(self) -> None:
history = [
ChatMessageSimple(
message="",
token_count=1,
message_type=MessageType.ASSISTANT,
tool_calls=[
ToolCallSimple(
tool_call_id="call_1",
tool_name="web_search",
tool_arguments={"queries": ["q"]},
token_count=1,
)
],
),
ChatMessageSimple(
message='[{"file_id":"img-1","revised_prompt":"r1"}]',
token_count=1,
message_type=MessageType.TOOL_CALL_RESPONSE,
tool_call_id="call_1",
),
]
assert _extract_recent_generated_image_file_ids(history) == []

View File

@@ -1,17 +1,3 @@
# OAuth callback page must be served by the web server (Next.js),
# not the MCP server. Exact match takes priority over the regex below.
location = /mcp/oauth/callback {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_redirect off;
proxy_pass http://web_server;
}
# MCP Server - Model Context Protocol for LLM integrations
# Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
location ~ ^/mcp(/.*)?$ {

View File

@@ -5,7 +5,7 @@ home: https://www.onyx.app/
sources:
- "https://github.com/onyx-dot-app/onyx"
type: application
version: 0.4.43
version: 0.4.41
appVersion: latest
annotations:
category: Productivity

View File

@@ -1,349 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"liveNow": true,
"panels": [
{
"title": "Client-Side Search Latency (P50 / P95 / P99)",
"description": "End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
"id": 1,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "seconds",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "dashed" }
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "red", "value": 2.0 }
]
},
"unit": "s",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
"legendFormat": "P50",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
"legendFormat": "P95",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
"legendFormat": "P99",
"refId": "C"
}
]
},
{
"title": "Server-Side Search Latency (P50 / P95 / P99)",
"description": "OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
"id": 2,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "seconds",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "dashed" }
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "red", "value": 2.0 }
]
},
"unit": "s",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
"legendFormat": "P50",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
"legendFormat": "P95",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
"legendFormat": "P99",
"refId": "C"
}
]
},
{
"title": "Client-Side Latency by Search Type (P95)",
"description": "P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
"id": 3,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "seconds",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
"legendFormat": "{{ search_type }}",
"refId": "A"
}
]
},
{
"title": "Search Throughput by Type",
"description": "Searches per second broken down by search type.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
"id": 4,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "searches/s",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "sum by (search_type) (rate(onyx_opensearch_search_total[5m]))",
"legendFormat": "{{ search_type }}",
"refId": "A"
}
]
},
{
"title": "Concurrent Searches In Progress",
"description": "Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
"id": 5,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "searches",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "sum by (search_type) (onyx_opensearch_searches_in_progress)",
"legendFormat": "{{ search_type }}",
"refId": "A"
}
]
},
{
"title": "Client vs Server Latency Overhead (P50)",
"description": "Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
"id": 6,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "seconds",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
"legendFormat": "Client - Server overhead (P50)",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
"legendFormat": "Client P50",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
"legendFormat": "Server P50",
"refId": "C"
}
]
}
],
"refresh": "5s",
"schemaVersion": 37,
"style": "dark",
"tags": ["onyx", "opensearch", "search", "latency"],
"templating": {
"list": [
{
"current": {
"text": "Prometheus",
"value": "prometheus"
},
"includeAll": false,
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
}
]
},
"time": { "from": "now-60m", "to": "now" },
"timepicker": {
"refresh_intervals": ["5s", "10s", "30s", "1m"]
},
"timezone": "",
"title": "Onyx OpenSearch Search Latency",
"uid": "onyx-opensearch-search-latency",
"version": 0,
"weekStart": ""
}

View File

@@ -1,606 +0,0 @@
{
"id": null,
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 18,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 4,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 50
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": ["lastNotNull", "max"],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "onyx_celery_queue_depth{queue=~\"$queue\"}",
"legendFormat": "{{queue}}",
"range": true,
"refId": "A"
}
],
"title": "Queue Depth by Queue",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 20
},
{
"color": "red",
"value": 100
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 10
},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "sum(onyx_celery_queue_depth)",
"refId": "A"
}
],
"title": "Total Queued Tasks",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 20
},
{
"color": "red",
"value": 100
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 10
},
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "onyx_celery_unacked_tasks",
"refId": "A"
}
],
"title": "Unacked Tasks",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 50
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 10
},
"id": 4,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "onyx_celery_queue_depth{queue=\"docprocessing\"}",
"refId": "A"
}
],
"title": "Docprocessing Queue",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 50
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 10
},
"id": 5,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "onyx_celery_queue_depth{queue=\"connector_doc_fetching\"}",
"refId": "A"
}
],
"title": "Docfetching Queue",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"stacking": {
"group": "A",
"mode": "none"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 50
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 14
},
"id": 6,
"options": {
"legend": {
"calcs": ["lastNotNull"],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "topk(10, onyx_celery_queue_depth)",
"legendFormat": "{{queue}}",
"range": true,
"refId": "A"
}
],
"title": "Top 10 Queue Backlogs",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 50
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 14
},
"id": 7,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": ["sum"],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Value"
}
]
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "sort_desc(onyx_celery_queue_depth)",
"format": "table",
"instant": true,
"refId": "A"
}
],
"title": "Current Queue Depth",
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns"
}
}
],
"type": "table"
}
],
"refresh": "30s",
"schemaVersion": 39,
"style": "dark",
"tags": ["onyx", "redis", "celery"],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"includeAll": false,
"label": "Datasource",
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"current": {
"selected": true,
"text": "All",
"value": ".*"
},
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"definition": "label_values(onyx_celery_queue_depth, queue)",
"hide": 0,
"includeAll": true,
"label": "Queue",
"multi": true,
"name": "queue",
"options": [],
"query": {
"query": "label_values(onyx_celery_queue_depth, queue)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Onyx Redis Queues",
"uid": "onyx-redis-queues",
"version": 1,
"weekStart": ""
}

View File

@@ -12,30 +12,4 @@ metadata:
data:
onyx-indexing-pipeline.json: |
{{- .Files.Get "dashboards/indexing-pipeline.json" | nindent 4 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "onyx.fullname" . }}-opensearch-search-latency-dashboard
labels:
{{- include "onyx.labels" . | nindent 4 }}
grafana_dashboard: "1"
annotations:
grafana_folder: "Onyx"
data:
onyx-opensearch-search-latency.json: |
{{- .Files.Get "dashboards/opensearch-search-latency.json" | nindent 4 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "onyx.fullname" . }}-redis-queues-dashboard
labels:
{{- include "onyx.labels" . | nindent 4 }}
grafana_dashboard: "1"
annotations:
grafana_folder: "Onyx"
data:
onyx-redis-queues.json: |
{{- .Files.Get "dashboards/redis-queues.json" | nindent 4 }}
{{- end }}

View File

@@ -42,22 +42,6 @@ data:
client_max_body_size 5G;
{{- if .Values.mcpServer.enabled }}
# OAuth callback page must be served by the web server (Next.js),
# not the MCP server. Exact match takes priority over the regex below.
location = /mcp/oauth/callback {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_redirect off;
proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
proxy_pass http://web_server;
}
# MCP Server - Model Context Protocol for LLM integrations
# Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
location ~ ^/mcp(/.*)?$ {

View File

@@ -296,7 +296,7 @@ nginx:
# The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.
# Workaround: Helm upgrade will restart if the following annotation value changes.
podAnnotations:
onyx.app/nginx-config-version: "4"
onyx.app/nginx-config-version: "3"
# Propagate DOMAIN into nginx so server_name continues to use the same env var
extraEnvs:

View File

@@ -0,0 +1,89 @@
# Initial Architecture Map
Status: provisional baseline. This is a routing map for agents, not a complete design spec for
every subsystem. Update it as the repo becomes more explicit.
## Top-Level Surfaces
The repository is easiest to reason about as six main surfaces:
| Surface | Primary Paths | Purpose |
| --- | --- | --- |
| Backend product logic | `backend/onyx/`, `backend/ee/onyx/` | Core auth, chat, search, indexing, connectors, API, and enterprise extensions |
| Data and persistence | `backend/onyx/db/`, `backend/ee/onyx/db/`, `backend/alembic/` | DB models, data access logic, and schema migrations |
| Frontend product surfaces | `web/src/app/`, `web/src/sections/`, `web/src/layouts/` | Next.js routes, screens, and feature-level UI composition |
| Frontend design system and shared UI | `web/lib/opal/`, `web/src/refresh-components/` | Preferred primitives for new UI work |
| Devtools and local developer workflows | `tools/ods/`, `cli/` | Repo automation, CI helpers, visual regression tooling, and CLI integrations |
| Agent-facing platform work | `backend/onyx/server/features/build/`, `backend/onyx/mcp_server/`, `backend/onyx/deep_research/`, `backend/onyx/agents/` | Sandbox runtime, MCP tool surface, agent orchestration, and research workflows |
| Agent-lab harness state | shared git metadata under `$(git rev-parse --git-common-dir)/onyx-agent-lab/` | Local worktree manifests, ports, env overlays, and verification artifacts for agentized development |
## Backend Map
Use these paths as the first stop when routing backend changes:
| Area | Paths | Notes |
| --- | --- | --- |
| Authentication and access control | `backend/onyx/auth/`, `backend/onyx/access/`, `backend/ee/onyx/access/` | User identity, auth flows, permissions |
| Chat and answer generation | `backend/onyx/chat/`, `backend/onyx/server/query_and_chat/` | Chat loop, message processing, streaming |
| Retrieval and tools | `backend/onyx/tools/`, `backend/onyx/context/`, `backend/onyx/mcp_server/` | Search tools, web tools, context assembly, MCP exposure |
| Connectors and indexing | `backend/onyx/connectors/`, `backend/onyx/document_index/`, `backend/onyx/background/` | Source sync, indexing, pruning, permissions sync |
| LLM and prompt infrastructure | `backend/onyx/llm/`, `backend/onyx/prompts/`, `backend/ee/onyx/prompts/` | Provider integrations and prompting |
| Server APIs and feature entrypoints | `backend/onyx/server/`, `backend/ee/onyx/server/` | FastAPI routes and product feature APIs |
| Agent and build platform | `backend/onyx/server/features/build/`, `backend/onyx/agents/`, `backend/onyx/deep_research/` | Sandboxes, agent runtimes, orchestration, long-running research |
| Persistence | `backend/onyx/db/`, `backend/ee/onyx/db/` | Put DB operations here, not in route handlers or feature modules |
## Frontend Map
For frontend work, route changes by intent first, then by component maturity:
| Intent | Preferred Paths | Notes |
| --- | --- | --- |
| Next.js route/page work | `web/src/app/` | App Router pages and page-local wiring |
| Feature composition | `web/src/sections/`, `web/src/layouts/` | Preferred place for reusable feature-level assemblies |
| New shared UI primitives | `web/lib/opal/`, `web/src/refresh-components/` | Default targets for new reusable UI |
| Legacy shared UI | `web/src/components/` | Avoid for new work unless forced by the local surface |
| Frontend business logic | `web/src/lib/`, `web/src/hooks/`, `web/src/interfaces/` | Utilities, hooks, typed interfaces |
Important frontend rule already established in [web/AGENTS.md](../../web/AGENTS.md):
- Do not use `web/src/components/` for new component work.
## Existing Hard Constraints
These rules already exist and should be treated as architectural boundaries:
- Backend errors should raise `OnyxError`, not `HTTPException`.
- DB operations belong under `backend/onyx/db/` or `backend/ee/onyx/db/`.
- New FastAPI APIs should not use `response_model`.
- Celery tasks should use `@shared_task`.
- Enqueued Celery tasks must include `expires=`.
- Backend calls in local/manual flows should go through `http://localhost:3000/api/...`.
## Change Routing Heuristics
Use these heuristics before editing:
1. If the task changes persistence semantics, start in the DB layer and migrations.
2. If the task changes user-visible UI, find the route in `web/src/app/`, then move downward into
`sections`, `layouts`, and preferred shared UI.
3. If the task spans product behavior and background execution, inspect both the API entrypoint and
the relevant Celery path.
4. If the task concerns agentization, build, or local execution, check whether
`backend/onyx/server/features/build/` or `tools/ods/` is the better home before creating a new
subsystem.
5. If the task needs isolated local boot, browser validation, or per-change artifacts, check
[HARNESS.md](./HARNESS.md) before inventing another ad hoc runner.
6. If the change touches a historically messy area, consult [LEGACY_ZONES.md](./LEGACY_ZONES.md)
before adding more local patterns.
## Test Routing
Onyx already has a clear testing ladder:
- `backend/tests/unit/`: isolated logic only
- `backend/tests/external_dependency_unit/`: real infra, direct function calls, selective mocking
- `backend/tests/integration/`: real deployment, no mocking
- `web/tests/e2e/`: full frontend-backend coordination
Prefer the lowest layer that still validates the real behavior. For many product changes in this
repo, that means integration or Playwright rather than unit tests.

147
docs/agent/BRANCHING.md Normal file
View File

@@ -0,0 +1,147 @@
# Branching Model for `agent-lab`
This is the branching policy for `agent-lab`. It is intentionally separate from the default
workflow on `main`.
This document explains how to use a long-running `agent-lab` branch without making `main`
implicitly depend on lab-only agent-engineering changes.
## Goals
- Keep `main` stable and consensus-driven.
- Allow opt-in agent-engineering improvements to live on `agent-lab`.
- Let engineers and agents use `agent-lab` as a control checkout for worktree-based development.
- Ensure product PRs to `main` originate from `main`-based branches, not from `agent-lab`.
## Branch Roles
| Branch | Purpose |
| --- | --- |
| `main` | Shipping branch and team default |
| `codex/agent-lab` | Long-running control checkout containing the harness and agent-engineering improvements |
| `codex/lab/<name>` | Short-lived branch for `agent-lab`-only tooling, docs, or workflow work |
| `codex/fix/<name>`, `codex/feat/<name>`, etc. | Short-lived product branch cut from `origin/main` and managed by the `agent-lab` control checkout |
## Core Rule
`main` must never depend on `agent-lab`.
That means:
- `codex/agent-lab` may contain extra tooling, docs, checks, and workflow changes.
- Product branches may be managed by the `agent-lab` control checkout, but they must still be based
on `origin/main`.
- A PR to `main` should come from a `main`-based product branch, not from `codex/agent-lab`.
## Preferred Workflow
### Lab-Only Work
Use this for agent-engineering docs, harnesses, optional checks, or tooling that should remain on
`agent-lab` for now.
1. Branch from `codex/agent-lab` into `codex/lab/<name>`.
For local isolation, create the branch via `ods worktree create codex/lab/<name>`.
2. Make the lab-only changes.
3. Open the PR back into `codex/agent-lab`.
4. Do not open these changes directly to `main` unless the team later agrees to upstream them.
### Product Feature Work
Use this when you want to fix a product bug or build a shipping feature for `main`.
1. Stay in the `codex/agent-lab` control checkout.
2. Create a product worktree from `origin/main`, using a conventional branch lane such as:
- `ods worktree create codex/fix/<name>`
- `ods worktree create codex/feat/<name>`
3. Make the code changes inside that worktree checkout.
4. Run harness commands from the control checkout against the tracked worktree:
- `ods agent-check --worktree codex/fix/<name>`
- `ods verify --worktree codex/fix/<name>`
- `ods backend api --worktree codex/fix/<name>`
- `ods web dev --worktree codex/fix/<name>`
5. If the change needs browser proof, record a before/after journey:
- before editing: `ods journey run --worktree codex/fix/<name> --journey <name> --label before`
- after validating the fix: `ods journey run --worktree codex/fix/<name> --journey <name> --label after`
- use `ods journey compare` only when the initial `before` capture was missed and a recovery
baseline is needed later
- after the PR exists, publish the artifact directory you captured or the fallback compare run
with `ods journey publish --run-dir <dir> --pr <number>`
6. Commit, push, and open the PR from the product worktree checkout itself.
Prefer `ods pr-open` so the repo template and conventional-commit title check stay in the same
control plane.
7. Open the PR directly from that product branch to `main`.
8. After the PR is open, use:
- `ods pr-review triage --pr <number>`
- `ods pr-checks diagnose --pr <number>`
- `ods pr-review respond --comment-id ... --thread-id ... --body ...`
## Commit Hygiene Rules
This workflow only works if commits are separated cleanly.
Agents and humans should:
- keep lab-only workflow changes in separate commits from product logic
- avoid mixing refactors, harness changes, and feature behavior in one commit
- use conventional-commit messages and PR titles
- prefer multiple small commits over one large mixed commit
Good split:
- `docs(agent-lab): clarify control-checkout workflow`
- `fix: suppress logged-out modal on fresh unauthenticated load`
- `test: add regression coverage for auth-page logout modal`
Bad split:
- `misc: update agent docs, add lint, change connector UI, fix API`
## Guidance for Agents
When an agent is working on product code, it should assume:
1. The product branch should be created from `origin/main`, not from `codex/agent-lab`.
2. The `codex/agent-lab` checkout is the control plane for `ods` commands until the harness is
upstreamed more broadly.
3. The code change itself should still be made and committed inside the target product worktree.
4. A PR to `main` should use a conventional-commit title such as `fix: ...` or `feat: ...`.
If a product bug is discovered while editing on `codex/agent-lab`, treat that as exploration.
Restart the real fix in a fresh `main`-based product worktree and port only the minimal product
patch there.
## What Should Usually Stay on `agent-lab`
These are usually lab-only unless explicitly approved for upstreaming:
- branch-specific workflow docs
- harness-only `ods` commands
- non-consensus lint rules
- agent harness scripts
- opt-in automation for review or promotion
- branch-specific AGENTS guidance
## What Can Be Promoted to `main`
These can be promoted once they stand on their own:
- product feature code
- product tests
- bug fixes
- low-controversy lint rules with team agreement
- small devtools improvements that are useful outside `agent-lab`
## Review Standard
If opening a PR to `main` from the `agent-lab` control workflow:
- make sure the PR branch itself is based on `origin/main`
- use a conventional-commit title
- mention any control-plane validation that was run with `ods ... --worktree <branch>`
- attach journey artifacts when browser behavior changed
- treat review-thread replies and failing checks as part of the same agent loop, not as a separate
manual phase
This keeps the product branch reviewable without forcing reviewers to understand the entire
`agent-lab` branch.

View File

@@ -0,0 +1,73 @@
# Golden Rules
These are the current rules for the `agent-lab` workflow. The long-term goal is to move the useful
ones from prose into shared checks, scripts, or tests where appropriate.
Some of these are already documented elsewhere in the repo as project standards. In this file,
they should be treated as the active rules for work done on `agent-lab`.
## Current Rules
### Backend
1. Raise `OnyxError` instead of `HTTPException`.
2. Put DB operations under `backend/onyx/db/` or `backend/ee/onyx/db/`.
3. Use `@shared_task` for Celery tasks.
4. Never enqueue a Celery task without `expires=`.
5. Do not use FastAPI `response_model` on new APIs.
6. Keep Python strictly typed.
### Frontend
1. Prefer `web/lib/opal/` and `web/src/refresh-components/` for new shared UI.
2. Do not add new shared components under `web/src/components/`.
3. Route backend calls through the frontend `/api/...` surface in local and test flows.
4. Keep TypeScript strictly typed.
### Workflow
1. Start in a tracked worktree created by `ods worktree create`. Do not use raw `git worktree add`
for harness-managed work.
2. For harness work, use `codex/lab/...` branches based on `codex/agent-lab`. For product work,
use conventional branches such as `codex/fix/...` or `codex/feat/...` based on `origin/main`.
3. Make edits inside the target worktree. Copying a patch from another checkout is only acceptable
when debugging the harness itself.
4. Prefer integration or external-dependency-unit tests over unit tests when validating real Onyx
behavior.
5. When a repeated review comment appears, convert it into repo-local documentation or a mechanical
check.
6. For browser-visible changes, prefer a registered `ods journey` capture over an ad hoc manual
recording. The before/after artifacts should live with the PR loop.
7. Use `ods pr-review` to fetch and triage GitHub review threads instead of relying on memory or
the web UI alone. Reply and resolve from the same workflow when confidence is high.
8. Use `ods pr-checks diagnose` to detect failing GitHub checks and point the next remediation
command. For Playwright failures, pair it with `ods trace`.
6. PR titles and commit messages should use conventional-commit style such as `fix: ...` or
`feat: ...`. Never use `[codex]` prefixes in this repo.
9. When touching legacy areas, leave the area more explicit than you found it: better naming,
better boundaries, or a follow-up cleanup note.
## Mechanical Checks
These are strong candidates for `ods agent-check` or dedicated linters:
| Check | Why it matters |
| --- | --- |
| Ban `HTTPException` in backend product code | Keeps API error handling consistent |
| Ban direct DB mutations outside DB directories | Preserves layering |
| Detect task enqueue calls missing `expires=` | Prevents queue growth and stale work |
| Detect new imports from `web/src/components/` in non-legacy code | Prevents further UI drift |
| Detect direct calls to backend ports in tests/scripts where frontend proxy should be used | Preserves realistic request paths |
| Detect missing docs/agent references for new repo-level rules | Prevents knowledge from staying only in chat |
## Rule Promotion Policy
Promote a rule from prose into enforcement when at least one is true:
- it has been violated more than once
- a violation is expensive to detect late
- the remediation is mechanical
- the error message can teach the correct pattern succinctly
Agents work better with fast, local, actionable failures than with broad stylistic feedback after a
PR is opened.

267
docs/agent/HARNESS.md Normal file
View File

@@ -0,0 +1,267 @@
# Worktree Harness
This document defines the `agent-lab` harness model for doing end-to-end work on `onyx`.
The goal is to make one agent capable of taking one isolated change from edit to verification
without depending on human memory for ports, paths, or validation steps.
## Principles
These decisions follow the same principles described in OpenAI's
[Harness engineering](https://openai.com/index/harness-engineering/) and
[Unlocking the Codex harness](https://openai.com/index/unlocking-the-codex-harness/) articles:
- each task should run in its own git worktree
- the app should be bootable per worktree
- browser state should be directly legible to the agent
- logs, traces, and test artifacts should be attached to the same worktree lifecycle
- repository docs plus local metadata should be the system of record, not chat memory
## Current Harness Surface
The first `agent-lab` harness layer lives in `tools/ods/`.
Implemented command surfaces:
- `ods worktree create <branch>`: creates a git worktree plus local agent metadata
- `ods worktree deps up|status|reset|down`: provisions and manages namespaced external state
- `ods worktree status`: lists tracked worktrees and their URLs
- `ods worktree show [worktree]`: prints the manifest for one worktree
- `ods worktree remove <worktree>`: removes the worktree and local harness state
- `ods journey list|run|compare|publish`: records registered browser journeys, including local
before/after video artifacts and optional PR publication
- `ods pr-review fetch|triage|respond|resolve`: turns GitHub review threads into a local
machine-readable loop
- `ods pr-checks status|diagnose`: makes failing GitHub checks queryable from the same control
plane
- `ods verify`: runs the agent verification ladder and writes a machine-readable summary
- `ods agent-check`: runs diff-based architectural and doc checks
## Required Workflow
This is the required `agent-lab` workflow going forward:
1. Create the target worktree first with `ods worktree create`.
2. Make the code changes inside that worktree.
3. Run verification against that same worktree.
4. Open the PR from that same worktree.
Do not implement a change in one checkout and then rsync or patch it into another checkout just to
test it. That is only acceptable when explicitly debugging the harness itself.
Also do not use raw `git worktree add` for harness-managed work. `ods worktree create` is the
authoritative entrypoint because it disables repo hooks during checkout, writes the local manifest,
bootstraps env/runtime dependencies, provisions namespaced state, and records the worktree lane and
base ref.
## Control Checkout Model
Right now the harness code itself lives on `codex/agent-lab`, not on plain `main`.
That means the `codex/agent-lab` checkout acts as the control plane:
- lab worktrees such as `codex/lab/...` are based on `codex/agent-lab`
- product worktrees such as `codex/fix/...` or `codex/feat/...` are based on `origin/main`
- the `agent-lab` checkout can still manage those product worktrees via `--worktree`
flags on `ods backend`, `ods web`, `ods verify`, and `ods agent-check`
This lets us use the harness to manage a `main`-based product branch before the harness itself has
been upstreamed to `main`.
## Worktree Metadata
Each `agent-lab` worktree gets a local manifest stored under the shared git metadata directory:
```text
$(git rev-parse --git-common-dir)/onyx-agent-lab/worktrees/<id>/
```
The manifest tracks:
- branch name
- checkout path
- base ref used when the branch was created
- dependency mode and namespace-derived external dependency settings
- reserved ports for web, API, model server, and MCP
- browser-facing URLs
- generated env overlay file paths
- artifact directory
- last verification summary
This state is local runtime metadata. It is intentionally not checked into the repo.
## Boot Model
The current harness boot model isolates the mutable application processes and can also isolate the
mutable non-search data plane.
Per worktree:
- Next.js dev server gets its own `PORT`
- browser-facing base URL is unique
- backend API port is unique
- model server port is unique
- MCP port reservation exists for future worktree-local MCP runtime use
- artifacts are written to a worktree-specific directory
Today this is enough to make the app bootable per worktree without requiring a fully duplicated
dependency container stack for every task.
Important boundary:
- isolated today: app processes, ports, URLs, local artifacts, worktree-local dependency installs,
PostgreSQL database, Redis key prefix, and MinIO file-store bucket when the worktree runs in
`namespaced` dependency mode
- shared today: OpenSearch/Vespa and the rest of the local dependency stack started via docker
compose
This means a normal `agent-lab` worktree can run against:
- a dedicated Postgres database on the shared local Postgres server
- a dedicated Redis namespace on the shared local Redis instance
- a dedicated MinIO file-store bucket on the shared local object store
OpenSearch/Vespa remain shared-only by design on this branch. The harness should never imply
otherwise.
This is a deliberate brownfield adaptation of the OpenAI articles worktree-per-task model:
keep the common path mechanically isolated where the repo already supports it, and explicitly mark
the high-complexity surfaces that remain shared.
## Dependency Modes
`agent-lab` currently supports two dependency modes:
- `namespaced`: default mode for agent feature work. Creates one Postgres database, one Redis
prefix, and one MinIO bucket per worktree.
- `shared`: reuse the existing local DB/Redis/MinIO state when full isolation is unnecessary.
The worktree manifest is the source of truth for the selected mode and the derived namespace values.
Search infrastructure policy:
- OpenSearch/Vespa are always shared
- there is no current plan to add namespaced or per-worktree search stacks on `agent-lab`
- tasks that mutate search/index infrastructure should be treated as higher-risk and validated with
extra care because the harness does not isolate that surface
## Backend and Web Integration
When `ods backend ...` or `ods web ...` runs inside a tracked `agent-lab` worktree, it should
derive runtime settings from the worktree manifest automatically.
Current behavior:
- `ods backend api` defaults to the reserved worktree API port
- `ods backend model_server` defaults to the reserved worktree model-server port
- `ods web dev` gets the reserved worktree web port plus `BASE_URL`, `WEB_DOMAIN`,
`INTERNAL_URL`, and `MCP_INTERNAL_URL`
- backend and web commands also inherit the manifests dependency namespace env overrides
- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files mirror those values
- `ods worktree bootstrap` prepares the worktree to run by linking env files, linking or cloning
the Python runtime, and preparing `web/node_modules`
- `ods worktree deps up` provisions namespaced Postgres/Redis/MinIO state when needed
- `ods backend ... --worktree <id>` and `ods web ... --worktree <id>` let the `agent-lab`
control checkout run app processes against a tracked target worktree
This makes the standard dev commands work in an isolated way without inventing a second startup
surface just for agents.
## Browser Validation
Use two browser surfaces with different jobs:
- Chrome DevTools MCP for exploratory validation, DOM snapshots, navigation, and interactive bug
reproduction
- Playwright for codified end-to-end verification, screenshots, and retained traces
- `ods journey run` for the default article-style loop inside one worktree: capture `before` before
the fix, then capture `after` after the fix and publish the resulting artifacts to the PR when
needed
- `ods journey compare` as the fallback path when the agent missed the initial `before` capture or
needs a strict baseline-vs-branch comparison after the fact
Important detail:
- The default path should not launch two worktrees just to prove a normal UI bug fix. Use one
tracked product worktree, start the app in that worktree, and record `before` and `after` from
that same environment.
- If the fix is still uncommitted, always capture from the tracked target worktree, not from a
temporary `HEAD` checkout.
- `ods journey compare` is reserved for recovery or explicit revision comparison, not as the
standard path for every PR.
The worktree manifest's `web` URL is the source of truth for both.
If an agent needs to inspect live UI behavior while iterating, it should prefer Chrome DevTools MCP
against the worktree URL. If the behavior needs to become a repeatable regression check, encode it
as Playwright coverage under `web/tests/e2e/`.
## Verification Ladder
The expected verification sequence for a worktree is:
1. `ods agent-check`
2. targeted backend tests when backend behavior changed
3. targeted Playwright runs when UI or frontend-backend flows changed
4. `ods journey run --label before` before the code change, then `ods journey run --label after`
after the change when the PR needs durable browser proof
5. screenshot and trace review when UI validation fails
`ods verify` is the first unified entrypoint for this ladder. It writes a JSON summary into the
worktree artifact directory so later agent runs can inspect prior results directly.
For product worktrees based on `main`, the intended control-plane usage is:
1. from `codex/agent-lab`, run `ods worktree create codex/fix/<name>`
2. edit inside the created `main`-based checkout
3. from `codex/agent-lab`, run `ods verify --worktree codex/fix/<name>`
4. if live processes are needed, run `ods backend ... --worktree codex/fix/<name>` and
`ods web ... --worktree codex/fix/<name>`
5. commit, push, and open the PR from the product worktree checkout itself
## Artifacts
Per-worktree artifacts are written under the local harness state directory, not into chat.
Current artifact classes:
- verification summaries
- pytest logs
- Playwright logs
- journey screenshots, videos, traces, and compare summaries
- PR review thread snapshots and triage outputs
- dependency namespace metadata in the local manifest
Existing repo outputs are still relevant:
- Playwright traces and screenshots under `web/output/`
- screenshot diff reports from `ods screenshot-diff`
- CI trace retrieval from `ods trace`
## Known Gaps
This is the initial harness layer, not the finished system.
Still missing:
- one-command `up/down` orchestration for all local processes
- worktree-local observability stack for logs, metrics, and traces
- worktree-local MCP server runtime wiring
- automatic promotion tooling from `agent-lab` feature branches to `main`
- recurring doc-gardening and cleanup agents
- resumable long-running task server for local development tasks
Resolved in the current harness layer:
- fresh-worktree bootstrap for `.venv`, `.vscode/.env*`, and `web/node_modules`
- namespaced isolation for Postgres, Redis, and MinIO on a per-worktree basis
- registered before/after browser journeys with durable artifact directories
- GitHub review-thread fetch/triage/respond tooling
- GitHub failing-check diagnosis from the same `ods` control plane
Non-goals on this branch:
- OpenSearch/Vespa namespacing
- per-worktree vector/search stacks
Those are the next places to invest if we want to match the article more closely.

View File

@@ -0,0 +1,87 @@
# Legacy Zones
Status: initial classification. This file exists to stop agents from treating every existing
pattern in the repository as equally desirable precedent.
## Zone Types
| Zone | Meaning | Edit Policy |
| --- | --- | --- |
| `strict` | Preferred surface for new work | Freely extend, but keep boundaries explicit and add tests |
| `transition` | Actively evolving surface with mixed patterns | Prefer local consistency, avoid introducing new abstractions casually |
| `legacy-adapter` | Known historical surface or deprecated pattern area | Avoid new dependencies on it; prefer facades, wrappers, or migrations away |
| `frozen` | Only touch for bug fixes, security, or explicitly scoped work | Do not expand the pattern set |
## Initial Classification
### Strict
These are good default targets for new investment:
- `backend/onyx/db/`
- `backend/ee/onyx/db/`
- `backend/onyx/error_handling/`
- `backend/onyx/mcp_server/`
- `backend/onyx/server/features/build/`
- `tools/ods/`
- `web/lib/opal/`
- `web/src/refresh-components/`
- `web/src/layouts/`
- `web/src/sections/cards/`
### Transition
These areas are important and active, but they mix styles, eras, and responsibilities:
- `backend/onyx/server/`
- `backend/ee/onyx/server/`
- `backend/onyx/chat/`
- `backend/onyx/tools/`
- `backend/onyx/agents/`
- `backend/onyx/deep_research/`
- `web/src/app/`
- `web/src/sections/`
- `web/src/lib/`
Edit guidance:
- prefer incremental refactors over sweeping rewrites
- keep changes local when the area lacks clear boundaries
- add tests before extracting new shared abstractions
### Legacy-Adapter
These areas should not be treated as default precedent for new work:
- `web/src/components/`
- `backend/model_server/legacy/`
Edit guidance:
- do not add fresh reusable components or helper patterns here
- if a task requires touching these areas, prefer introducing an adapter in a stricter surface
- if you must extend a legacy file, keep the blast radius small and document follow-up cleanup
### Frozen
No repo-wide frozen zones are declared yet beyond files or subsystems that are clearly deprecated on
their face. Add explicit entries here rather than relying on tribal knowledge.
## Brownfield Rules
When a task lands in a non-strict zone:
1. Identify whether the task is fixing behavior, adding capability, or migrating structure.
2. Avoid copying local patterns into stricter parts of the codebase.
3. If an unsafe pattern is unavoidable, isolate it behind a typed boundary.
4. Record newly discovered smells in [GOLDEN_RULES.md](./GOLDEN_RULES.md) or a follow-on
execution plan.
## Promotion Criteria
A transition area can move toward `strict` when:
- its dependency boundaries are easy to explain
- new code has a preferred home
- tests are reliable enough for agents to use as feedback loops
- recurring review comments have been turned into written or mechanical rules

View File

@@ -0,0 +1,48 @@
# Quality Score Baseline
This file is an intentionally rough baseline for how legible the repository is to coding agents.
It is not a product quality report. It is a scorecard for agent development ergonomics.
## Scoring Rubric
Each area is scored from `0` to `5` on four dimensions:
- `Legibility`: how easy it is to discover the right files and concepts
- `Boundaries`: how clearly dependency and ownership seams are defined
- `Verification`: how available and reliable the feedback loops are
- `Agent ergonomics`: how likely an agent is to make a correct change without human rescue
Overall score is directional, not mathematically precise.
## Initial Baseline
| Area | Legibility | Boundaries | Verification | Agent ergonomics | Overall | Notes |
| --- | --- | --- | --- | --- | --- | --- |
| Backend core (`backend/onyx/`, `backend/ee/onyx/`) | 3 | 3 | 4 | 3 | 3.25 | Strong test surface, but top-level routing docs are thin |
| Persistence (`backend/onyx/db/`, migrations) | 4 | 4 | 3 | 4 | 3.75 | Clearer than most areas because path-level rules already exist |
| Frontend modern surfaces (`web/src/app/`, `sections`, `opal`, `refresh-components`) | 3 | 3 | 3 | 3 | 3.0 | Direction exists, but mixed generations still leak across boundaries |
| Frontend legacy shared UI (`web/src/components/`) | 1 | 1 | 2 | 1 | 1.25 | Explicitly deprecated, but still present and easy for agents to cargo-cult |
| Agent platform and build sandbox (`backend/onyx/server/features/build/`) | 3 | 4 | 3 | 4 | 3.5 | Good substrate for agentization, but not yet aimed at repo development workflows |
| MCP, CLI, and devtools (`backend/onyx/mcp_server/`, `cli/`, `tools/ods/`) | 4 | 4 | 4 | 4 | 4.0 | `agent-check`, worktree manifests, `ods verify`, `ods journey`, and PR review/check tooling give this surface a real control plane |
| Repo-level docs and plans | 4 | 3 | 4 | 4 | 3.75 | `docs/agent/` now describes the journey/review/check loop directly, though subsystem coverage is still uneven |
## Biggest Gaps
1. Repo-level architecture knowledge is still thinner than the runtime and workflow docs.
2. Brownfield and legacy zones are not explicitly flagged enough for agents.
3. Important engineering rules still outnumber the mechanical checks that enforce them.
4. The worktree harness does not yet include a local observability stack or one-command process orchestration.
## Near-Term Targets
The next improvements should aim to move these areas:
- Repo-level docs and plans: `3.0 -> 4.0`
- Frontend legacy safety: `1.25 -> 2.5`
- Backend core agent ergonomics: `3.0 -> 4.0`
- Worktree observability and runtime automation: `2.5 -> 4.0`
## Update Policy
When a new check, map, or workflow materially improves agent behavior, update this scorecard and
note what changed. If a score changes, the adjacent notes should explain why.

68
docs/agent/README.md Normal file
View File

@@ -0,0 +1,68 @@
# Agent Engineering Docs
This directory is the knowledge base for the `agent-lab` workflow around making development of
`onyx` itself more agentized.
The goal is not to replace the root [AGENTS.md](../../AGENTS.md).
The goal is to keep architecture maps, unsafe-zone notes, quality signals, and follow-on
execution plans in a form that coding agents can discover and update.
On `agent-lab`, this directory is the system of record for agent-engineering workflow.
## Principles
- Keep the entrypoint small. The root `AGENTS.md` should point here; it should not become a
growing encyclopedia.
- Create the target worktree first. The intended workflow is one task, one tracked worktree, one
verification loop, and one PR from that same checkout.
- Keep artifacts with the workflow. Browser videos, traces, review summaries, and check triage
should be produced by harness commands and stored as machine-readable outputs, not recreated
from chat memory.
- Prefer maps over manuals. Agents need navigable pointers to the right subsystem, not a giant
blob of undifferentiated instructions.
- Encode recurring judgment into the repo. If a rule matters often, document it here and then
promote it into a check, linter, test, or script.
- Distinguish legacy from greenfield. Agents will copy the patterns they see. If an area is
historically messy, we need to say so explicitly.
- Version decisions with the code. If a design choice matters for future changes, it should live
in-repo rather than in chat or memory.
## Documents
- [ARCHITECTURE.md](./ARCHITECTURE.md): top-level codebase map and change-routing guidance.
- [BRANCHING.md](./BRANCHING.md): branch model for long-running `agent-lab` development and
promotion of product-only changes to `main`.
- [HARNESS.md](./HARNESS.md): worktree runtime model, verification ladder, and browser/tooling
expectations.
- [LEGACY_ZONES.md](./LEGACY_ZONES.md): edit policy for strict, transitional, and legacy areas.
- [GOLDEN_RULES.md](./GOLDEN_RULES.md): active rules for `agent-lab` and promotion targets for
mechanical enforcement.
- [QUALITY_SCORE.md](./QUALITY_SCORE.md): baseline legibility and maintainability assessment for
agent work.
## Operating Model
Use this directory for information that should change how future agents work in the `agent-lab`
workflow:
- architecture maps
- dependency and layering rules
- "do not extend this pattern" warnings
- safe extension points
- recurring cleanup policies
- harness/runtime behavior for worktree-based development
- before/after browser journeys and PR artifact publication
- GitHub review and failing-check control loops
- quality scorecards
- active execution plans for agent-engineering improvements
Current workflow split:
- `codex/agent-lab` is the control checkout for the harness itself.
- `codex/lab/<name>` branches are for harness/docs/tooling work based on `codex/agent-lab`.
- `codex/fix/<name>`, `codex/feat/<name>`, and similar conventional product branches should be
created from `origin/main`, even when they are managed from the `agent-lab` control checkout.
- PR titles and commit messages should use conventional-commit style, never `[codex]` prefixes.
Do not turn this into a dumping ground. If something is local to one feature, keep it with that
feature. This directory is for `agent-lab`-level agent-development guidance.

View File

@@ -148,7 +148,7 @@ dev = [
"matplotlib==3.10.8",
"mypy-extensions==1.0.0",
"mypy==1.13.0",
"onyx-devtools==0.7.5",
"onyx-devtools==0.7.4",
"openapi-generator-cli==7.17.0",
"pandas-stubs~=2.3.3",
"pre-commit==3.2.2",

View File

@@ -28,11 +28,11 @@ Some commands require external tools to be installed and configured:
- **uv** - Required for `backend` commands
- Install from [docs.astral.sh/uv](https://docs.astral.sh/uv/)
- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, and `trace` commands
- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, `trace`, `pr-review`, and `pr-checks` commands
- Install from [cli.github.com](https://cli.github.com/)
- Authenticate with `gh auth login`
- **AWS CLI** - Required for `screenshot-diff` commands (S3 baseline sync)
- **AWS CLI** - Required for `screenshot-diff` commands and `journey publish` (S3 artifact sync)
- Install from [aws.amazon.com/cli](https://aws.amazon.com/cli/)
- Authenticate with `aws sso login` or `aws configure`
@@ -196,11 +196,19 @@ ods backend <subcommand>
| Flag | Default | Description |
|------|---------|-------------|
| `--no-ee` | `false` | Disable Enterprise Edition features (enabled by default) |
| `--worktree` | current checkout | Run the command against a tracked agent-lab worktree |
| `--port` | `8080` (api) / `9000` (model_server) | Port to listen on |
Shell environment takes precedence over `.env` file values, so inline overrides
work as expected (e.g. `S3_ENDPOINT_URL=foo ods backend api`).
When run inside a tracked `agent-lab` worktree, `ods backend api` and
`ods backend model_server` will automatically use that worktree's reserved
ports unless you override them explicitly with `--port`.
The same command can also be launched from the `codex/agent-lab` control
checkout against another tracked worktree via `--worktree <branch>`.
**Examples:**
```shell
@@ -218,6 +226,9 @@ ods backend model_server
# Start the model server on a custom port
ods backend model_server --port 9001
# Run the API server for a tracked product worktree from the control checkout
ods backend api --worktree codex/fix/auth-banner-modal
```
### `web` - Run Frontend Scripts
@@ -231,6 +242,14 @@ ods web <script> [args...]
Script names are available via shell completion (for supported shells via
`ods completion`), and are read from `web/package.json`.
When run inside a tracked `agent-lab` worktree, `ods web ...` automatically
injects the worktree's `PORT`, `BASE_URL`, `WEB_DOMAIN`, `INTERNAL_URL`, and
`MCP_INTERNAL_URL` so the Next.js dev server boots against the right isolated
stack.
From the `codex/agent-lab` control checkout, `--worktree <branch>` applies the
same wiring to a tracked target worktree.
**Examples:**
```shell
@@ -242,6 +261,162 @@ ods web lint
# Forward extra args to the script
ods web test --watch
# Run the Next.js dev server for a tracked product worktree
ods web dev --worktree codex/fix/auth-banner-modal
```
### `worktree` - Manage Agent-Lab Worktrees
Create and manage local git worktrees for agentized development. Each tracked
worktree gets:
- a reserved port bundle for web, API, model server, and MCP
- an explicit dependency mode for local external state
- generated `.vscode/.env.agent-lab` and `.vscode/.env.web.agent-lab` files
- a local artifact directory for verification logs and summaries
- a manifest stored under the shared git metadata directory
- bootstrap support for env files, Python runtime, and frontend dependencies
`ods worktree create` is the authoritative entrypoint for this workflow. Do not
use raw `git worktree add` when you want the `agent-lab` harness, because you
will skip the manifest, env overlays, dependency bootstrap, and lane-aware base
selection.
```shell
ods worktree <subcommand>
```
**Subcommands:**
- `create <branch>` - Create a worktree and manifest
- `bootstrap [worktree]` - Prepare env files and dependencies for a worktree
- `deps up|status|reset|down [worktree]` - Provision and manage namespaced external state
- `status` - List tracked worktrees and URLs
- `show [worktree]` - Show detailed metadata for one worktree
- `remove <worktree>` - Remove a worktree and its local state
`ods worktree create` bootstraps new worktrees by default. The current bootstrap
behavior is:
- link `.vscode/.env` and `.vscode/.env.web` from the source checkout when present
- link the source checkout's `.venv` when present
- clone `web/node_modules` into the worktree when present, falling back to
`npm ci --prefer-offline --no-audit`
Current isolation boundary:
- worktree-local: web/API/model-server ports, URLs, env overlays, artifact dirs
- namespaced when `--dependency-mode namespaced` is used: PostgreSQL database,
Redis prefix, and MinIO file-store bucket
- always shared: OpenSearch/Vespa and the rest of the docker-compose dependency stack
`namespaced` is the default dependency mode on `agent-lab`. `shared` is still
available for lighter-weight work that does not need isolated DB/Redis/MinIO
state.
Branch lanes:
- `codex/lab/<name>` worktrees are treated as harness work and default to
`codex/agent-lab` as the base ref
- `codex/fix/<name>`, `codex/feat/<name>`, and other conventional product lanes
default to `origin/main` as the base ref
- branches that do not encode a lane fall back to `HEAD`; use `--from` or a
clearer branch name when the base matters
Control-plane note:
- the harness lives on `codex/agent-lab`
- product worktrees can still be based on `origin/main`
- run `ods backend`, `ods web`, `ods verify`, and `ods agent-check` with
`--worktree <branch>` from the control checkout when the target worktree does
not carry the harness code itself
Search/vector note:
- OpenSearch/Vespa stay shared-only
- this branch intentionally does not implement namespaced or per-worktree search stacks
- tasks that touch search/index infrastructure should assume a shared surface
**Examples:**
```shell
# Create a product bugfix worktree from main
ods worktree create codex/fix/auth-banner-modal
# Create a lab-only worktree from agent-lab
ods worktree create codex/lab/browser-validation
# Reuse the shared DB/Redis/MinIO state for a lighter-weight task
ods worktree create codex/fix/ui-polish --dependency-mode shared
# Re-bootstrap an existing worktree
ods worktree bootstrap codex/fix/auth-banner-modal
# Inspect the current worktree's namespaced dependency state
ods worktree deps status
# Reset the current worktree's Postgres/Redis/MinIO namespace
ods worktree deps reset
# See tracked worktrees
ods worktree status
# Show the current worktree manifest
ods worktree show
# Remove a worktree when finished
ods worktree remove codex/fix/auth-banner-modal
# Remove a worktree and tear down its namespaced dependencies
ods worktree remove codex/fix/auth-banner-modal --drop-deps
```
### `verify` - Run the Agent-Lab Verification Ladder
Run a unified verification flow for the current checkout. `ods verify` is the
first worktree-aware entrypoint that combines:
- `agent-check`
- optional targeted pytest execution
- optional targeted Playwright execution
- machine-readable verification summaries written to the worktree artifact dir
```shell
ods verify
```
Useful flags:
| Flag | Description |
|------|-------------|
| `--base-ref <ref>` | Ref to compare against for `agent-check` |
| `--skip-agent-check` | Skip the diff-based rules step |
| `--worktree <id>` | Run verification against a tracked worktree from the control checkout |
| `--pytest <path>` | Run a specific pytest path or node id (repeatable) |
| `--playwright <path>` | Run a specific Playwright test path (repeatable) |
| `--playwright-grep <expr>` | Pass `--grep` through to Playwright |
| `--playwright-project <name>` | Limit Playwright to one project |
Examples:
```shell
# Run just the diff-based checks
ods verify
# Validate a backend change with one focused integration target
ods verify --pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
# Validate a UI change with one Playwright suite
ods verify --playwright tests/e2e/chat/welcome_page.spec.ts --playwright-project admin
# Run both backend and UI checks
ods verify \
--pytest backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py \
--playwright tests/e2e/admin/default-agent.spec.ts
# Verify a tracked product worktree from the control checkout
ods verify --worktree codex/fix/auth-banner-modal
```
### `dev` - Devcontainer Management
@@ -325,6 +500,56 @@ Check that specified modules are only lazily imported (used for keeping backend
ods check-lazy-imports
```
### `agent-check` - Check New Agent-Safety Violations
Run a small set of diff-based checks aimed at keeping new changes agent-friendly
without failing on historical debt already present in the repository.
This command is part of the expected workflow on `agent-lab`. It is not necessarily a repo-wide
mandatory gate on `main`.
```shell
ods agent-check
```
Current checks flag newly added:
- `HTTPException` usage in backend product code
- `response_model=` on backend APIs
- Celery `.delay()` calls
- imports from `web/src/components/` outside the legacy component tree
The command also validates the `docs/agent/` knowledge base by checking that
required files exist and that local markdown links in that surface resolve
correctly.
Useful flags:
| Flag | Description |
|------|-------------|
| `--staged` | Check the staged diff instead of the working tree |
| `--base-ref <ref>` | Diff against a git ref other than `HEAD` |
| `--worktree <id>` | Check a tracked worktree from the control checkout |
Examples:
```shell
# Check working tree changes
ods agent-check
# Check only staged changes
ods agent-check --staged
# Compare the branch against main
ods agent-check --base-ref origin/main
# Limit the diff to specific paths
ods agent-check web/src backend/onyx/server/features/build
# Run against a tracked product worktree from the control checkout
ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
```
### `run-ci` - Run CI on Fork PRs
Pull requests from forks don't automatically trigger GitHub Actions for security reasons.
@@ -516,6 +741,148 @@ ods trace --project admin
ods trace --list
```
### `journey` - Capture Before/After Browser Journeys
Run a registered Playwright journey with video capture. The default workflow is
to record `before` and `after` inside the same tracked worktree as the change.
`journey compare` remains available as a recovery path when you need to compare
two explicit revisions/worktrees after the fact.
Registered journeys live in `web/tests/e2e/journeys/registry.json`.
An optional `.github/agent-journeys.json` file can list journeys for a PR:
```json
{
"journeys": ["auth-landing"]
}
```
```shell
ods journey <subcommand>
```
**Subcommands:**
- `list` - Show registered journeys
- `run` - Run one journey against the current or target worktree
- `compare` - Capture `before` and `after` artifacts across two revisions/worktrees when a missed baseline must be recovered
- `publish` - Upload a compare run to S3 and upsert the PR comment
**Examples:**
```shell
# List journey definitions
ods journey list
# Capture before in the tracked product worktree before editing
ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label before
# Capture after in that same worktree after validating the fix
ods journey run --worktree codex/fix/auth-banner-modal --journey auth-landing --label after
# Recover a missed baseline later by comparing origin/main to a tracked product worktree
ods journey compare \
--journey auth-landing \
--after-worktree codex/fix/auth-banner-modal
# Publish an existing compare run to PR #10007
ods journey publish \
--run-dir .git/onyx-agent-lab/journeys/20260408-123000 \
--pr 10007
```
`journey run` writes a `summary.json` into the capture directory. `journey compare`
writes a `summary.json` into its run directory and, when `--pr` is supplied,
uploads that directory to S3 and upserts a PR comment with before/after links.
### `pr-review` - Fetch and Respond to GitHub Review Threads
Treat PR review comments as a local machine-readable workflow instead of relying
on the GitHub UI alone.
```shell
ods pr-review <subcommand>
```
**Subcommands:**
- `fetch` - Download review threads into local harness state
- `triage` - Classify threads as actionable, duplicate, outdated, or resolved
- `respond` - Reply to an inline review comment and optionally resolve its thread
- `resolve` - Resolve a review thread without posting a reply
**Examples:**
```shell
# Fetch review threads for the current branch PR
ods pr-review fetch
# Triage review threads for a specific PR
ods pr-review triage --pr 10007
# Reply to a top-level review comment and resolve the thread
ods pr-review respond \
--pr 10007 \
--comment-id 2512997464 \
--thread-id PRRT_kwDO... \
--body "Fixed in the latest patch. Added a regression journey as well."
```
Fetched and triaged review data is written under the local harness state
directory:
```text
$(git rev-parse --git-common-dir)/onyx-agent-lab/reviews/pr-<number>/
```
### `pr-checks` - Diagnose Failing GitHub Checks
Inspect the latest checks on a PR and surface the failing ones with the next
recommended remediation command.
```shell
ods pr-checks <subcommand>
```
**Subcommands:**
- `status` - list all checks for the PR
- `diagnose` - list only failing checks and point to the next step
**Examples:**
```shell
# Show all checks on the current branch PR
ods pr-checks status
# Show only failing checks and the next remediation command
ods pr-checks diagnose --pr 10007
```
`pr-checks diagnose` is especially useful after pushing a fix or after replying
to review comments. For Playwright failures it points directly at `ods trace`.
### `pr-open` - Open a PR With the Repo Template
Create a pull request through `gh` while enforcing a conventional-commit title.
If `--title` is omitted, `ods` uses the latest commit subject. The PR body
defaults to `.github/pull_request_template.md`. PRs are ready-for-review by
default; use `--draft` only when you explicitly need that state.
```shell
ods pr-open
ods pr-open --title "fix: suppress logged-out modal on fresh auth load"
```
### `pr-merge` - Merge a PR Through `gh`
Merge or auto-merge a pull request with an explicit merge method.
```shell
ods pr-merge --pr 10007 --method squash
ods pr-merge --pr 10007 --method squash --auto --delete-branch
```
### Testing Changes Locally (Dry Run)
Both `run-ci` and `cherry-pick` support `--dry-run` to test without making remote changes:

View File

@@ -0,0 +1,161 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"sort"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentcheck"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentdocs"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
type AgentCheckOptions struct {
Staged bool
BaseRef string
Worktree string
RepoRoot string
}
type AgentCheckResult struct {
Violations []agentcheck.Violation
DocViolations []agentdocs.Violation
}
// NewAgentCheckCommand creates the agent-check command.
func NewAgentCheckCommand() *cobra.Command {
opts := &AgentCheckOptions{}
cmd := &cobra.Command{
Use: "agent-check [paths...]",
Short: "Run diff-based checks for agent-safe changes",
Long: `Run diff-based checks for agent-safe changes.
This command inspects added lines in the current git diff and flags a small set
of newly introduced repo-level violations without failing on historical debt.
By default it compares the working tree against HEAD. Use --staged to inspect
the staged diff instead, or --base-ref to compare against a different ref.
Use --worktree to run the same check against a tracked target worktree from the
agent-lab control checkout.
Examples:
ods agent-check
ods agent-check --staged
ods agent-check --base-ref origin/main
ods agent-check --worktree codex/fix/auth-banner-modal --base-ref origin/main
ods agent-check web/src backend/onyx/server/features/build`,
Run: func(cmd *cobra.Command, args []string) {
runAgentCheck(opts, args)
},
}
cmd.Flags().BoolVar(&opts.Staged, "staged", false, "check staged changes instead of the working tree")
cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to diff against instead of HEAD")
cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to check instead of the current checkout")
return cmd
}
func runAgentCheck(opts *AgentCheckOptions, providedPaths []string) {
repoRoot, _, _ := resolveAgentLabTarget(opts.Worktree)
opts.RepoRoot = repoRoot
result, err := evaluateAgentCheck(opts, providedPaths)
if err != nil {
log.Fatalf("Failed to run agent-check: %v", err)
}
if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
log.Info("✅ agent-check found no new violations.")
return
}
sort.Slice(result.Violations, func(i, j int) bool {
if result.Violations[i].Path != result.Violations[j].Path {
return result.Violations[i].Path < result.Violations[j].Path
}
if result.Violations[i].LineNum != result.Violations[j].LineNum {
return result.Violations[i].LineNum < result.Violations[j].LineNum
}
return result.Violations[i].RuleID < result.Violations[j].RuleID
})
for _, violation := range result.Violations {
log.Errorf("\n❌ %s:%d [%s]", violation.Path, violation.LineNum, violation.RuleID)
log.Errorf(" %s", violation.Message)
log.Errorf(" Added line: %s", strings.TrimSpace(violation.Content))
}
for _, violation := range result.DocViolations {
log.Errorf("\n❌ %s [agent-docs]", violation.Path)
log.Errorf(" %s", violation.Message)
}
fmt.Fprintf(
os.Stderr,
"\nFound %d agent-check violation(s) and %d agent-docs violation(s).\n",
len(result.Violations),
len(result.DocViolations),
)
os.Exit(1)
}
func evaluateAgentCheck(opts *AgentCheckOptions, providedPaths []string) (*AgentCheckResult, error) {
diffOutput, err := getAgentCheckDiff(opts, providedPaths)
if err != nil {
return nil, err
}
addedLines, err := agentcheck.ParseAddedLines(diffOutput)
if err != nil {
return nil, err
}
root := opts.RepoRoot
if root == "" {
var err error
root, err = paths.GitRoot()
if err != nil {
return nil, fmt.Errorf("determine git root: %w", err)
}
}
result := &AgentCheckResult{
Violations: agentcheck.CheckAddedLines(addedLines),
DocViolations: agentdocs.Validate(root),
}
return result, nil
}
func getAgentCheckDiff(opts *AgentCheckOptions, providedPaths []string) (string, error) {
args := []string{"diff", "--no-color", "--unified=0"}
if opts.Staged {
args = append(args, "--cached")
} else if opts.BaseRef != "" {
args = append(args, opts.BaseRef)
} else {
args = append(args, "HEAD")
}
if len(providedPaths) > 0 {
args = append(args, "--")
args = append(args, providedPaths...)
}
cmd := exec.Command("git", args...)
if opts.RepoRoot != "" {
cmd.Dir = opts.RepoRoot
}
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git %s failed: %w\n%s", strings.Join(args, " "), err, string(output))
}
return string(output), nil
}

View File

@@ -0,0 +1,32 @@
package cmd
import (
log "github.com/sirupsen/logrus"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
func resolveAgentLabTarget(identifier string) (string, agentlab.Manifest, bool) {
if identifier == "" {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
manifest, found := currentAgentLabManifest(repoRoot)
return repoRoot, manifest, found
}
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
log.Fatalf("Failed to resolve worktree %q: %v", identifier, err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
return manifest.CheckoutPath, manifest, true
}

View File

@@ -1,7 +1,6 @@
package cmd
import (
"bufio"
"errors"
"fmt"
"net"
@@ -14,14 +13,16 @@ import (
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
)
// NewBackendCommand creates the parent "backend" command with subcommands for
// running backend services.
// BackendOptions holds options shared across backend subcommands.
type BackendOptions struct {
NoEE bool
NoEE bool
Worktree string
}
func NewBackendCommand() *cobra.Command {
@@ -44,6 +45,7 @@ Available subcommands:
}
cmd.PersistentFlags().BoolVar(&opts.NoEE, "no-ee", false, "Disable Enterprise Edition features (enabled by default)")
cmd.PersistentFlags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
cmd.AddCommand(newBackendAPICommand(opts))
cmd.AddCommand(newBackendModelServerCommand(opts))
@@ -62,9 +64,10 @@ func newBackendAPICommand(opts *BackendOptions) *cobra.Command {
Examples:
ods backend api
ods backend api --port 9090
ods backend api --no-ee`,
ods backend api --no-ee
ods backend api --worktree codex/fix/auth-banner-modal`,
Run: func(cmd *cobra.Command, args []string) {
runBackendService("api", "onyx.main:app", port, opts)
runBackendService("api", "onyx.main:app", port, cmd.Flags().Changed("port"), opts)
},
}
@@ -83,9 +86,10 @@ func newBackendModelServerCommand(opts *BackendOptions) *cobra.Command {
Examples:
ods backend model_server
ods backend model_server --port 9001`,
ods backend model_server --port 9001
ods backend model_server --worktree codex/fix/auth-banner-modal`,
Run: func(cmd *cobra.Command, args []string) {
runBackendService("model_server", "model_server.main:app", port, opts)
runBackendService("model_server", "model_server.main:app", port, cmd.Flags().Changed("port"), opts)
},
}
@@ -137,16 +141,25 @@ func resolvePort(port string) string {
return port
}
func runBackendService(name, module, port string, opts *BackendOptions) {
root, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to find git root: %v", err)
func runBackendService(name, module, port string, portExplicit bool, opts *BackendOptions) {
root, worktreeManifest, hasWorktreeManifest := resolveAgentLabTarget(opts.Worktree)
if hasWorktreeManifest && !portExplicit {
switch name {
case "api":
port = strconv.Itoa(worktreeManifest.Ports.API)
case "model_server":
port = strconv.Itoa(worktreeManifest.Ports.ModelServer)
}
}
port = resolvePort(port)
envFile := ensureBackendEnvFile(root)
fileVars := loadBackendEnvFile(envFile)
fileVars, err := envutil.LoadFile(envFile)
if err != nil {
log.Fatalf("Failed to load env file %s: %v", envFile, err)
}
eeDefaults := eeEnvDefaults(opts.NoEE)
fileVars = append(fileVars, eeDefaults...)
@@ -162,9 +175,17 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
if !opts.NoEE {
log.Info("Enterprise Edition enabled (use --no-ee to disable)")
}
if hasWorktreeManifest {
log.Infof("agent-lab worktree %s detected: web=%s api=%s", worktreeManifest.Branch, worktreeManifest.URLs.Web, worktreeManifest.URLs.API)
log.Infof("lane=%s base-ref=%s", worktreeManifest.ResolvedLane(), worktreeManifest.BaseRef)
log.Infof("dependency mode=%s search-infra=%s", worktreeManifest.ResolvedDependencies().Mode, worktreeManifest.ResolvedDependencies().SearchInfraMode)
}
log.Debugf("Running in %s: uv %v", backendDir, uvicornArgs)
mergedEnv := mergeEnv(os.Environ(), fileVars)
mergedEnv := envutil.Merge(os.Environ(), fileVars)
if hasWorktreeManifest {
mergedEnv = envutil.ApplyOverrides(mergedEnv, worktreeManifest.RuntimeEnv())
}
log.Debugf("Applied %d env vars from %s (shell takes precedence)", len(fileVars), envFile)
svcCmd := exec.Command("uv", uvicornArgs...)
@@ -185,6 +206,18 @@ func runBackendService(name, module, port string, opts *BackendOptions) {
}
}
func currentAgentLabManifest(repoRoot string) (agentlab.Manifest, bool) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
return agentlab.Manifest{}, false
}
manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
if err != nil {
return agentlab.Manifest{}, false
}
return manifest, found
}
// eeEnvDefaults returns env entries for EE and license enforcement settings.
// These are appended to the file vars so they act as defaults — shell env
// and .env file values still take precedence via mergeEnv.
@@ -231,59 +264,3 @@ func ensureBackendEnvFile(root string) string {
log.Infof("Created %s from template (review and fill in <REPLACE THIS> values)", envFile)
return envFile
}
// mergeEnv combines shell environment with file-based defaults. Shell values
// take precedence — file entries are only added for keys not already present.
func mergeEnv(shellEnv, fileVars []string) []string {
existing := make(map[string]bool, len(shellEnv))
for _, entry := range shellEnv {
if idx := strings.Index(entry, "="); idx > 0 {
existing[entry[:idx]] = true
}
}
merged := make([]string, len(shellEnv))
copy(merged, shellEnv)
for _, entry := range fileVars {
if idx := strings.Index(entry, "="); idx > 0 {
key := entry[:idx]
if !existing[key] {
merged = append(merged, entry)
} else {
log.Debugf("Env var %s already set in shell, skipping .env value", key)
}
}
}
return merged
}
// loadBackendEnvFile parses a .env file into KEY=VALUE entries suitable for
// appending to os.Environ(). Blank lines and comments are skipped.
func loadBackendEnvFile(path string) []string {
f, err := os.Open(path)
if err != nil {
log.Fatalf("Failed to open env file %s: %v", path, err)
}
defer func() { _ = f.Close() }()
var envVars []string
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if idx := strings.Index(line, "="); idx > 0 {
key := strings.TrimSpace(line[:idx])
value := strings.TrimSpace(line[idx+1:])
value = strings.Trim(value, `"'`)
envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("Failed to read env file %s: %v", path, err)
}
return envVars
}

View File

@@ -37,8 +37,6 @@ func NewDesktopCommand() *cobra.Command {
runDesktopScript(args)
},
}
cmd.Flags().SetInterspersed(false)
return cmd
}

View File

@@ -29,8 +29,6 @@ Examples:
// runDevExec executes "devcontainer exec --workspace-folder <root> <command...>".
func runDevExec(command []string) {
checkDevcontainerCLI()
ensureDockerSock()
ensureRemoteUser()
root, err := paths.GitRoot()
if err != nil {

View File

@@ -148,53 +148,10 @@ func worktreeGitMount(root string) (string, bool) {
return mount, true
}
// sshAgentMount returns a --mount flag value that forwards the host's SSH agent
// socket into the container. Returns ("", false) when SSH_AUTH_SOCK is unset or
// the socket is not accessible.
func sshAgentMount() (string, bool) {
sock := os.Getenv("SSH_AUTH_SOCK")
if sock == "" {
log.Debug("SSH_AUTH_SOCK not set — skipping SSH agent forwarding")
return "", false
}
if _, err := os.Stat(sock); err != nil {
log.Debugf("SSH_AUTH_SOCK=%s not accessible: %v", sock, err)
return "", false
}
mount := fmt.Sprintf("type=bind,source=%s,target=/tmp/ssh-agent.sock", sock)
log.Debugf("Forwarding SSH agent: %s", sock)
return mount, true
}
// ensureRemoteUser sets DEVCONTAINER_REMOTE_USER when rootless Docker is
// detected. Container root maps to the host user in rootless mode, so running
// as root inside the container avoids the UID mismatch on new files.
// Must be called after ensureDockerSock.
func ensureRemoteUser() {
if os.Getenv("DEVCONTAINER_REMOTE_USER") != "" {
return
}
if runtime.GOOS == "linux" {
sock := os.Getenv("DOCKER_SOCK")
xdg := os.Getenv("XDG_RUNTIME_DIR")
// Heuristic: rootless Docker on Linux typically places its socket
// under $XDG_RUNTIME_DIR. If DOCKER_SOCK was set to a custom path
// outside XDG_RUNTIME_DIR, set DEVCONTAINER_REMOTE_USER=root manually.
if xdg != "" && strings.HasPrefix(sock, xdg) {
log.Debug("Rootless Docker detected — setting DEVCONTAINER_REMOTE_USER=root")
if err := os.Setenv("DEVCONTAINER_REMOTE_USER", "root"); err != nil {
log.Warnf("Failed to set DEVCONTAINER_REMOTE_USER: %v", err)
}
}
}
}
// runDevcontainer executes "devcontainer <action> --workspace-folder <root> [extraArgs...]".
func runDevcontainer(action string, extraArgs []string) {
checkDevcontainerCLI()
ensureDockerSock()
ensureRemoteUser()
root, err := paths.GitRoot()
if err != nil {
@@ -205,9 +162,6 @@ func runDevcontainer(action string, extraArgs []string) {
if mount, ok := worktreeGitMount(root); ok {
args = append(args, "--mount", mount)
}
if mount, ok := sshAgentMount(); ok {
args = append(args, "--mount", mount)
}
args = append(args, extraArgs...)
log.Debugf("Running: devcontainer %v", args)

View File

@@ -0,0 +1,63 @@
package cmd
import (
"fmt"
"os/exec"
"strings"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
)
func ghString(args ...string) (string, error) {
git.CheckGitHubCLI()
cmd := exec.Command("gh", args...)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return "", fmt.Errorf("gh %s failed: %w: %s", strings.Join(args, " "), err, strings.TrimSpace(string(exitErr.Stderr)))
}
return "", fmt.Errorf("gh %s failed: %w", strings.Join(args, " "), err)
}
return strings.TrimSpace(string(output)), nil
}
func resolvePRNumber(explicit string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return explicit, nil
}
return ghString("pr", "view", "--json", "number", "--jq", ".number")
}
func currentRepoSlug() (string, error) {
return ghString("repo", "view", "--json", "owner,name", "--jq", `.owner.login + "/" + .name`)
}
func upsertIssueComment(repoSlug, prNumber, marker, body string) error {
commentID, err := ghString(
"api",
fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
"--jq",
fmt.Sprintf(".[] | select(.body | startswith(%q)) | .id", marker),
)
if err != nil {
return err
}
if commentID != "" {
_, err := ghString(
"api",
"--method", "PATCH",
fmt.Sprintf("repos/%s/issues/comments/%s", repoSlug, commentID),
"-f", fmt.Sprintf("body=%s", body),
)
return err
}
_, err = ghString(
"api",
"--method", "POST",
fmt.Sprintf("repos/%s/issues/%s/comments", repoSlug, prNumber),
"-f", fmt.Sprintf("body=%s", body),
)
return err
}

865
tools/ods/cmd/journey.go Normal file
View File

@@ -0,0 +1,865 @@
package cmd
import (
"encoding/json"
"fmt"
"net/http"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/journey"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
"github.com/onyx-dot-app/onyx/tools/ods/internal/s3"
)
const defaultJourneyHTTPRegion = "us-east-2"
type JourneyRunOptions struct {
Journey string
Label string
Worktree string
OutputDir string
Project string
}
type JourneyCompareOptions struct {
Journeys []string
PlanFile string
BeforeRef string
AfterRef string
AfterWorktree string
DependencyMode string
PR string
KeepWorktrees bool
Bucket string
}
type JourneyPublishOptions struct {
RunDir string
PR string
Bucket string
}
type JourneyCaptureSummary struct {
Journey string `json:"journey"`
Label string `json:"label"`
Worktree string `json:"worktree,omitempty"`
URL string `json:"url"`
ArtifactDir string `json:"artifact_dir"`
LogPath string `json:"log_path"`
VideoFiles []string `json:"video_files,omitempty"`
TraceFiles []string `json:"trace_files,omitempty"`
Screenshots []string `json:"screenshots,omitempty"`
MetadataJSON []string `json:"metadata_json,omitempty"`
}
type JourneyCompareSummary struct {
GeneratedAt string `json:"generated_at"`
BeforeRef string `json:"before_ref"`
AfterRef string `json:"after_ref"`
RunDir string `json:"run_dir"`
S3Prefix string `json:"s3_prefix,omitempty"`
S3HTTPBase string `json:"s3_http_base,omitempty"`
Captures []JourneyCaptureSummary `json:"captures"`
}
type managedProcess struct {
Name string
Cmd *exec.Cmd
LogPath string
}
// NewJourneyCommand creates the journey command surface.
func NewJourneyCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "journey",
Short: "Capture before/after browser journeys as agent artifacts",
}
cmd.AddCommand(newJourneyListCommand())
cmd.AddCommand(newJourneyRunCommand())
cmd.AddCommand(newJourneyCompareCommand())
cmd.AddCommand(newJourneyPublishCommand())
return cmd
}
func newJourneyListCommand() *cobra.Command {
return &cobra.Command{
Use: "list",
Short: "List registered browser journeys",
Run: func(cmd *cobra.Command, args []string) {
runJourneyList()
},
}
}
func newJourneyRunCommand() *cobra.Command {
opts := &JourneyRunOptions{}
cmd := &cobra.Command{
Use: "run",
Short: "Run a single registered journey against the current or target worktree",
Long: `Run one registered journey against the current checkout or a tracked worktree.
This is the default before/after workflow for product changes:
1. capture --label before in the target worktree before editing
2. implement and validate the change in that same worktree
3. capture --label after in that same worktree
Use journey compare only when you need to recover a missed baseline or compare
two explicit revisions after the fact.`,
Run: func(cmd *cobra.Command, args []string) {
runJourneyRun(opts)
},
}
cmd.Flags().StringVar(&opts.Journey, "journey", "", "registered journey name to run")
cmd.Flags().StringVar(&opts.Label, "label", "after", "artifact label for this capture (for example before or after)")
cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
cmd.Flags().StringVar(&opts.OutputDir, "output-dir", "", "explicit artifact directory for the capture")
cmd.Flags().StringVar(&opts.Project, "project", "", "override the Playwright project from the journey registry")
_ = cmd.MarkFlagRequired("journey")
return cmd
}
func newJourneyCompareCommand() *cobra.Command {
opts := &JourneyCompareOptions{}
cmd := &cobra.Command{
Use: "compare",
Short: "Capture before and after videos by replaying registered journeys against two revisions",
Long: `Create or reuse worktrees for the before and after revisions, boot the app in each one,
record the configured journeys, and write a machine-readable summary. If --pr is supplied,
the compare run is also uploaded to S3 and linked from the pull request.
This is the fallback path, not the default workflow. Prefer journey run inside a
single tracked product worktree when you can capture before and after during the
normal edit loop.`,
Run: func(cmd *cobra.Command, args []string) {
runJourneyCompare(opts)
},
}
cmd.Flags().StringArrayVar(&opts.Journeys, "journey", nil, "registered journey name to capture (repeatable)")
cmd.Flags().StringVar(&opts.PlanFile, "plan-file", "", "JSON file containing {\"journeys\":[...]} (defaults to .github/agent-journeys.json when present)")
cmd.Flags().StringVar(&opts.BeforeRef, "before-ref", "origin/main", "git ref for the before capture")
cmd.Flags().StringVar(&opts.AfterRef, "after-ref", "HEAD", "git ref for the after capture when --after-worktree is not supplied")
cmd.Flags().StringVar(&opts.AfterWorktree, "after-worktree", "", "existing tracked worktree to use for the after capture")
cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode for temporary worktrees: namespaced or shared")
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to upload/comment against after capture")
cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
cmd.Flags().BoolVar(&opts.KeepWorktrees, "keep-worktrees", false, "keep temporary journey worktrees after the capture run")
return cmd
}
func newJourneyPublishCommand() *cobra.Command {
opts := &JourneyPublishOptions{}
cmd := &cobra.Command{
Use: "publish",
Short: "Upload a previously captured compare run and update the pull request comment",
Run: func(cmd *cobra.Command, args []string) {
runJourneyPublish(opts)
},
}
cmd.Flags().StringVar(&opts.RunDir, "run-dir", "", "compare run directory containing summary.json")
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number to publish against")
cmd.Flags().StringVar(&opts.Bucket, "bucket", "", "override the S3 bucket used for uploaded journey artifacts")
_ = cmd.MarkFlagRequired("run-dir")
return cmd
}
func runJourneyList() {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
registry, err := journey.LoadRegistry(repoRoot)
if err != nil {
log.Fatalf("Failed to load journey registry: %v", err)
}
for _, definition := range registry.Journeys {
fmt.Printf("%s\t%s\tproject=%s\tmodel_server=%t\n", definition.Name, definition.Description, definition.Project, definition.RequiresModelServer)
}
}
func runJourneyRun(opts *JourneyRunOptions) {
repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
harnessRoot, err := resolveJourneyHarnessRoot(repoRoot, manifest, hasManifest)
if err != nil {
log.Fatalf("Failed to resolve journey harness root: %v", err)
}
capture, err := captureJourney(harnessRoot, repoRoot, manifest, hasManifest, opts.Journey, opts.Label, opts.OutputDir, opts.Project)
if err != nil {
log.Fatalf("Journey capture failed: %v", err)
}
summaryPath := filepath.Join(capture.ArtifactDir, "summary.json")
data, err := json.MarshalIndent(capture, "", " ")
if err != nil {
log.Fatalf("Failed to encode journey summary: %v", err)
}
if err := os.WriteFile(summaryPath, data, 0644); err != nil {
log.Fatalf("Failed to write journey summary: %v", err)
}
log.Infof("Journey %s (%s) captured to %s", capture.Journey, capture.Label, capture.ArtifactDir)
}
func runJourneyCompare(opts *JourneyCompareOptions) {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
definitions, err := resolveJourneyDefinitions(repoRoot, opts.Journeys, opts.PlanFile)
if err != nil {
log.Fatalf("Failed to resolve journeys: %v", err)
}
currentRoot, currentManifest, hasCurrentManifest := resolveAgentLabTarget("")
if opts.AfterWorktree == "" && strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") && !hasCurrentManifest && git.HasUncommittedChanges() {
log.Fatalf("The current checkout has uncommitted changes, but it is not a tracked agent-lab worktree. Create the product worktree first and rerun with --after-worktree <branch> so the after capture reflects the real patch.")
}
_ = currentRoot
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
runDir := filepath.Join(agentlab.StateRoot(commonGitDir), "journeys", time.Now().UTC().Format("20060102-150405"))
if err := os.MkdirAll(runDir, 0755); err != nil {
log.Fatalf("Failed to create journey run dir: %v", err)
}
beforeTarget, err := createTemporaryJourneyWorktree(opts.BeforeRef, "before", agentlab.DependencyMode(opts.DependencyMode))
if err != nil {
log.Fatalf("Failed to create before worktree: %v", err)
}
if !opts.KeepWorktrees {
defer cleanupJourneyTarget(beforeTarget)
}
var afterTarget journeyTarget
if opts.AfterWorktree != "" {
afterTarget, err = resolveJourneyTarget(opts.AfterWorktree)
if err != nil {
log.Fatalf("Failed to resolve after worktree: %v", err)
}
if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
}
} else if strings.EqualFold(strings.TrimSpace(opts.AfterRef), "HEAD") {
if hasCurrentManifest {
afterTarget = journeyTarget{
Identifier: currentManifest.Branch,
Manifest: currentManifest,
}
if err := runSelfCommand("worktree", "deps", "up", afterTarget.Identifier); err != nil {
log.Fatalf("Failed to provision dependencies for %s: %v", afterTarget.Identifier, err)
}
log.Infof("Using current tracked worktree %s for the after capture", afterTarget.Identifier)
} else {
afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
if err != nil {
log.Fatalf("Failed to create after worktree: %v", err)
}
if !opts.KeepWorktrees {
defer cleanupJourneyTarget(afterTarget)
}
}
} else {
afterTarget, err = createTemporaryJourneyWorktree(opts.AfterRef, "after", agentlab.DependencyMode(opts.DependencyMode))
if err != nil {
log.Fatalf("Failed to create after worktree: %v", err)
}
if !opts.KeepWorktrees {
defer cleanupJourneyTarget(afterTarget)
}
}
summary := JourneyCompareSummary{
GeneratedAt: time.Now().UTC().Format(time.RFC3339),
BeforeRef: opts.BeforeRef,
AfterRef: opts.AfterRef,
RunDir: runDir,
Captures: []JourneyCaptureSummary{},
}
beforeCaptures, err := captureJourneySet(beforeTarget, definitions, "before", runDir)
if err != nil {
log.Fatalf("Before capture failed: %v", err)
}
summary.Captures = append(summary.Captures, beforeCaptures...)
afterCaptures, err := captureJourneySet(afterTarget, definitions, "after", runDir)
if err != nil {
log.Fatalf("After capture failed: %v", err)
}
summary.Captures = append(summary.Captures, afterCaptures...)
writeJourneyCompareSummary(runDir, summary)
log.Infof("Journey compare summary written to %s", filepath.Join(runDir, "summary.json"))
if opts.PR != "" {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
updated, err := publishJourneyCompare(runDir, prNumber, opts.Bucket)
if err != nil {
log.Fatalf("Failed to publish journey compare run: %v", err)
}
writeJourneyCompareSummary(runDir, updated)
}
}
func runJourneyPublish(opts *JourneyPublishOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
updated, err := publishJourneyCompare(opts.RunDir, prNumber, opts.Bucket)
if err != nil {
log.Fatalf("Failed to publish journey compare run: %v", err)
}
writeJourneyCompareSummary(opts.RunDir, updated)
log.Infof("Published journey compare run from %s", opts.RunDir)
}
func resolveJourneyDefinitions(repoRoot string, requested []string, planFile string) ([]journey.Definition, error) {
journeyNames := append([]string{}, requested...)
resolvedPlan := strings.TrimSpace(planFile)
if resolvedPlan == "" {
defaultPlan := filepath.Join(repoRoot, journey.DefaultPlanPath)
if _, err := os.Stat(defaultPlan); err == nil {
resolvedPlan = defaultPlan
}
}
if resolvedPlan != "" {
plan, err := journey.LoadPlan(resolvedPlan)
if err != nil {
return nil, err
}
journeyNames = append(journeyNames, plan.Journeys...)
}
if len(journeyNames) == 0 {
return nil, fmt.Errorf("no journeys requested; pass --journey or provide %s", journey.DefaultPlanPath)
}
seen := map[string]bool{}
deduped := make([]string, 0, len(journeyNames))
for _, name := range journeyNames {
if !seen[name] {
seen[name] = true
deduped = append(deduped, name)
}
}
return journey.ResolveDefinitions(repoRoot, deduped)
}
func resolveJourneyHarnessRoot(targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool) (string, error) {
candidates := []string{targetRepoRoot}
if hasManifest && manifest.CreatedFromPath != "" {
candidates = append([]string{manifest.CreatedFromPath}, candidates...)
}
for _, candidate := range candidates {
if _, err := os.Stat(filepath.Join(candidate, journey.RegistryPath)); err == nil {
return candidate, nil
}
}
return "", fmt.Errorf("no journey registry found in target repo %s or control checkout %s", targetRepoRoot, manifest.CreatedFromPath)
}
func captureJourney(harnessRoot, targetRepoRoot string, manifest agentlab.Manifest, hasManifest bool, journeyName, label, outputDir, projectOverride string) (JourneyCaptureSummary, error) {
definitions, err := journey.ResolveDefinitions(harnessRoot, []string{journeyName})
if err != nil {
return JourneyCaptureSummary{}, err
}
definition := definitions[0]
targetDir := strings.TrimSpace(outputDir)
if targetDir == "" {
if hasManifest {
targetDir = filepath.Join(manifest.ArtifactDir, "journeys", journey.Slug(definition.Name), journey.Slug(label))
} else {
targetDir = filepath.Join(targetRepoRoot, "web", "output", "journeys", journey.Slug(definition.Name), journey.Slug(label))
}
}
if err := os.MkdirAll(targetDir, 0755); err != nil {
return JourneyCaptureSummary{}, fmt.Errorf("create journey artifact dir: %w", err)
}
playwrightOutputDir := filepath.Join(targetDir, "playwright")
logPath := filepath.Join(targetDir, "journey.log")
projectName := definition.Project
if strings.TrimSpace(projectOverride) != "" {
projectName = projectOverride
}
envOverrides := map[string]string{
"PLAYWRIGHT_JOURNEY_MODE": "1",
"PLAYWRIGHT_JOURNEY_CAPTURE_DIR": targetDir,
"PLAYWRIGHT_OUTPUT_DIR": playwrightOutputDir,
}
if definition.SkipGlobalSetup {
envOverrides["PLAYWRIGHT_SKIP_GLOBAL_SETUP"] = "1"
}
if hasManifest {
for key, value := range manifest.RuntimeEnv() {
envOverrides[key] = value
}
}
step, passed := runLoggedCommand(
"journey-"+definition.Name,
logPath,
filepath.Join(harnessRoot, "web"),
envOverrides,
"npx",
"playwright", "test", definition.TestPath, "--project", projectName,
)
if !passed {
return JourneyCaptureSummary{}, fmt.Errorf("%s", strings.Join(step.Details, "\n"))
}
artifactSummary, err := summarizeJourneyArtifacts(targetDir)
if err != nil {
return JourneyCaptureSummary{}, err
}
artifactSummary.Journey = definition.Name
artifactSummary.Label = label
artifactSummary.ArtifactDir = targetDir
artifactSummary.LogPath = logPath
if hasManifest {
artifactSummary.Worktree = manifest.Branch
artifactSummary.URL = manifest.URLs.Web
} else {
artifactSummary.URL = envOverrides["BASE_URL"]
}
return artifactSummary, nil
}
type journeyTarget struct {
Identifier string
Manifest agentlab.Manifest
Temporary bool
}
func resolveJourneyTarget(identifier string) (journeyTarget, error) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
return journeyTarget{}, err
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
return journeyTarget{}, err
}
if !found {
return journeyTarget{}, fmt.Errorf("no worktree found for %q", identifier)
}
return journeyTarget{Identifier: manifest.Branch, Manifest: manifest}, nil
}
func createTemporaryJourneyWorktree(ref, label string, mode agentlab.DependencyMode) (journeyTarget, error) {
branch := fmt.Sprintf("codex/journey-%s-%s-%d", journey.Slug(label), journey.Slug(strings.ReplaceAll(ref, "/", "-")), time.Now().UTC().UnixNano())
if err := runSelfCommand("worktree", "create", branch, "--from", ref, "--dependency-mode", string(mode)); err != nil {
return journeyTarget{}, err
}
if err := runSelfCommand("worktree", "deps", "up", branch); err != nil {
return journeyTarget{}, err
}
target, err := resolveJourneyTarget(branch)
if err != nil {
return journeyTarget{}, err
}
target.Temporary = true
return target, nil
}
func cleanupJourneyTarget(target journeyTarget) {
if !target.Temporary {
return
}
if err := runSelfCommand("worktree", "remove", target.Identifier, "--force", "--drop-deps"); err != nil {
log.Warnf("Failed to remove temporary worktree %s: %v", target.Identifier, err)
}
if err := exec.Command("git", "branch", "-D", target.Identifier).Run(); err != nil {
log.Warnf("Failed to delete temporary branch %s: %v", target.Identifier, err)
}
}
func captureJourneySet(target journeyTarget, definitions []journey.Definition, label, runDir string) ([]JourneyCaptureSummary, error) {
harnessRoot, err := resolveJourneyHarnessRoot(target.Manifest.CheckoutPath, target.Manifest, true)
if err != nil {
return nil, err
}
requiresModelServer := false
for _, definition := range definitions {
if definition.RequiresModelServer {
requiresModelServer = true
break
}
}
processes, err := startJourneyServices(target, runDir, requiresModelServer)
if err != nil {
return nil, err
}
defer stopManagedProcesses(processes)
captures := make([]JourneyCaptureSummary, 0, len(definitions))
for _, definition := range definitions {
outputDir := filepath.Join(runDir, journey.Slug(definition.Name), journey.Slug(label))
capture, err := captureJourney(harnessRoot, target.Manifest.CheckoutPath, target.Manifest, true, definition.Name, label, outputDir, "")
if err != nil {
return nil, err
}
captures = append(captures, capture)
}
return captures, nil
}
func startJourneyServices(target journeyTarget, runDir string, includeModelServer bool) ([]managedProcess, error) {
logDir := filepath.Join(runDir, "services", journey.Slug(target.Manifest.Branch))
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, fmt.Errorf("create service log dir: %w", err)
}
processes := make([]managedProcess, 0, 3)
apiProcess, err := startManagedProcess(
"api",
filepath.Join(logDir, "api.log"),
"backend", "api", "--worktree", target.Identifier,
)
if err != nil {
return nil, err
}
processes = append(processes, apiProcess)
if includeModelServer {
modelProcess, err := startManagedProcess(
"model_server",
filepath.Join(logDir, "model_server.log"),
"backend", "model_server", "--worktree", target.Identifier,
)
if err != nil {
stopManagedProcesses(processes)
return nil, err
}
processes = append(processes, modelProcess)
}
webProcess, err := startManagedProcess(
"web",
filepath.Join(logDir, "web.log"),
"web", "dev", "--worktree", target.Identifier, "--", "--webpack",
)
if err != nil {
stopManagedProcesses(processes)
return nil, err
}
processes = append(processes, webProcess)
if err := waitForJourneyURL(target.Manifest.URLs.API+"/health", 2*time.Minute, processes...); err != nil {
stopManagedProcesses(processes)
return nil, err
}
if err := waitForJourneyURL(target.Manifest.URLs.Web+"/api/health", 3*time.Minute, processes...); err != nil {
stopManagedProcesses(processes)
return nil, err
}
return processes, nil
}
func startManagedProcess(name, logPath string, args ...string) (managedProcess, error) {
executable, err := os.Executable()
if err != nil {
return managedProcess{}, fmt.Errorf("determine ods executable: %w", err)
}
if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
return managedProcess{}, fmt.Errorf("create log dir: %w", err)
}
logFile, err := os.Create(logPath)
if err != nil {
return managedProcess{}, fmt.Errorf("create log file: %w", err)
}
cmd := exec.Command(executable, args...)
cmd.Stdout = logFile
cmd.Stderr = logFile
cmd.Stdin = nil
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if err := cmd.Start(); err != nil {
_ = logFile.Close()
return managedProcess{}, fmt.Errorf("start %s: %w", name, err)
}
_ = logFile.Close()
return managedProcess{Name: name, Cmd: cmd, LogPath: logPath}, nil
}
func stopManagedProcesses(processes []managedProcess) {
for i := len(processes) - 1; i >= 0; i-- {
process := processes[i]
if process.Cmd == nil || process.Cmd.Process == nil {
continue
}
_ = process.Cmd.Process.Signal(os.Interrupt)
done := make(chan struct{})
go func(cmd *exec.Cmd) {
_, _ = cmd.Process.Wait()
close(done)
}(process.Cmd)
select {
case <-done:
case <-time.After(10 * time.Second):
_ = process.Cmd.Process.Kill()
}
}
}
func waitForJourneyURL(url string, timeout time.Duration, processes ...managedProcess) error {
client := &http.Client{Timeout: 5 * time.Second}
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if err := ensureManagedProcessesRunning(processes); err != nil {
return fmt.Errorf("while waiting for %s: %w", url, err)
}
resp, err := client.Get(url)
if err == nil {
_ = resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 500 {
return nil
}
}
time.Sleep(3 * time.Second)
}
if err := ensureManagedProcessesRunning(processes); err != nil {
return fmt.Errorf("while waiting for %s: %w", url, err)
}
return fmt.Errorf("timed out waiting for %s", url)
}
func ensureManagedProcessesRunning(processes []managedProcess) error {
for _, process := range processes {
if process.Cmd == nil || process.Cmd.Process == nil {
continue
}
if err := syscall.Kill(process.Cmd.Process.Pid, 0); err != nil {
if err == syscall.ESRCH {
return fmt.Errorf("%s exited early\n%s", process.Name, readJourneyLogTail(process.LogPath, 40))
}
if err != syscall.EPERM {
return fmt.Errorf("check %s process health: %w", process.Name, err)
}
}
}
return nil
}
func readJourneyLogTail(path string, lineCount int) string {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Sprintf("failed to read %s: %v", path, err)
}
trimmed := strings.TrimRight(string(data), "\n")
if trimmed == "" {
return fmt.Sprintf("%s is empty", path)
}
lines := strings.Split(trimmed, "\n")
if len(lines) > lineCount {
lines = lines[len(lines)-lineCount:]
}
return fmt.Sprintf("recent log tail from %s:\n%s", path, strings.Join(lines, "\n"))
}
func summarizeJourneyArtifacts(root string) (JourneyCaptureSummary, error) {
summary := JourneyCaptureSummary{}
err := filepath.Walk(root, func(path string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if info.IsDir() {
return nil
}
relative, err := filepath.Rel(root, path)
if err != nil {
return err
}
switch {
case strings.HasSuffix(path, ".webm"):
summary.VideoFiles = append(summary.VideoFiles, relative)
case strings.HasSuffix(path, "trace.zip"):
summary.TraceFiles = append(summary.TraceFiles, relative)
case strings.HasSuffix(path, ".png"):
summary.Screenshots = append(summary.Screenshots, relative)
case strings.HasSuffix(path, ".json") && filepath.Base(path) != "summary.json":
summary.MetadataJSON = append(summary.MetadataJSON, relative)
}
return nil
})
if err != nil {
return summary, fmt.Errorf("walk journey artifacts: %w", err)
}
sort.Strings(summary.VideoFiles)
sort.Strings(summary.TraceFiles)
sort.Strings(summary.Screenshots)
sort.Strings(summary.MetadataJSON)
return summary, nil
}
func runSelfCommand(args ...string) error {
executable, err := os.Executable()
if err != nil {
return err
}
cmd := exec.Command(executable, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
return cmd.Run()
}
func writeJourneyCompareSummary(runDir string, summary JourneyCompareSummary) {
data, err := json.MarshalIndent(summary, "", " ")
if err != nil {
log.Fatalf("Failed to encode journey compare summary: %v", err)
}
if err := os.WriteFile(filepath.Join(runDir, "summary.json"), data, 0644); err != nil {
log.Fatalf("Failed to write journey compare summary: %v", err)
}
}
func publishJourneyCompare(runDir, prNumber, bucketOverride string) (JourneyCompareSummary, error) {
var summary JourneyCompareSummary
data, err := os.ReadFile(filepath.Join(runDir, "summary.json"))
if err != nil {
return summary, fmt.Errorf("read compare summary: %w", err)
}
if err := json.Unmarshal(data, &summary); err != nil {
return summary, fmt.Errorf("parse compare summary: %w", err)
}
bucket := bucketOverride
if bucket == "" {
bucket = getS3Bucket()
}
timestamp := filepath.Base(runDir)
s3Prefix := fmt.Sprintf("s3://%s/journeys/pr-%s/%s/", bucket, prNumber, timestamp)
if err := s3.SyncUp(runDir, s3Prefix, true); err != nil {
return summary, err
}
httpBase := fmt.Sprintf("https://%s.s3.%s.amazonaws.com/journeys/pr-%s/%s/", bucket, defaultJourneyHTTPRegion, prNumber, timestamp)
summary.S3Prefix = s3Prefix
summary.S3HTTPBase = httpBase
repoSlug, err := currentRepoSlug()
if err != nil {
return summary, err
}
body := buildJourneyPRComment(summary)
if err := upsertIssueComment(repoSlug, prNumber, "<!-- agent-journey-report -->", body); err != nil {
return summary, err
}
return summary, nil
}
func buildJourneyPRComment(summary JourneyCompareSummary) string {
type capturePair struct {
before *JourneyCaptureSummary
after *JourneyCaptureSummary
}
byJourney := map[string]*capturePair{}
for i := range summary.Captures {
capture := &summary.Captures[i]
pair := byJourney[capture.Journey]
if pair == nil {
pair = &capturePair{}
byJourney[capture.Journey] = pair
}
switch capture.Label {
case "before":
pair.before = capture
case "after":
pair.after = capture
}
}
names := make([]string, 0, len(byJourney))
for name := range byJourney {
names = append(names, name)
}
sort.Strings(names)
lines := []string{
"<!-- agent-journey-report -->",
"### Agent Journey Report",
"",
fmt.Sprintf("Before ref: `%s`", summary.BeforeRef),
fmt.Sprintf("After ref: `%s`", summary.AfterRef),
"",
"| Journey | Before | After |",
"|---------|--------|-------|",
}
for _, name := range names {
pair := byJourney[name]
before := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.before)
after := journeyLink(summary.RunDir, summary.S3HTTPBase, pair.after)
lines = append(lines, fmt.Sprintf("| `%s` | %s | %s |", name, before, after))
}
return strings.Join(lines, "\n")
}
func journeyLink(runDir, httpBase string, capture *JourneyCaptureSummary) string {
if capture == nil {
return "_not captured_"
}
artifactDir, err := filepath.Rel(runDir, capture.ArtifactDir)
if err != nil {
artifactDir = capture.ArtifactDir
}
if len(capture.VideoFiles) > 0 {
return fmt.Sprintf("[video](%s%s)", httpBase, pathJoin(artifactDir, capture.VideoFiles[0]))
}
if len(capture.Screenshots) > 0 {
return fmt.Sprintf("[screenshot](%s%s)", httpBase, pathJoin(artifactDir, capture.Screenshots[0]))
}
return "_no artifact_"
}
func pathJoin(parts ...string) string {
clean := make([]string, 0, len(parts))
for _, part := range parts {
if part == "" {
continue
}
clean = append(clean, strings.Trim(part, "/"))
}
return strings.Join(clean, "/")
}

289
tools/ods/cmd/pr_checks.go Normal file
View File

@@ -0,0 +1,289 @@
package cmd
import (
"encoding/json"
"fmt"
"os/exec"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
)
type PRChecksOptions struct {
PR string
}
type ghChecksResponse struct {
Data struct {
Repository struct {
PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
URL string `json:"url"`
HeadRef string `json:"headRefName"`
Commits struct {
Nodes []struct {
Commit struct {
StatusCheckRollup struct {
Contexts struct {
Nodes []struct {
Type string `json:"__typename"`
Name string `json:"name"`
DisplayTitle string `json:"displayTitle"`
WorkflowName string `json:"workflowName"`
Status string `json:"status"`
Conclusion string `json:"conclusion"`
DetailsURL string `json:"detailsUrl"`
Context string `json:"context"`
State string `json:"state"`
TargetURL string `json:"targetUrl"`
Description string `json:"description"`
} `json:"nodes"`
} `json:"contexts"`
} `json:"statusCheckRollup"`
} `json:"commit"`
} `json:"nodes"`
} `json:"commits"`
} `json:"pullRequest"`
} `json:"repository"`
} `json:"data"`
}
// NewPRChecksCommand creates the pr-checks command surface.
func NewPRChecksCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "pr-checks",
Short: "Inspect GitHub PR checks and surface failing runs for remediation",
}
cmd.AddCommand(newPRChecksStatusCommand())
cmd.AddCommand(newPRChecksDiagnoseCommand())
return cmd
}
func newPRChecksStatusCommand() *cobra.Command {
opts := &PRChecksOptions{}
cmd := &cobra.Command{
Use: "status",
Short: "List all status checks for a pull request",
Run: func(cmd *cobra.Command, args []string) {
runPRChecksStatus(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
return cmd
}
func newPRChecksDiagnoseCommand() *cobra.Command {
opts := &PRChecksOptions{}
cmd := &cobra.Command{
Use: "diagnose",
Short: "List only failing checks and point to the next remediation command",
Run: func(cmd *cobra.Command, args []string) {
runPRChecksDiagnose(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
return cmd
}
func runPRChecksStatus(opts *PRChecksOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
response, err := fetchPRChecks(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR checks: %v", err)
}
fmt.Printf("PR #%d %s\n", response.Data.Repository.PullRequest.Number, response.Data.Repository.PullRequest.Title)
for _, check := range flattenChecks(response) {
fmt.Printf("[%s] %s (%s) %s\n", check.result(), check.displayName(), check.kind(), check.url())
}
}
func runPRChecksDiagnose(opts *PRChecksOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
response, err := fetchPRChecks(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR checks: %v", err)
}
failing := failingChecks(response)
if len(failing) == 0 {
fmt.Printf("No failing checks found on PR #%s\n", prNumber)
return
}
fmt.Printf("Failing checks for PR #%s:\n", prNumber)
for _, check := range failing {
fmt.Printf("- %s (%s)\n", check.displayName(), check.url())
if strings.Contains(strings.ToLower(check.displayName()), "playwright") {
fmt.Printf(" next: ods trace --pr %s\n", prNumber)
} else {
fmt.Printf(" next: gh run view <run-id> --log-failed\n")
}
}
}
func fetchPRChecks(prNumber string) (*ghChecksResponse, error) {
repoSlug, err := currentRepoSlug()
if err != nil {
return nil, err
}
parts := strings.SplitN(repoSlug, "/", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("unexpected repo slug %q", repoSlug)
}
git.CheckGitHubCLI()
query := `query($owner:String!, $name:String!, $number:Int!) {
repository(owner:$owner, name:$name) {
pullRequest(number:$number) {
number
title
url
headRefName
commits(last:1) {
nodes {
commit {
statusCheckRollup {
contexts(first:100) {
nodes {
__typename
... on CheckRun {
name
status
conclusion
detailsUrl
}
... on StatusContext {
context
state
targetUrl
description
}
}
}
}
}
}
}
}
}
}`
cmd := exec.Command(
"gh", "api", "graphql",
"-f", "query="+query,
"-F", "owner="+parts[0],
"-F", "name="+parts[1],
"-F", "number="+prNumber,
)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("gh api graphql failed: %w", err)
}
var response ghChecksResponse
if err := json.Unmarshal(output, &response); err != nil {
return nil, fmt.Errorf("parse PR checks: %w", err)
}
return &response, nil
}
type flattenedCheck struct {
Type string
Name string
DisplayTitle string
WorkflowName string
Status string
Conclusion string
DetailsURL string
Context string
State string
TargetURL string
}
func flattenChecks(response *ghChecksResponse) []flattenedCheck {
result := []flattenedCheck{}
if response == nil || len(response.Data.Repository.PullRequest.Commits.Nodes) == 0 {
return result
}
for _, node := range response.Data.Repository.PullRequest.Commits.Nodes[0].Commit.StatusCheckRollup.Contexts.Nodes {
result = append(result, flattenedCheck{
Type: node.Type,
Name: node.Name,
DisplayTitle: node.DisplayTitle,
WorkflowName: node.WorkflowName,
Status: node.Status,
Conclusion: node.Conclusion,
DetailsURL: node.DetailsURL,
Context: node.Context,
State: node.State,
TargetURL: node.TargetURL,
})
}
return result
}
func (c flattenedCheck) displayName() string {
switch c.Type {
case "CheckRun":
if c.DisplayTitle != "" {
return c.DisplayTitle
}
if c.WorkflowName != "" && c.Name != "" {
return c.WorkflowName + " / " + c.Name
}
return c.Name
default:
return c.Context
}
}
func (c flattenedCheck) kind() string {
if c.Type == "" {
return "status"
}
return c.Type
}
func (c flattenedCheck) result() string {
if c.Type == "CheckRun" {
if c.Conclusion != "" {
return strings.ToLower(c.Conclusion)
}
return strings.ToLower(c.Status)
}
return strings.ToLower(c.State)
}
func (c flattenedCheck) url() string {
if c.DetailsURL != "" {
return c.DetailsURL
}
return c.TargetURL
}
func failingChecks(response *ghChecksResponse) []flattenedCheck {
checks := flattenChecks(response)
failing := make([]flattenedCheck, 0, len(checks))
for _, check := range checks {
result := check.result()
if result == "failure" || result == "failed" || result == "timed_out" || result == "cancelled" || result == "error" {
failing = append(failing, check)
}
}
return failing
}

73
tools/ods/cmd/pr_merge.go Normal file
View File

@@ -0,0 +1,73 @@
package cmd
import (
"os"
"os/exec"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
)
type PRMergeOptions struct {
PR string
Auto bool
DeleteBranch bool
Method string
}
// NewPRMergeCommand creates the pr-merge command.
func NewPRMergeCommand() *cobra.Command {
opts := &PRMergeOptions{}
cmd := &cobra.Command{
Use: "pr-merge",
Short: "Merge a GitHub pull request through gh with explicit method flags",
Run: func(cmd *cobra.Command, args []string) {
runPRMerge(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().BoolVar(&opts.Auto, "auto", false, "enable auto-merge instead of merging immediately")
cmd.Flags().BoolVar(&opts.DeleteBranch, "delete-branch", false, "delete the branch after merge")
cmd.Flags().StringVar(&opts.Method, "method", "squash", "merge method: squash, merge, or rebase")
return cmd
}
func runPRMerge(opts *PRMergeOptions) {
git.CheckGitHubCLI()
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
args := []string{"pr", "merge", prNumber}
switch opts.Method {
case "squash":
args = append(args, "--squash")
case "merge":
args = append(args, "--merge")
case "rebase":
args = append(args, "--rebase")
default:
log.Fatalf("Invalid merge method %q: expected squash, merge, or rebase", opts.Method)
}
if opts.Auto {
args = append(args, "--auto")
}
if opts.DeleteBranch {
args = append(args, "--delete-branch")
}
cmd := exec.Command("gh", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
log.Fatalf("Failed to merge PR #%s: %v", prNumber, err)
}
}

89
tools/ods/cmd/pr_open.go Normal file
View File

@@ -0,0 +1,89 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
var conventionalPRTitlePattern = regexp.MustCompile(`^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test)(\([^)]+\))?: .+`)
type PROpenOptions struct {
Title string
Base string
BodyFile string
Draft bool
}
// NewPROpenCommand creates the pr-open command.
func NewPROpenCommand() *cobra.Command {
opts := &PROpenOptions{}
cmd := &cobra.Command{
Use: "pr-open",
Short: "Open a GitHub pull request using the repo template and a conventional-commit title",
Run: func(cmd *cobra.Command, args []string) {
runPROpen(opts)
},
}
cmd.Flags().StringVar(&opts.Title, "title", "", "PR title (defaults to the latest commit subject)")
cmd.Flags().StringVar(&opts.Base, "base", "main", "base branch for the PR")
cmd.Flags().StringVar(&opts.BodyFile, "body-file", "", "explicit PR body file (defaults to .github/pull_request_template.md)")
cmd.Flags().BoolVar(&opts.Draft, "draft", false, "open the PR as a draft")
return cmd
}
func runPROpen(opts *PROpenOptions) {
git.CheckGitHubCLI()
title := strings.TrimSpace(opts.Title)
if title == "" {
subject, err := git.GetCommitMessage("HEAD")
if err != nil {
log.Fatalf("Failed to determine PR title from HEAD: %v", err)
}
title = subject
}
if !conventionalPRTitlePattern.MatchString(title) {
log.Fatalf("PR title must follow conventional-commit style. Got %q", title)
}
bodyFile := strings.TrimSpace(opts.BodyFile)
if bodyFile == "" {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
bodyFile = filepath.Join(repoRoot, ".github", "pull_request_template.md")
}
bodyBytes, err := os.ReadFile(bodyFile)
if err != nil {
log.Fatalf("Failed to read PR body file %s: %v", bodyFile, err)
}
args := []string{"pr", "create", "--base", opts.Base, "--title", title, "--body", string(bodyBytes)}
if opts.Draft {
args = append(args, "--draft")
}
cmd := exec.Command("gh", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
log.Fatalf("Failed to open PR: %v", err)
}
fmt.Printf("Opened PR with title %q\n", title)
}

393
tools/ods/cmd/pr_review.go Normal file
View File

@@ -0,0 +1,393 @@
package cmd
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/prreview"
)
type PRReviewFetchOptions struct {
PR string
Output string
}
type PRReviewTriageOptions struct {
PR string
Output string
}
type PRReviewRespondOptions struct {
PR string
CommentID int
Body string
ThreadID string
}
type ghReviewResponse struct {
Data struct {
Repository struct {
PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
URL string `json:"url"`
ReviewThreads struct {
Nodes []struct {
ID string `json:"id"`
IsResolved bool `json:"isResolved"`
IsOutdated bool `json:"isOutdated"`
Path string `json:"path"`
Line int `json:"line"`
StartLine int `json:"startLine"`
Comments struct {
Nodes []struct {
DatabaseID int `json:"databaseId"`
Body string
URL string `json:"url"`
CreatedAt string `json:"createdAt"`
Author struct {
Login string `json:"login"`
} `json:"author"`
} `json:"nodes"`
} `json:"comments"`
} `json:"nodes"`
} `json:"reviewThreads"`
} `json:"pullRequest"`
} `json:"repository"`
} `json:"data"`
}
// NewPRReviewCommand creates the pr-review command surface.
func NewPRReviewCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "pr-review",
Short: "Fetch, triage, and respond to GitHub pull request review threads",
}
cmd.AddCommand(newPRReviewFetchCommand())
cmd.AddCommand(newPRReviewTriageCommand())
cmd.AddCommand(newPRReviewRespondCommand())
cmd.AddCommand(newPRReviewResolveCommand())
return cmd
}
func newPRReviewFetchCommand() *cobra.Command {
opts := &PRReviewFetchOptions{}
cmd := &cobra.Command{
Use: "fetch",
Short: "Fetch pull request review threads and write them to local harness state",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewFetch(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the fetched review JSON")
return cmd
}
func newPRReviewTriageCommand() *cobra.Command {
opts := &PRReviewTriageOptions{}
cmd := &cobra.Command{
Use: "triage",
Short: "Classify unresolved review threads into actionable, duplicate, outdated, or resolved",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewTriage(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().StringVar(&opts.Output, "output", "", "explicit output path for the triage JSON")
return cmd
}
func newPRReviewRespondCommand() *cobra.Command {
opts := &PRReviewRespondOptions{}
cmd := &cobra.Command{
Use: "respond",
Short: "Reply to an inline pull request review comment and optionally resolve the thread",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewRespond(opts)
},
}
cmd.Flags().StringVar(&opts.PR, "pr", "", "pull request number (defaults to the current branch PR)")
cmd.Flags().IntVar(&opts.CommentID, "comment-id", 0, "top-level pull request review comment ID to reply to")
cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve after replying")
cmd.Flags().StringVar(&opts.Body, "body", "", "reply body to post")
_ = cmd.MarkFlagRequired("comment-id")
_ = cmd.MarkFlagRequired("body")
return cmd
}
func newPRReviewResolveCommand() *cobra.Command {
opts := &PRReviewRespondOptions{}
cmd := &cobra.Command{
Use: "resolve",
Short: "Resolve a review thread without posting a reply",
Run: func(cmd *cobra.Command, args []string) {
runPRReviewResolve(opts)
},
}
cmd.Flags().StringVar(&opts.ThreadID, "thread-id", "", "GraphQL review thread ID to resolve")
_ = cmd.MarkFlagRequired("thread-id")
return cmd
}
func runPRReviewFetch(opts *PRReviewFetchOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
review, err := fetchPRReview(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR review threads: %v", err)
}
outputPath, err := reviewOutputPath(prNumber, opts.Output, "threads.json")
if err != nil {
log.Fatalf("Failed to determine output path: %v", err)
}
writeJSON(outputPath, review)
log.Infof("Fetched %d review threads into %s", len(review.Threads), outputPath)
}
func runPRReviewTriage(opts *PRReviewTriageOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
review, err := fetchPRReview(prNumber)
if err != nil {
log.Fatalf("Failed to fetch PR review threads: %v", err)
}
triage := prreview.Triage(review)
outputPath, err := reviewOutputPath(prNumber, opts.Output, "triage.json")
if err != nil {
log.Fatalf("Failed to determine output path: %v", err)
}
writeJSON(outputPath, triage)
for _, summary := range triage.Summaries {
lineRef := ""
if summary.Thread.Path != "" {
lineRef = summary.Thread.Path
if summary.Thread.Line > 0 {
lineRef = fmt.Sprintf("%s:%d", lineRef, summary.Thread.Line)
}
}
fmt.Printf("[%s] %s %s %s\n", summary.Category, summary.Source, summary.Thread.ID, lineRef)
for _, reason := range summary.Reasons {
fmt.Printf(" - %s\n", reason)
}
}
log.Infof("Wrote PR review triage to %s", outputPath)
}
func runPRReviewRespond(opts *PRReviewRespondOptions) {
prNumber, err := resolvePRNumber(opts.PR)
if err != nil {
log.Fatalf("Failed to resolve PR number: %v", err)
}
repoSlug, err := currentRepoSlug()
if err != nil {
log.Fatalf("Failed to resolve repo slug: %v", err)
}
if err := replyToReviewComment(repoSlug, prNumber, opts.CommentID, opts.Body); err != nil {
log.Fatalf("Failed to reply to review comment: %v", err)
}
if strings.TrimSpace(opts.ThreadID) != "" {
if err := resolveReviewThread(opts.ThreadID); err != nil {
log.Fatalf("Failed to resolve review thread: %v", err)
}
}
log.Infof("Posted reply to review comment %d on PR #%s", opts.CommentID, prNumber)
}
func runPRReviewResolve(opts *PRReviewRespondOptions) {
if err := resolveReviewThread(opts.ThreadID); err != nil {
log.Fatalf("Failed to resolve review thread: %v", err)
}
log.Infof("Resolved review thread %s", opts.ThreadID)
}
func fetchPRReview(prNumber string) (prreview.PullRequest, error) {
repoSlug, err := currentRepoSlug()
if err != nil {
return prreview.PullRequest{}, err
}
parts := strings.SplitN(repoSlug, "/", 2)
if len(parts) != 2 {
return prreview.PullRequest{}, fmt.Errorf("unexpected repo slug %q", repoSlug)
}
response, err := ghGraphQL(parts[0], parts[1], prNumber)
if err != nil {
return prreview.PullRequest{}, err
}
pr := prreview.PullRequest{
Number: response.Data.Repository.PullRequest.Number,
Title: response.Data.Repository.PullRequest.Title,
URL: response.Data.Repository.PullRequest.URL,
Threads: []prreview.Thread{},
}
for _, thread := range response.Data.Repository.PullRequest.ReviewThreads.Nodes {
item := prreview.Thread{
ID: thread.ID,
IsResolved: thread.IsResolved,
IsOutdated: thread.IsOutdated,
Path: thread.Path,
Line: thread.Line,
StartLine: thread.StartLine,
Comments: []prreview.Comment{},
}
for _, comment := range thread.Comments.Nodes {
item.Comments = append(item.Comments, prreview.Comment{
ID: comment.DatabaseID,
Body: comment.Body,
AuthorLogin: comment.Author.Login,
URL: comment.URL,
CreatedAt: comment.CreatedAt,
})
}
pr.Threads = append(pr.Threads, item)
}
return pr, nil
}
func ghGraphQL(owner, name, prNumber string) (*ghReviewResponse, error) {
git.CheckGitHubCLI()
query := `query($owner:String!, $name:String!, $number:Int!) {
repository(owner:$owner, name:$name) {
pullRequest(number:$number) {
number
title
url
reviewThreads(first:100) {
nodes {
id
isResolved
isOutdated
path
line
startLine
comments(first:100) {
nodes {
databaseId
body
url
createdAt
author {
login
}
}
}
}
}
}
}
}`
cmd := exec.Command(
"gh", "api", "graphql",
"-f", "query="+query,
"-F", "owner="+owner,
"-F", "name="+name,
"-F", "number="+prNumber,
)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("gh api graphql failed: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("gh api graphql failed: %w", err)
}
var response ghReviewResponse
if err := json.Unmarshal(output, &response); err != nil {
return nil, fmt.Errorf("parse graphql response: %w", err)
}
return &response, nil
}
func replyToReviewComment(repoSlug, prNumber string, commentID int, body string) error {
_, err := ghString(
"api",
"--method", "POST",
fmt.Sprintf("repos/%s/pulls/%s/comments/%d/replies", repoSlug, prNumber, commentID),
"-f", "body="+body,
)
return err
}
func resolveReviewThread(threadID string) error {
git.CheckGitHubCLI()
mutation := `mutation($threadId:ID!) {
resolveReviewThread(input:{threadId:$threadId}) {
thread {
id
isResolved
}
}
}`
cmd := exec.Command(
"gh", "api", "graphql",
"-f", "query="+mutation,
"-F", "threadId="+threadID,
)
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("resolve review thread: %w: %s", err, strings.TrimSpace(string(output)))
}
return nil
}
func reviewOutputPath(prNumber, explicit, fileName string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return explicit, nil
}
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
return "", err
}
stateDir := filepath.Join(agentlab.StateRoot(commonGitDir), "reviews", "pr-"+prNumber)
if err := os.MkdirAll(stateDir, 0755); err != nil {
return "", fmt.Errorf("create review state dir: %w", err)
}
return filepath.Join(stateDir, fileName), nil
}
func writeJSON(path string, value any) {
data, err := json.MarshalIndent(value, "", " ")
if err != nil {
log.Fatalf("Failed to encode JSON for %s: %v", path, err)
}
if err := os.WriteFile(path, data, 0644); err != nil {
log.Fatalf("Failed to write %s: %v", path, err)
}
}

View File

@@ -41,6 +41,7 @@ func NewRootCommand() *cobra.Command {
cmd.PersistentFlags().BoolVar(&opts.Debug, "debug", false, "run in debug mode")
// Add subcommands
cmd.AddCommand(NewAgentCheckCommand())
cmd.AddCommand(NewBackendCommand())
cmd.AddCommand(NewCheckLazyImportsCommand())
cmd.AddCommand(NewCherryPickCommand())
@@ -48,8 +49,13 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewDeployCommand())
cmd.AddCommand(NewOpenAPICommand())
cmd.AddCommand(NewComposeCommand())
cmd.AddCommand(NewJourneyCommand())
cmd.AddCommand(NewLogsCommand())
cmd.AddCommand(NewPullCommand())
cmd.AddCommand(NewPRChecksCommand())
cmd.AddCommand(NewPRMergeCommand())
cmd.AddCommand(NewPROpenCommand())
cmd.AddCommand(NewPRReviewCommand())
cmd.AddCommand(NewRunCICommand())
cmd.AddCommand(NewScreenshotDiffCommand())
cmd.AddCommand(NewDesktopCommand())
@@ -58,6 +64,8 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewLatestStableTagCommand())
cmd.AddCommand(NewWhoisCommand())
cmd.AddCommand(NewTraceCommand())
cmd.AddCommand(NewVerifyCommand())
cmd.AddCommand(NewWorktreeCommand())
return cmd
}

318
tools/ods/cmd/verify.go Normal file
View File

@@ -0,0 +1,318 @@
package cmd
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"time"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
)
type VerifyOptions struct {
BaseRef string
SkipAgentCheck bool
Worktree string
PytestPaths []string
PlaywrightPaths []string
PlaywrightGrep string
PlaywrightProject string
}
type VerifySummary struct {
GeneratedAt string `json:"generated_at"`
RepoRoot string `json:"repo_root"`
Worktree *agentlab.Manifest `json:"worktree,omitempty"`
Steps []VerifyStepSummary `json:"steps"`
}
type VerifyStepSummary struct {
Name string `json:"name"`
Status string `json:"status"`
Command []string `json:"command,omitempty"`
DurationMS int64 `json:"duration_ms"`
LogPath string `json:"log_path,omitempty"`
ArtifactDir string `json:"artifact_dir,omitempty"`
Details []string `json:"details,omitempty"`
}
// NewVerifyCommand creates the verify command.
func NewVerifyCommand() *cobra.Command {
opts := &VerifyOptions{}
cmd := &cobra.Command{
Use: "verify",
Short: "Run the agent-lab verification ladder and write a machine-readable summary",
Long: `Run the agent-lab verification ladder for the current checkout.
This command composes the diff-based agent-check with optional pytest and
Playwright execution, then writes a JSON summary into the worktree artifact
directory so agents can inspect the result without re-parsing console output.
Use --worktree to run the same flow against a tracked target worktree from the
agent-lab control checkout.`,
Run: func(cmd *cobra.Command, args []string) {
runVerify(opts)
},
}
cmd.Flags().StringVar(&opts.BaseRef, "base-ref", "", "git ref to compare against for agent-check (defaults to the worktree base ref or HEAD)")
cmd.Flags().BoolVar(&opts.SkipAgentCheck, "skip-agent-check", false, "skip the diff-based agent-check step")
cmd.Flags().StringVar(&opts.Worktree, "worktree", "", "tracked agent-lab worktree to verify from instead of the current checkout")
cmd.Flags().StringArrayVar(&opts.PytestPaths, "pytest", nil, "pytest path or node id to run (repeatable)")
cmd.Flags().StringArrayVar(&opts.PlaywrightPaths, "playwright", nil, "Playwright test path to run (repeatable)")
cmd.Flags().StringVar(&opts.PlaywrightGrep, "playwright-grep", "", "grep passed through to Playwright")
cmd.Flags().StringVar(&opts.PlaywrightProject, "playwright-project", "", "Playwright project to run")
return cmd
}
func runVerify(opts *VerifyOptions) {
repoRoot, manifest, hasManifest := resolveAgentLabTarget(opts.Worktree)
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
runAt := time.Now().UTC()
artifactRoot := filepath.Join(repoRoot, "web", "output")
if hasManifest {
artifactRoot = filepath.Join(manifest.ArtifactDir, "verify", runAt.Format("20060102-150405"))
}
if err := os.MkdirAll(artifactRoot, 0755); err != nil {
log.Fatalf("Failed to create verify artifact dir: %v", err)
}
summary := VerifySummary{
GeneratedAt: runAt.Format(time.RFC3339),
RepoRoot: repoRoot,
Steps: make([]VerifyStepSummary, 0, 3),
}
if hasManifest {
manifestCopy := manifest
summary.Worktree = &manifestCopy
}
if !opts.SkipAgentCheck {
baseRef := opts.BaseRef
if baseRef == "" && hasManifest {
baseRef = manifest.BaseRef
}
if baseRef == "" {
baseRef = "HEAD"
}
step, passed := runAgentCheckVerifyStep(repoRoot, opts.Worktree, baseRef)
summary.Steps = append(summary.Steps, step)
if !passed {
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
os.Exit(1)
}
}
if len(opts.PytestPaths) > 0 {
step, passed := runPytestVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts.PytestPaths)
summary.Steps = append(summary.Steps, step)
if !passed {
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
os.Exit(1)
}
}
if len(opts.PlaywrightPaths) > 0 || opts.PlaywrightGrep != "" {
step, passed := runPlaywrightVerifyStep(repoRoot, artifactRoot, manifest, hasManifest, opts)
summary.Steps = append(summary.Steps, step)
if !passed {
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
os.Exit(1)
}
}
writeVerifySummary(summary, artifactRoot, commonGitDir, manifest, hasManifest, runAt)
log.Infof("Verification summary written to %s", filepath.Join(artifactRoot, "summary.json"))
}
func runAgentCheckVerifyStep(repoRoot, worktree, baseRef string) (VerifyStepSummary, bool) {
startedAt := time.Now()
opts := &AgentCheckOptions{BaseRef: baseRef, Worktree: worktree, RepoRoot: repoRoot}
result, err := evaluateAgentCheck(opts, nil)
step := VerifyStepSummary{
Name: "agent-check",
Command: []string{"ods", "agent-check", "--base-ref", baseRef},
DurationMS: time.Since(startedAt).Milliseconds(),
}
if worktree != "" {
step.Command = append(step.Command, "--worktree", worktree)
}
if err != nil {
step.Status = "failed"
step.Details = []string{err.Error()}
return step, false
}
if len(result.Violations) == 0 && len(result.DocViolations) == 0 {
step.Status = "passed"
return step, true
}
step.Status = "failed"
for _, violation := range result.Violations {
step.Details = append(step.Details, fmt.Sprintf("%s:%d [%s] %s", violation.Path, violation.LineNum, violation.RuleID, violation.Message))
}
for _, violation := range result.DocViolations {
step.Details = append(step.Details, fmt.Sprintf("%s [agent-docs] %s", violation.Path, violation.Message))
}
return step, false
}
func runPytestVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, pytestPaths []string) (VerifyStepSummary, bool) {
pythonExecutable := pythonForRepo(repoRoot)
args := append([]string{"-m", "dotenv", "-f", ".vscode/.env", "run", "--", "pytest"}, pytestPaths...)
extraEnv := map[string]string{}
if hasManifest {
for key, value := range manifest.RuntimeEnv() {
extraEnv[key] = value
}
}
step, passed := runLoggedCommand(
"pytest",
filepath.Join(artifactRoot, "pytest.log"),
filepath.Join(repoRoot, "backend"),
extraEnv,
pythonExecutable,
args...,
)
if hasManifest {
step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
}
return step, passed
}
func runPlaywrightVerifyStep(repoRoot, artifactRoot string, manifest agentlab.Manifest, hasManifest bool, opts *VerifyOptions) (VerifyStepSummary, bool) {
args := []string{"playwright", "test"}
args = append(args, opts.PlaywrightPaths...)
if opts.PlaywrightGrep != "" {
args = append(args, "--grep", opts.PlaywrightGrep)
}
if opts.PlaywrightProject != "" {
args = append(args, "--project", opts.PlaywrightProject)
}
extraEnv := map[string]string{}
if hasManifest {
for key, value := range manifest.RuntimeEnv() {
extraEnv[key] = value
}
}
step, passed := runLoggedCommand(
"playwright",
filepath.Join(artifactRoot, "playwright.log"),
filepath.Join(repoRoot, "web"),
extraEnv,
"npx",
args...,
)
step.ArtifactDir = filepath.Join(repoRoot, "web", "output")
if hasManifest {
step.Details = append(step.Details, fmt.Sprintf("base url: %s", manifest.URLs.Web))
step.Details = append(step.Details, fmt.Sprintf("dependency mode: %s", manifest.ResolvedDependencies().Mode))
step.Details = append(step.Details, fmt.Sprintf("search infra: %s", manifest.ResolvedDependencies().SearchInfraMode))
step.Details = append(step.Details, fmt.Sprintf("reuse Chrome DevTools MCP against %s for interactive browser validation", manifest.URLs.Web))
step.Details = append(step.Details, manifest.DependencyWarnings()...)
}
return step, passed
}
func runLoggedCommand(name, logPath, workdir string, extraEnv map[string]string, executable string, args ...string) (VerifyStepSummary, bool) {
startedAt := time.Now()
if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
return VerifyStepSummary{
Name: name,
Status: "failed",
DurationMS: time.Since(startedAt).Milliseconds(),
Details: []string{fmt.Sprintf("create log dir: %v", err)},
}, false
}
logFile, err := os.Create(logPath)
if err != nil {
return VerifyStepSummary{
Name: name,
Status: "failed",
DurationMS: time.Since(startedAt).Milliseconds(),
Details: []string{fmt.Sprintf("create log file: %v", err)},
}, false
}
defer func() { _ = logFile.Close() }()
cmd := exec.Command(executable, args...)
cmd.Dir = workdir
cmd.Stdout = io.MultiWriter(os.Stdout, logFile)
cmd.Stderr = io.MultiWriter(os.Stderr, logFile)
if len(extraEnv) > 0 {
cmd.Env = envutil.ApplyOverrides(os.Environ(), extraEnv)
}
step := VerifyStepSummary{
Name: name,
Command: append([]string{executable}, args...),
LogPath: logPath,
DurationMS: 0,
}
err = cmd.Run()
step.DurationMS = time.Since(startedAt).Milliseconds()
if err != nil {
step.Status = "failed"
step.Details = []string{err.Error()}
return step, false
}
step.Status = "passed"
return step, true
}
func writeVerifySummary(summary VerifySummary, artifactRoot, commonGitDir string, manifest agentlab.Manifest, hasManifest bool, runAt time.Time) {
summaryPath := filepath.Join(artifactRoot, "summary.json")
data, err := json.MarshalIndent(summary, "", " ")
if err != nil {
log.Fatalf("Failed to encode verify summary: %v", err)
}
if err := os.WriteFile(summaryPath, data, 0644); err != nil {
log.Fatalf("Failed to write verify summary: %v", err)
}
if hasManifest {
if err := agentlab.UpdateVerification(commonGitDir, manifest, summaryPath, runAt); err != nil {
log.Warnf("Failed to update worktree verification metadata: %v", err)
}
}
}
func pythonForRepo(repoRoot string) string {
candidate := filepath.Join(repoRoot, ".venv", "bin", "python")
if _, err := os.Stat(candidate); err == nil {
return candidate
}
if manifest, found := currentAgentLabManifest(repoRoot); found {
sharedCandidate := filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python")
if _, err := os.Stat(sharedCandidate); err == nil {
return sharedCandidate
}
}
return "python"
}

View File

@@ -13,6 +13,7 @@ import (
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
@@ -22,28 +23,32 @@ type webPackageJSON struct {
// NewWebCommand creates a command that runs npm scripts from the web directory.
func NewWebCommand() *cobra.Command {
var worktree string
cmd := &cobra.Command{
Use: "web <script> [args...]",
Short: "Run web/package.json npm scripts",
Long: webHelpDescription(),
Args: cobra.MinimumNArgs(1),
Args: cobra.MinimumNArgs(1),
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
if len(args) > 0 {
return nil, cobra.ShellCompDirectiveNoFileComp
}
return webScriptNames(), cobra.ShellCompDirectiveNoFileComp
},
Run: func(cmd *cobra.Command, args []string) {
runWebScript(args)
},
}
cmd.Flags().SetInterspersed(false)
cmd.Flags().StringVar(&worktree, "worktree", "", "tracked agent-lab worktree to run from instead of the current checkout")
cmd.Run = func(cmd *cobra.Command, args []string) {
runWebScript(args, worktree)
}
return cmd
}
func runWebScript(args []string) {
webDir, err := webDir()
func runWebScript(args []string, worktree string) {
repoRoot, manifest, hasManifest := resolveAgentLabTarget(worktree)
webDir, err := webDirForRoot(repoRoot)
if err != nil {
log.Fatalf("Failed to find web directory: %v", err)
}
@@ -68,6 +73,13 @@ func runWebScript(args []string) {
webCmd.Stderr = os.Stderr
webCmd.Stdin = os.Stdin
if hasManifest {
webCmd.Env = envutil.ApplyOverrides(os.Environ(), manifest.RuntimeEnv())
log.Infof("agent-lab worktree %s detected: web=%s api=%s", manifest.Branch, manifest.URLs.Web, manifest.URLs.API)
log.Infof("lane=%s base-ref=%s", manifest.ResolvedLane(), manifest.BaseRef)
log.Infof("dependency mode=%s search-infra=%s", manifest.ResolvedDependencies().Mode, manifest.ResolvedDependencies().SearchInfraMode)
}
if err := webCmd.Run(); err != nil {
// For wrapped commands, preserve the child process's exit code and
// avoid duplicating already-printed stderr output.
@@ -101,7 +113,8 @@ func webHelpDescription() string {
Examples:
ods web dev
ods web lint
ods web test --watch`
ods web test --watch
ods web dev --worktree codex/fix/auth-banner-modal`
scripts := webScriptNames()
if len(scripts) == 0 {
@@ -112,7 +125,7 @@ Examples:
}
func loadWebScripts() (map[string]string, error) {
webDir, err := webDir()
webDir, err := webDirForRoot("")
if err != nil {
return nil, err
}
@@ -135,10 +148,13 @@ func loadWebScripts() (map[string]string, error) {
return pkg.Scripts, nil
}
func webDir() (string, error) {
root, err := paths.GitRoot()
if err != nil {
return "", err
func webDirForRoot(root string) (string, error) {
if root == "" {
var err error
root, err = paths.GitRoot()
if err != nil {
return "", err
}
}
return filepath.Join(root, "web"), nil
}

626
tools/ods/cmd/worktree.go Normal file
View File

@@ -0,0 +1,626 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"text/tabwriter"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/agentlab"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
type WorktreeCreateOptions struct {
From string
Path string
Bootstrap bool
DependencyMode string
}
type WorktreeRemoveOptions struct {
Force bool
DropDeps bool
}
type WorktreeBootstrapOptions struct {
EnvMode string
PythonMode string
WebMode string
}
// NewWorktreeCommand creates the parent worktree command.
func NewWorktreeCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "worktree",
Short: "Manage agent-lab git worktrees and harness metadata",
Long: `Manage agent-lab git worktrees and the local harness state that makes
them bootable with isolated ports, URLs, and artifact directories.`,
}
cmd.AddCommand(newWorktreeCreateCommand())
cmd.AddCommand(newWorktreeBootstrapCommand())
cmd.AddCommand(newWorktreeDepsCommand())
cmd.AddCommand(newWorktreeStatusCommand())
cmd.AddCommand(newWorktreeShowCommand())
cmd.AddCommand(newWorktreeRemoveCommand())
return cmd
}
func newWorktreeCreateCommand() *cobra.Command {
opts := &WorktreeCreateOptions{}
cmd := &cobra.Command{
Use: "create <branch>",
Short: "Create a new agent-lab worktree with isolated runtime metadata",
Long: `Create a tracked agent-lab worktree and bootstrap its local runtime state.
Branch lanes control the default base ref when --from is not supplied:
codex/lab/<name> -> codex/agent-lab
codex/fix/<name> -> origin/main
codex/feat/<name> -> origin/main
Use conventional branch lanes for product work so the base stays explicit.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
runWorktreeCreate(args[0], opts)
},
}
cmd.Flags().StringVar(&opts.From, "from", "", "git ref to branch from (defaults are inferred from the branch lane)")
cmd.Flags().StringVar(&opts.Path, "path", "", "custom checkout path for the new worktree")
cmd.Flags().BoolVar(&opts.Bootstrap, "bootstrap", true, "bootstrap env, Python, and frontend dependencies for the worktree")
cmd.Flags().StringVar(&opts.DependencyMode, "dependency-mode", string(agentlab.DependencyModeNamespaced), "dependency mode: namespaced or shared")
return cmd
}
func newWorktreeBootstrapCommand() *cobra.Command {
opts := &WorktreeBootstrapOptions{}
cmd := &cobra.Command{
Use: "bootstrap [worktree]",
Short: "Bootstrap env files and dependencies for an agent-lab worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeBootstrap(identifier, opts)
},
}
cmd.Flags().StringVar(&opts.EnvMode, "env-mode", string(agentlab.BootstrapModeAuto), "env bootstrap mode: auto, skip, link, copy")
cmd.Flags().StringVar(&opts.PythonMode, "python-mode", string(agentlab.BootstrapModeAuto), "Python bootstrap mode: auto, skip, link, copy")
cmd.Flags().StringVar(&opts.WebMode, "web-mode", string(agentlab.BootstrapModeAuto), "frontend bootstrap mode: auto, skip, clone, copy, npm")
return cmd
}
func newWorktreeDepsCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "deps",
Short: "Manage namespaced external dependencies for an agent-lab worktree",
}
cmd.AddCommand(&cobra.Command{
Use: "up [worktree]",
Short: "Provision external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsUp(identifier)
},
})
cmd.AddCommand(&cobra.Command{
Use: "status [worktree]",
Short: "Inspect external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsStatus(identifier)
},
})
cmd.AddCommand(&cobra.Command{
Use: "reset [worktree]",
Short: "Reset namespaced external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsReset(identifier)
},
})
cmd.AddCommand(&cobra.Command{
Use: "down [worktree]",
Short: "Tear down namespaced external dependency state for a worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeDepsDown(identifier)
},
})
return cmd
}
func newWorktreeStatusCommand() *cobra.Command {
return &cobra.Command{
Use: "status",
Short: "List tracked agent-lab worktrees",
Run: func(cmd *cobra.Command, args []string) {
runWorktreeStatus()
},
}
}
func newWorktreeShowCommand() *cobra.Command {
return &cobra.Command{
Use: "show [worktree]",
Short: "Show detailed metadata for an agent-lab worktree",
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
identifier := ""
if len(args) == 1 {
identifier = args[0]
}
runWorktreeShow(identifier)
},
}
}
func newWorktreeRemoveCommand() *cobra.Command {
opts := &WorktreeRemoveOptions{}
cmd := &cobra.Command{
Use: "remove <worktree>",
Short: "Remove an agent-lab worktree and its local state",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
runWorktreeRemove(args[0], opts)
},
}
cmd.Flags().BoolVar(&opts.Force, "force", false, "force removal even if git reports uncommitted changes")
cmd.Flags().BoolVar(&opts.DropDeps, "drop-deps", false, "tear down namespaced dependencies before removing the worktree")
return cmd
}
func runWorktreeCreate(branch string, opts *WorktreeCreateOptions) {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
if manifest, found, err := agentlab.FindByIdentifier(commonGitDir, branch); err != nil {
log.Fatalf("Failed to inspect existing worktrees: %v", err)
} else if found {
log.Fatalf("Worktree already exists for %s at %s", manifest.Branch, manifest.CheckoutPath)
}
manifests, err := agentlab.LoadAll(commonGitDir)
if err != nil {
log.Fatalf("Failed to load worktree metadata: %v", err)
}
ports, err := agentlab.AllocatePorts(manifests)
if err != nil {
log.Fatalf("Failed to allocate worktree ports: %v", err)
}
dependencyMode := agentlab.DependencyMode(opts.DependencyMode)
switch dependencyMode {
case agentlab.DependencyModeShared, agentlab.DependencyModeNamespaced:
default:
log.Fatalf("Invalid dependency mode %q: must be shared or namespaced", opts.DependencyMode)
}
checkoutPath := opts.Path
if checkoutPath == "" {
checkoutPath = agentlab.DefaultCheckoutPath(repoRoot, branch)
}
checkoutPath, err = filepath.Abs(checkoutPath)
if err != nil {
log.Fatalf("Failed to resolve checkout path: %v", err)
}
if _, err := os.Stat(checkoutPath); err == nil {
log.Fatalf("Checkout path already exists: %s", checkoutPath)
}
baseSelection := agentlab.ResolveCreateBaseRef(branch, opts.From, agentlab.GitRefExists)
manifest := agentlab.BuildManifest(
repoRoot,
commonGitDir,
branch,
baseSelection.Lane,
baseSelection.Ref,
checkoutPath,
ports,
dependencyMode,
)
args := []string{"-c", "core.hooksPath=/dev/null", "worktree", "add", "-b", branch, checkoutPath, baseSelection.Ref}
log.Infof("Creating worktree %s at %s", branch, checkoutPath)
gitCmd := exec.Command("git", args...)
gitCmd.Stdout = os.Stdout
gitCmd.Stderr = os.Stderr
gitCmd.Stdin = os.Stdin
if err := gitCmd.Run(); err != nil {
log.Fatalf("git worktree add failed: %v", err)
}
if resolvedPath, err := filepath.EvalSymlinks(checkoutPath); err == nil {
manifest.CheckoutPath = resolvedPath
}
if err := agentlab.WriteEnvFiles(manifest); err != nil {
log.Fatalf("Failed to write worktree env files: %v", err)
}
if err := agentlab.WriteManifest(commonGitDir, manifest); err != nil {
log.Fatalf("Failed to write worktree manifest: %v", err)
}
if opts.Bootstrap {
bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
EnvMode: agentlab.BootstrapModeAuto,
PythonMode: agentlab.BootstrapModeAuto,
WebMode: agentlab.BootstrapModeAuto,
})
if err != nil {
log.Fatalf("Failed to bootstrap worktree: %v", err)
}
for _, action := range bootstrapResult.Actions {
fmt.Printf(" bootstrap: %s\n", action)
}
}
manifest, dependencyResult, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to provision worktree dependencies: %v", err)
}
for _, action := range dependencyResult.Actions {
fmt.Printf(" deps: %s\n", action)
}
fmt.Printf("Created agent-lab worktree %s\n", manifest.Branch)
fmt.Printf(" checkout: %s\n", manifest.CheckoutPath)
fmt.Printf(" lane: %s\n", manifest.ResolvedLane())
fmt.Printf(" base ref: %s\n", manifest.BaseRef)
fmt.Printf(" base selection: %s\n", baseSelection.Reason)
fmt.Printf(" dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
if manifest.ResolvedDependencies().Namespace != "" {
fmt.Printf(" dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
}
if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced {
fmt.Printf(" postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
fmt.Printf(" redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
fmt.Printf(" file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
}
fmt.Printf(" web url: %s\n", manifest.URLs.Web)
fmt.Printf(" api url: %s\n", manifest.URLs.API)
fmt.Printf(" mcp url: %s\n", manifest.URLs.MCP)
fmt.Printf(" artifacts: %s\n", manifest.ArtifactDir)
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf(" note: %s\n", warning)
}
fmt.Printf("\nNext steps:\n")
fmt.Printf(" cd %s\n", manifest.CheckoutPath)
fmt.Printf(" # Make edits in the worktree itself.\n")
if manifest.ResolvedLane() == agentlab.WorktreeLaneProduct {
fmt.Printf(" # Run harness commands from the control checkout with --worktree %s.\n", manifest.Branch)
fmt.Printf(" ods verify --worktree %s\n", manifest.Branch)
fmt.Printf(" ods backend api --worktree %s\n", manifest.Branch)
fmt.Printf(" ods web dev --worktree %s\n", manifest.Branch)
} else {
fmt.Printf(" ods backend api\n")
fmt.Printf(" ods backend model_server\n")
fmt.Printf(" ods web dev\n")
fmt.Printf(" ods verify\n")
}
}
func runWorktreeBootstrap(identifier string, opts *WorktreeBootstrapOptions) {
manifest := mustResolveWorktree(identifier)
bootstrapResult, err := agentlab.Bootstrap(manifest, agentlab.BootstrapOptions{
EnvMode: agentlab.BootstrapMode(opts.EnvMode),
PythonMode: agentlab.BootstrapMode(opts.PythonMode),
WebMode: agentlab.BootstrapMode(opts.WebMode),
})
if err != nil {
log.Fatalf("Failed to bootstrap worktree %s: %v", manifest.Branch, err)
}
fmt.Printf("Bootstrapped %s\n", manifest.Branch)
for _, action := range bootstrapResult.Actions {
fmt.Printf(" %s\n", action)
}
}
func runWorktreeDepsUp(identifier string) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest := mustResolveWorktree(identifier)
manifest, result, err := agentlab.ProvisionDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to provision dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("Provisioned dependencies for %s\n", manifest.Branch)
for _, action := range result.Actions {
fmt.Printf(" %s\n", action)
}
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf(" note: %s\n", warning)
}
}
func runWorktreeDepsStatus(identifier string) {
manifest := mustResolveWorktree(identifier)
status, err := agentlab.InspectDependencies(manifest)
if err != nil {
log.Fatalf("Failed to inspect dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("branch: %s\n", manifest.Branch)
fmt.Printf("mode: %s\n", status.Mode)
if status.Namespace != "" {
fmt.Printf("namespace: %s\n", status.Namespace)
}
if status.PostgresDatabase != "" {
fmt.Printf("postgres database: %s (ready=%t tables=%d)\n", status.PostgresDatabase, status.PostgresReady, status.PostgresTableCount)
}
if status.RedisPrefix != "" {
fmt.Printf("redis prefix: %s (ready=%t keys=%d)\n", status.RedisPrefix, status.RedisReady, status.RedisKeyCount)
}
if status.FileStoreBucket != "" {
fmt.Printf("file-store bucket: %s (ready=%t objects=%d)\n", status.FileStoreBucket, status.FileStoreReady, status.FileStoreObjectCount)
}
fmt.Printf("search infra: %s\n", status.SearchInfraMode)
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf("note: %s\n", warning)
}
}
func runWorktreeDepsReset(identifier string) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest := mustResolveWorktree(identifier)
manifest, result, err := agentlab.ResetDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to reset dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("Reset dependencies for %s\n", manifest.Branch)
for _, action := range result.Actions {
fmt.Printf(" %s\n", action)
}
}
func runWorktreeDepsDown(identifier string) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest := mustResolveWorktree(identifier)
manifest, result, err := agentlab.TeardownDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to tear down dependencies for %s: %v", manifest.Branch, err)
}
fmt.Printf("Tore down dependencies for %s\n", manifest.Branch)
for _, action := range result.Actions {
fmt.Printf(" %s\n", action)
}
}
func runWorktreeStatus() {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
repoRoot, _ := paths.GitRoot()
current, _, _ := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
manifests, err := agentlab.LoadAll(commonGitDir)
if err != nil {
log.Fatalf("Failed to load worktree manifests: %v", err)
}
if len(manifests) == 0 {
log.Info("No agent-lab worktrees tracked yet.")
return
}
tw := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
if _, err := fmt.Fprintln(tw, "CURRENT\tBRANCH\tLANE\tMODE\tWEB\tAPI\tPATH"); err != nil {
log.Fatalf("Failed to write worktree header: %v", err)
}
for _, manifest := range manifests {
marker := ""
if manifest.ID == current.ID && manifest.ID != "" {
marker = "*"
}
if _, err := fmt.Fprintf(
tw,
"%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
marker,
manifest.Branch,
manifest.ResolvedLane(),
manifest.ResolvedDependencies().Mode,
manifest.URLs.Web,
manifest.URLs.API,
manifest.CheckoutPath,
); err != nil {
log.Fatalf("Failed to write worktree row for %s: %v", manifest.Branch, err)
}
}
_ = tw.Flush()
}
func runWorktreeShow(identifier string) {
manifest := mustResolveWorktree(identifier)
fmt.Printf("branch: %s\n", manifest.Branch)
fmt.Printf("id: %s\n", manifest.ID)
fmt.Printf("lane: %s\n", manifest.ResolvedLane())
fmt.Printf("checkout: %s\n", manifest.CheckoutPath)
fmt.Printf("base-ref: %s\n", manifest.BaseRef)
fmt.Printf("state-dir: %s\n", manifest.StateDir)
fmt.Printf("artifacts: %s\n", manifest.ArtifactDir)
fmt.Printf("backend env: %s\n", manifest.EnvFile)
fmt.Printf("web env: %s\n", manifest.WebEnvFile)
fmt.Printf("compose project: %s\n", manifest.ComposeProject)
fmt.Printf("dependency mode: %s\n", manifest.ResolvedDependencies().Mode)
if manifest.ResolvedDependencies().Namespace != "" {
fmt.Printf("dependency namespace: %s\n", manifest.ResolvedDependencies().Namespace)
}
if manifest.ResolvedDependencies().PostgresDatabase != "" {
fmt.Printf("postgres database: %s\n", manifest.ResolvedDependencies().PostgresDatabase)
fmt.Printf("redis prefix: %s\n", manifest.ResolvedDependencies().RedisPrefix)
fmt.Printf("file-store bucket: %s\n", manifest.ResolvedDependencies().FileStoreBucket)
}
fmt.Printf("search infra: %s\n", manifest.ResolvedDependencies().SearchInfraMode)
fmt.Printf("web url: %s\n", manifest.URLs.Web)
fmt.Printf("api url: %s\n", manifest.URLs.API)
fmt.Printf("mcp url: %s\n", manifest.URLs.MCP)
fmt.Printf("ports: web=%d api=%d model_server=%d mcp=%d\n", manifest.Ports.Web, manifest.Ports.API, manifest.Ports.ModelServer, manifest.Ports.MCP)
if manifest.LastVerifiedAt != "" {
fmt.Printf("last verified: %s\n", manifest.LastVerifiedAt)
}
if manifest.LastVerifySummary != "" {
fmt.Printf("last summary: %s\n", manifest.LastVerifySummary)
}
for _, warning := range manifest.DependencyWarnings() {
fmt.Printf("note: %s\n", warning)
}
}
func mustResolveWorktree(identifier string) agentlab.Manifest {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
if identifier == "" {
repoRoot, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to determine git root: %v", err)
}
manifest, found, err := agentlab.FindByRepoRoot(commonGitDir, repoRoot)
if err != nil {
log.Fatalf("Failed to resolve current worktree manifest: %v", err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
return manifest
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
log.Fatalf("Failed to resolve worktree manifest: %v", err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
return manifest
}
func runWorktreeRemove(identifier string, opts *WorktreeRemoveOptions) {
commonGitDir, err := agentlab.GetCommonGitDir()
if err != nil {
log.Fatalf("Failed to determine git common dir: %v", err)
}
manifest, found, err := agentlab.FindByIdentifier(commonGitDir, identifier)
if err != nil {
log.Fatalf("Failed to resolve worktree: %v", err)
}
if !found {
log.Fatalf("No agent-lab worktree found for %q", identifier)
}
if opts.DropDeps {
var teardownResult *agentlab.DependencyResult
manifest, teardownResult, err = agentlab.TeardownDependencies(commonGitDir, manifest)
if err != nil {
log.Fatalf("Failed to tear down worktree dependencies: %v", err)
}
for _, action := range teardownResult.Actions {
fmt.Printf(" deps: %s\n", action)
}
}
args := []string{"worktree", "remove"}
if opts.Force {
args = append(args, "--force")
}
args = append(args, manifest.CheckoutPath)
log.Infof("Removing worktree %s", manifest.Branch)
gitCmd := exec.Command("git", args...)
gitCmd.Stdout = os.Stdout
gitCmd.Stderr = os.Stderr
gitCmd.Stdin = os.Stdin
if err := gitCmd.Run(); err != nil {
if opts.Force && isOrphanedWorktree(manifest.CheckoutPath) {
log.Warnf("git detached %s but left an orphaned checkout behind; removing %s", manifest.Branch, manifest.CheckoutPath)
if removeErr := os.RemoveAll(manifest.CheckoutPath); removeErr != nil {
log.Fatalf("git worktree remove failed: %v (fallback cleanup failed: %v)", err, removeErr)
}
} else {
log.Fatalf("git worktree remove failed: %v", err)
}
}
if err := agentlab.RemoveState(commonGitDir, manifest.ID); err != nil {
log.Fatalf("Failed to remove worktree state: %v", err)
}
fmt.Printf("Removed agent-lab worktree %s\n", manifest.Branch)
if manifest.ResolvedDependencies().Mode == agentlab.DependencyModeNamespaced && !opts.DropDeps {
fmt.Printf(" note: namespaced Postgres/Redis/MinIO state was left in place. Use `ods worktree deps down %s` before removal if you want cleanup.\n", manifest.Branch)
}
}
func isOrphanedWorktree(checkoutPath string) bool {
output, err := exec.Command("git", "worktree", "list", "--porcelain").Output()
if err == nil && strings.Contains(string(output), "worktree "+checkoutPath+"\n") {
return false
}
if _, statErr := os.Stat(checkoutPath); os.IsNotExist(statErr) {
return true
}
if statusErr := exec.Command("git", "-C", checkoutPath, "status", "--short").Run(); statusErr != nil {
return true
}
return false
}

View File

@@ -0,0 +1,95 @@
package agentcheck
import (
"bufio"
"fmt"
"path/filepath"
"regexp"
"strings"
)
var hunkHeaderPattern = regexp.MustCompile(`^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@`)
type AddedLine struct {
Path string
LineNum int
Content string
}
type Violation struct {
RuleID string
Path string
LineNum int
Message string
Content string
}
func ParseAddedLines(diff string) ([]AddedLine, error) {
scanner := bufio.NewScanner(strings.NewReader(diff))
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var addedLines []AddedLine
currentPath := ""
currentNewLine := 0
inHunk := false
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "+++ "):
currentPath = normalizeDiffPath(strings.TrimPrefix(line, "+++ "))
inHunk = false
case strings.HasPrefix(line, "@@ "):
match := hunkHeaderPattern.FindStringSubmatch(line)
if len(match) != 2 {
return nil, fmt.Errorf("failed to parse hunk header: %s", line)
}
var err error
currentNewLine, err = parseLineNumber(match[1])
if err != nil {
return nil, err
}
inHunk = true
case !inHunk || currentPath == "":
continue
case strings.HasPrefix(line, "+") && !strings.HasPrefix(line, "+++"):
addedLines = append(addedLines, AddedLine{
Path: currentPath,
LineNum: currentNewLine,
Content: strings.TrimPrefix(line, "+"),
})
currentNewLine++
case strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---"):
continue
default:
currentNewLine++
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to scan diff: %w", err)
}
return addedLines, nil
}
func normalizeDiffPath(path string) string {
path = strings.TrimSpace(path)
path = strings.TrimPrefix(path, "b/")
if path == "/dev/null" {
return ""
}
return filepath.ToSlash(path)
}
func parseLineNumber(value string) (int, error) {
lineNum := 0
for _, ch := range value {
if ch < '0' || ch > '9' {
return 0, fmt.Errorf("invalid line number: %s", value)
}
lineNum = lineNum*10 + int(ch-'0')
}
return lineNum, nil
}

View File

@@ -0,0 +1,143 @@
package agentcheck
import (
"reflect"
"testing"
)
func TestParseAddedLines(t *testing.T) {
diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
index 1111111..2222222 100644
--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
@@ -10,1 +11,3 @@
context = old_value
+from fastapi import HTTPException
-raise OldError()
+raise HTTPException(status_code=400, detail="bad")
@@ -20,0 +23,1 @@
+task.delay (payload)
diff --git a/web/src/sections/Foo.tsx b/web/src/sections/Foo.tsx
index 1111111..2222222 100644
--- a/web/src/sections/Foo.tsx
+++ b/web/src/sections/Foo.tsx
@@ -3,0 +4 @@
+import { Thing } from "@/components/Thing";`
addedLines, err := ParseAddedLines(diff)
if err != nil {
t.Fatalf("ParseAddedLines returned error: %v", err)
}
if len(addedLines) != 4 {
t.Fatalf("expected 4 added lines, got %d", len(addedLines))
}
if addedLines[0].Path != "backend/onyx/server/foo.py" || addedLines[0].LineNum != 12 {
t.Fatalf("unexpected first added line: %+v", addedLines[0])
}
if addedLines[2].Path != "backend/onyx/server/foo.py" || addedLines[2].LineNum != 23 {
t.Fatalf("unexpected third added line: %+v", addedLines[2])
}
if addedLines[3].Path != "web/src/sections/Foo.tsx" || addedLines[3].LineNum != 4 {
t.Fatalf("unexpected final added line: %+v", addedLines[3])
}
}
func TestParseAddedLinesRejectsMalformedHunkHeader(t *testing.T) {
diff := `diff --git a/backend/onyx/server/foo.py b/backend/onyx/server/foo.py
--- a/backend/onyx/server/foo.py
+++ b/backend/onyx/server/foo.py
@@ invalid @@
+raise HTTPException(status_code=400, detail="bad")`
if _, err := ParseAddedLines(diff); err == nil {
t.Fatal("expected malformed hunk header to return an error")
}
}
func TestCheckAddedLinesFindsExpectedViolations(t *testing.T) {
lines := []AddedLine{
{Path: "backend/onyx/server/foo.py", LineNum: 10, Content: "from fastapi import HTTPException"},
{Path: "backend/onyx/server/foo.py", LineNum: 11, Content: `raise HTTPException(status_code=400, detail="bad")`},
{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
{Path: "backend/onyx/server/foo.py", LineNum: 13, Content: "my_task.delay (payload)"},
{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `export { Thing } from "@/components/Thing";`},
}
violations := CheckAddedLines(lines)
if len(violations) != 5 {
t.Fatalf("expected 5 violations, got %d: %+v", len(violations), violations)
}
expectedRules := []string{
"no-new-http-exception",
"no-new-http-exception",
"no-new-response-model",
"no-new-delay",
"no-new-legacy-component-import",
}
for i, expectedRule := range expectedRules {
if violations[i].RuleID != expectedRule {
t.Fatalf("expected rule %q at index %d, got %q", expectedRule, i, violations[i].RuleID)
}
}
}
func TestCheckAddedLinesIgnoresCommentsStringsAndAllowedScopes(t *testing.T) {
lines := []AddedLine{
{Path: "backend/onyx/server/foo.py", LineNum: 1, Content: `message = "HTTPException"`},
{Path: "backend/onyx/server/foo.py", LineNum: 2, Content: `detail = "response_model="`},
{Path: "backend/onyx/server/foo.py", LineNum: 3, Content: `note = ".delay("`},
{Path: "backend/onyx/server/foo.py", LineNum: 4, Content: `# HTTPException`},
{Path: "backend/onyx/server/foo.py", LineNum: 5, Content: `handler = HTTPExceptionAlias`},
{Path: "backend/onyx/main.py", LineNum: 6, Content: `raise HTTPException(status_code=400, detail="bad")`},
{Path: "backend/tests/unit/test_foo.py", LineNum: 7, Content: `from fastapi import HTTPException`},
{Path: "backend/model_server/foo.py", LineNum: 8, Content: `task.delay(payload)`},
{Path: "web/src/sections/Foo.tsx", LineNum: 9, Content: `const path = "@/components/Thing";`},
{Path: "web/src/sections/Foo.tsx", LineNum: 10, Content: `// import { Thing } from "@/components/Thing";`},
{Path: "web/src/components/Foo.tsx", LineNum: 11, Content: `import { Bar } from "@/components/Bar";`},
}
violations := CheckAddedLines(lines)
if len(violations) != 0 {
t.Fatalf("expected no violations, got %+v", violations)
}
}
func TestCheckAddedLinesWithRulesSupportsCustomRuleSets(t *testing.T) {
lines := []AddedLine{
{Path: "backend/onyx/server/foo.py", LineNum: 12, Content: "response_model = FooResponse"},
{Path: "web/src/sections/Foo.tsx", LineNum: 20, Content: `import type { Thing } from "@/components/Thing";`},
}
rules := []Rule{
{
ID: "python-response-model-only",
Message: "response_model is not allowed",
Scope: backendProductPythonScope(),
Match: func(line lineView) bool {
return responseModelPattern.MatchString(line.CodeSansStrings)
},
},
}
violations := CheckAddedLinesWithRules(lines, rules)
expected := []Violation{
{
RuleID: "python-response-model-only",
Path: "backend/onyx/server/foo.py",
LineNum: 12,
Message: "response_model is not allowed",
Content: "response_model = FooResponse",
},
}
if !reflect.DeepEqual(expected, violations) {
t.Fatalf("unexpected violations: %+v", violations)
}
}

View File

@@ -0,0 +1,101 @@
package agentcheck
import "strings"
func stripLineComment(path string, content string) string {
switch {
case strings.HasSuffix(path, ".py"):
return stripCommentMarker(content, "#")
case isJSLikePath(path):
return stripCommentMarker(content, "//")
default:
return content
}
}
func isJSLikePath(path string) bool {
return strings.HasSuffix(path, ".js") ||
strings.HasSuffix(path, ".jsx") ||
strings.HasSuffix(path, ".ts") ||
strings.HasSuffix(path, ".tsx")
}
func stripCommentMarker(line string, marker string) string {
if marker == "" {
return line
}
var builder strings.Builder
quote := byte(0)
escaped := false
for i := 0; i < len(line); i++ {
ch := line[i]
if quote != 0 {
builder.WriteByte(ch)
if escaped {
escaped = false
continue
}
if ch == '\\' && quote != '`' {
escaped = true
continue
}
if ch == quote {
quote = 0
}
continue
}
if strings.HasPrefix(line[i:], marker) {
break
}
builder.WriteByte(ch)
if isQuote(ch) {
quote = ch
}
}
return builder.String()
}
func stripQuotedStrings(line string) string {
var builder strings.Builder
quote := byte(0)
escaped := false
for i := 0; i < len(line); i++ {
ch := line[i]
if quote != 0 {
if escaped {
escaped = false
continue
}
if ch == '\\' && quote != '`' {
escaped = true
continue
}
if ch == quote {
quote = 0
}
continue
}
if isQuote(ch) {
quote = ch
builder.WriteByte(' ')
continue
}
builder.WriteByte(ch)
}
return builder.String()
}
func isQuote(ch byte) bool {
return ch == '"' || ch == '\'' || ch == '`'
}

View File

@@ -0,0 +1,170 @@
package agentcheck
import (
"regexp"
"strings"
)
var (
httpExceptionPattern = regexp.MustCompile(`\bHTTPException\b`)
responseModelPattern = regexp.MustCompile(`\bresponse_model\s*=`)
delayCallPattern = regexp.MustCompile(`\.\s*delay\s*\(`)
componentPathPattern = regexp.MustCompile(`["'](?:@/components/|\.\.?/components/|\.\.?/.*/components/)`)
importExportPattern = regexp.MustCompile(`^\s*(?:import|export)\b`)
)
type Scope func(path string) bool
type Matcher func(line lineView) bool
type Rule struct {
ID string
Message string
Scope Scope
Match Matcher
}
type lineView struct {
AddedLine
Path string
Code string
CodeSansStrings string
TrimmedCode string
}
func CheckAddedLines(lines []AddedLine) []Violation {
return CheckAddedLinesWithRules(lines, DefaultRules())
}
func CheckAddedLinesWithRules(lines []AddedLine, rules []Rule) []Violation {
var violations []Violation
for _, addedLine := range lines {
line := buildLineView(addedLine)
if line.Path == "" {
continue
}
for _, rule := range rules {
if rule.Scope != nil && !rule.Scope(line.Path) {
continue
}
if rule.Match == nil || !rule.Match(line) {
continue
}
violations = append(violations, Violation{
RuleID: rule.ID,
Path: line.Path,
LineNum: line.LineNum,
Message: rule.Message,
Content: line.Content,
})
}
}
return violations
}
func DefaultRules() []Rule {
return append([]Rule(nil), defaultRules...)
}
var defaultRules = []Rule{
{
ID: "no-new-http-exception",
Message: "Do not introduce new HTTPException usage in backend product code. Raise OnyxError instead.",
Scope: backendProductPythonScope(exactPath("backend/onyx/main.py")),
Match: func(line lineView) bool {
return hasPythonCode(line) && httpExceptionPattern.MatchString(line.CodeSansStrings)
},
},
{
ID: "no-new-response-model",
Message: "Do not introduce response_model on new FastAPI APIs. Type the function directly instead.",
Scope: backendProductPythonScope(),
Match: func(line lineView) bool {
return hasPythonCode(line) && responseModelPattern.MatchString(line.CodeSansStrings)
},
},
{
ID: "no-new-delay",
Message: "Do not introduce Celery .delay() calls. Use an enqueue path that sets expires= explicitly.",
Scope: backendProductPythonScope(),
Match: func(line lineView) bool {
return hasPythonCode(line) && delayCallPattern.MatchString(line.CodeSansStrings)
},
},
{
ID: "no-new-legacy-component-import",
Message: "Do not introduce new imports from web/src/components. Prefer Opal or refresh-components.",
Scope: nonLegacyWebSourceScope(),
Match: func(line lineView) bool {
return isLegacyComponentImport(line)
},
},
}
func buildLineView(line AddedLine) lineView {
path := normalizeDiffPath(line.Path)
code := stripLineComment(path, line.Content)
return lineView{
AddedLine: line,
Path: path,
Code: code,
CodeSansStrings: stripQuotedStrings(code),
TrimmedCode: strings.TrimSpace(code),
}
}
func backendProductPythonScope(excluded ...Scope) Scope {
return func(path string) bool {
if !strings.HasPrefix(path, "backend/") || !strings.HasSuffix(path, ".py") {
return false
}
if strings.HasPrefix(path, "backend/tests/") {
return false
}
if strings.HasPrefix(path, "backend/model_server/") {
return false
}
if strings.Contains(path, "/__pycache__/") {
return false
}
for _, exclude := range excluded {
if exclude != nil && exclude(path) {
return false
}
}
return true
}
}
func nonLegacyWebSourceScope() Scope {
return func(path string) bool {
if !strings.HasPrefix(path, "web/src/") {
return false
}
return !strings.HasPrefix(path, "web/src/components/")
}
}
func exactPath(target string) Scope {
return func(path string) bool {
return path == target
}
}
func hasPythonCode(line lineView) bool {
return strings.TrimSpace(line.CodeSansStrings) != ""
}
func isLegacyComponentImport(line lineView) bool {
if line.TrimmedCode == "" {
return false
}
if !importExportPattern.MatchString(line.TrimmedCode) {
return false
}
return componentPathPattern.MatchString(line.Code)
}

View File

@@ -0,0 +1,107 @@
package agentdocs
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
)
var markdownLinkPattern = regexp.MustCompile(`\[[^\]]+\]\(([^)]+)\)`)
var requiredFiles = []string{
"AGENTS.md",
"docs/agent/README.md",
"docs/agent/ARCHITECTURE.md",
"docs/agent/BRANCHING.md",
"docs/agent/HARNESS.md",
"docs/agent/GOLDEN_RULES.md",
"docs/agent/LEGACY_ZONES.md",
"docs/agent/QUALITY_SCORE.md",
}
type Violation struct {
Path string
Message string
}
func Validate(root string) []Violation {
if _, err := os.Stat(filepath.Join(root, filepath.FromSlash("docs/agent/README.md"))); err != nil {
return nil
}
var violations []Violation
for _, relPath := range requiredFiles {
if _, err := os.Stat(filepath.Join(root, filepath.FromSlash(relPath))); err != nil {
violations = append(violations, Violation{
Path: relPath,
Message: "required agent-lab knowledge-base file is missing",
})
}
}
for _, relPath := range requiredFiles {
absPath := filepath.Join(root, filepath.FromSlash(relPath))
content, err := os.ReadFile(absPath)
if err != nil {
continue
}
violations = append(violations, validateMarkdownLinks(root, relPath, string(content))...)
}
return violations
}
func validateMarkdownLinks(root string, relPath string, content string) []Violation {
var violations []Violation
matches := markdownLinkPattern.FindAllStringSubmatch(content, -1)
docDir := filepath.Dir(filepath.Join(root, filepath.FromSlash(relPath)))
for _, match := range matches {
if len(match) != 2 {
continue
}
target := strings.TrimSpace(match[1])
if target == "" {
continue
}
if strings.HasPrefix(target, "http://") || strings.HasPrefix(target, "https://") {
continue
}
if strings.HasPrefix(target, "#") || strings.HasPrefix(target, "mailto:") {
continue
}
target = stripAnchor(target)
var absTarget string
if filepath.IsAbs(target) {
absTarget = target
} else {
absTarget = filepath.Join(docDir, target)
}
if _, err := os.Stat(absTarget); err != nil {
violations = append(violations, Violation{
Path: relPath,
Message: fmt.Sprintf(
"broken markdown link target: %s",
target,
),
})
}
}
return violations
}
func stripAnchor(target string) string {
if idx := strings.Index(target, "#"); idx >= 0 {
return target[:idx]
}
return target
}

View File

@@ -0,0 +1,61 @@
package agentdocs
import (
"os"
"path/filepath"
"testing"
)
func TestValidateSuccess(t *testing.T) {
root := t.TempDir()
writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Architecture](./ARCHITECTURE.md)
[Root](../../AGENTS.md)`)
writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/BRANCHING.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/HARNESS.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/GOLDEN_RULES.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/LEGACY_ZONES.md"), `ok`)
writeFile(t, filepath.Join(root, "docs/agent/QUALITY_SCORE.md"), `ok`)
violations := Validate(root)
if len(violations) != 0 {
t.Fatalf("expected no violations, got %+v", violations)
}
}
func TestValidateMissingAndBrokenLinks(t *testing.T) {
root := t.TempDir()
writeFile(t, filepath.Join(root, "AGENTS.md"), `[Agent Docs](./docs/agent/README.md)`)
writeFile(t, filepath.Join(root, "docs/agent/README.md"), `[Missing](./MISSING.md)`)
writeFile(t, filepath.Join(root, "docs/agent/ARCHITECTURE.md"), `ok`)
violations := Validate(root)
if len(violations) < 2 {
t.Fatalf("expected multiple violations, got %+v", violations)
}
}
func TestValidateSkipsReposWithoutAgentLabDocs(t *testing.T) {
root := t.TempDir()
writeFile(t, filepath.Join(root, "README.md"), `plain repo`)
violations := Validate(root)
if len(violations) != 0 {
t.Fatalf("expected no violations for repo without agent-lab docs, got %+v", violations)
}
}
func writeFile(t *testing.T, path string, content string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("failed to create dir for %s: %v", path, err)
}
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatalf("failed to write %s: %v", path, err)
}
}

View File

@@ -0,0 +1,585 @@
package agentlab
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
)
const (
stateDirName = "onyx-agent-lab"
worktreesDirName = "worktrees"
envFileName = ".env.agent-lab"
webEnvFileName = ".env.web.agent-lab"
defaultWebPort = 3300
defaultAPIPort = 8380
defaultModelPort = 9300
defaultMCPPort = 8390
portSearchWindow = 400
dockerProjectPrefix = "onyx"
searchInfraMode = "shared"
)
var nonAlphaNumPattern = regexp.MustCompile(`[^a-z0-9]+`)
type DependencyMode string
const (
DependencyModeShared DependencyMode = "shared"
DependencyModeNamespaced DependencyMode = "namespaced"
)
type WorktreeLane string
const (
WorktreeLaneLab WorktreeLane = "lab"
WorktreeLaneProduct WorktreeLane = "product"
WorktreeLaneCustom WorktreeLane = "custom"
)
var productBranchPrefixes = []string{
"build/",
"chore/",
"ci/",
"docs/",
"feat/",
"fix/",
"perf/",
"refactor/",
"revert/",
"style/",
"test/",
}
type DependencyConfig struct {
Mode DependencyMode `json:"mode"`
Namespace string `json:"namespace,omitempty"`
PostgresDatabase string `json:"postgres_database,omitempty"`
RedisPrefix string `json:"redis_prefix,omitempty"`
FileStoreBucket string `json:"file_store_bucket,omitempty"`
SearchInfraMode string `json:"search_infra_mode"`
LastProvisionedAt string `json:"last_provisioned_at,omitempty"`
}
type PortSet struct {
Web int `json:"web"`
API int `json:"api"`
ModelServer int `json:"model_server"`
MCP int `json:"mcp"`
}
type URLSet struct {
Web string `json:"web"`
API string `json:"api"`
MCP string `json:"mcp"`
}
type Manifest struct {
ID string `json:"id"`
Branch string `json:"branch"`
Lane WorktreeLane `json:"lane,omitempty"`
BaseRef string `json:"base_ref"`
CreatedFromPath string `json:"created_from_path"`
CheckoutPath string `json:"checkout_path"`
StateDir string `json:"state_dir"`
ArtifactDir string `json:"artifact_dir"`
EnvFile string `json:"env_file"`
WebEnvFile string `json:"web_env_file"`
ComposeProject string `json:"compose_project"`
Dependencies DependencyConfig `json:"dependencies"`
Ports PortSet `json:"ports"`
URLs URLSet `json:"urls"`
CreatedAt time.Time `json:"created_at"`
LastVerifiedAt string `json:"last_verified_at,omitempty"`
LastVerifySummary string `json:"last_verify_summary,omitempty"`
}
func Slug(value string) string {
normalized := strings.ToLower(strings.TrimSpace(value))
normalized = strings.ReplaceAll(normalized, "/", "-")
normalized = strings.ReplaceAll(normalized, "_", "-")
normalized = nonAlphaNumPattern.ReplaceAllString(normalized, "-")
normalized = strings.Trim(normalized, "-")
if normalized == "" {
return "worktree"
}
return normalized
}
func worktreeID(value string) string {
slug := Slug(value)
sum := sha256.Sum256([]byte(value))
return fmt.Sprintf("%s-%s", slug, hex.EncodeToString(sum[:4]))
}
func ComposeProjectName(id string) string {
slug := Slug(id)
if len(slug) > 32 {
slug = slug[:32]
}
return fmt.Sprintf("%s-%s", dockerProjectPrefix, slug)
}
func GetCommonGitDir() (string, error) {
cmd := exec.Command("git", "rev-parse", "--path-format=absolute", "--git-common-dir")
output, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("git rev-parse --git-common-dir failed: %w", err)
}
return strings.TrimSpace(string(output)), nil
}
func StateRoot(commonGitDir string) string {
return filepath.Join(commonGitDir, stateDirName)
}
func WorktreesRoot(commonGitDir string) string {
return filepath.Join(StateRoot(commonGitDir), worktreesDirName)
}
func WorktreeStateDir(commonGitDir, id string) string {
return filepath.Join(WorktreesRoot(commonGitDir), Slug(id))
}
func ManifestPath(commonGitDir, id string) string {
return filepath.Join(WorktreeStateDir(commonGitDir, id), "manifest.json")
}
func DefaultCheckoutPath(repoRoot, id string) string {
parent := filepath.Dir(repoRoot)
worktreesRoot := filepath.Join(parent, filepath.Base(repoRoot)+"-worktrees")
return filepath.Join(worktreesRoot, worktreeID(id))
}
func NormalizeBranchForLane(branch string) string {
normalized := strings.TrimSpace(branch)
normalized = strings.TrimPrefix(normalized, "refs/heads/")
normalized = strings.TrimPrefix(normalized, "origin/")
normalized = strings.TrimPrefix(normalized, "codex/")
return normalized
}
func InferLane(branch string) WorktreeLane {
normalized := NormalizeBranchForLane(branch)
if strings.HasPrefix(normalized, "lab/") {
return WorktreeLaneLab
}
for _, prefix := range productBranchPrefixes {
if strings.HasPrefix(normalized, prefix) {
return WorktreeLaneProduct
}
}
return WorktreeLaneCustom
}
type BaseRefSelection struct {
Ref string
Lane WorktreeLane
Reason string
}
func ResolveCreateBaseRef(branch, requested string, refExists func(string) bool) BaseRefSelection {
lane := InferLane(branch)
if requested != "" {
return BaseRefSelection{
Ref: requested,
Lane: lane,
Reason: "using explicit --from value",
}
}
switch lane {
case WorktreeLaneLab:
for _, candidate := range []string{"codex/agent-lab", "agent-lab", "origin/codex/agent-lab", "origin/agent-lab"} {
if refExists(candidate) {
return BaseRefSelection{
Ref: candidate,
Lane: lane,
Reason: fmt.Sprintf("inferred lab lane from branch name; using %s as the base ref", candidate),
}
}
}
return BaseRefSelection{
Ref: "HEAD",
Lane: lane,
Reason: "inferred lab lane from branch name, but no agent-lab ref exists locally; falling back to HEAD",
}
case WorktreeLaneProduct:
for _, candidate := range []string{"origin/main", "main"} {
if refExists(candidate) {
return BaseRefSelection{
Ref: candidate,
Lane: lane,
Reason: fmt.Sprintf("inferred product lane from branch name; using %s as the base ref", candidate),
}
}
}
return BaseRefSelection{
Ref: "HEAD",
Lane: lane,
Reason: "inferred product lane from branch name, but no main ref exists locally; falling back to HEAD",
}
default:
return BaseRefSelection{
Ref: "HEAD",
Lane: lane,
Reason: "no lane inferred from branch name; defaulting to HEAD. Prefer codex/lab/... for harness work and codex/fix... or codex/feat... for product work, or pass --from explicitly",
}
}
}
func GitRefExists(ref string) bool {
cmd := exec.Command("git", "rev-parse", "--verify", "--quiet", ref)
return cmd.Run() == nil
}
func BuildManifest(repoRoot, commonGitDir, branch string, lane WorktreeLane, baseRef, checkoutPath string, ports PortSet, dependencyMode DependencyMode) Manifest {
id := worktreeID(branch)
stateDir := WorktreeStateDir(commonGitDir, id)
artifactDir := filepath.Join(stateDir, "artifacts")
envDir := filepath.Join(checkoutPath, ".vscode")
return Manifest{
ID: id,
Branch: branch,
Lane: lane,
BaseRef: baseRef,
CreatedFromPath: repoRoot,
CheckoutPath: checkoutPath,
StateDir: stateDir,
ArtifactDir: artifactDir,
EnvFile: filepath.Join(envDir, envFileName),
WebEnvFile: filepath.Join(envDir, webEnvFileName),
ComposeProject: ComposeProjectName(id),
Dependencies: BuildDependencyConfig(branch, dependencyMode),
Ports: ports,
URLs: URLSet{
Web: fmt.Sprintf("http://127.0.0.1:%d", ports.Web),
API: fmt.Sprintf("http://127.0.0.1:%d", ports.API),
MCP: fmt.Sprintf("http://127.0.0.1:%d", ports.MCP),
},
CreatedAt: time.Now().UTC(),
}
}
func (m Manifest) ResolvedLane() WorktreeLane {
if m.Lane == "" {
return InferLane(m.Branch)
}
return m.Lane
}
func BuildDependencyConfig(branch string, mode DependencyMode) DependencyConfig {
if mode == "" {
mode = DependencyModeShared
}
config := DependencyConfig{
Mode: mode,
SearchInfraMode: searchInfraMode,
}
if mode != DependencyModeNamespaced {
return config
}
namespace := worktreeID(branch)
dbSuffix := strings.ReplaceAll(namespace, "-", "_")
database := fmt.Sprintf("agentlab_%s", dbSuffix)
if len(database) > 63 {
database = database[:63]
}
bucket := fmt.Sprintf("onyx-agentlab-%s", namespace)
if len(bucket) > 63 {
bucket = bucket[:63]
bucket = strings.Trim(bucket, "-")
}
config.Namespace = namespace
config.PostgresDatabase = database
config.RedisPrefix = fmt.Sprintf("agentlab:%s", namespace)
config.FileStoreBucket = bucket
return config
}
func (m Manifest) ResolvedDependencies() DependencyConfig {
if m.Dependencies.Mode == "" {
return BuildDependencyConfig(m.Branch, DependencyModeShared)
}
resolved := m.Dependencies
if resolved.SearchInfraMode == "" {
resolved.SearchInfraMode = searchInfraMode
}
return resolved
}
func (m Manifest) RuntimeEnv() map[string]string {
env := map[string]string{
"AGENT_LAB_ARTIFACT_DIR": m.ArtifactDir,
"AGENT_LAB_DEPENDENCY_MODE": string(m.ResolvedDependencies().Mode),
"AGENT_LAB_SEARCH_INFRA_MODE": m.ResolvedDependencies().SearchInfraMode,
"AGENT_LAB_WORKTREE_ID": m.ID,
"AGENT_LAB_WORKTREE_URL": m.URLs.Web,
"BASE_URL": m.URLs.Web,
"INTERNAL_URL": m.URLs.API,
"MCP_INTERNAL_URL": m.URLs.MCP,
"PORT": fmt.Sprintf("%d", m.Ports.Web),
"WEB_DOMAIN": m.URLs.Web,
}
deps := m.ResolvedDependencies()
if deps.Namespace != "" {
env["AGENT_LAB_NAMESPACE"] = deps.Namespace
}
if deps.Mode == DependencyModeNamespaced {
env["POSTGRES_DB"] = deps.PostgresDatabase
env["DEFAULT_REDIS_PREFIX"] = deps.RedisPrefix
env["S3_FILE_STORE_BUCKET_NAME"] = deps.FileStoreBucket
}
return env
}
func (m Manifest) ShellEnv() map[string]string {
return m.RuntimeEnv()
}
func (m Manifest) DependencyWarnings() []string {
deps := m.ResolvedDependencies()
if deps.SearchInfraMode == searchInfraMode {
return []string{
"Search infrastructure remains shared across worktrees. OpenSearch/Vespa state is not namespaced or torn down by agent-lab.",
}
}
return nil
}
func (m Manifest) EnvFileContents(kind string) string {
values := m.RuntimeEnv()
deps := m.ResolvedDependencies()
var lines []string
lines = append(lines, "# Generated by `ods worktree create` for agent-lab.")
lines = append(lines, "# This file only contains worktree-local overrides.")
lines = append(lines, fmt.Sprintf("AGENT_LAB_WORKTREE_ID=%s", m.ID))
lines = append(lines, fmt.Sprintf("AGENT_LAB_ARTIFACT_DIR=%s", m.ArtifactDir))
lines = append(lines, fmt.Sprintf("AGENT_LAB_DEPENDENCY_MODE=%s", deps.Mode))
lines = append(lines, fmt.Sprintf("AGENT_LAB_SEARCH_INFRA_MODE=%s", deps.SearchInfraMode))
if deps.Namespace != "" {
lines = append(lines, fmt.Sprintf("AGENT_LAB_NAMESPACE=%s", deps.Namespace))
}
switch kind {
case "web":
lines = append(lines, fmt.Sprintf("PORT=%d", m.Ports.Web))
lines = append(lines, fmt.Sprintf("BASE_URL=%s", values["BASE_URL"]))
lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
default:
lines = append(lines, fmt.Sprintf("WEB_DOMAIN=%s", values["WEB_DOMAIN"]))
lines = append(lines, fmt.Sprintf("INTERNAL_URL=%s", values["INTERNAL_URL"]))
lines = append(lines, fmt.Sprintf("MCP_INTERNAL_URL=%s", values["MCP_INTERNAL_URL"]))
if deps.Mode == DependencyModeNamespaced {
lines = append(lines, fmt.Sprintf("POSTGRES_DB=%s", deps.PostgresDatabase))
lines = append(lines, fmt.Sprintf("DEFAULT_REDIS_PREFIX=%s", deps.RedisPrefix))
lines = append(lines, fmt.Sprintf("S3_FILE_STORE_BUCKET_NAME=%s", deps.FileStoreBucket))
}
}
return strings.Join(lines, "\n") + "\n"
}
func WriteManifest(commonGitDir string, manifest Manifest) error {
stateDir := WorktreeStateDir(commonGitDir, manifest.ID)
if err := os.MkdirAll(stateDir, 0755); err != nil {
return fmt.Errorf("create worktree state dir: %w", err)
}
if err := os.MkdirAll(manifest.ArtifactDir, 0755); err != nil {
return fmt.Errorf("create artifact dir: %w", err)
}
data, err := json.MarshalIndent(manifest, "", " ")
if err != nil {
return fmt.Errorf("marshal manifest: %w", err)
}
if err := os.WriteFile(ManifestPath(commonGitDir, manifest.ID), data, 0644); err != nil {
return fmt.Errorf("write manifest: %w", err)
}
return nil
}
func WriteEnvFiles(manifest Manifest) error {
if err := os.MkdirAll(filepath.Dir(manifest.EnvFile), 0755); err != nil {
return fmt.Errorf("create env dir: %w", err)
}
if err := os.WriteFile(manifest.EnvFile, []byte(manifest.EnvFileContents("backend")), 0644); err != nil {
return fmt.Errorf("write backend env file: %w", err)
}
if err := os.WriteFile(manifest.WebEnvFile, []byte(manifest.EnvFileContents("web")), 0644); err != nil {
return fmt.Errorf("write web env file: %w", err)
}
return nil
}
func LoadAll(commonGitDir string) ([]Manifest, error) {
worktreesRoot := WorktreesRoot(commonGitDir)
entries, err := os.ReadDir(worktreesRoot)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
return nil, fmt.Errorf("read worktrees dir: %w", err)
}
manifests := make([]Manifest, 0, len(entries))
for _, entry := range entries {
if !entry.IsDir() {
continue
}
manifest, err := LoadManifest(filepath.Join(worktreesRoot, entry.Name(), "manifest.json"))
if err != nil {
return nil, err
}
manifests = append(manifests, manifest)
}
sort.Slice(manifests, func(i, j int) bool {
return manifests[i].Branch < manifests[j].Branch
})
return manifests, nil
}
func LoadManifest(path string) (Manifest, error) {
var manifest Manifest
data, err := os.ReadFile(path)
if err != nil {
return manifest, fmt.Errorf("read manifest %s: %w", path, err)
}
if err := json.Unmarshal(data, &manifest); err != nil {
return manifest, fmt.Errorf("parse manifest %s: %w", path, err)
}
return manifest, nil
}
func FindByRepoRoot(commonGitDir, repoRoot string) (Manifest, bool, error) {
manifests, err := LoadAll(commonGitDir)
if err != nil {
return Manifest{}, false, err
}
repoRoot = normalizePath(repoRoot)
for _, manifest := range manifests {
if normalizePath(manifest.CheckoutPath) == repoRoot {
return manifest, true, nil
}
}
return Manifest{}, false, nil
}
func FindByIdentifier(commonGitDir, identifier string) (Manifest, bool, error) {
manifests, err := LoadAll(commonGitDir)
if err != nil {
return Manifest{}, false, err
}
slug := Slug(identifier)
cleanIdentifier := normalizePath(identifier)
var slugMatches []Manifest
for _, manifest := range manifests {
switch {
case manifest.ID == slug:
return manifest, true, nil
case manifest.Branch == identifier:
return manifest, true, nil
case normalizePath(manifest.CheckoutPath) == cleanIdentifier:
return manifest, true, nil
case slug != "" && Slug(manifest.Branch) == slug:
slugMatches = append(slugMatches, manifest)
}
}
if len(slugMatches) == 1 {
return slugMatches[0], true, nil
}
if len(slugMatches) > 1 {
return Manifest{}, false, fmt.Errorf("identifier %q matches multiple worktrees; use the branch, full id, or checkout path", identifier)
}
return Manifest{}, false, nil
}
func RemoveState(commonGitDir, id string) error {
if err := os.RemoveAll(WorktreeStateDir(commonGitDir, id)); err != nil {
return fmt.Errorf("remove worktree state: %w", err)
}
return nil
}
func UpdateVerification(commonGitDir string, manifest Manifest, summaryPath string, verifiedAt time.Time) error {
manifest.LastVerifySummary = summaryPath
manifest.LastVerifiedAt = verifiedAt.UTC().Format(time.RFC3339)
return WriteManifest(commonGitDir, manifest)
}
func AllocatePorts(existing []Manifest) (PortSet, error) {
reserved := make(map[int]bool)
for _, manifest := range existing {
reserved[manifest.Ports.Web] = true
reserved[manifest.Ports.API] = true
reserved[manifest.Ports.ModelServer] = true
reserved[manifest.Ports.MCP] = true
}
for offset := 0; offset < portSearchWindow; offset++ {
ports := PortSet{
Web: defaultWebPort + offset,
API: defaultAPIPort + offset,
ModelServer: defaultModelPort + offset,
MCP: defaultMCPPort + offset,
}
if reserved[ports.Web] || reserved[ports.API] || reserved[ports.ModelServer] || reserved[ports.MCP] {
continue
}
if portsAvailable(ports) {
return ports, nil
}
}
return PortSet{}, fmt.Errorf("failed to allocate an available worktree port set after %d attempts", portSearchWindow)
}
func portsAvailable(ports PortSet) bool {
candidates := []int{ports.Web, ports.API, ports.ModelServer, ports.MCP}
for _, port := range candidates {
ln, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
return false
}
_ = ln.Close()
}
return true
}
func normalizePath(path string) string {
clean := filepath.Clean(path)
resolved, err := filepath.EvalSymlinks(clean)
if err == nil {
return filepath.Clean(resolved)
}
return clean
}

View File

@@ -0,0 +1,312 @@
package agentlab
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestSlug(t *testing.T) {
t.Parallel()
tests := map[string]string{
"feat/My Feature": "feat-my-feature",
"lab/agent_docs": "lab-agent-docs",
" ": "worktree",
}
for input, want := range tests {
input := input
want := want
t.Run(input, func(t *testing.T) {
t.Parallel()
if got := Slug(input); got != want {
t.Fatalf("Slug(%q) = %q, want %q", input, got, want)
}
})
}
}
func TestWorktreeIDIsCollisionResistant(t *testing.T) {
t.Parallel()
idOne := worktreeID("feat/foo_bar")
idTwo := worktreeID("feat/foo-bar")
if idOne == idTwo {
t.Fatalf("expected distinct worktree ids, got %q", idOne)
}
if !strings.HasPrefix(idOne, "feat-foo-bar-") {
t.Fatalf("unexpected worktree id format: %s", idOne)
}
}
func TestInferLane(t *testing.T) {
t.Parallel()
tests := map[string]WorktreeLane{
"lab/docs": WorktreeLaneLab,
"codex/lab/docs": WorktreeLaneLab,
"fix/auth-banner-modal": WorktreeLaneProduct,
"codex/feat/agent-check": WorktreeLaneProduct,
"chore/update-readme": WorktreeLaneProduct,
"codex/auth-banner-modal": WorktreeLaneCustom,
"agent-lab": WorktreeLaneCustom,
}
for branch, want := range tests {
branch := branch
want := want
t.Run(branch, func(t *testing.T) {
t.Parallel()
if got := InferLane(branch); got != want {
t.Fatalf("InferLane(%q) = %q, want %q", branch, got, want)
}
})
}
}
func TestResolveCreateBaseRef(t *testing.T) {
t.Parallel()
refExists := func(ref string) bool {
switch ref {
case "codex/agent-lab", "origin/main":
return true
default:
return false
}
}
product := ResolveCreateBaseRef("codex/fix/auth-banner-modal", "", refExists)
if product.Ref != "origin/main" || product.Lane != WorktreeLaneProduct {
t.Fatalf("unexpected product base selection: %+v", product)
}
lab := ResolveCreateBaseRef("codex/lab/bootstrap-docs", "", refExists)
if lab.Ref != "codex/agent-lab" || lab.Lane != WorktreeLaneLab {
t.Fatalf("unexpected lab base selection: %+v", lab)
}
explicit := ResolveCreateBaseRef("codex/auth-banner-modal", "origin/release", refExists)
if explicit.Ref != "origin/release" || explicit.Lane != WorktreeLaneCustom {
t.Fatalf("unexpected explicit base selection: %+v", explicit)
}
custom := ResolveCreateBaseRef("codex/auth-banner-modal", "", refExists)
if custom.Ref != "HEAD" || custom.Lane != WorktreeLaneCustom {
t.Fatalf("unexpected custom base selection: %+v", custom)
}
}
func TestBuildManifest(t *testing.T) {
t.Parallel()
ports := PortSet{Web: 3301, API: 8381, ModelServer: 9301, MCP: 8391}
manifest := BuildManifest(
"/repo/main",
"/repo/.git",
"feat/agent-harness",
WorktreeLaneProduct,
"origin/main",
"/worktrees/feat-agent-harness",
ports,
DependencyModeNamespaced,
)
if manifest.ID != worktreeID("feat/agent-harness") {
t.Fatalf("unexpected manifest id: %s", manifest.ID)
}
if manifest.URLs.Web != "http://127.0.0.1:3301" {
t.Fatalf("unexpected web url: %s", manifest.URLs.Web)
}
if manifest.ComposeProject != "onyx-"+worktreeID("feat/agent-harness") {
t.Fatalf("unexpected compose project: %s", manifest.ComposeProject)
}
if got := manifest.ShellEnv()["INTERNAL_URL"]; got != "http://127.0.0.1:8381" {
t.Fatalf("unexpected INTERNAL_URL: %s", got)
}
if got := manifest.ResolvedDependencies().PostgresDatabase; got != "agentlab_"+strings.ReplaceAll(worktreeID("feat/agent-harness"), "-", "_") {
t.Fatalf("unexpected postgres database: %s", got)
}
if got := manifest.RuntimeEnv()["DEFAULT_REDIS_PREFIX"]; got != "agentlab:"+worktreeID("feat/agent-harness") {
t.Fatalf("unexpected redis prefix: %s", got)
}
}
func TestWriteManifestAndLoadAll(t *testing.T) {
t.Parallel()
commonGitDir := t.TempDir()
manifest := BuildManifest(
"/repo/main",
commonGitDir,
"lab/docs",
WorktreeLaneLab,
"HEAD",
"/repo-worktrees/lab-docs",
PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
DependencyModeShared,
)
if err := WriteManifest(commonGitDir, manifest); err != nil {
t.Fatalf("WriteManifest() error = %v", err)
}
manifests, err := LoadAll(commonGitDir)
if err != nil {
t.Fatalf("LoadAll() error = %v", err)
}
if len(manifests) != 1 {
t.Fatalf("LoadAll() length = %d, want 1", len(manifests))
}
if manifests[0].Branch != manifest.Branch {
t.Fatalf("unexpected branch: %s", manifests[0].Branch)
}
}
func TestWriteEnvFiles(t *testing.T) {
t.Parallel()
root := t.TempDir()
manifest := BuildManifest(
"/repo/main",
filepath.Join(root, ".git"),
"feat/env",
WorktreeLaneProduct,
"HEAD",
root,
PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
DependencyModeNamespaced,
)
if err := WriteEnvFiles(manifest); err != nil {
t.Fatalf("WriteEnvFiles() error = %v", err)
}
for _, path := range []string{manifest.EnvFile, manifest.WebEnvFile} {
if _, err := os.Stat(path); err != nil {
t.Fatalf("expected env file %s to exist: %v", path, err)
}
}
backendEnv, err := os.ReadFile(manifest.EnvFile)
if err != nil {
t.Fatalf("read backend env file: %v", err)
}
if !containsAll(
string(backendEnv),
"POSTGRES_DB=agentlab_"+strings.ReplaceAll(worktreeID("feat/env"), "-", "_"),
"DEFAULT_REDIS_PREFIX=agentlab:"+worktreeID("feat/env"),
"S3_FILE_STORE_BUCKET_NAME=onyx-agentlab-"+worktreeID("feat/env"),
) {
t.Fatalf("backend env file missing dependency namespace entries: %s", string(backendEnv))
}
}
func TestFindByIdentifierRejectsAmbiguousSlug(t *testing.T) {
t.Parallel()
commonGitDir := t.TempDir()
manifests := []Manifest{
BuildManifest(
"/repo/main",
commonGitDir,
"feat/foo_bar",
WorktreeLaneProduct,
"HEAD",
"/repo-worktrees/"+worktreeID("feat/foo_bar"),
PortSet{Web: 3302, API: 8382, ModelServer: 9302, MCP: 8392},
DependencyModeNamespaced,
),
BuildManifest(
"/repo/main",
commonGitDir,
"feat/foo-bar",
WorktreeLaneProduct,
"HEAD",
"/repo-worktrees/"+worktreeID("feat/foo-bar"),
PortSet{Web: 3303, API: 8383, ModelServer: 9303, MCP: 8393},
DependencyModeNamespaced,
),
}
for _, manifest := range manifests {
if err := WriteManifest(commonGitDir, manifest); err != nil {
t.Fatalf("WriteManifest() error = %v", err)
}
}
if _, found, err := FindByIdentifier(commonGitDir, "feat-foo-bar"); err == nil || found {
t.Fatalf("expected ambiguous slug lookup to fail, found=%t err=%v", found, err)
}
}
func TestBootstrapLinksAndClonesFromSource(t *testing.T) {
t.Parallel()
sourceRoot := t.TempDir()
checkoutRoot := t.TempDir()
commonGitDir := filepath.Join(sourceRoot, ".git")
writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env"), "OPENAI_API_KEY=test\n")
writeTestFile(t, filepath.Join(sourceRoot, ".vscode", ".env.web"), "AUTH_TYPE=basic\n")
writeTestFile(t, filepath.Join(sourceRoot, ".venv", "bin", "python"), "#!/bin/sh\n")
writeTestFile(t, filepath.Join(sourceRoot, "web", "node_modules", ".bin", "next"), "#!/bin/sh\n")
manifest := BuildManifest(
sourceRoot,
commonGitDir,
"feat/bootstrap",
WorktreeLaneProduct,
"HEAD",
checkoutRoot,
PortSet{Web: 3305, API: 8385, ModelServer: 9305, MCP: 8395},
DependencyModeNamespaced,
)
result, err := Bootstrap(manifest, BootstrapOptions{
EnvMode: BootstrapModeLink,
PythonMode: BootstrapModeLink,
WebMode: BootstrapModeClone,
})
if err != nil {
t.Fatalf("Bootstrap() error = %v", err)
}
if len(result.Actions) == 0 {
t.Fatal("expected bootstrap actions to be recorded")
}
if target, err := os.Readlink(filepath.Join(checkoutRoot, ".vscode", ".env")); err != nil || target == "" {
t.Fatalf("expected .vscode/.env symlink, err=%v target=%q", err, target)
}
if target, err := os.Readlink(filepath.Join(checkoutRoot, ".venv")); err != nil || target == "" {
t.Fatalf("expected .venv symlink, err=%v target=%q", err, target)
}
if _, err := os.Stat(filepath.Join(checkoutRoot, "web", "node_modules", ".bin", "next")); err != nil {
t.Fatalf("expected cloned node_modules marker: %v", err)
}
if _, err := os.Lstat(filepath.Join(checkoutRoot, "web", "node_modules")); err != nil {
t.Fatalf("expected node_modules to exist: %v", err)
}
}
func writeTestFile(t *testing.T, path string, content string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
}
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
func containsAll(value string, parts ...string) bool {
for _, part := range parts {
if !strings.Contains(value, part) {
return false
}
}
return true
}

View File

@@ -0,0 +1,233 @@
package agentlab
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
)
type BootstrapMode string
const (
BootstrapModeAuto BootstrapMode = "auto"
BootstrapModeSkip BootstrapMode = "skip"
BootstrapModeLink BootstrapMode = "link"
BootstrapModeCopy BootstrapMode = "copy"
BootstrapModeClone BootstrapMode = "clone"
BootstrapModeNPM BootstrapMode = "npm"
)
type BootstrapOptions struct {
EnvMode BootstrapMode
PythonMode BootstrapMode
WebMode BootstrapMode
}
type BootstrapResult struct {
Actions []string
}
func Bootstrap(manifest Manifest, opts BootstrapOptions) (*BootstrapResult, error) {
result := &BootstrapResult{}
if err := bootstrapEnvFiles(manifest, opts.EnvMode, result); err != nil {
return nil, err
}
if err := bootstrapPython(manifest, opts.PythonMode, result); err != nil {
return nil, err
}
if err := bootstrapWeb(manifest, opts.WebMode, result); err != nil {
return nil, err
}
return result, nil
}
func bootstrapEnvFiles(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
if mode == BootstrapModeSkip {
return nil
}
vscodeDir := filepath.Join(manifest.CheckoutPath, ".vscode")
if err := os.MkdirAll(vscodeDir, 0755); err != nil {
return fmt.Errorf("create .vscode dir: %w", err)
}
sources := []struct {
source string
target string
label string
}{
{
source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env"),
target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env"),
label: ".vscode/.env",
},
{
source: filepath.Join(manifest.CreatedFromPath, ".vscode", ".env.web"),
target: filepath.Join(manifest.CheckoutPath, ".vscode", ".env.web"),
label: ".vscode/.env.web",
},
}
for _, item := range sources {
if _, err := os.Stat(item.source); err != nil {
continue
}
if _, err := os.Lstat(item.target); err == nil {
result.Actions = append(result.Actions, fmt.Sprintf("kept existing %s", item.label))
continue
}
currentMode := mode
if currentMode == BootstrapModeAuto {
currentMode = BootstrapModeLink
}
switch currentMode {
case BootstrapModeLink:
if err := os.Symlink(item.source, item.target); err != nil {
return fmt.Errorf("symlink %s: %w", item.label, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("linked %s from source checkout", item.label))
case BootstrapModeCopy, BootstrapModeClone:
if err := copyFile(item.source, item.target); err != nil {
return fmt.Errorf("copy %s: %w", item.label, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("copied %s from source checkout", item.label))
default:
return fmt.Errorf("unsupported env bootstrap mode: %s", currentMode)
}
}
return nil
}
func bootstrapPython(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
if mode == BootstrapModeSkip {
return nil
}
sourceVenv := filepath.Join(manifest.CreatedFromPath, ".venv")
targetVenv := filepath.Join(manifest.CheckoutPath, ".venv")
if _, err := os.Stat(targetVenv); err == nil {
result.Actions = append(result.Actions, "kept existing .venv")
return nil
}
if _, err := os.Stat(sourceVenv); err != nil {
result.Actions = append(result.Actions, "source .venv missing; backend bootstrap deferred")
return nil
}
currentMode := mode
if currentMode == BootstrapModeAuto {
currentMode = BootstrapModeLink
}
switch currentMode {
case BootstrapModeLink:
if err := os.Symlink(sourceVenv, targetVenv); err != nil {
return fmt.Errorf("symlink .venv: %w", err)
}
result.Actions = append(result.Actions, "linked shared .venv from source checkout")
case BootstrapModeCopy, BootstrapModeClone:
if err := cloneDirectory(sourceVenv, targetVenv); err != nil {
return fmt.Errorf("clone .venv: %w", err)
}
result.Actions = append(result.Actions, "cloned .venv from source checkout")
default:
return fmt.Errorf("unsupported python bootstrap mode: %s", currentMode)
}
return nil
}
func bootstrapWeb(manifest Manifest, mode BootstrapMode, result *BootstrapResult) error {
if mode == BootstrapModeSkip {
return nil
}
sourceModules := filepath.Join(manifest.CreatedFromPath, "web", "node_modules")
targetModules := filepath.Join(manifest.CheckoutPath, "web", "node_modules")
if _, err := os.Lstat(targetModules); err == nil {
result.Actions = append(result.Actions, "kept existing web/node_modules")
return nil
}
currentMode := mode
if currentMode == BootstrapModeAuto {
if _, err := os.Stat(sourceModules); err == nil {
currentMode = BootstrapModeClone
} else {
currentMode = BootstrapModeNPM
}
}
switch currentMode {
case BootstrapModeClone, BootstrapModeCopy:
if _, err := os.Stat(sourceModules); err != nil {
webDir := filepath.Join(manifest.CheckoutPath, "web")
cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
cmd.Dir = webDir
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
return fmt.Errorf("npm ci: %w", err)
}
result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
return nil
}
if err := cloneDirectory(sourceModules, targetModules); err != nil {
return fmt.Errorf("clone web/node_modules: %w", err)
}
result.Actions = append(result.Actions, "cloned local web/node_modules into worktree")
return nil
case BootstrapModeNPM:
webDir := filepath.Join(manifest.CheckoutPath, "web")
cmd := exec.Command("npm", "ci", "--prefer-offline", "--no-audit")
cmd.Dir = webDir
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
return fmt.Errorf("npm ci: %w", err)
}
result.Actions = append(result.Actions, "installed web/node_modules with npm ci")
default:
return fmt.Errorf("unsupported web bootstrap mode: %s", currentMode)
}
return nil
}
func cloneDirectory(source, target string) error {
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
return fmt.Errorf("create parent dir for %s: %w", target, err)
}
if runtime.GOOS == "darwin" {
cmd := exec.Command("cp", "-R", "-c", source, target)
if err := cmd.Run(); err == nil {
return nil
}
}
if runtime.GOOS != "windows" {
cmd := exec.Command("cp", "-R", source, target)
if err := cmd.Run(); err == nil {
return nil
}
}
return fmt.Errorf("no supported directory clone strategy succeeded for %s", source)
}
func copyFile(source, target string) error {
data, err := os.ReadFile(source)
if err != nil {
return err
}
return os.WriteFile(target, data, 0644)
}

View File

@@ -0,0 +1,252 @@
package agentlab
import (
"bytes"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
"github.com/onyx-dot-app/onyx/tools/ods/internal/alembic"
"github.com/onyx-dot-app/onyx/tools/ods/internal/envutil"
)
type DependencyResult struct {
Actions []string
}
type DependencyStatus struct {
Mode DependencyMode `json:"mode"`
Namespace string `json:"namespace,omitempty"`
PostgresDatabase string `json:"postgres_database,omitempty"`
PostgresReady bool `json:"postgres_ready"`
PostgresTableCount int `json:"postgres_table_count,omitempty"`
RedisPrefix string `json:"redis_prefix,omitempty"`
RedisReady bool `json:"redis_ready"`
RedisKeyCount int `json:"redis_key_count,omitempty"`
FileStoreBucket string `json:"file_store_bucket,omitempty"`
FileStoreReady bool `json:"file_store_ready"`
FileStoreObjectCount int `json:"file_store_object_count,omitempty"`
SearchInfraMode string `json:"search_infra_mode"`
}
func ProvisionDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
deps := manifest.ResolvedDependencies()
result := &DependencyResult{}
switch deps.Mode {
case DependencyModeShared:
result.Actions = append(result.Actions, "using shared Postgres, Redis, and MinIO state")
case DependencyModeNamespaced:
if _, err := runPythonScript(manifest, "ensure_database.py"); err != nil {
return manifest, nil, fmt.Errorf("ensure PostgreSQL database %s: %w", deps.PostgresDatabase, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("ensured PostgreSQL database %s", deps.PostgresDatabase))
envMap, err := runtimeEnvMap(manifest)
if err != nil {
return manifest, nil, err
}
if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
return manifest, nil, fmt.Errorf("migrate namespaced database %s: %w", deps.PostgresDatabase, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("migrated PostgreSQL database %s", deps.PostgresDatabase))
if _, err := runPythonScript(manifest, "ensure_bucket.py"); err != nil {
return manifest, nil, fmt.Errorf("ensure file-store bucket %s: %w", deps.FileStoreBucket, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("ensured file-store bucket %s", deps.FileStoreBucket))
result.Actions = append(result.Actions, fmt.Sprintf("reserved Redis prefix %s", deps.RedisPrefix))
default:
return manifest, nil, fmt.Errorf("unsupported dependency mode: %s", deps.Mode)
}
result.Actions = append(result.Actions, "search infrastructure remains shared-only")
manifest.Dependencies = deps
manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
if err := WriteManifest(commonGitDir, manifest); err != nil {
return manifest, nil, err
}
return manifest, result, nil
}
func InspectDependencies(manifest Manifest) (*DependencyStatus, error) {
deps := manifest.ResolvedDependencies()
status := &DependencyStatus{
Mode: deps.Mode,
Namespace: deps.Namespace,
PostgresDatabase: deps.PostgresDatabase,
RedisPrefix: deps.RedisPrefix,
FileStoreBucket: deps.FileStoreBucket,
SearchInfraMode: deps.SearchInfraMode,
}
if deps.Mode == DependencyModeShared {
status.PostgresReady = true
status.RedisReady = true
status.FileStoreReady = true
return status, nil
}
output, err := runPythonScript(manifest, "dependency_status.py")
if err != nil {
return nil, fmt.Errorf("inspect namespaced dependencies: %w", err)
}
if err := json.Unmarshal([]byte(output), status); err != nil {
return nil, fmt.Errorf("parse dependency status: %w", err)
}
return status, nil
}
func ResetDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
deps := manifest.ResolvedDependencies()
result := &DependencyResult{}
if deps.Mode == DependencyModeShared {
result.Actions = append(result.Actions, "shared dependency mode selected; reset is a no-op")
return manifest, result, nil
}
if _, err := runPythonScript(manifest, "reset_dependencies.py"); err != nil {
return manifest, nil, fmt.Errorf("reset namespaced dependencies: %w", err)
}
result.Actions = append(result.Actions, fmt.Sprintf("dropped and recreated PostgreSQL database %s", deps.PostgresDatabase))
result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
result.Actions = append(result.Actions, fmt.Sprintf("emptied file-store bucket %s", deps.FileStoreBucket))
envMap, err := runtimeEnvMap(manifest)
if err != nil {
return manifest, nil, err
}
if err := alembic.UpgradeWithEnv("head", alembic.SchemaDefault, envMap); err != nil {
return manifest, nil, fmt.Errorf("re-migrate namespaced database %s: %w", deps.PostgresDatabase, err)
}
result.Actions = append(result.Actions, fmt.Sprintf("re-migrated PostgreSQL database %s", deps.PostgresDatabase))
result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not reset")
manifest.Dependencies = deps
manifest.Dependencies.LastProvisionedAt = time.Now().UTC().Format(time.RFC3339)
if err := WriteManifest(commonGitDir, manifest); err != nil {
return manifest, nil, err
}
return manifest, result, nil
}
func TeardownDependencies(commonGitDir string, manifest Manifest) (Manifest, *DependencyResult, error) {
deps := manifest.ResolvedDependencies()
result := &DependencyResult{}
if deps.Mode == DependencyModeShared {
result.Actions = append(result.Actions, "shared dependency mode selected; teardown is a no-op")
return manifest, result, nil
}
if _, err := runPythonScript(manifest, "teardown_dependencies.py"); err != nil {
return manifest, nil, fmt.Errorf("tear down namespaced dependencies: %w", err)
}
result.Actions = append(result.Actions, fmt.Sprintf("dropped PostgreSQL database %s", deps.PostgresDatabase))
result.Actions = append(result.Actions, fmt.Sprintf("cleared Redis prefix %s", deps.RedisPrefix))
result.Actions = append(result.Actions, fmt.Sprintf("deleted file-store bucket %s", deps.FileStoreBucket))
result.Actions = append(result.Actions, "search infrastructure remains shared-only and was not torn down")
manifest.Dependencies = deps
manifest.Dependencies.LastProvisionedAt = ""
if err := WriteManifest(commonGitDir, manifest); err != nil {
return manifest, nil, err
}
return manifest, result, nil
}
func runtimeEnvMap(manifest Manifest) (map[string]string, error) {
envMap := make(map[string]string)
repoRoot := runtimeRepoRoot(manifest)
backendEnvPath := filepath.Join(repoRoot, ".vscode", ".env")
if _, err := os.Stat(backendEnvPath); err == nil {
fileVars, err := envutil.LoadFile(backendEnvPath)
if err != nil {
return nil, err
}
for _, entry := range fileVars {
if idx := strings.Index(entry, "="); idx > 0 {
envMap[entry[:idx]] = entry[idx+1:]
}
}
}
for key, value := range manifest.RuntimeEnv() {
envMap[key] = value
}
return envMap, nil
}
func runPythonScript(manifest Manifest, scriptName string) (string, error) {
pythonBinary, err := findPythonBinary(manifest)
if err != nil {
return "", err
}
code, err := loadPythonScript(scriptName)
if err != nil {
return "", err
}
envMap, err := runtimeEnvMap(manifest)
if err != nil {
return "", err
}
cmd := exec.Command(pythonBinary, "-c", code)
cmd.Dir = filepath.Join(runtimeRepoRoot(manifest), "backend")
cmd.Env = envutil.ApplyOverrides(os.Environ(), envMap)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
message := strings.TrimSpace(stderr.String())
if message == "" {
message = strings.TrimSpace(stdout.String())
}
if message == "" {
message = err.Error()
}
return "", fmt.Errorf("%s", message)
}
return strings.TrimSpace(stdout.String()), nil
}
func findPythonBinary(manifest Manifest) (string, error) {
var candidates []string
if runtime.GOOS == "windows" {
candidates = []string{
filepath.Join(manifest.CheckoutPath, ".venv", "Scripts", "python.exe"),
filepath.Join(manifest.CreatedFromPath, ".venv", "Scripts", "python.exe"),
}
} else {
candidates = []string{
filepath.Join(manifest.CheckoutPath, ".venv", "bin", "python"),
filepath.Join(manifest.CreatedFromPath, ".venv", "bin", "python"),
}
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
return candidate, nil
}
}
return "", fmt.Errorf("could not find a Python interpreter in %s/.venv or %s/.venv", manifest.CheckoutPath, manifest.CreatedFromPath)
}
func runtimeRepoRoot(manifest Manifest) string {
if manifest.CheckoutPath != "" {
if _, err := os.Stat(filepath.Join(manifest.CheckoutPath, "backend")); err == nil {
return manifest.CheckoutPath
}
}
return manifest.CreatedFromPath
}

View File

@@ -0,0 +1,17 @@
package agentlab
import (
"embed"
"fmt"
)
//go:embed scripts/*.py
var pythonScripts embed.FS
func loadPythonScript(name string) (string, error) {
data, err := pythonScripts.ReadFile("scripts/" + name)
if err != nil {
return "", fmt.Errorf("load python script %s: %w", name, err)
}
return string(data), nil
}

View File

@@ -0,0 +1,90 @@
import json
import os
import boto3
import psycopg2
import urllib3
from botocore.config import Config
from botocore.exceptions import ClientError
from redis import Redis
db_name = os.environ["POSTGRES_DB"]
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=db_name
)
with conn.cursor() as cur:
cur.execute(
"SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"
)
table_count = int(cur.fetchone()[0])
conn.close()
redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
redis_client = Redis(
host=os.environ.get("REDIS_HOST", "localhost"),
port=int(os.environ.get("REDIS_PORT", "6379")),
db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
password=os.environ.get("REDIS_PASSWORD") or None,
ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
)
redis_key_count = 0
for _ in redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000):
redis_key_count += 1
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
s3_client = boto3.client(**kwargs)
bucket_ready = True
bucket_object_count = 0
try:
s3_client.head_bucket(Bucket=bucket)
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket):
bucket_object_count += len(page.get("Contents", []))
except ClientError:
bucket_ready = False
print(
json.dumps(
{
"mode": os.environ["AGENT_LAB_DEPENDENCY_MODE"],
"namespace": os.environ.get("AGENT_LAB_NAMESPACE", ""),
"postgres_database": db_name,
"postgres_ready": True,
"postgres_table_count": table_count,
"redis_prefix": redis_prefix,
"redis_ready": True,
"redis_key_count": redis_key_count,
"file_store_bucket": bucket,
"file_store_ready": bucket_ready,
"file_store_object_count": bucket_object_count,
"search_infra_mode": os.environ.get(
"AGENT_LAB_SEARCH_INFRA_MODE", "shared"
),
}
)
)

View File

@@ -0,0 +1,40 @@
import os
import boto3
import urllib3
from botocore.config import Config
from botocore.exceptions import ClientError
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
client = boto3.client(**kwargs)
try:
client.head_bucket(Bucket=bucket)
except ClientError as exc:
status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
if status not in (403, 404):
raise
if endpoint or region == "us-east-1":
client.create_bucket(Bucket=bucket)
else:
client.create_bucket(
Bucket=bucket, CreateBucketConfiguration={"LocationConstraint": region}
)
print(bucket)

View File

@@ -0,0 +1,23 @@
import os
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
target_db = os.environ["POSTGRES_DB"]
admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=admin_db
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
with conn.cursor() as cur:
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (target_db,))
if cur.fetchone() is None:
cur.execute(f'CREATE DATABASE "{target_db}"')
conn.close()
print(target_db)

View File

@@ -0,0 +1,67 @@
import os
import boto3
import psycopg2
import urllib3
from botocore.config import Config
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from redis import Redis
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
target_db = os.environ["POSTGRES_DB"]
admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=admin_db
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
with conn.cursor() as cur:
cur.execute(
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
(target_db,),
)
cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
cur.execute(f'CREATE DATABASE "{target_db}"')
conn.close()
redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
redis_client = Redis(
host=os.environ.get("REDIS_HOST", "localhost"),
port=int(os.environ.get("REDIS_PORT", "6379")),
db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
password=os.environ.get("REDIS_PASSWORD") or None,
ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
)
keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
if keys:
redis_client.delete(*keys)
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
s3_client = boto3.client(**kwargs)
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket):
objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
if objects:
s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})

View File

@@ -0,0 +1,73 @@
import os
import boto3
import psycopg2
import urllib3
from botocore.config import Config
from botocore.exceptions import ClientError
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from redis import Redis
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
user = os.environ.get("POSTGRES_USER", "postgres")
password = os.environ.get("POSTGRES_PASSWORD", "password")
target_db = os.environ["POSTGRES_DB"]
admin_db = os.environ.get("AGENT_LAB_POSTGRES_ADMIN_DB", "postgres")
conn = psycopg2.connect(
host=host, port=port, user=user, password=password, dbname=admin_db
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
with conn.cursor() as cur:
cur.execute(
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s AND pid <> pg_backend_pid()",
(target_db,),
)
cur.execute(f'DROP DATABASE IF EXISTS "{target_db}"')
conn.close()
redis_prefix = os.environ["DEFAULT_REDIS_PREFIX"]
redis_client = Redis(
host=os.environ.get("REDIS_HOST", "localhost"),
port=int(os.environ.get("REDIS_PORT", "6379")),
db=int(os.environ.get("REDIS_DB_NUMBER", "0")),
password=os.environ.get("REDIS_PASSWORD") or None,
ssl=os.environ.get("REDIS_SSL", "").lower() == "true",
ssl_cert_reqs="none" if os.environ.get("REDIS_SSL", "").lower() == "true" else None,
)
keys = list(redis_client.scan_iter(match=f"{redis_prefix}:*", count=1000))
if keys:
redis_client.delete(*keys)
bucket = os.environ["S3_FILE_STORE_BUCKET_NAME"]
endpoint = os.environ.get("S3_ENDPOINT_URL") or None
access_key = os.environ.get("S3_AWS_ACCESS_KEY_ID") or None
secret_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY") or None
region = os.environ.get("AWS_REGION_NAME") or "us-east-1"
verify_ssl = os.environ.get("S3_VERIFY_SSL", "false").lower() == "true"
kwargs = {"service_name": "s3", "region_name": region}
if endpoint:
kwargs["endpoint_url"] = endpoint
kwargs["config"] = Config(signature_version="s3v4", s3={"addressing_style": "path"})
if not verify_ssl:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kwargs["verify"] = False
if access_key and secret_key:
kwargs["aws_access_key_id"] = access_key
kwargs["aws_secret_access_key"] = secret_key
s3_client = boto3.client(**kwargs)
try:
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket):
objects = [{"Key": item["Key"]} for item in page.get("Contents", [])]
if objects:
s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects})
s3_client.delete_bucket(Bucket=bucket)
except ClientError as exc:
status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
if status not in (403, 404):
raise

View File

@@ -53,12 +53,17 @@ func FindAlembicBinary() (string, error) {
// otherwise it will attempt to run via docker exec on a container
// that has alembic installed (e.g., api_server).
func Run(args []string, schema Schema) error {
return RunWithEnv(args, schema, nil)
}
// RunWithEnv executes an alembic command with explicit environment overrides.
func RunWithEnv(args []string, schema Schema, extraEnv map[string]string) error {
// Check if we need to run via docker exec
if shouldUseDockerExec() {
return runViaDockerExec(args, schema)
return runViaDockerExec(args, schema, extraEnv)
}
return runLocally(args, schema)
return runLocally(args, schema, extraEnv)
}
// shouldUseDockerExec determines if we should run alembic via docker exec.
@@ -79,7 +84,7 @@ func shouldUseDockerExec() bool {
}
// runLocally runs alembic on the local machine.
func runLocally(args []string, schema Schema) error {
func runLocally(args []string, schema Schema, extraEnv map[string]string) error {
backendDir, err := paths.BackendDir()
if err != nil {
return fmt.Errorf("failed to find backend directory: %w", err)
@@ -104,13 +109,13 @@ func runLocally(args []string, schema Schema) error {
cmd.Stdin = os.Stdin
// Pass through POSTGRES_* environment variables
cmd.Env = buildAlembicEnv()
cmd.Env = buildAlembicEnv(extraEnv)
return cmd.Run()
}
// runViaDockerExec runs alembic inside a Docker container that has network access.
func runViaDockerExec(args []string, schema Schema) error {
func runViaDockerExec(args []string, schema Schema, extraEnv map[string]string) error {
// Find a container with alembic installed (api_server)
container, err := findAlembicContainer()
if err != nil {
@@ -136,7 +141,11 @@ func runViaDockerExec(args []string, schema Schema) error {
// Run alembic inside the container
// The container should have the correct env vars and network access
dockerArgs := []string{"exec", "-i", container, "alembic"}
dockerArgs := []string{"exec", "-i"}
for key, value := range extraEnv {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
dockerArgs = append(dockerArgs, container, "alembic")
dockerArgs = append(dockerArgs, alembicArgs...)
cmd := exec.Command("docker", dockerArgs...)
@@ -158,7 +167,7 @@ var alembicContainerNames = []string{
// It inherits the current environment and ensures POSTGRES_* variables are set.
// If POSTGRES_HOST is not explicitly set, it attempts to detect the PostgreSQL
// container IP address automatically.
func buildAlembicEnv() []string {
func buildAlembicEnv(extraEnv map[string]string) []string {
env := os.Environ()
// Get postgres config (which reads from env with defaults)
@@ -188,6 +197,10 @@ func buildAlembicEnv() []string {
}
}
for key, value := range extraEnv {
env = append(env, fmt.Sprintf("%s=%s", key, value))
}
return env
}
@@ -238,6 +251,14 @@ func Upgrade(revision string, schema Schema) error {
return Run([]string{"upgrade", revision}, schema)
}
// UpgradeWithEnv runs alembic upgrade with explicit environment overrides.
func UpgradeWithEnv(revision string, schema Schema, extraEnv map[string]string) error {
if revision == "" {
revision = "head"
}
return RunWithEnv([]string{"upgrade", revision}, schema, extraEnv)
}
// Downgrade runs alembic downgrade to the specified revision.
func Downgrade(revision string, schema Schema) error {
return Run([]string{"downgrade", revision}, schema)

View File

@@ -0,0 +1,105 @@
package envutil
import (
"bufio"
"fmt"
"os"
"sort"
"strings"
)
// LoadFile parses a .env-style file into KEY=VALUE entries suitable for
// appending to os.Environ(). Blank lines and comments are skipped.
func LoadFile(path string) ([]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open env file %s: %w", path, err)
}
defer func() { _ = f.Close() }()
var envVars []string
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if idx := strings.Index(line, "="); idx > 0 {
key := strings.TrimSpace(line[:idx])
value := strings.TrimSpace(line[idx+1:])
value = strings.Trim(value, `"'`)
envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("read env file %s: %w", path, err)
}
return envVars, nil
}
// Merge combines shell environment with file-based defaults. Shell values take
// precedence, so file entries are only added for keys not already present.
func Merge(shellEnv, fileVars []string) []string {
existing := make(map[string]bool, len(shellEnv))
for _, entry := range shellEnv {
if idx := strings.Index(entry, "="); idx > 0 {
existing[entry[:idx]] = true
}
}
merged := make([]string, len(shellEnv))
copy(merged, shellEnv)
for _, entry := range fileVars {
if idx := strings.Index(entry, "="); idx > 0 {
key := entry[:idx]
if !existing[key] {
merged = append(merged, entry)
}
}
}
return merged
}
// ApplyOverrides replaces or appends KEY=VALUE entries in env with the provided
// overrides. The returned slice contains at most one entry per overridden key.
func ApplyOverrides(env []string, overrides map[string]string) []string {
if len(overrides) == 0 {
return env
}
overrideKeys := make(map[string]bool, len(overrides))
for key := range overrides {
overrideKeys[key] = true
}
filtered := make([]string, 0, len(env)+len(overrides))
for _, entry := range env {
if idx := strings.Index(entry, "="); idx > 0 {
if overrideKeys[entry[:idx]] {
continue
}
}
filtered = append(filtered, entry)
}
filtered = append(filtered, MapToEnvEntries(overrides)...)
return filtered
}
// MapToEnvEntries converts a string map into KEY=VALUE entries in stable order.
func MapToEnvEntries(values map[string]string) []string {
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
sort.Strings(keys)
entries := make([]string, 0, len(keys))
for _, key := range keys {
entries = append(entries, fmt.Sprintf("%s=%s", key, values[key]))
}
return entries
}

View File

@@ -0,0 +1,122 @@
package journey
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
)
const (
RegistryPath = "web/tests/e2e/journeys/registry.json"
DefaultPlanPath = ".github/agent-journeys.json"
)
type Definition struct {
Name string `json:"name"`
Description string `json:"description"`
TestPath string `json:"test_path"`
Project string `json:"project"`
RequiresModelServer bool `json:"requires_model_server"`
SkipGlobalSetup bool `json:"skip_global_setup"`
}
type Registry struct {
Journeys []Definition `json:"journeys"`
}
type Plan struct {
Journeys []string `json:"journeys"`
}
func LoadRegistry(repoRoot string) (Registry, error) {
var registry Registry
data, err := os.ReadFile(filepath.Join(repoRoot, RegistryPath))
if err != nil {
return registry, fmt.Errorf("read journey registry: %w", err)
}
if err := json.Unmarshal(data, &registry); err != nil {
return registry, fmt.Errorf("parse journey registry: %w", err)
}
if len(registry.Journeys) == 0 {
return registry, fmt.Errorf("journey registry is empty")
}
for _, journey := range registry.Journeys {
if strings.TrimSpace(journey.Name) == "" {
return registry, fmt.Errorf("journey registry contains an entry with an empty name")
}
if strings.TrimSpace(journey.TestPath) == "" {
return registry, fmt.Errorf("journey %q is missing test_path", journey.Name)
}
if strings.TrimSpace(journey.Project) == "" {
return registry, fmt.Errorf("journey %q is missing project", journey.Name)
}
}
return registry, nil
}
func LoadPlan(planPath string) (Plan, error) {
var plan Plan
data, err := os.ReadFile(planPath)
if err != nil {
return plan, fmt.Errorf("read journey plan: %w", err)
}
if err := json.Unmarshal(data, &plan); err != nil {
return plan, fmt.Errorf("parse journey plan: %w", err)
}
if len(plan.Journeys) == 0 {
return plan, fmt.Errorf("journey plan contains no journeys")
}
return plan, nil
}
func ResolveDefinitions(repoRoot string, names []string) ([]Definition, error) {
registry, err := LoadRegistry(repoRoot)
if err != nil {
return nil, err
}
byName := make(map[string]Definition, len(registry.Journeys))
for _, definition := range registry.Journeys {
byName[definition.Name] = definition
}
definitions := make([]Definition, 0, len(names))
for _, name := range names {
definition, ok := byName[name]
if !ok {
return nil, fmt.Errorf("unknown journey %q", name)
}
definitions = append(definitions, definition)
}
return definitions, nil
}
func Slug(value string) string {
normalized := strings.TrimSpace(strings.ToLower(value))
normalized = strings.ReplaceAll(normalized, "/", "-")
var builder strings.Builder
lastDash := false
for _, r := range normalized {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
builder.WriteRune(r)
lastDash = false
continue
}
if !lastDash {
builder.WriteByte('-')
lastDash = true
}
}
slug := strings.Trim(builder.String(), "-")
if slug == "" {
return "journey"
}
return slug
}

View File

@@ -0,0 +1,59 @@
package journey
import (
"os"
"path/filepath"
"testing"
)
func TestResolveDefinitions(t *testing.T) {
t.Helper()
root := t.TempDir()
registryDir := filepath.Join(root, "web", "tests", "e2e", "journeys")
if err := os.MkdirAll(registryDir, 0755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(filepath.Join(registryDir, "registry.json"), []byte(`{
"journeys": [
{
"name": "auth-landing",
"description": "test",
"test_path": "tests/e2e/journeys/auth_landing.spec.ts",
"project": "journey",
"requires_model_server": false,
"skip_global_setup": true
}
]
}`), 0644); err != nil {
t.Fatalf("write registry: %v", err)
}
definitions, err := ResolveDefinitions(root, []string{"auth-landing"})
if err != nil {
t.Fatalf("resolve definitions: %v", err)
}
if len(definitions) != 1 {
t.Fatalf("expected 1 definition, got %d", len(definitions))
}
if definitions[0].Project != "journey" {
t.Fatalf("expected project journey, got %q", definitions[0].Project)
}
}
func TestLoadPlanRequiresJourneys(t *testing.T) {
t.Helper()
path := filepath.Join(t.TempDir(), "journeys.json")
if err := os.WriteFile(path, []byte(`{"journeys":["auth-landing"]}`), 0644); err != nil {
t.Fatalf("write plan: %v", err)
}
plan, err := LoadPlan(path)
if err != nil {
t.Fatalf("load plan: %v", err)
}
if len(plan.Journeys) != 1 || plan.Journeys[0] != "auth-landing" {
t.Fatalf("unexpected plan contents: %+v", plan)
}
}

View File

@@ -0,0 +1,147 @@
package prreview
import (
"fmt"
"regexp"
"sort"
"strings"
)
type Source string
const (
SourceHuman Source = "human"
SourceCodex Source = "codex"
SourceGreptile Source = "greptile"
SourceCubic Source = "cubic"
SourceBot Source = "bot"
)
type Comment struct {
ID int `json:"id"`
Body string `json:"body"`
AuthorLogin string `json:"author_login"`
URL string `json:"url,omitempty"`
CreatedAt string `json:"created_at,omitempty"`
}
type Thread struct {
ID string `json:"id"`
IsResolved bool `json:"is_resolved"`
IsOutdated bool `json:"is_outdated"`
Path string `json:"path,omitempty"`
Line int `json:"line,omitempty"`
StartLine int `json:"start_line,omitempty"`
Comments []Comment `json:"comments"`
}
type PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
URL string `json:"url,omitempty"`
Threads []Thread `json:"threads"`
}
type ThreadSummary struct {
Thread Thread `json:"thread"`
Source Source `json:"source"`
Category string `json:"category"`
DuplicateOf string `json:"duplicate_of,omitempty"`
Reasons []string `json:"reasons,omitempty"`
}
type TriageResult struct {
PullRequest PullRequest `json:"pull_request"`
Summaries []ThreadSummary `json:"summaries"`
}
var nonAlphaNum = regexp.MustCompile(`[^a-z0-9]+`)
func ClassifySource(login string) Source {
lower := strings.ToLower(strings.TrimSpace(login))
switch {
case strings.Contains(lower, "codex"):
return SourceCodex
case strings.Contains(lower, "greptile"):
return SourceGreptile
case strings.Contains(lower, "cubic"):
return SourceCubic
case strings.HasSuffix(lower, "[bot]") || strings.Contains(lower, "bot"):
return SourceBot
default:
return SourceHuman
}
}
func Triage(pr PullRequest) TriageResult {
summaries := make([]ThreadSummary, 0, len(pr.Threads))
seen := map[string]string{}
for _, thread := range pr.Threads {
source := SourceHuman
if len(thread.Comments) > 0 {
source = ClassifySource(thread.Comments[0].AuthorLogin)
}
summary := ThreadSummary{
Thread: thread,
Source: source,
Category: "actionable",
}
if thread.IsResolved {
summary.Category = "resolved"
summary.Reasons = append(summary.Reasons, "thread already resolved")
} else if thread.IsOutdated {
summary.Category = "outdated"
summary.Reasons = append(summary.Reasons, "thread marked outdated by GitHub")
}
key := duplicateKey(thread)
if existing, ok := seen[key]; ok && summary.Category == "actionable" {
summary.Category = "duplicate"
summary.DuplicateOf = existing
summary.Reasons = append(summary.Reasons, fmt.Sprintf("duplicates %s", existing))
} else if summary.Category == "actionable" {
seen[key] = thread.ID
}
if source == SourceHuman && summary.Category == "actionable" {
summary.Reasons = append(summary.Reasons, "human review requires explicit response or fix")
}
if source != SourceHuman && summary.Category == "actionable" {
summary.Reasons = append(summary.Reasons, fmt.Sprintf("%s-generated review comment", source))
}
summaries = append(summaries, summary)
}
sort.Slice(summaries, func(i, j int) bool {
if summaries[i].Category != summaries[j].Category {
return summaries[i].Category < summaries[j].Category
}
if summaries[i].Source != summaries[j].Source {
return summaries[i].Source < summaries[j].Source
}
return summaries[i].Thread.ID < summaries[j].Thread.ID
})
return TriageResult{
PullRequest: pr,
Summaries: summaries,
}
}
func duplicateKey(thread Thread) string {
parts := []string{thread.Path, fmt.Sprintf("%d", thread.Line)}
if len(thread.Comments) > 0 {
parts = append(parts, normalizeBody(thread.Comments[0].Body))
}
return strings.Join(parts, "::")
}
func normalizeBody(body string) string {
normalized := strings.ToLower(strings.TrimSpace(body))
normalized = nonAlphaNum.ReplaceAllString(normalized, " ")
return strings.Join(strings.Fields(normalized), " ")
}

View File

@@ -0,0 +1,61 @@
package prreview
import "testing"
func TestClassifySource(t *testing.T) {
t.Helper()
cases := map[string]Source{
"openai-codex-reviewer[bot]": SourceCodex,
"greptile-ai[bot]": SourceGreptile,
"cubic-review[bot]": SourceCubic,
"renovate[bot]": SourceBot,
"human-user": SourceHuman,
}
for login, expected := range cases {
if actual := ClassifySource(login); actual != expected {
t.Fatalf("classify %q: expected %s, got %s", login, expected, actual)
}
}
}
func TestTriageMarksDuplicates(t *testing.T) {
t.Helper()
result := Triage(PullRequest{
Number: 42,
Threads: []Thread{
{
ID: "thread-1",
Path: "web/src/foo.tsx",
Line: 10,
Comments: []Comment{
{ID: 1, AuthorLogin: "greptile-ai[bot]", Body: "Handle null values here."},
},
},
{
ID: "thread-2",
Path: "web/src/foo.tsx",
Line: 10,
Comments: []Comment{
{ID: 2, AuthorLogin: "openai-codex-reviewer[bot]", Body: "Handle null values here"},
},
},
},
})
if len(result.Summaries) != 2 {
t.Fatalf("expected 2 summaries, got %d", len(result.Summaries))
}
var duplicateFound bool
for _, summary := range result.Summaries {
if summary.Thread.ID == "thread-2" && summary.Category == "duplicate" {
duplicateFound = true
}
}
if !duplicateFound {
t.Fatalf("expected duplicate thread to be detected: %+v", result.Summaries)
}
}

16
uv.lock generated
View File

@@ -4511,7 +4511,7 @@ dev = [
{ name = "matplotlib", specifier = "==3.10.8" },
{ name = "mypy", specifier = "==1.13.0" },
{ name = "mypy-extensions", specifier = "==1.0.0" },
{ name = "onyx-devtools", specifier = "==0.7.5" },
{ name = "onyx-devtools", specifier = "==0.7.4" },
{ name = "openapi-generator-cli", specifier = "==7.17.0" },
{ name = "pandas-stubs", specifier = "~=2.3.3" },
{ name = "pre-commit", specifier = "==3.2.2" },
@@ -4554,19 +4554,19 @@ model-server = [
[[package]]
name = "onyx-devtools"
version = "0.7.5"
version = "0.7.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "fastapi" },
{ name = "openapi-generator-cli" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/cb/f8/844e34f5126ae40fff0d012bba0b28f031f8871062759bb3789eae4f5e0a/onyx_devtools-0.7.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b3cd434c722ae48a1f651748a9f094711b29d1a9f37fbbadef3144f2cdb0f16d", size = 4238900, upload-time = "2026-04-10T07:02:16.382Z" },
{ url = "https://files.pythonhosted.org/packages/2d/97/d1db725f900b199fa3f7a7a7c9b51ae75d4b18755c924f00f06a7703e552/onyx_devtools-0.7.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c50e3d76d4f8cc4faa6250e758d42f0249067f0e17bc82b99c6c00dd48114393", size = 3913672, upload-time = "2026-04-10T07:02:17.46Z" },
{ url = "https://files.pythonhosted.org/packages/31/83/e11bedb0a1321b63c844a418be1990c172ed363c6ee612978c3a38df71f1/onyx_devtools-0.7.5-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:ec01aeaaa14854b0933bb85bbfc51184599d3dbf1c0097ff59c1c72db8222a5a", size = 3779585, upload-time = "2026-04-10T07:02:16.31Z" },
{ url = "https://files.pythonhosted.org/packages/b3/85/128d25cd35c1adc436dcff9ab4f2c20cf29528d09415280c1230ff0ca993/onyx_devtools-0.7.5-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:586d50ecb6dcea95611135e4cd4529ebedd8ab84a41b1adf3be1280a48dc52af", size = 4201962, upload-time = "2026-04-10T07:02:14.466Z" },
{ url = "https://files.pythonhosted.org/packages/99/5d/83c80f918b399fea998cd41bfe90bda733eda77e133ca4dc1e9ce18a9b4a/onyx_devtools-0.7.5-py3-none-win_amd64.whl", hash = "sha256:c45d80f0093ba738120b77c4c0bde13843e33d786ae8608eb10490f06183d89b", size = 4320088, upload-time = "2026-04-10T07:02:17.09Z" },
{ url = "https://files.pythonhosted.org/packages/26/bf/b9c85cc61981bd71c0f1cbb50192763b11788a7c8636b1e01f750251c92c/onyx_devtools-0.7.5-py3-none-win_arm64.whl", hash = "sha256:9852a7cc29939371e016b794f2cffdb88680280d857d24c191c5188884416a3d", size = 3858839, upload-time = "2026-04-10T07:02:20.098Z" },
{ url = "https://files.pythonhosted.org/packages/cc/3f/584bb003333b6e6d632b06bbf99d410c7a71adde1711076fd44fe88d966d/onyx_devtools-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6c51d9199ff8ff8fe64a3cfcf77f8170508722b33a1de54c5474be0447b7afa8", size = 4237700, upload-time = "2026-04-09T21:28:20.694Z" },
{ url = "https://files.pythonhosted.org/packages/0a/04/8c28522d51a66b1bdc997a1c72821122eab23f048459646c6ee62a39f6eb/onyx_devtools-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f64a4cec6d3616b9ca7354e326994882c9ff2cb3f9fc9a44e55f0eb6a6ff1c1c", size = 3912751, upload-time = "2026-04-09T21:28:23.079Z" },
{ url = "https://files.pythonhosted.org/packages/8c/e6/ae60307cc50064dacb58e003c9a367d5c85118fd89a597abf3de5fd66f0a/onyx_devtools-0.7.4-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:31c7cecaaa329e3f6d53864290bc53fd0b823453c6cfdb8be7931a8925f5c075", size = 3778188, upload-time = "2026-04-09T21:28:23.14Z" },
{ url = "https://files.pythonhosted.org/packages/f1/d1/5a2789efac7d8f19d30d4d8da1862dd10a16b65d8c9b200542a959094a17/onyx_devtools-0.7.4-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:4c44e3c21253ea92127af483155190c14426c729d93e244aedc33875f74d3514", size = 4200526, upload-time = "2026-04-09T21:28:23.711Z" },
{ url = "https://files.pythonhosted.org/packages/0a/40/56a467eaa7b78411971898191cf0dc3ee49b7f448d1cfe76cd432f6458d3/onyx_devtools-0.7.4-py3-none-win_amd64.whl", hash = "sha256:6fa2b63b702bc5ecbeed5f9eadec57d61ac5c4a646cf5fbd66ee340f53b7d81c", size = 4319090, upload-time = "2026-04-09T21:28:23.26Z" },
{ url = "https://files.pythonhosted.org/packages/fa/ef/c866fa8ce1f75e1ac67bc239e767b8944cb1a12a44950986ce57e06db17f/onyx_devtools-0.7.4-py3-none-win_arm64.whl", hash = "sha256:c84cbe6a85474dc9f005f079796cf031e80c4249897432ad9f370cd27f72970a", size = 3857229, upload-time = "2026-04-09T21:28:23.484Z" },
]
[[package]]

View File

@@ -1,46 +0,0 @@
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import { Divider } from "@opal/components/divider/components";
const meta: Meta<typeof Divider> = {
title: "opal/components/Divider",
component: Divider,
tags: ["autodocs"],
};
export default meta;
type Story = StoryObj<typeof Divider>;
export const Plain: Story = {
render: () => <Divider />,
};
export const WithTitle: Story = {
render: () => <Divider title="Section" />,
};
export const WithDescription: Story = {
render: () => (
<Divider description="Additional configuration options for power users." />
),
};
export const Foldable: Story = {
render: () => (
<Divider title="Advanced Options" foldable defaultOpen={false}>
<div style={{ padding: "0.5rem 0" }}>
<p>This content is revealed when the divider is expanded.</p>
</div>
</Divider>
),
};
export const FoldableDefaultOpen: Story = {
render: () => (
<Divider title="Details" foldable defaultOpen>
<div style={{ padding: "0.5rem 0" }}>
<p>This starts open by default.</p>
</div>
</Divider>
),
};

View File

@@ -1,62 +0,0 @@
# Divider
**Import:** `import { Divider } from "@opal/components";`
A horizontal rule that optionally displays a title, description, or foldable content section.
## Props
The component uses a discriminated union with four variants. `title` and `description` are mutually exclusive; `foldable` requires `title`.
### Bare divider
No props — renders a plain horizontal line.
### Titled divider
| Prop | Type | Default | Description |
|---|---|---|---|
| `title` | `string \| RichStr` | **(required)** | Label to the left of the line |
### Described divider
| Prop | Type | Default | Description |
|---|---|---|---|
| `description` | `string \| RichStr` | **(required)** | Text below the line |
### Foldable divider
| Prop | Type | Default | Description |
|---|---|---|---|
| `title` | `string \| RichStr` | **(required)** | Label to the left of the line |
| `foldable` | `true` | **(required)** | Enables fold/expand behavior |
| `open` | `boolean` | — | Controlled open state |
| `defaultOpen` | `boolean` | `false` | Uncontrolled initial open state |
| `onOpenChange` | `(open: boolean) => void` | — | Callback when toggled |
| `children` | `ReactNode` | — | Content revealed when open |
## Usage Examples
```tsx
import { Divider } from "@opal/components";
// Plain line
<Divider />
// With title
<Divider title="Advanced" />
// With description
<Divider description="Additional configuration options." />
// Foldable
<Divider title="Advanced Options" foldable>
<p>Hidden content here</p>
</Divider>
// Controlled foldable
const [open, setOpen] = useState(false);
<Divider title="Details" foldable open={open} onOpenChange={setOpen}>
<p>Controlled content</p>
</Divider>
```

View File

@@ -1,163 +0,0 @@
"use client";
import "@opal/components/divider/styles.css";
import { useState, useCallback } from "react";
import type { RichStr } from "@opal/types";
import { Button, Text } from "@opal/components";
import { SvgChevronRight } from "@opal/icons";
import { Interactive } from "@opal/core";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface DividerNeverFields {
open?: never;
defaultOpen?: never;
onOpenChange?: never;
children?: never;
}
/** Plain line — no title, no description. */
interface DividerBareProps extends DividerNeverFields {
title?: never;
description?: never;
foldable?: false;
ref?: React.Ref<HTMLDivElement>;
}
/** Line with a title to the left. */
interface DividerTitledProps extends DividerNeverFields {
title: string | RichStr;
description?: never;
foldable?: false;
ref?: React.Ref<HTMLDivElement>;
}
/** Line with a description below. */
interface DividerDescribedProps extends DividerNeverFields {
title?: never;
/** Description rendered below the divider line. */
description: string | RichStr;
foldable?: false;
ref?: React.Ref<HTMLDivElement>;
}
/** Foldable — requires title, reveals children. */
interface DividerFoldableProps {
/** Title is required when foldable. */
title: string | RichStr;
foldable: true;
description?: never;
/** Controlled open state. */
open?: boolean;
/** Uncontrolled default open state. */
defaultOpen?: boolean;
/** Callback when open state changes. */
onOpenChange?: (open: boolean) => void;
/** Content revealed when open. */
children?: React.ReactNode;
ref?: React.Ref<HTMLDivElement>;
}
type DividerProps =
| DividerBareProps
| DividerTitledProps
| DividerDescribedProps
| DividerFoldableProps;
// ---------------------------------------------------------------------------
// Divider
// ---------------------------------------------------------------------------
function Divider(props: DividerProps) {
if (props.foldable) {
return <FoldableDivider {...props} />;
}
const { ref } = props;
const title = "title" in props ? props.title : undefined;
const description = "description" in props ? props.description : undefined;
return (
<div ref={ref} className="opal-divider">
<div className="opal-divider-row">
{title && (
<div className="opal-divider-title">
<Text font="secondary-body" color="text-03" nowrap>
{title}
</Text>
</div>
)}
<div className="opal-divider-line" />
</div>
{description && (
<div className="opal-divider-description">
<Text font="secondary-body" color="text-03">
{description}
</Text>
</div>
)}
</div>
);
}
// ---------------------------------------------------------------------------
// FoldableDivider (internal)
// ---------------------------------------------------------------------------
function FoldableDivider({
title,
open: controlledOpen,
defaultOpen = false,
onOpenChange,
children,
}: DividerFoldableProps) {
const [internalOpen, setInternalOpen] = useState(defaultOpen);
const isControlled = controlledOpen !== undefined;
const isOpen = isControlled ? controlledOpen : internalOpen;
const toggle = useCallback(() => {
const next = !isOpen;
if (!isControlled) setInternalOpen(next);
onOpenChange?.(next);
}, [isOpen, isControlled, onOpenChange]);
return (
<>
<Interactive.Stateless
variant="default"
prominence="tertiary"
interaction={isOpen ? "hover" : "rest"}
onClick={toggle}
>
<Interactive.Container
roundingVariant="sm"
heightVariant="fit"
widthVariant="full"
>
<div className="opal-divider">
<div className="opal-divider-row">
<div className="opal-divider-title">
<Text font="secondary-body" color="inherit" nowrap>
{title}
</Text>
</div>
<div className="opal-divider-line" />
<div className="opal-divider-chevron" data-open={isOpen}>
<Button
icon={SvgChevronRight}
size="sm"
prominence="tertiary"
/>
</div>
</div>
</div>
</Interactive.Container>
</Interactive.Stateless>
{isOpen && children}
</>
);
}
export { Divider, type DividerProps };

View File

@@ -1,38 +0,0 @@
/* ---------------------------------------------------------------------------
Divider
A horizontal rule with optional title, foldable chevron, or description.
--------------------------------------------------------------------------- */
.opal-divider {
@apply flex flex-col w-full;
padding: 0.25rem 0.5rem;
gap: 0.75rem;
}
.opal-divider-row {
@apply flex flex-row items-center w-full;
gap: 2px;
padding: 0px;
}
.opal-divider-title {
@apply flex flex-col justify-center;
padding: 0px 2px;
}
.opal-divider-line {
@apply flex-1 h-px bg-border-01;
}
.opal-divider-description {
padding: 0px 2px;
}
.opal-divider-chevron {
@apply transition-transform duration-200 ease-in-out;
}
.opal-divider-chevron[data-open="true"] {
transform: rotate(90deg);
}

View File

@@ -54,12 +54,6 @@ export {
type TagColor,
} from "@opal/components/tag/components";
/* Divider */
export {
Divider,
type DividerProps,
} from "@opal/components/divider/components";
/* Card */
export {
Card,

View File

@@ -10,7 +10,7 @@ const SvgAnthropic = ({ size, ...props }: IconProps) => (
>
<path
d="M36.1779 9.78003H29.1432L41.9653 42.2095H49L36.1779 9.78003ZM15.8221 9.78003L3 42.2095H10.1844L12.8286 35.4243H26.2495L28.8438 42.2095H36.0282L23.2061 9.78003H15.8221ZM15.1236 29.3874L19.5141 18.0121L23.9046 29.3874H15.1236Z"
fill="var(--text-05)"
fill="currentColor"
/>
</svg>
);

View File

@@ -12,7 +12,7 @@ const SvgAws = ({ size, ...props }: IconProps) => (
<title>AWS</title>
<path
d="M14.6195 23.2934C14.6195 23.9333 14.7233 24.4522 14.8443 24.8326C14.9827 25.2131 15.1556 25.6282 15.3978 26.0778C15.4842 26.2162 15.5188 26.3546 15.5188 26.4756C15.5188 26.6486 15.4151 26.8215 15.1902 26.9945L14.1007 27.7208C13.945 27.8246 13.7894 27.8765 13.651 27.8765C13.4781 27.8765 13.3051 27.79 13.1322 27.6344C12.89 27.3749 12.6825 27.0982 12.5096 26.8215C12.3366 26.5275 12.1637 26.1989 11.9734 25.8011C10.6245 27.3922 8.92958 28.1878 6.88881 28.1878C5.43606 28.1878 4.27731 27.7727 3.42988 26.9426C2.58244 26.1124 2.15007 25.0056 2.15007 23.622C2.15007 22.152 2.66891 20.9586 3.72389 20.0593C4.77886 19.16 6.17973 18.7103 7.96108 18.7103C8.54909 18.7103 9.15441 18.7622 9.79431 18.8487C10.4342 18.9352 11.0914 19.0735 11.7832 19.2292V17.9667C11.7832 16.6523 11.5065 15.7356 10.9703 15.1995C10.4169 14.6634 9.483 14.404 8.15132 14.404C7.546 14.404 6.9234 14.4731 6.28349 14.6288C5.64359 14.7844 5.02098 14.9747 4.41567 15.2168C4.13896 15.3379 3.93142 15.407 3.81036 15.4416C3.6893 15.4762 3.60282 15.4935 3.53364 15.4935C3.29152 15.4935 3.17046 15.3206 3.17046 14.9574V14.1099C3.17046 13.8332 3.20505 13.6257 3.29152 13.5046C3.37799 13.3836 3.53364 13.2625 3.77577 13.1414C4.38108 12.8301 5.10746 12.5707 5.9549 12.3632C6.80233 12.1384 7.70165 12.0346 8.65286 12.0346C10.7109 12.0346 12.2156 12.5015 13.1841 13.4355C14.1353 14.3694 14.6195 15.7875 14.6195 17.6899V23.2934ZM7.63248 25.9222C8.2032 25.9222 8.79122 25.8184 9.41383 25.6109C10.0364 25.4034 10.5899 25.0229 11.0568 24.504C11.3335 24.1754 11.5411 23.8122 11.6448 23.3972C11.7486 22.9821 11.8178 22.4806 11.8178 21.8925V21.1662C11.3162 21.0451 10.7801 20.9413 10.2267 20.8722C9.67325 20.803 9.13711 20.7684 8.60098 20.7684C7.44224 20.7684 6.5948 20.9932 6.02407 21.4602C5.45335 21.9271 5.17664 22.5843 5.17664 23.4491C5.17664 24.2619 5.38417 24.8672 5.81654 25.2823C6.23161 25.7147 6.83692 25.9222 7.63248 25.9222ZM21.5201 27.79C21.2088 27.79 21.0012 27.7381 20.8629 27.6171C20.7245 27.5133 20.6035 27.2712 20.4997 26.9426L16.4355 13.5738C16.3317 13.2279 16.2798 13.0031 16.2798 12.882C16.2798 12.6053 16.4182 12.4497 16.6949 12.4497H18.3897C18.7183 12.4497 18.9432 12.5015 19.0642 12.6226C19.2026 12.7264 19.3064 12.9685 19.4101 13.2971L22.3156 24.7462L25.0136 13.2971C25.1001 12.9512 25.2038 12.7264 25.3422 12.6226C25.4806 12.5188 25.7227 12.4497 26.034 12.4497H27.4176C27.7462 12.4497 27.971 12.5015 28.1093 12.6226C28.2477 12.7264 28.3688 12.9685 28.4379 13.2971L31.1705 24.8845L34.1625 13.2971C34.2662 12.9512 34.3873 12.7264 34.5084 12.6226C34.6467 12.5188 34.8716 12.4497 35.1829 12.4497H36.7913C37.068 12.4497 37.2236 12.588 37.2236 12.882C37.2236 12.9685 37.2063 13.055 37.189 13.1587C37.1717 13.2625 37.1372 13.4009 37.068 13.5911L32.9 26.9599C32.7962 27.3058 32.6751 27.5306 32.5368 27.6344C32.3984 27.7381 32.1736 27.8073 31.8796 27.8073H30.3922C30.0636 27.8073 29.8388 27.7554 29.7004 27.6344C29.5621 27.5133 29.441 27.2885 29.3719 26.9426L26.6912 15.7875L24.0278 26.9253C23.9413 27.2712 23.8376 27.496 23.6992 27.6171C23.5609 27.7381 23.3187 27.79 23.0074 27.79H21.5201ZM43.7437 28.257C42.8444 28.257 41.9451 28.1532 41.0803 27.9457C40.2156 27.7381 39.5411 27.5133 39.0914 27.2539C38.8147 27.0982 38.6245 26.9253 38.5553 26.7696C38.4861 26.614 38.4515 26.441 38.4515 26.2854V25.4034C38.4515 25.0402 38.5899 24.8672 38.8493 24.8672C38.9531 24.8672 39.0569 24.8845 39.1606 24.9191C39.2644 24.9537 39.42 25.0229 39.593 25.0921C40.181 25.3515 40.8209 25.559 41.4954 25.6974C42.1872 25.8357 42.8617 25.9049 43.5535 25.9049C44.643 25.9049 45.4905 25.7147 46.0785 25.3342C46.6665 24.9537 46.9778 24.4003 46.9778 23.6912C46.9778 23.2069 46.8222 22.8092 46.5109 22.4806C46.1996 22.152 45.6115 21.858 44.7641 21.5812L42.2564 20.803C40.9939 20.4052 40.0599 19.8172 39.4892 19.0389C38.9185 18.278 38.6245 17.4305 38.6245 16.5312C38.6245 15.8048 38.7801 15.1649 39.0914 14.6115C39.4027 14.0581 39.8178 13.5738 40.3367 13.1933C40.8555 12.7956 41.4435 12.5015 42.1353 12.294C42.8271 12.0865 43.5535 12 44.3144 12C44.6949 12 45.0927 12.0173 45.4732 12.0692C45.871 12.1211 46.2341 12.1902 46.5973 12.2594C46.9432 12.3459 47.2718 12.4324 47.5831 12.5361C47.8944 12.6399 48.1366 12.7437 48.3095 12.8474C48.5516 12.9858 48.7246 13.1242 48.8283 13.2798C48.9321 13.4182 48.984 13.6084 48.984 13.8505V14.6634C48.984 15.0266 48.8456 15.2168 48.5862 15.2168C48.4479 15.2168 48.223 15.1476 47.929 15.0093C46.9432 14.5596 45.8364 14.3348 44.6084 14.3348C43.6227 14.3348 42.8444 14.4904 42.3083 14.819C41.7721 15.1476 41.4954 15.6492 41.4954 16.3583C41.4954 16.8425 41.6684 17.2576 42.0142 17.5862C42.3601 17.9148 43 18.2434 43.9167 18.5374L46.3725 19.3156C47.6177 19.7134 48.517 20.2668 49.0532 20.9759C49.5893 21.685 49.8487 22.4979 49.8487 23.3972C49.8487 24.1408 49.6931 24.8153 49.3991 25.4034C49.0878 25.9914 48.6727 26.5102 48.1366 26.9253C47.6004 27.3577 46.9605 27.669 46.2168 27.8938C45.4386 28.1359 44.6257 28.257 43.7437 28.257Z"
className="fill-[#252F3E] dark:fill-text-05"
fill="#252F3E"
/>
<path
fillRule="evenodd"

View File

@@ -1,25 +0,0 @@
import type { IconProps } from "@opal/types";
const SvgCohere = ({ size, ...props }: IconProps) => (
<svg
width={size}
height={size}
viewBox="0 0 52 52"
fill="none"
xmlns="http://www.w3.org/2000/svg"
{...props}
>
<path
d="M18.256 30.224C19.4293 30.224 21.776 30.1653 25.0613 28.816C28.8747 27.232 36.384 24.416 41.84 21.4827C45.6533 19.4293 47.296 16.7307 47.296 13.0933C47.296 8.10667 43.248 4 38.2027 4H17.0827C9.86667 4 4 9.86667 4 17.0827C4 24.2987 9.51467 30.224 18.256 30.224Z"
fill="#39594D"
/>
<path
d="M21.8347 39.2C21.8347 35.68 23.9467 32.4533 27.232 31.104L33.8613 28.3467C40.608 25.5893 48 30.5173 48 37.792C48 43.424 43.424 48 37.792 48H30.576C25.7653 48 21.8347 44.0693 21.8347 39.2Z"
fill="#D18EE2"
/>
<path
d="M11.568 31.9253C7.40267 31.9253 4 35.328 4 39.4933V40.4907C4 44.5973 7.40267 48 11.568 48C15.7333 48 19.136 44.5973 19.136 40.432V39.4347C19.0773 35.328 15.7333 31.9253 11.568 31.9253Z"
fill="#FF7759"
/>
</svg>
);
export default SvgCohere;

View File

@@ -3,7 +3,6 @@ export { default as SvgAws } from "@opal/logos/aws";
export { default as SvgAzure } from "@opal/logos/azure";
export { default as SvgBifrost } from "@opal/logos/bifrost";
export { default as SvgClaude } from "@opal/logos/claude";
export { default as SvgCohere } from "@opal/logos/cohere";
export { default as SvgDeepseek } from "@opal/logos/deepseek";
export { default as SvgDiscord } from "@opal/logos/discord";
export { default as SvgGemini } from "@opal/logos/gemini";
@@ -12,7 +11,6 @@ export { default as SvgLitellm } from "@opal/logos/litellm";
export { default as SvgLmStudio } from "@opal/logos/lm-studio";
export { default as SvgMicrosoft } from "@opal/logos/microsoft";
export { default as SvgMistral } from "@opal/logos/mistral";
export { default as SvgNomic } from "@opal/logos/nomic";
export { default as SvgOllama } from "@opal/logos/ollama";
export { default as SvgOnyxLogo } from "@opal/logos/onyx-logo";
export { default as SvgOnyxLogoTyped } from "@opal/logos/onyx-logo-typed";
@@ -21,4 +19,3 @@ export { default as SvgOpenai } from "@opal/logos/openai";
export { default as SvgOpenrouter } from "@opal/logos/openrouter";
export { default as SvgQwen } from "@opal/logos/qwen";
export { default as SvgSlack } from "@opal/logos/slack";
export { default as SvgVoyage } from "@opal/logos/voyage";

View File

@@ -1,21 +0,0 @@
import type { IconProps } from "@opal/types";
const SvgNomic = ({ size, ...props }: IconProps) => (
<svg
width={size}
height={size}
viewBox="0 0 52 52"
fill="none"
xmlns="http://www.w3.org/2000/svg"
{...props}
>
<path
d="M35.858 6.31995H46V45.6709H35.6146C32.0852 36.8676 25.1481 27.7804 15.7363 24.8189V6.31995H25.4726C26.5274 12.7296 30.1618 18.3744 35.858 21.6546V6.31995Z"
fill="var(--text-05)"
/>
<path
d="M15.7363 24.8189V45.6709H6L6 30.0927C9.05968 27.6167 11.9635 25.8737 15.7363 24.8189Z"
fill="var(--text-05)"
/>
</svg>
);
export default SvgNomic;

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More