chore(devtools): ods screenshot-diff for visual regression testing (#8386)

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
This commit is contained in:
Jamison Lahman
2026-02-12 16:04:22 -08:00
committed by GitHub
parent 6749f63f09
commit 27e676c48f
17 changed files with 2253 additions and 21 deletions

View File

@@ -52,6 +52,9 @@ env:
MCP_SERVER_PUBLIC_HOST: host.docker.internal
MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
# Visual regression S3 bucket (shared across all jobs)
PLAYWRIGHT_S3_BUCKET: onyx-playwright-artifacts
jobs:
build-web-image:
runs-on:
@@ -239,6 +242,9 @@ jobs:
playwright-tests:
needs: [build-web-image, build-backend-image, build-model-server-image]
name: Playwright Tests (${{ matrix.project }})
permissions:
id-token: write # Required for OIDC-based AWS credential exchange (S3 access)
contents: read
runs-on:
- runs-on
- runner=8cpu-linux-arm64
@@ -428,8 +434,6 @@ jobs:
env:
PROJECT: ${{ matrix.project }}
run: |
# Create test-results directory to ensure it exists for artifact upload
mkdir -p test-results
npx playwright test --project ${PROJECT}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
@@ -437,9 +441,124 @@ jobs:
with:
# Includes test results and trace.zip files
name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
path: ./web/test-results/
path: ./web/output/playwright/
retention-days: 30
- name: Upload screenshots
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}
path: ./web/output/screenshots/
retention-days: 30
# --- Visual Regression Diff ---
- name: Configure AWS credentials
if: always()
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Install the latest version of uv
if: always()
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
- name: Determine baseline revision
if: always()
id: baseline-rev
env:
EVENT_NAME: ${{ github.event_name }}
BASE_REF: ${{ github.event.pull_request.base.ref }}
GH_REF: ${{ github.ref }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ "${EVENT_NAME}" = "pull_request" ]; then
# PRs compare against the base branch (e.g. main, release/2.5)
echo "rev=${BASE_REF}" >> "$GITHUB_OUTPUT"
elif [[ "${GH_REF}" == refs/tags/* ]]; then
# Tag builds compare against the tag name
echo "rev=${REF_NAME}" >> "$GITHUB_OUTPUT"
else
# Push builds (main, release/*) compare against the branch name
echo "rev=${REF_NAME}" >> "$GITHUB_OUTPUT"
fi
- name: Generate screenshot diff report
if: always()
env:
PROJECT: ${{ matrix.project }}
PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}
run: |
uv run --no-sync --with onyx-devtools ods screenshot-diff compare \
--project "${PROJECT}" \
--rev "${BASELINE_REV}"
- name: Upload visual diff report to S3
if: always()
env:
PROJECT: ${{ matrix.project }}
PR_NUMBER: ${{ github.event.pull_request.number }}
RUN_ID: ${{ github.run_id }}
run: |
SUMMARY_FILE="web/output/screenshot-diff/${PROJECT}/summary.json"
if [ ! -f "${SUMMARY_FILE}" ]; then
echo "No summary file found — skipping S3 upload."
exit 0
fi
HAS_DIFF=$(jq -r '.has_differences' "${SUMMARY_FILE}")
if [ "${HAS_DIFF}" != "true" ]; then
echo "No visual differences for ${PROJECT} — skipping S3 upload."
exit 0
fi
aws s3 sync "web/output/screenshot-diff/${PROJECT}/" \
"s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/"
- name: Upload visual diff summary
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: screenshot-diff-summary-${{ matrix.project }}
path: ./web/output/screenshot-diff/${{ matrix.project }}/summary.json
if-no-files-found: ignore
retention-days: 5
- name: Upload visual diff report artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}
path: ./web/output/screenshot-diff/${{ matrix.project }}/
if-no-files-found: ignore
retention-days: 30
- name: Update S3 baselines
if: >-
success() && (
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release/') ||
startsWith(github.ref, 'refs/tags/v')
)
env:
PROJECT: ${{ matrix.project }}
PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}
run: |
if [ -d "web/output/screenshots/" ] && [ "$(ls -A web/output/screenshots/)" ]; then
uv run --no-sync --with onyx-devtools ods screenshot-diff upload-baselines \
--project "${PROJECT}" \
--rev "${BASELINE_REV}" \
--delete
else
echo "No screenshots to upload for ${PROJECT} — skipping baseline update."
fi
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
@@ -457,6 +576,95 @@ jobs:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
# Post a single combined visual regression comment after all matrix jobs finish
visual-regression-comment:
needs: [playwright-tests]
if: always() && github.event_name == 'pull_request'
runs-on: ubuntu-slim
timeout-minutes: 5
permissions:
pull-requests: write
steps:
- name: Download visual diff summaries
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # ratchet:actions/download-artifact@v4
with:
pattern: screenshot-diff-summary-*
path: summaries/
- name: Post combined PR comment
env:
GH_TOKEN: ${{ github.token }}
PR_NUMBER: ${{ github.event.pull_request.number }}
RUN_ID: ${{ github.run_id }}
REPO: ${{ github.repository }}
S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
run: |
MARKER="<!-- visual-regression-report -->"
# Build the markdown table from all summary files
TABLE_HEADER="| Project | Changed | Added | Removed | Unchanged | Report |"
TABLE_DIVIDER="|---------|---------|-------|---------|-----------|--------|"
TABLE_ROWS=""
HAS_ANY_SUMMARY=false
for SUMMARY_DIR in summaries/screenshot-diff-summary-*/; do
SUMMARY_FILE="${SUMMARY_DIR}summary.json"
if [ ! -f "${SUMMARY_FILE}" ]; then
continue
fi
HAS_ANY_SUMMARY=true
PROJECT=$(jq -r '.project' "${SUMMARY_FILE}")
CHANGED=$(jq -r '.changed' "${SUMMARY_FILE}")
ADDED=$(jq -r '.added' "${SUMMARY_FILE}")
REMOVED=$(jq -r '.removed' "${SUMMARY_FILE}")
UNCHANGED=$(jq -r '.unchanged' "${SUMMARY_FILE}")
TOTAL=$(jq -r '.total' "${SUMMARY_FILE}")
HAS_DIFF=$(jq -r '.has_differences' "${SUMMARY_FILE}")
if [ "${TOTAL}" = "0" ]; then
REPORT_LINK="_No screenshots_"
elif [ "${HAS_DIFF}" = "true" ]; then
REPORT_URL="https://${S3_BUCKET}.s3.us-east-2.amazonaws.com/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/index.html"
REPORT_LINK="[View Report](${REPORT_URL})"
else
REPORT_LINK="✅ No changes"
fi
TABLE_ROWS="${TABLE_ROWS}| \`${PROJECT}\` | ${CHANGED} | ${ADDED} | ${REMOVED} | ${UNCHANGED} | ${REPORT_LINK} |\n"
done
if [ "${HAS_ANY_SUMMARY}" = "false" ]; then
echo "No visual diff summaries found — skipping PR comment."
exit 0
fi
BODY=$(printf '%s\n' \
"${MARKER}" \
"### 🖼️ Visual Regression Report" \
"" \
"${TABLE_HEADER}" \
"${TABLE_DIVIDER}" \
"$(printf '%b' "${TABLE_ROWS}")")
# Upsert: find existing comment with the marker, or create a new one
EXISTING_COMMENT_ID=$(gh api \
"repos/${REPO}/issues/${PR_NUMBER}/comments" \
--jq ".[] | select(.body | startswith(\"${MARKER}\")) | .id" \
2>/dev/null | head -1)
if [ -n "${EXISTING_COMMENT_ID}" ]; then
gh api \
--method PATCH \
"repos/${REPO}/issues/comments/${EXISTING_COMMENT_ID}" \
-f body="${BODY}"
else
gh api \
--method POST \
"repos/${REPO}/issues/${PR_NUMBER}/comments" \
-f body="${BODY}"
fi
playwright-required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim

View File

@@ -317,7 +317,7 @@ oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
onyx-devtools==0.5.3
onyx-devtools==0.5.7
# via onyx
openai==2.14.0
# via

View File

@@ -144,7 +144,7 @@ dev = [
"matplotlib==3.10.8",
"mypy-extensions==1.0.0",
"mypy==1.13.0",
"onyx-devtools==0.5.3",
"onyx-devtools==0.5.7",
"openapi-generator-cli==7.17.0",
"pandas-stubs~=2.3.3",
"pre-commit==3.2.2",

View File

@@ -29,6 +29,10 @@ Some commands require external tools to be installed and configured:
- Install from [cli.github.com](https://cli.github.com/)
- Authenticate with `gh auth login`
- **AWS CLI** - Required for `screenshot-diff` commands (S3 baseline sync)
- Install from [aws.amazon.com/cli](https://aws.amazon.com/cli/)
- Authenticate with `aws sso login` or `aws configure`
### Autocomplete
`ods` provides autocomplete for `bash`, `fish`, `powershell` and `zsh` shells.
@@ -239,6 +243,100 @@ ods cherry-pick abc123 --release 2.5 --release 2.6
ods cherry-pick abc123 def456 ghi789 --release 2.5
```
### `screenshot-diff` - Visual Regression Testing
Compare Playwright screenshots against baselines and generate visual diff reports.
Baselines are stored per-project and per-revision in S3:
```
s3://<bucket>/baselines/<project>/<rev>/
```
This allows storing baselines for `main`, release branches (`release/2.5`), and
version tags (`v2.0.0`) side-by-side. Revisions containing `/` are sanitised to
`-` in the S3 path (e.g. `release/2.5``release-2.5`).
```shell
ods screenshot-diff <subcommand>
```
**Subcommands:**
- `compare` - Compare screenshots against baselines and generate a diff report
- `upload-baselines` - Upload screenshots to S3 as new baselines
The `--project` flag provides sensible defaults so you don't need to specify every path.
When set, the following defaults are applied:
| Flag | Default |
|------|---------|
| `--baseline` | `s3://onyx-playwright-artifacts/baselines/<project>/<rev>/` |
| `--current` | `web/output/screenshots/` |
| `--output` | `web/output/screenshot-diff/<project>/index.html` |
| `--rev` | `main` |
The S3 bucket defaults to `onyx-playwright-artifacts` and can be overridden with the
`PLAYWRIGHT_S3_BUCKET` environment variable.
**`compare` Flags:**
| Flag | Default | Description |
|------|---------|-------------|
| `--project` | | Project name (e.g. `admin`); sets sensible defaults |
| `--rev` | `main` | Revision baseline to compare against |
| `--from-rev` | | Source (older) revision for cross-revision comparison |
| `--to-rev` | | Target (newer) revision for cross-revision comparison |
| `--baseline` | | Baseline directory or S3 URL (`s3://...`) |
| `--current` | | Current screenshots directory or S3 URL (`s3://...`) |
| `--output` | `screenshot-diff/index.html` | Output path for the HTML report |
| `--threshold` | `0.2` | Per-channel pixel difference threshold (0.01.0) |
| `--max-diff-ratio` | `0.01` | Max diff pixel ratio before marking as changed |
**`upload-baselines` Flags:**
| Flag | Default | Description |
|------|---------|-------------|
| `--project` | | Project name (e.g. `admin`); sets sensible defaults |
| `--rev` | `main` | Revision to store the baseline under |
| `--dir` | | Local directory containing screenshots to upload |
| `--dest` | | S3 destination URL (`s3://...`) |
| `--delete` | `false` | Delete S3 files not present locally |
**Examples:**
```shell
# Compare local screenshots against the main baseline (default)
ods screenshot-diff compare --project admin
# Compare against a release branch baseline
ods screenshot-diff compare --project admin --rev release/2.5
# Compare two revisions directly (both sides fetched from S3)
ods screenshot-diff compare --project admin --from-rev v1.0.0 --to-rev v2.0.0
# Compare with explicit paths
ods screenshot-diff compare \
--baseline ./baselines \
--current ./web/output/screenshots/ \
--output ./report/index.html
# Upload baselines for main (default)
ods screenshot-diff upload-baselines --project admin
# Upload baselines for a release branch
ods screenshot-diff upload-baselines --project admin --rev release/2.5
# Upload baselines for a version tag
ods screenshot-diff upload-baselines --project admin --rev v2.0.0
# Upload with delete (remove old baselines not in current set)
ods screenshot-diff upload-baselines --project admin --delete
```
The `compare` subcommand writes a `summary.json` alongside the report with aggregate
counts (changed, added, removed, unchanged). The HTML report is only generated when
visual differences are detected.
### Testing Changes Locally (Dry Run)
Both `run-ci` and `cherry-pick` support `--dry-run` to test without making remote changes:

View File

@@ -49,6 +49,7 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewLogsCommand())
cmd.AddCommand(NewPullCommand())
cmd.AddCommand(NewRunCICommand())
cmd.AddCommand(NewScreenshotDiffCommand())
return cmd
}

View File

@@ -0,0 +1,500 @@
package cmd
import (
"fmt"
"os"
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/imgdiff"
"github.com/onyx-dot-app/onyx/tools/ods/internal/s3"
)
const (
// DefaultS3Bucket is the default S3 bucket for Playwright visual regression artifacts.
DefaultS3Bucket = "onyx-playwright-artifacts"
// DefaultScreenshotDir is the default local directory for captured screenshots,
// relative to the repository root.
DefaultScreenshotDir = "web/output/screenshots"
// DefaultOutputDir is the default base directory for screenshot diff output,
// relative to the repository root.
DefaultOutputDir = "web/output/screenshot-diff"
// DefaultRev is the default revision used when --rev is not specified.
DefaultRev = "main"
)
// getS3Bucket returns the S3 bucket name, preferring the PLAYWRIGHT_S3_BUCKET
// environment variable over the compiled-in default.
func getS3Bucket() string {
if bucket := os.Getenv("PLAYWRIGHT_S3_BUCKET"); bucket != "" {
return bucket
}
return DefaultS3Bucket
}
// sanitizeRev normalises a git ref for use as an S3 path segment.
// Slashes are replaced with dashes (e.g. "release/2.5" → "release-2.5").
func sanitizeRev(rev string) string {
return strings.ReplaceAll(rev, "/", "-")
}
// ScreenshotDiffCompareOptions holds options for the compare subcommand.
type ScreenshotDiffCompareOptions struct {
Project string
Rev string // revision whose baseline to compare against (default: "main")
FromRev string // cross-revision mode: source (older) revision
ToRev string // cross-revision mode: target (newer) revision
Baseline string
Current string
Output string
Threshold float64
MaxDiffRatio float64
}
// ScreenshotDiffUploadOptions holds options for the upload-baselines subcommand.
type ScreenshotDiffUploadOptions struct {
Project string
Rev string // revision to store the baseline under (default: "main")
Dir string
Dest string
Delete bool
}
// NewScreenshotDiffCommand creates the screenshot-diff command with subcommands.
func NewScreenshotDiffCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "screenshot-diff",
Short: "Visual regression testing for Playwright screenshots",
Long: `Compare Playwright screenshots against baselines and generate visual diff reports.
Supports comparing local directories and downloading baselines from S3.
The generated HTML report is self-contained (images base64-inlined) and can
be opened locally or hosted on S3.
Baselines are stored per-project and per-revision in S3:
s3://<bucket>/baselines/<project>/<rev>/
The --project flag provides sensible defaults so you don't need to specify
every path. For example:
# Compare local screenshots against the "main" baseline (default)
ods screenshot-diff compare --project admin
# Compare against a release branch baseline
ods screenshot-diff compare --project admin --rev release/2.5
# Compare two revisions directly (no local screenshots needed)
ods screenshot-diff compare --project admin --from-rev v1.0.0 --to-rev v2.0.0
# Upload new baselines for the "admin" project on main
ods screenshot-diff upload-baselines --project admin
# Upload baselines for a release branch
ods screenshot-diff upload-baselines --project admin --rev release/2.5
You can override any default with explicit flags:
ods screenshot-diff compare --baseline ./my-baselines --current ./my-screenshots`,
Run: func(cmd *cobra.Command, args []string) {
_ = cmd.Help()
},
}
cmd.AddCommand(newCompareCommand())
cmd.AddCommand(newUploadBaselinesCommand())
return cmd
}
func newCompareCommand() *cobra.Command {
opts := &ScreenshotDiffCompareOptions{}
cmd := &cobra.Command{
Use: "compare",
Short: "Compare screenshots against baselines and generate a diff report",
Long: `Compare current screenshots against baseline screenshots and produce
a self-contained HTML visual diff report with a JSON summary.
Baselines are stored per-revision in S3:
s3://<bucket>/baselines/<project>/<rev>/
When --project is specified, the following defaults are applied:
--baseline → s3://<bucket>/baselines/<project>/<rev>/
--current → web/output/screenshots/
--output → web/output/screenshot-diff/<project>/index.html
--rev → main
The bucket defaults to "onyx-playwright-artifacts" and can be overridden
with the PLAYWRIGHT_S3_BUCKET environment variable.
A summary.json file is always written next to the HTML report. If there
are no visual differences, the HTML report is skipped.
CROSS-REVISION MODE:
Use --from-rev and --to-rev to compare two stored revisions directly.
Both sides are downloaded from S3 — no local screenshots are needed.
ods screenshot-diff compare --project admin --from-rev v1.0.0 --to-rev v2.0.0
Examples:
# Compare local screenshots against main (default)
ods screenshot-diff compare --project admin
# Compare against a specific revision
ods screenshot-diff compare --project admin --rev release/2.5
# Compare two revisions
ods screenshot-diff compare --project admin --from-rev v1.0.0 --to-rev v2.0.0
# Override specific flags
ods screenshot-diff compare --project admin --current ./custom-dir/
# Fully manual (no project flag)
ods screenshot-diff compare \
--baseline s3://my-bucket/baselines/admin/main/ \
--current ./web/output/screenshots/ \
--output ./web/output/screenshot-diff/admin/index.html`,
Run: func(cmd *cobra.Command, args []string) {
runCompare(opts)
},
}
cmd.Flags().StringVar(&opts.Project, "project", "", "Project name (e.g. admin); sets sensible defaults for baseline, current, and output")
cmd.Flags().StringVar(&opts.Rev, "rev", "", "Revision to compare against (default: main). Ignored when --from-rev/--to-rev are set")
cmd.Flags().StringVar(&opts.FromRev, "from-rev", "", "Source (older) revision for cross-revision comparison")
cmd.Flags().StringVar(&opts.ToRev, "to-rev", "", "Target (newer) revision for cross-revision comparison")
cmd.Flags().StringVar(&opts.Baseline, "baseline", "", "Baseline directory or S3 URL (s3://...)")
cmd.Flags().StringVar(&opts.Current, "current", "", "Current screenshots directory or S3 URL (s3://...)")
cmd.Flags().StringVar(&opts.Output, "output", "", "Output path for the HTML report")
cmd.Flags().Float64Var(&opts.Threshold, "threshold", 0.2, "Per-channel pixel difference threshold (0.0-1.0)")
cmd.Flags().Float64Var(&opts.MaxDiffRatio, "max-diff-ratio", 0.01, "Max diff pixel ratio before marking as changed (informational)")
return cmd
}
func newUploadBaselinesCommand() *cobra.Command {
opts := &ScreenshotDiffUploadOptions{}
cmd := &cobra.Command{
Use: "upload-baselines",
Short: "Upload screenshots to S3 as new baselines",
Long: `Upload a local directory of screenshots to S3 to serve as the new
baseline for future comparisons. Typically run after tests pass on the
main branch or a release branch.
Baselines are stored per-revision in S3:
s3://<bucket>/baselines/<project>/<rev>/
When --project is specified, the following defaults are applied:
--dir → web/output/screenshots/
--dest → s3://<bucket>/baselines/<project>/<rev>/
--rev → main
Examples:
# Upload baselines for main (default)
ods screenshot-diff upload-baselines --project admin
# Upload baselines for a release branch
ods screenshot-diff upload-baselines --project admin --rev release/2.5
# Upload baselines for a version tag
ods screenshot-diff upload-baselines --project admin --rev v2.0.0
# With delete (remove old baselines not in current set)
ods screenshot-diff upload-baselines --project admin --delete
# Fully manual
ods screenshot-diff upload-baselines \
--dir ./web/output/screenshots/ \
--dest s3://onyx-playwright-artifacts/baselines/admin/main/`,
Run: func(cmd *cobra.Command, args []string) {
runUploadBaselines(opts)
},
}
cmd.Flags().StringVar(&opts.Project, "project", "", "Project name (e.g. admin); sets sensible defaults for dir and dest")
cmd.Flags().StringVar(&opts.Rev, "rev", "", "Revision to store the baseline under (default: main)")
cmd.Flags().StringVar(&opts.Dir, "dir", "", "Local directory containing screenshots to upload")
cmd.Flags().StringVar(&opts.Dest, "dest", "", "S3 destination URL (s3://...)")
cmd.Flags().BoolVar(&opts.Delete, "delete", false, "Delete S3 files not present locally")
return cmd
}
// resolveCompareDefaults fills in missing flags from the --project default when set.
func resolveCompareDefaults(opts *ScreenshotDiffCompareOptions) {
bucket := getS3Bucket()
if opts.Project != "" {
// Cross-revision mode: both sides come from S3
if opts.FromRev != "" && opts.ToRev != "" {
if opts.Baseline == "" {
opts.Baseline = fmt.Sprintf("s3://%s/baselines/%s/%s/",
bucket, opts.Project, sanitizeRev(opts.FromRev))
}
if opts.Current == "" {
opts.Current = fmt.Sprintf("s3://%s/baselines/%s/%s/",
bucket, opts.Project, sanitizeRev(opts.ToRev))
}
} else {
// Standard mode: compare local screenshots against a revision
rev := opts.Rev
if rev == "" {
rev = DefaultRev
}
if opts.Baseline == "" {
opts.Baseline = fmt.Sprintf("s3://%s/baselines/%s/%s/",
bucket, opts.Project, sanitizeRev(rev))
}
if opts.Current == "" {
opts.Current = DefaultScreenshotDir
}
}
if opts.Output == "" {
opts.Output = filepath.Join(DefaultOutputDir, opts.Project, "index.html")
}
}
// Fall back for output even without --project
if opts.Output == "" {
opts.Output = "screenshot-diff/index.html"
}
}
// resolveUploadDefaults fills in missing flags from the --project default when set.
func resolveUploadDefaults(opts *ScreenshotDiffUploadOptions) {
bucket := getS3Bucket()
if opts.Project != "" {
rev := opts.Rev
if rev == "" {
rev = DefaultRev
}
if opts.Dir == "" {
opts.Dir = DefaultScreenshotDir
}
if opts.Dest == "" {
opts.Dest = fmt.Sprintf("s3://%s/baselines/%s/%s/",
bucket, opts.Project, sanitizeRev(rev))
}
}
}
// downloadS3Dir downloads an S3 URL into a local temporary directory and
// returns the path. The caller is responsible for cleaning up the directory.
func downloadS3Dir(s3URL string, prefix string) (string, error) {
tmpDir, err := os.MkdirTemp("", prefix)
if err != nil {
return "", fmt.Errorf("failed to create temp directory: %w", err)
}
if err := s3.SyncDown(s3URL, tmpDir); err != nil {
_ = os.RemoveAll(tmpDir)
return "", fmt.Errorf("failed to download from S3 (%s): %w", s3URL, err)
}
return tmpDir, nil
}
func runCompare(opts *ScreenshotDiffCompareOptions) {
// Validate cross-revision flags are used together
if (opts.FromRev != "") != (opts.ToRev != "") {
log.Fatal("--from-rev and --to-rev must be used together")
}
resolveCompareDefaults(opts)
// Validate required fields
if opts.Baseline == "" {
log.Fatal("--baseline is required (or use --project to set defaults)")
}
if opts.Current == "" {
log.Fatal("--current is required (or use --project to set defaults)")
}
// Determine the project name for the summary (use flag or derive from path)
project := opts.Project
if project == "" {
project = "default"
}
// Track temp dirs for cleanup
var tempDirs []string
defer func() {
for _, d := range tempDirs {
_ = os.RemoveAll(d)
}
}()
// Resolve baseline directory
baselineDir := opts.Baseline
if strings.HasPrefix(opts.Baseline, "s3://") {
dir, err := downloadS3Dir(opts.Baseline, "screenshot-baseline-*")
if err != nil {
log.Fatalf("Failed to download baselines: %v", err)
}
tempDirs = append(tempDirs, dir)
baselineDir = dir
}
// Resolve current directory (may also be S3 in cross-revision mode)
currentDir := opts.Current
if strings.HasPrefix(opts.Current, "s3://") {
dir, err := downloadS3Dir(opts.Current, "screenshot-current-*")
if err != nil {
log.Fatalf("Failed to download current screenshots: %v", err)
}
tempDirs = append(tempDirs, dir)
currentDir = dir
}
// Verify baseline directory exists
if _, err := os.Stat(baselineDir); os.IsNotExist(err) {
log.Warnf("Baseline directory does not exist: %s", baselineDir)
log.Warn("This may be the first run -- no baselines to compare against.")
// Create an empty dir so CompareDirectories works (all files will be "added")
if err := os.MkdirAll(baselineDir, 0755); err != nil {
log.Fatalf("Failed to create baseline directory: %v", err)
}
}
// Resolve the output path
outputPath := opts.Output
if !filepath.IsAbs(outputPath) {
cwd, err := os.Getwd()
if err != nil {
log.Fatalf("Failed to get working directory: %v", err)
}
outputPath = filepath.Join(cwd, outputPath)
}
summaryPath := filepath.Join(filepath.Dir(outputPath), "summary.json")
// If the current screenshots directory doesn't exist, write an empty summary and exit
if _, err := os.Stat(currentDir); os.IsNotExist(err) {
log.Warnf("Current screenshots directory does not exist: %s", currentDir)
log.Warn("No screenshots captured for this project — writing empty summary.")
summary := imgdiff.Summary{Project: project}
if err := imgdiff.WriteSummary(summary, summaryPath); err != nil {
log.Fatalf("Failed to write summary: %v", err)
}
log.Infof("Summary written to: %s", summaryPath)
return
}
log.Infof("Comparing screenshots...")
log.Infof(" Baseline: %s", opts.Baseline)
log.Infof(" Current: %s", opts.Current)
log.Infof(" Threshold: %.2f", opts.Threshold)
results, err := imgdiff.CompareDirectories(baselineDir, currentDir, opts.Threshold)
if err != nil {
log.Fatalf("Comparison failed: %v", err)
}
// Print terminal summary
printSummary(results)
// Build and write JSON summary (always)
summary := imgdiff.BuildSummary(project, results)
if err := imgdiff.WriteSummary(summary, summaryPath); err != nil {
log.Fatalf("Failed to write summary: %v", err)
}
log.Infof("Summary written to: %s", summaryPath)
// Generate HTML report only if there are differences
if summary.HasDifferences {
log.Infof("Generating report: %s", outputPath)
if err := imgdiff.GenerateReport(results, outputPath); err != nil {
log.Fatalf("Failed to generate report: %v", err)
}
log.Infof("Report generated successfully: %s", outputPath)
} else {
log.Infof("No visual differences detected — skipping report generation.")
}
}
func runUploadBaselines(opts *ScreenshotDiffUploadOptions) {
resolveUploadDefaults(opts)
// Validate required fields
if opts.Dir == "" {
log.Fatal("--dir is required (or use --project to set defaults)")
}
if opts.Dest == "" {
log.Fatal("--dest is required (or use --project to set defaults)")
}
if _, err := os.Stat(opts.Dir); os.IsNotExist(err) {
log.Fatalf("Screenshots directory does not exist: %s", opts.Dir)
}
if !strings.HasPrefix(opts.Dest, "s3://") {
log.Fatalf("Destination must be an S3 URL (s3://...): %s", opts.Dest)
}
log.Infof("Uploading baselines...")
log.Infof(" Source: %s", opts.Dir)
log.Infof(" Dest: %s", opts.Dest)
if err := s3.SyncUp(opts.Dir, opts.Dest, opts.Delete); err != nil {
log.Fatalf("Failed to upload baselines: %v", err)
}
log.Info("Baselines uploaded successfully.")
}
func printSummary(results []imgdiff.Result) {
changed, added, removed, unchanged := 0, 0, 0, 0
for _, r := range results {
switch r.Status {
case imgdiff.StatusChanged:
changed++
case imgdiff.StatusAdded:
added++
case imgdiff.StatusRemoved:
removed++
case imgdiff.StatusUnchanged:
unchanged++
}
}
fmt.Println()
fmt.Println("╔══════════════════════════════════════════════╗")
fmt.Println("║ Visual Regression Summary ║")
fmt.Println("╠══════════════════════════════════════════════╣")
fmt.Printf("║ Changed: %-32d ║\n", changed)
fmt.Printf("║ Added: %-32d ║\n", added)
fmt.Printf("║ Removed: %-32d ║\n", removed)
fmt.Printf("║ Unchanged: %-32d ║\n", unchanged)
fmt.Printf("║ Total: %-32d ║\n", len(results))
fmt.Println("╚══════════════════════════════════════════════╝")
fmt.Println()
if changed > 0 || added > 0 || removed > 0 {
for _, r := range results {
switch r.Status {
case imgdiff.StatusChanged:
fmt.Printf(" ⚠ CHANGED %s (%.2f%% diff)\n", r.Name, r.DiffPercent)
case imgdiff.StatusAdded:
fmt.Printf(" ✚ ADDED %s\n", r.Name)
case imgdiff.StatusRemoved:
fmt.Printf(" ✖ REMOVED %s\n", r.Name)
}
}
fmt.Println()
}
}

View File

@@ -0,0 +1,321 @@
package imgdiff
import (
"fmt"
"image"
"image/color"
"image/png"
"math"
"os"
"path/filepath"
"sort"
"strings"
)
// Status represents the comparison status of a screenshot.
type Status int
const (
// StatusUnchanged means the baseline and current images are identical (within threshold).
StatusUnchanged Status = iota
// StatusChanged means the images differ beyond the threshold.
StatusChanged
// StatusAdded means the image exists only in the current directory (no baseline).
StatusAdded
// StatusRemoved means the image exists only in the baseline directory (no current).
StatusRemoved
)
// String returns a human-readable string for the status.
func (s Status) String() string {
switch s {
case StatusUnchanged:
return "unchanged"
case StatusChanged:
return "changed"
case StatusAdded:
return "added"
case StatusRemoved:
return "removed"
default:
return "unknown"
}
}
// Result holds the comparison result for a single screenshot.
type Result struct {
// Name is the filename of the screenshot (e.g. "admin-documents-explorer.png").
Name string
// Status is the comparison status.
Status Status
// DiffPercent is the percentage of pixels that differ (0.0 to 100.0).
DiffPercent float64
// DiffPixels is the number of pixels that differ.
DiffPixels int
// TotalPixels is the total number of pixels compared.
TotalPixels int
// BaselinePath is the path to the baseline image (empty if added).
BaselinePath string
// CurrentPath is the path to the current image (empty if removed).
CurrentPath string
// DiffImage is the generated diff overlay image (nil if unchanged, added, or removed).
DiffImage image.Image
}
// Compare compares two PNG images pixel-by-pixel and returns the result.
// The threshold parameter (0.0 to 1.0) controls per-channel sensitivity:
// a pixel is considered different if any channel differs by more than threshold * 255.
func Compare(baselinePath, currentPath string, threshold float64) (*Result, error) {
baseline, err := decodePNG(baselinePath)
if err != nil {
return nil, fmt.Errorf("failed to decode baseline %s: %w", baselinePath, err)
}
current, err := decodePNG(currentPath)
if err != nil {
return nil, fmt.Errorf("failed to decode current %s: %w", currentPath, err)
}
baselineBounds := baseline.Bounds()
currentBounds := current.Bounds()
// Use the larger dimensions to ensure we compare the full area
width := max(baselineBounds.Dx(), currentBounds.Dx())
height := max(baselineBounds.Dy(), currentBounds.Dy())
totalPixels := width * height
if totalPixels == 0 {
return &Result{
Name: filepath.Base(currentPath),
Status: StatusUnchanged,
BaselinePath: baselinePath,
CurrentPath: currentPath,
}, nil
}
diffImage := image.NewRGBA(image.Rect(0, 0, width, height))
diffPixels := 0
thresholdValue := threshold * 255.0
for y := 0; y < height; y++ {
for x := 0; x < width; x++ {
// Get pixel from each image (transparent if out of bounds)
var br, bg, bb, ba uint32
var cr, cg, cb, ca uint32
if x < baselineBounds.Dx() && y < baselineBounds.Dy() {
br, bg, bb, ba = baseline.At(baselineBounds.Min.X+x, baselineBounds.Min.Y+y).RGBA()
}
if x < currentBounds.Dx() && y < currentBounds.Dy() {
cr, cg, cb, ca = current.At(currentBounds.Min.X+x, currentBounds.Min.Y+y).RGBA()
}
// Convert from 16-bit to 8-bit
br8 := float64(br >> 8)
bg8 := float64(bg >> 8)
bb8 := float64(bb >> 8)
ba8 := float64(ba >> 8)
cr8 := float64(cr >> 8)
cg8 := float64(cg >> 8)
cb8 := float64(cb >> 8)
ca8 := float64(ca >> 8)
// Check if channels differ beyond threshold
isDiff := math.Abs(br8-cr8) > thresholdValue ||
math.Abs(bg8-cg8) > thresholdValue ||
math.Abs(bb8-cb8) > thresholdValue ||
math.Abs(ba8-ca8) > thresholdValue
if isDiff {
diffPixels++
// Highlight in magenta for diff overlay
diffImage.Set(x, y, color.RGBA{R: 255, G: 0, B: 255, A: 255})
} else {
// Dim the unchanged pixel (30% opacity of the current image)
diffImage.Set(x, y, color.RGBA{
R: uint8(cr8 * 0.3),
G: uint8(cg8 * 0.3),
B: uint8(cb8 * 0.3),
A: uint8(math.Max(ca8*0.3, 50)),
})
}
}
}
diffPercent := float64(diffPixels) / float64(totalPixels) * 100.0
status := StatusUnchanged
if diffPixels > 0 {
status = StatusChanged
}
return &Result{
Name: filepath.Base(currentPath),
Status: status,
DiffPercent: diffPercent,
DiffPixels: diffPixels,
TotalPixels: totalPixels,
BaselinePath: baselinePath,
CurrentPath: currentPath,
DiffImage: diffImage,
}, nil
}
// CompareDirectories compares all PNG files in two directories.
// Files are matched by name. Files only in baseline are "removed",
// files only in current are "added", and matching files are compared.
func CompareDirectories(baselineDir, currentDir string, threshold float64) ([]Result, error) {
baselineFiles, err := listPNGs(baselineDir)
if err != nil {
return nil, fmt.Errorf("failed to list baseline directory: %w", err)
}
currentFiles, err := listPNGs(currentDir)
if err != nil {
return nil, fmt.Errorf("failed to list current directory: %w", err)
}
// Build maps for lookup
baselineMap := make(map[string]string, len(baselineFiles))
for _, f := range baselineFiles {
baselineMap[filepath.Base(f)] = f
}
currentMap := make(map[string]string, len(currentFiles))
for _, f := range currentFiles {
currentMap[filepath.Base(f)] = f
}
// Collect all unique names
allNames := make(map[string]struct{})
for name := range baselineMap {
allNames[name] = struct{}{}
}
for name := range currentMap {
allNames[name] = struct{}{}
}
var results []Result
for name := range allNames {
baselinePath, inBaseline := baselineMap[name]
currentPath, inCurrent := currentMap[name]
switch {
case inBaseline && inCurrent:
result, err := Compare(baselinePath, currentPath, threshold)
if err != nil {
return nil, fmt.Errorf("failed to compare %s: %w", name, err)
}
results = append(results, *result)
case inBaseline && !inCurrent:
results = append(results, Result{
Name: name,
Status: StatusRemoved,
BaselinePath: baselinePath,
})
case !inBaseline && inCurrent:
results = append(results, Result{
Name: name,
Status: StatusAdded,
CurrentPath: currentPath,
})
}
}
// Sort: changed first (by diff % descending), then added, removed, unchanged
sort.Slice(results, func(i, j int) bool {
if results[i].Status != results[j].Status {
return statusOrder(results[i].Status) < statusOrder(results[j].Status)
}
if results[i].Status == StatusChanged {
return results[i].DiffPercent > results[j].DiffPercent
}
return results[i].Name < results[j].Name
})
return results, nil
}
// SaveDiffImage writes a diff overlay image to the specified path as PNG.
func SaveDiffImage(img image.Image, path string) error {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer func() { _ = f.Close() }()
if err := png.Encode(f, img); err != nil {
return fmt.Errorf("failed to encode PNG: %w", err)
}
return nil
}
// decodePNG reads and decodes a PNG file.
func decodePNG(path string) (image.Image, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer func() { _ = f.Close() }()
img, err := png.Decode(f)
if err != nil {
return nil, err
}
return img, nil
}
// listPNGs returns all .png files in a directory (non-recursive).
func listPNGs(dir string) ([]string, error) {
entries, err := os.ReadDir(dir)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
var pngs []string
for _, entry := range entries {
if entry.IsDir() {
continue
}
if strings.HasSuffix(strings.ToLower(entry.Name()), ".png") {
pngs = append(pngs, filepath.Join(dir, entry.Name()))
}
}
return pngs, nil
}
// statusOrder returns a sort priority for each status.
func statusOrder(s Status) int {
switch s {
case StatusChanged:
return 0
case StatusAdded:
return 1
case StatusRemoved:
return 2
case StatusUnchanged:
return 3
default:
return 4
}
}

View File

@@ -0,0 +1,309 @@
package imgdiff
import (
"image"
"image/color"
"image/png"
"os"
"path/filepath"
"testing"
)
// createTestPNG creates a solid-color PNG file at the given path.
func createTestPNG(t *testing.T, path string, width, height int, c color.Color) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
t.Fatalf("failed to create dir: %v", err)
}
img := image.NewRGBA(image.Rect(0, 0, width, height))
for y := 0; y < height; y++ {
for x := 0; x < width; x++ {
img.Set(x, y, c)
}
}
f, err := os.Create(path)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
defer func() { _ = f.Close() }()
if err := png.Encode(f, img); err != nil {
t.Fatalf("failed to encode PNG: %v", err)
}
}
// createTestPNGWithBlock creates a PNG with a colored block at the specified position.
func createTestPNGWithBlock(t *testing.T, path string, width, height int, bg, block color.Color, bx, by, bw, bh int) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
t.Fatalf("failed to create dir: %v", err)
}
img := image.NewRGBA(image.Rect(0, 0, width, height))
for y := 0; y < height; y++ {
for x := 0; x < width; x++ {
if x >= bx && x < bx+bw && y >= by && y < by+bh {
img.Set(x, y, block)
} else {
img.Set(x, y, bg)
}
}
}
f, err := os.Create(path)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
defer func() { _ = f.Close() }()
if err := png.Encode(f, img); err != nil {
t.Fatalf("failed to encode PNG: %v", err)
}
}
func TestCompare_IdenticalImages(t *testing.T) {
dir := t.TempDir()
baselinePath := filepath.Join(dir, "baseline.png")
currentPath := filepath.Join(dir, "current.png")
white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
createTestPNG(t, baselinePath, 100, 100, white)
createTestPNG(t, currentPath, 100, 100, white)
result, err := Compare(baselinePath, currentPath, 0.2)
if err != nil {
t.Fatalf("Compare failed: %v", err)
}
if result.Status != StatusUnchanged {
t.Errorf("expected StatusUnchanged, got %s", result.Status)
}
if result.DiffPercent != 0.0 {
t.Errorf("expected 0%% diff, got %.2f%%", result.DiffPercent)
}
if result.DiffPixels != 0 {
t.Errorf("expected 0 diff pixels, got %d", result.DiffPixels)
}
if result.TotalPixels != 10000 {
t.Errorf("expected 10000 total pixels, got %d", result.TotalPixels)
}
}
func TestCompare_DifferentImages(t *testing.T) {
dir := t.TempDir()
baselinePath := filepath.Join(dir, "baseline.png")
currentPath := filepath.Join(dir, "current.png")
white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
red := color.RGBA{R: 255, G: 0, B: 0, A: 255}
// Baseline: all white
createTestPNG(t, baselinePath, 100, 100, white)
// Current: white with a 10x10 red block (100 pixels different)
createTestPNGWithBlock(t, currentPath, 100, 100, white, red, 0, 0, 10, 10)
result, err := Compare(baselinePath, currentPath, 0.2)
if err != nil {
t.Fatalf("Compare failed: %v", err)
}
if result.Status != StatusChanged {
t.Errorf("expected StatusChanged, got %s", result.Status)
}
if result.DiffPixels != 100 {
t.Errorf("expected 100 diff pixels, got %d", result.DiffPixels)
}
if result.DiffPercent != 1.0 {
t.Errorf("expected 1.0%% diff, got %.2f%%", result.DiffPercent)
}
if result.DiffImage == nil {
t.Error("expected non-nil DiffImage")
}
}
func TestCompare_SubtleDifferenceBelowThreshold(t *testing.T) {
dir := t.TempDir()
baselinePath := filepath.Join(dir, "baseline.png")
currentPath := filepath.Join(dir, "current.png")
// Two very similar colors -- difference of 10 on one channel
c1 := color.RGBA{R: 200, G: 200, B: 200, A: 255}
c2 := color.RGBA{R: 210, G: 200, B: 200, A: 255}
createTestPNG(t, baselinePath, 10, 10, c1)
createTestPNG(t, currentPath, 10, 10, c2)
// Threshold 0.2 = 51 pixel value difference. 10 < 51, so should be unchanged.
result, err := Compare(baselinePath, currentPath, 0.2)
if err != nil {
t.Fatalf("Compare failed: %v", err)
}
if result.Status != StatusUnchanged {
t.Errorf("expected StatusUnchanged (diff below threshold), got %s", result.Status)
}
if result.DiffPixels != 0 {
t.Errorf("expected 0 diff pixels (below threshold), got %d", result.DiffPixels)
}
}
func TestCompare_DifferentSizes(t *testing.T) {
dir := t.TempDir()
baselinePath := filepath.Join(dir, "baseline.png")
currentPath := filepath.Join(dir, "current.png")
white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
createTestPNG(t, baselinePath, 100, 100, white)
createTestPNG(t, currentPath, 100, 120, white) // Taller
result, err := Compare(baselinePath, currentPath, 0.2)
if err != nil {
t.Fatalf("Compare failed: %v", err)
}
// The extra 20 rows (2000 pixels) should be "different" (white vs transparent/zero)
if result.Status != StatusChanged {
t.Errorf("expected StatusChanged for different sizes, got %s", result.Status)
}
if result.TotalPixels != 12000 { // 100*120
t.Errorf("expected 12000 total pixels, got %d", result.TotalPixels)
}
}
func TestCompareDirectories(t *testing.T) {
baselineDir := filepath.Join(t.TempDir(), "baseline")
currentDir := filepath.Join(t.TempDir(), "current")
white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
red := color.RGBA{R: 255, G: 0, B: 0, A: 255}
blue := color.RGBA{R: 0, G: 0, B: 255, A: 255}
// shared-unchanged.png: identical in both
createTestPNG(t, filepath.Join(baselineDir, "shared-unchanged.png"), 10, 10, white)
createTestPNG(t, filepath.Join(currentDir, "shared-unchanged.png"), 10, 10, white)
// shared-changed.png: different in both
createTestPNG(t, filepath.Join(baselineDir, "shared-changed.png"), 10, 10, white)
createTestPNG(t, filepath.Join(currentDir, "shared-changed.png"), 10, 10, red)
// removed.png: only in baseline
createTestPNG(t, filepath.Join(baselineDir, "removed.png"), 10, 10, white)
// added.png: only in current
createTestPNG(t, filepath.Join(currentDir, "added.png"), 10, 10, blue)
results, err := CompareDirectories(baselineDir, currentDir, 0.2)
if err != nil {
t.Fatalf("CompareDirectories failed: %v", err)
}
if len(results) != 4 {
t.Fatalf("expected 4 results, got %d", len(results))
}
// Results should be sorted: changed first, then added, removed, unchanged
statusCounts := map[Status]int{}
for _, r := range results {
statusCounts[r.Status]++
}
if statusCounts[StatusChanged] != 1 {
t.Errorf("expected 1 changed, got %d", statusCounts[StatusChanged])
}
if statusCounts[StatusAdded] != 1 {
t.Errorf("expected 1 added, got %d", statusCounts[StatusAdded])
}
if statusCounts[StatusRemoved] != 1 {
t.Errorf("expected 1 removed, got %d", statusCounts[StatusRemoved])
}
if statusCounts[StatusUnchanged] != 1 {
t.Errorf("expected 1 unchanged, got %d", statusCounts[StatusUnchanged])
}
// First result should be the changed one (sort order)
if results[0].Status != StatusChanged {
t.Errorf("expected first result to be changed, got %s", results[0].Status)
}
}
func TestCompareDirectories_EmptyBaseline(t *testing.T) {
baselineDir := filepath.Join(t.TempDir(), "baseline")
currentDir := filepath.Join(t.TempDir(), "current")
if err := os.MkdirAll(baselineDir, 0755); err != nil {
t.Fatal(err)
}
white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
createTestPNG(t, filepath.Join(currentDir, "new.png"), 10, 10, white)
results, err := CompareDirectories(baselineDir, currentDir, 0.2)
if err != nil {
t.Fatalf("CompareDirectories failed: %v", err)
}
if len(results) != 1 {
t.Fatalf("expected 1 result, got %d", len(results))
}
if results[0].Status != StatusAdded {
t.Errorf("expected StatusAdded, got %s", results[0].Status)
}
}
func TestGenerateReport(t *testing.T) {
dir := t.TempDir()
baselineDir := filepath.Join(dir, "baseline")
currentDir := filepath.Join(dir, "current")
white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
red := color.RGBA{R: 255, G: 0, B: 0, A: 255}
createTestPNG(t, filepath.Join(baselineDir, "page.png"), 50, 50, white)
createTestPNG(t, filepath.Join(currentDir, "page.png"), 50, 50, red)
results, err := CompareDirectories(baselineDir, currentDir, 0.2)
if err != nil {
t.Fatalf("CompareDirectories failed: %v", err)
}
outputPath := filepath.Join(dir, "report", "index.html")
if err := GenerateReport(results, outputPath); err != nil {
t.Fatalf("GenerateReport failed: %v", err)
}
// Verify the file was created and has content
info, err := os.Stat(outputPath)
if err != nil {
t.Fatalf("report file not found: %v", err)
}
if info.Size() == 0 {
t.Error("report file is empty")
}
// Verify it contains expected HTML elements
content, err := os.ReadFile(outputPath)
if err != nil {
t.Fatalf("failed to read report: %v", err)
}
contentStr := string(content)
for _, expected := range []string{
"Visual Regression Report",
"data:image/png;base64,",
"page.png",
"changed",
} {
if !contains(contentStr, expected) {
t.Errorf("report missing expected content: %q", expected)
}
}
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && searchString(s, substr)
}
func searchString(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}

View File

@@ -0,0 +1,345 @@
package imgdiff
import (
"bytes"
"encoding/base64"
"fmt"
"html/template"
"image"
"image/png"
"os"
"path/filepath"
)
// reportEntry holds data for a single screenshot in the HTML template.
type reportEntry struct {
Name string
Status string
DiffPercent string
BaselineDataURI template.URL
CurrentDataURI template.URL
DiffDataURI template.URL
HasBaseline bool
HasCurrent bool
HasDiff bool
}
// reportData holds all data for the HTML template.
type reportData struct {
Entries []reportEntry
ChangedCount int
AddedCount int
RemovedCount int
UnchangedCount int
TotalCount int
HasDifferences bool
}
// GenerateReport produces a self-contained HTML file from comparison results.
// All images are base64-encoded inline as data URIs.
func GenerateReport(results []Result, outputPath string) error {
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
}
data := reportData{}
for _, r := range results {
entry := reportEntry{
Name: r.Name,
Status: r.Status.String(),
}
switch r.Status {
case StatusChanged:
data.ChangedCount++
entry.DiffPercent = fmt.Sprintf("%.2f%%", r.DiffPercent)
case StatusAdded:
data.AddedCount++
case StatusRemoved:
data.RemovedCount++
case StatusUnchanged:
data.UnchangedCount++
entry.DiffPercent = "0.00%"
}
if r.BaselinePath != "" {
uri, err := pngFileToDataURI(r.BaselinePath)
if err != nil {
return fmt.Errorf("failed to encode baseline %s: %w", r.Name, err)
}
entry.BaselineDataURI = template.URL(uri)
entry.HasBaseline = true
}
if r.CurrentPath != "" {
uri, err := pngFileToDataURI(r.CurrentPath)
if err != nil {
return fmt.Errorf("failed to encode current %s: %w", r.Name, err)
}
entry.CurrentDataURI = template.URL(uri)
entry.HasCurrent = true
}
if r.DiffImage != nil {
uri, err := imageToDataURI(r.DiffImage)
if err != nil {
return fmt.Errorf("failed to encode diff %s: %w", r.Name, err)
}
entry.DiffDataURI = template.URL(uri)
entry.HasDiff = true
}
data.Entries = append(data.Entries, entry)
}
data.TotalCount = len(results)
data.HasDifferences = data.ChangedCount > 0 || data.AddedCount > 0 || data.RemovedCount > 0
tmpl, err := template.New("report").Parse(htmlTemplate)
if err != nil {
return fmt.Errorf("failed to parse template: %w", err)
}
f, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer func() { _ = f.Close() }()
if err := tmpl.Execute(f, data); err != nil {
return fmt.Errorf("failed to execute template: %w", err)
}
return nil
}
// pngFileToDataURI reads a PNG file and returns a base64 data URI.
func pngFileToDataURI(path string) (string, error) {
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
encoded := base64.StdEncoding.EncodeToString(data)
return "data:image/png;base64," + encoded, nil
}
// imageToDataURI encodes an image.Image to a PNG base64 data URI.
func imageToDataURI(img image.Image) (string, error) {
var buf bytes.Buffer
if err := png.Encode(&buf, img); err != nil {
return "", err
}
encoded := base64.StdEncoding.EncodeToString(buf.Bytes())
return "data:image/png;base64," + encoded, nil
}
const htmlTemplate = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Visual Regression Report</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; background: #f5f5f5; color: #333; }
.header { background: #1a1a2e; color: #fff; padding: 24px 32px; }
.header h1 { font-size: 24px; font-weight: 600; }
.header p { margin-top: 8px; opacity: 0.8; font-size: 14px; }
.summary { display: flex; gap: 16px; padding: 20px 32px; background: #fff; border-bottom: 1px solid #e0e0e0; flex-wrap: wrap; }
.summary-card { padding: 12px 20px; border-radius: 8px; font-size: 14px; font-weight: 500; }
.summary-changed { background: #fff3e0; color: #e65100; }
.summary-added { background: #e8f5e9; color: #2e7d32; }
.summary-removed { background: #fce4ec; color: #c62828; }
.summary-unchanged { background: #e3f2fd; color: #1565c0; }
.content { padding: 24px 32px; max-width: 1400px; margin: 0 auto; }
.section-title { font-size: 18px; font-weight: 600; margin: 24px 0 16px; padding-bottom: 8px; border-bottom: 2px solid #e0e0e0; }
.no-changes { text-align: center; padding: 60px 20px; color: #666; }
.no-changes h2 { font-size: 24px; margin-bottom: 8px; color: #2e7d32; }
.card { background: #fff; border-radius: 12px; margin-bottom: 24px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); overflow: hidden; }
.card-header { display: flex; justify-content: space-between; align-items: center; padding: 16px 20px; border-bottom: 1px solid #eee; }
.card-name { font-weight: 600; font-size: 15px; }
.card-badge { font-size: 12px; padding: 4px 10px; border-radius: 12px; font-weight: 500; }
.badge-changed { background: #fff3e0; color: #e65100; }
.badge-added { background: #e8f5e9; color: #2e7d32; }
.badge-removed { background: #fce4ec; color: #c62828; }
.tabs { display: flex; gap: 0; border-bottom: 1px solid #eee; }
.tab { padding: 10px 20px; cursor: pointer; font-size: 13px; font-weight: 500; color: #666; border-bottom: 2px solid transparent; transition: all 0.2s; }
.tab:hover { color: #333; background: #f9f9f9; }
.tab.active { color: #1a1a2e; border-bottom-color: #1a1a2e; }
.tab-content { display: none; padding: 20px; }
.tab-content.active { display: block; }
.slider-container { position: relative; overflow: hidden; cursor: ew-resize; user-select: none; border: 1px solid #eee; border-radius: 4px; }
.slider-container > img { display: block; width: 100%; height: auto; }
.slider-baseline { position: absolute; top: 0; left: 0; width: 100%; height: 100%; clip-path: inset(0 50% 0 0); }
.slider-baseline img { display: block; width: 100%; height: auto; }
.slider-divider { position: absolute; top: 0; width: 3px; height: 100%; background: #e65100; z-index: 10; cursor: ew-resize; }
.slider-divider::before { content: ""; position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); width: 32px; height: 32px; background: #e65100; border-radius: 50%; border: 2px solid #fff; box-shadow: 0 2px 8px rgba(0,0,0,0.3); }
.slider-divider::after { content: "\2194"; position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); color: #fff; font-size: 16px; z-index: 1; }
.slider-label { position: absolute; top: 10px; padding: 4px 10px; background: rgba(0,0,0,0.6); color: #fff; font-size: 11px; border-radius: 4px; z-index: 5; pointer-events: none; }
.slider-label-left { left: 10px; }
.slider-label-right { right: 10px; }
.side-by-side { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
.side-by-side .img-container { border: 1px solid #eee; border-radius: 4px; overflow: hidden; }
.side-by-side .img-label { font-size: 12px; font-weight: 500; padding: 8px 12px; background: #f5f5f5; color: #666; }
.side-by-side img { display: block; width: 100%; height: auto; }
.diff-overlay img { display: block; max-width: 100%; height: auto; border: 1px solid #eee; border-radius: 4px; }
.single-image img { display: block; max-width: 100%; height: auto; border: 1px solid #eee; border-radius: 4px; }
.unchanged-section { margin-top: 32px; }
.unchanged-toggle { cursor: pointer; font-size: 14px; color: #666; padding: 12px 0; }
.unchanged-toggle:hover { color: #333; }
.unchanged-list { display: none; }
.unchanged-list.open { display: block; }
.unchanged-item { padding: 8px 0; font-size: 13px; color: #888; border-bottom: 1px solid #f0f0f0; }
</style>
</head>
<body>
<div class="header">
<h1>Visual Regression Report</h1>
<p>{{.TotalCount}} screenshot{{if ne .TotalCount 1}}s{{end}} compared</p>
</div>
<div class="summary">
{{if gt .ChangedCount 0}}<div class="summary-card summary-changed">{{.ChangedCount}} Changed</div>{{end}}
{{if gt .AddedCount 0}}<div class="summary-card summary-added">{{.AddedCount}} Added</div>{{end}}
{{if gt .RemovedCount 0}}<div class="summary-card summary-removed">{{.RemovedCount}} Removed</div>{{end}}
<div class="summary-card summary-unchanged">{{.UnchangedCount}} Unchanged</div>
</div>
<div class="content">
{{if not .HasDifferences}}
<div class="no-changes">
<h2>No visual changes detected</h2>
<p>All {{.TotalCount}} screenshots match their baselines.</p>
</div>
{{end}}
{{range .Entries}}
{{if eq .Status "changed"}}
<div class="card">
<div class="card-header">
<span class="card-name">{{.Name}}</span>
<span class="card-badge badge-changed">{{.DiffPercent}} changed</span>
</div>
<div class="tabs">
<div class="tab active" onclick="switchTab(this, 'slider')">Slider</div>
<div class="tab" onclick="switchTab(this, 'sidebyside')">Side by Side</div>
<div class="tab" onclick="switchTab(this, 'diff')">Diff Overlay</div>
</div>
<div class="tab-content active" data-tab="slider">
<div class="slider-container" onmousedown="startSlider(event, this)" onmousemove="moveSlider(event, this)" ontouchstart="startSlider(event, this)" ontouchmove="moveSlider(event, this)">
<img src="{{.CurrentDataURI}}" alt="Current" draggable="false">
<div class="slider-baseline">
<img src="{{.BaselineDataURI}}" alt="Baseline" draggable="false">
</div>
<div class="slider-divider" style="left: calc(50% - 1.5px);"></div>
<span class="slider-label slider-label-left">Baseline</span>
<span class="slider-label slider-label-right">Current</span>
</div>
</div>
<div class="tab-content" data-tab="sidebyside">
<div class="side-by-side">
<div class="img-container">
<div class="img-label">Baseline</div>
<img src="{{.BaselineDataURI}}" alt="Baseline">
</div>
<div class="img-container">
<div class="img-label">Current</div>
<img src="{{.CurrentDataURI}}" alt="Current">
</div>
</div>
</div>
<div class="tab-content" data-tab="diff">
<div class="diff-overlay">
{{if .HasDiff}}<img src="{{.DiffDataURI}}" alt="Diff overlay">{{end}}
</div>
</div>
</div>
{{end}}
{{if eq .Status "added"}}
<div class="card">
<div class="card-header">
<span class="card-name">{{.Name}}</span>
<span class="card-badge badge-added">added</span>
</div>
<div class="tab-content active" data-tab="single">
<div class="single-image">
{{if .HasCurrent}}<img src="{{.CurrentDataURI}}" alt="New screenshot">{{end}}
</div>
</div>
</div>
{{end}}
{{if eq .Status "removed"}}
<div class="card">
<div class="card-header">
<span class="card-name">{{.Name}}</span>
<span class="card-badge badge-removed">removed</span>
</div>
<div class="tab-content active" data-tab="single">
<div class="single-image">
{{if .HasBaseline}}<img src="{{.BaselineDataURI}}" alt="Removed screenshot">{{end}}
</div>
</div>
</div>
{{end}}
{{end}}
{{if gt .UnchangedCount 0}}
<div class="unchanged-section">
<div class="unchanged-toggle" onclick="toggleUnchanged(this)">
&#9654; {{.UnchangedCount}} unchanged screenshot{{if ne .UnchangedCount 1}}s{{end}} (click to expand)
</div>
<div class="unchanged-list">
{{range .Entries}}{{if eq .Status "unchanged"}}<div class="unchanged-item">{{.Name}}</div>{{end}}{{end}}
</div>
</div>
{{end}}
</div>
<script>
// Tab switching
function switchTab(tabEl, tabName) {
const card = tabEl.closest('.card');
card.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
card.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
tabEl.classList.add('active');
card.querySelector('[data-tab="' + tabName + '"]').classList.add('active');
}
// Slider interaction
let sliderActive = false;
function startSlider(e, container) {
sliderActive = true;
moveSlider(e, container);
const stopSlider = function() { sliderActive = false; };
document.addEventListener('mouseup', stopSlider, { once: true });
document.addEventListener('touchend', stopSlider, { once: true });
}
function moveSlider(e, container) {
if (!sliderActive) return;
e.preventDefault();
const rect = container.getBoundingClientRect();
const clientX = e.touches ? e.touches[0].clientX : e.clientX;
let x = clientX - rect.left;
x = Math.max(0, Math.min(x, rect.width));
const percent = (x / rect.width) * 100;
const clipRight = 100 - percent;
container.querySelector('.slider-baseline').style.clipPath = 'inset(0 ' + clipRight + '% 0 0)';
container.querySelector('.slider-divider').style.left = 'calc(' + percent + '% - 1.5px)';
}
// Unchanged section toggle
function toggleUnchanged(el) {
const list = el.nextElementSibling;
const isOpen = list.classList.toggle('open');
el.innerHTML = (isOpen ? '&#9660;' : '&#9654;') + ' {{.UnchangedCount}} unchanged screenshot{{if ne .UnchangedCount 1}}s{{end}} (click to ' + (isOpen ? 'collapse' : 'expand') + ')';
}
</script>
</body>
</html>`

View File

@@ -0,0 +1,60 @@
package imgdiff
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
)
// Summary holds aggregate comparison results in a JSON-friendly format.
// It is written alongside the HTML report so that CI pipelines can read it
// without parsing HTML.
type Summary struct {
Project string `json:"project"`
Changed int `json:"changed"`
Added int `json:"added"`
Removed int `json:"removed"`
Unchanged int `json:"unchanged"`
Total int `json:"total"`
HasDifferences bool `json:"has_differences"`
}
// BuildSummary computes a Summary from a slice of comparison results.
func BuildSummary(project string, results []Result) Summary {
s := Summary{Project: project}
for _, r := range results {
switch r.Status {
case StatusChanged:
s.Changed++
case StatusAdded:
s.Added++
case StatusRemoved:
s.Removed++
case StatusUnchanged:
s.Unchanged++
}
}
s.Total = len(results)
s.HasDifferences = s.Changed > 0 || s.Added > 0 || s.Removed > 0
return s
}
// WriteSummary writes a Summary as pretty-printed JSON to the given path,
// creating parent directories as needed.
func WriteSummary(summary Summary, path string) error {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return fmt.Errorf("failed to create directory for summary: %w", err)
}
data, err := json.MarshalIndent(summary, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal summary: %w", err)
}
if err := os.WriteFile(path, data, 0644); err != nil {
return fmt.Errorf("failed to write summary: %w", err)
}
return nil
}

View File

@@ -0,0 +1,49 @@
package s3
import (
"fmt"
"os"
"os/exec"
log "github.com/sirupsen/logrus"
)
// SyncDown downloads an S3 prefix to a local directory using AWS CLI.
// This is equivalent to: aws s3 sync <s3url> <destDir>
func SyncDown(s3url string, destDir string) error {
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("failed to create destination directory: %w", err)
}
log.Infof("Downloading from %s to %s ...", s3url, destDir)
cmd := exec.Command("aws", "s3", "sync", s3url, destDir)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("aws s3 sync failed: %w\n\nTo authenticate, run:\n aws sso login\n\nOr configure AWS credentials with:\n aws configure sso", err)
}
return nil
}
// SyncUp uploads a local directory to an S3 prefix using AWS CLI.
// If delete is true, files in S3 that don't exist locally are removed.
// This is equivalent to: aws s3 sync <srcDir> <s3url> [--delete]
func SyncUp(srcDir string, s3url string, delete bool) error {
args := []string{"s3", "sync", srcDir, s3url}
if delete {
args = append(args, "--delete")
}
log.Infof("Uploading from %s to %s ...", srcDir, s3url)
cmd := exec.Command("aws", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("aws s3 sync failed: %w\n\nTo authenticate, run:\n aws sso login\n\nOr configure AWS credentials with:\n aws configure sso", err)
}
return nil
}

18
uv.lock generated
View File

@@ -4711,7 +4711,7 @@ requires-dist = [
{ name = "numpy", marker = "extra == 'model-server'", specifier = "==2.4.1" },
{ name = "oauthlib", marker = "extra == 'backend'", specifier = "==3.2.2" },
{ name = "office365-rest-python-client", marker = "extra == 'backend'", specifier = "==2.5.9" },
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.5.3" },
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.5.7" },
{ name = "openai", specifier = "==2.14.0" },
{ name = "openapi-generator-cli", marker = "extra == 'dev'", specifier = "==7.17.0" },
{ name = "openinference-instrumentation", marker = "extra == 'backend'", specifier = "==0.1.42" },
@@ -4816,20 +4816,20 @@ requires-dist = [{ name = "onyx", extras = ["backend", "dev", "ee"], editable =
[[package]]
name = "onyx-devtools"
version = "0.5.3"
version = "0.5.7"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "fastapi" },
{ name = "openapi-generator-cli" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/85/39/87e770afccf123cd72ca8c58178bc08a9b04cb6198f265213012a6a71f21/onyx_devtools-0.5.3-py3-none-any.whl", hash = "sha256:6b61dff779a5839032fb282f8db62aa3d640c09fa0d7d2ed7f8a23fd38fa84df", size = 2894984, upload-time = "2026-02-11T23:05:50.739Z" },
{ url = "https://files.pythonhosted.org/packages/ef/c5/9a7516398af4183f3247a668b710da344c002586e9be668cb690b8566d8a/onyx_devtools-0.5.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:268c57ffb08322bd9671d1b8444199607bc1eaf7e2c25300de98ba272c716c3e", size = 2913582, upload-time = "2026-02-11T23:05:33.582Z" },
{ url = "https://files.pythonhosted.org/packages/70/58/86895464d02e2ae0a22a0bcc48cfd5e7cb647ee117a1a0620850f03e21e5/onyx_devtools-0.5.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e440d14ecad26ea3c85ae00a95cc1731214de6c6c71b90b08ab3608d99ecdd58", size = 2717143, upload-time = "2026-02-11T23:05:32.673Z" },
{ url = "https://files.pythonhosted.org/packages/10/95/c8ea6a27afde2c29b108a0988aa4f44963d7124bfe04322217c7003129b9/onyx_devtools-0.5.3-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:49136baf0427aa6a5dde57457e4c963d86be4cd59bb6d02837609dd470de6a6b", size = 2625948, upload-time = "2026-02-11T23:05:48.147Z" },
{ url = "https://files.pythonhosted.org/packages/85/cc/aabfb4599ce42aac88bdb1082696e3dde0a34a7739df61035e77e01cbca3/onyx_devtools-0.5.3-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2c327d258943f80b9860268fa69fde3a6f707d2aaed9362385cd6acd255d11cc", size = 2895001, upload-time = "2026-02-11T23:05:50.509Z" },
{ url = "https://files.pythonhosted.org/packages/17/3c/d3af3a49464d15ebb0a8cf371169158bb99a14be859ac7468c73ecf055cd/onyx_devtools-0.5.3-py3-none-win_amd64.whl", hash = "sha256:fa5e7b779ede887f7c2e2da2442048cc9b626a9d8007b34c3b617e40dfd8d5bd", size = 2977738, upload-time = "2026-02-11T23:05:30.592Z" },
{ url = "https://files.pythonhosted.org/packages/3f/27/8844e7c4ee06453b57be55644e572206b7b79e3685351f80afd8b7056327/onyx_devtools-0.5.3-py3-none-win_arm64.whl", hash = "sha256:2542fc3b1ee27d0695aef8e17819879a0eeaed10e2855e31145cbfa6267fcf6c", size = 2688564, upload-time = "2026-02-11T23:05:34.968Z" },
{ url = "https://files.pythonhosted.org/packages/23/7d/a9135044e220b6ef6a0752be826c6c758a1fc8b59d545306938aa43e8976/onyx_devtools-0.5.7-py3-none-any.whl", hash = "sha256:47c5cdefb525523a9860ed134366f30a0d2ad30e055b2350c1da577d1059654b", size = 3769892, upload-time = "2026-02-12T20:06:02.937Z" },
{ url = "https://files.pythonhosted.org/packages/e7/63/26dbfc35f62d0617e4c46b508e106f155990c37c851d8eb44bc331b2e933/onyx_devtools-0.5.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c7ce707d9e27733e7300b2be3686e3fd76d62b9b1c20c9bd02dac707f4eac1d5", size = 3815888, upload-time = "2026-02-12T20:06:07.024Z" },
{ url = "https://files.pythonhosted.org/packages/82/55/4498e74af5f115355127c966e326f9ae430460170d1f1d50c2f150f53a00/onyx_devtools-0.5.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0d02a0c1c48a33bd85b251a2288d94a00effc2139b6e2b7018362cba8cf717e1", size = 3562190, upload-time = "2026-02-12T20:06:00.998Z" },
{ url = "https://files.pythonhosted.org/packages/18/70/fc1490420bd690bc6b3ebc3a6da68347636cb1a31afa07801fba9f77def4/onyx_devtools-0.5.7-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:fe3ae04f06e1b421f1297e70d2c14013d85941afa85210bfd96db30abb391989", size = 3425118, upload-time = "2026-02-12T20:05:59.192Z" },
{ url = "https://files.pythonhosted.org/packages/b3/46/76b44234d7cd4cf5c73b897f6dd1864c867c63cc871fd73f8901592c9248/onyx_devtools-0.5.7-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:ecf5f525c773d8db0b58bef3a02b00df31e7a9ade16213b4220eb2baffffd8e2", size = 3769913, upload-time = "2026-02-12T20:06:03.405Z" },
{ url = "https://files.pythonhosted.org/packages/7b/e5/9ef8d3265dfc82dbd9d27653d981ccb67c779882807ef1bd7fcecbe1c68a/onyx_devtools-0.5.7-py3-none-win_amd64.whl", hash = "sha256:f84368da19311acc246d511c5b2874b14ca1c9e53675198ba6ccabefbe57d648", size = 3863558, upload-time = "2026-02-12T20:06:01.995Z" },
{ url = "https://files.pythonhosted.org/packages/30/ad/f23ace3e049017e9cfcc06302005fd476b44357b6f4ade521febd8393599/onyx_devtools-0.5.7-py3-none-win_arm64.whl", hash = "sha256:edb1dcd3901f7532114d40fbc903ba60c528bdad397425c174dc5841b5b8de43", size = 3486869, upload-time = "2026-02-12T20:06:03.719Z" },
]
[[package]]

1
web/.gitignore vendored
View File

@@ -41,6 +41,7 @@ next-env.d.ts
/user_auth.json
/build-archive.log
/test-results
/output/
# generated clients ... in particular, the API to the Onyx backend itself!
/src/lib/generated

View File

@@ -61,27 +61,26 @@ Bring up the entire application.
0. Install playwright dependencies
```cd web
```bash
npx playwright install
```
1. Run playwright
```
cd web
```bash
npx playwright test
```
To run a single test:
```
```bash
npx playwright test landing-page.spec.ts
```
If running locally, interactive options can help you see exactly what is happening in
the test.
```
```bash
npx playwright test --ui
npx playwright test --headed
```
@@ -90,6 +89,17 @@ npx playwright test --headed
By default, playwright.config.ts is configured to output the results to:
```bash
web/output/playwright/
```
web/test-results
3. Visual regression screenshots
Screenshots are captured automatically during test runs and saved to `web/output/screenshots/`.
To compare screenshots across CI runs, use:
```bash
ods screenshot-diff compare --project admin
```
For more information, see [tools/ods/README.md](https://github.com/onyx-dot-app/onyx/blob/main/tools/ods/README.md#screenshot-diff---visual-regression-testing).

View File

@@ -8,6 +8,12 @@ export default defineConfig({
timeout: 100000, // 100 seconds timeout
expect: {
timeout: 15000, // 15 seconds timeout for all assertions to reduce flakiness
toHaveScreenshot: {
// Allow up to 1% of pixels to differ (accounts for anti-aliasing, subpixel rendering)
maxDiffPixelRatio: 0.01,
// Threshold per-channel (0-1): how different a pixel can be before it counts as changed
threshold: 0.2,
},
},
retries: process.env.CI ? 2 : 0, // Retry failed tests 2 times in CI, 0 locally
@@ -20,7 +26,7 @@ export default defineConfig({
reporter: [["list"]],
// Only run Playwright tests from tests/e2e directory (ignore Jest tests in src/)
testMatch: /.*\/tests\/e2e\/.*\.spec\.ts/,
outputDir: "test-results",
outputDir: "output/playwright",
use: {
// Base URL for the application, can be overridden via BASE_URL environment variable
baseURL: process.env.BASE_URL || "http://localhost:3000",

View File

@@ -0,0 +1,199 @@
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { expectScreenshot } from "./utils/visualRegression";
test.use({ storageState: "admin_auth.json" });
test.describe.configure({ mode: "parallel" });
interface AdminPageSnapshot {
name: string;
path: string;
pageTitle: string;
options?: {
paragraphText?: string | RegExp;
buttonName?: string;
subHeaderText?: string;
};
}
const ADMIN_PAGES: AdminPageSnapshot[] = [
{
name: "Document Management - Explorer",
path: "documents/explorer",
pageTitle: "Document Explorer",
},
{
name: "Connectors - Add Connector",
path: "add-connector",
pageTitle: "Add Connector",
},
{
name: "Custom Assistants - Assistants",
path: "assistants",
pageTitle: "Assistants",
options: {
paragraphText:
"Assistants are a way to build custom search/question-answering experiences for different use cases.",
},
},
{
name: "Configuration - Document Processing",
path: "configuration/document-processing",
pageTitle: "Document Processing",
},
{
name: "Document Management - Document Sets",
path: "documents/sets",
pageTitle: "Document Sets",
options: {
paragraphText:
"Document Sets allow you to group logically connected documents into a single bundle. These can then be used as a filter when performing searches to control the scope of information Onyx searches over.",
},
},
{
name: "Custom Assistants - Slack Bots",
path: "bots",
pageTitle: "Slack Bots",
options: {
paragraphText:
"Setup Slack bots that connect to Onyx. Once setup, you will be able to ask questions to Onyx directly from Slack. Additionally, you can:",
},
},
{
name: "Custom Assistants - Standard Answers",
path: "standard-answer",
pageTitle: "Standard Answers",
},
{
name: "Performance - Usage Statistics",
path: "performance/usage",
pageTitle: "Usage Statistics",
},
{
name: "Document Management - Feedback",
path: "documents/feedback",
pageTitle: "Document Feedback",
},
{
name: "Configuration - LLM",
path: "configuration/llm",
pageTitle: "LLM Setup",
},
{
name: "Connectors - Existing Connectors",
path: "indexing/status",
pageTitle: "Existing Connectors",
},
{
name: "User Management - Groups",
path: "groups",
pageTitle: "Manage User Groups",
},
{
name: "Appearance & Theming",
path: "theme",
pageTitle: "Appearance & Theming",
},
{
name: "Configuration - Search Settings",
path: "configuration/search",
pageTitle: "Search Settings",
},
{
name: "Custom Assistants - MCP Actions",
path: "actions/mcp",
pageTitle: "MCP Actions",
},
{
name: "Custom Assistants - OpenAPI Actions",
path: "actions/open-api",
pageTitle: "OpenAPI Actions",
},
{
name: "User Management - Token Rate Limits",
path: "token-rate-limits",
pageTitle: "Token Rate Limits",
options: {
paragraphText:
"Token rate limits enable you control how many tokens can be spent in a given time period. With token rate limits, you can:",
buttonName: "Create a Token Rate Limit",
},
},
];
async function verifyAdminPageNavigation(
page: Page,
path: string,
pageTitle: string,
options?: {
paragraphText?: string | RegExp;
buttonName?: string;
subHeaderText?: string;
}
) {
await page.goto(`/admin/${path}`);
try {
await expect(page.locator('[aria-label="admin-page-title"]')).toHaveText(
pageTitle,
{
timeout: 10000,
}
);
} catch (error) {
console.error(
`Failed to find admin-page title with text "${pageTitle}" for path "${path}"`
);
// NOTE: This is a temporary measure for debugging the issue
console.error(await page.content());
throw error;
}
if (options?.paragraphText) {
await expect(page.locator("p.text-sm").nth(0)).toHaveText(
options.paragraphText
);
}
if (options?.buttonName) {
await expect(
page.getByRole("button", { name: options.buttonName })
).toHaveCount(1);
}
}
const THEMES = ["light", "dark"] as const;
for (const theme of THEMES) {
test.describe(`Admin pages (${theme} mode)`, () => {
// Inject the theme into localStorage before every navigation so
// next-themes picks it up on first render.
test.beforeEach(async ({ page }) => {
await page.addInitScript((t: string) => {
localStorage.setItem("theme", t);
}, theme);
});
for (const snapshot of ADMIN_PAGES) {
test(`Admin - ${snapshot.name}`, async ({ page }) => {
await verifyAdminPageNavigation(
page,
snapshot.path,
snapshot.pageTitle,
snapshot.options
);
// Wait for all network requests to settle before capturing the screenshot.
await page.waitForLoadState("networkidle");
// Capture a screenshot for visual regression review.
// The screenshot name includes the theme to keep light/dark baselines separate.
const screenshotName = `admin-${theme}-${snapshot.path.replace(
/\//g,
"-"
)}`;
await expectScreenshot(page, { name: screenshotName });
});
}
});
}

View File

@@ -0,0 +1,125 @@
import type { Page, PageScreenshotOptions } from "@playwright/test";
import { expect } from "@playwright/test";
/**
* Whether visual regression assertions are enabled.
*
* When `VISUAL_REGRESSION=true` is set, `expectScreenshot()` calls
* `toHaveScreenshot()` which will fail if the screenshot differs from the
* stored baseline.
*
* When disabled (the default), screenshots are still captured and saved but
* mismatches do NOT fail the test — this lets CI collect screenshots for later
* review without gating on them.
*/
const VISUAL_REGRESSION_ENABLED =
process.env.VISUAL_REGRESSION?.toLowerCase() === "true";
/**
* Default selectors to mask across all screenshots so that dynamic content
* (timestamps, avatars, etc.) doesn't cause spurious diffs.
*/
const DEFAULT_MASK_SELECTORS: string[] = [
// Add selectors for dynamic content that should be masked, e.g.:
// '[data-testid="timestamp"]',
// '[data-testid="user-avatar"]',
];
interface ScreenshotOptions {
/**
* Name for the screenshot file. If omitted, Playwright auto-generates one
* from the test title.
*/
name?: string;
/**
* Additional CSS selectors to mask (on top of the defaults).
* Masked areas are replaced with a pink box so they don't cause diffs.
*/
mask?: string[];
/**
* If true, capture the full scrollable page instead of just the viewport.
* Defaults to false.
*/
fullPage?: boolean;
/**
* Override the max diff pixel ratio for this specific screenshot.
*/
maxDiffPixelRatio?: number;
/**
* Override the per-channel threshold for this specific screenshot.
*/
threshold?: number;
/**
* Additional Playwright screenshot options.
*/
screenshotOptions?: PageScreenshotOptions;
}
/**
* Take a screenshot and optionally assert it matches the stored baseline.
*
* Behavior depends on the `VISUAL_REGRESSION` environment variable:
* - `VISUAL_REGRESSION=true` → assert via `toHaveScreenshot()` (fails on diff)
* - Otherwise → capture and save the screenshot for review only
*
* Usage:
* ```ts
* import { expectScreenshot } from "@tests/e2e/utils/visualRegression";
*
* test("admin page looks right", async ({ page }) => {
* await page.goto("/admin/settings");
* await expectScreenshot(page, { name: "admin-settings" });
* });
* ```
*/
export async function expectScreenshot(
page: Page,
options: ScreenshotOptions = {}
): Promise<void> {
const {
name,
mask = [],
fullPage = false,
maxDiffPixelRatio,
threshold,
} = options;
// Combine default masks with per-call masks
const allMaskSelectors = [...DEFAULT_MASK_SELECTORS, ...mask];
const maskLocators = allMaskSelectors.map((selector) =>
page.locator(selector)
);
// Build the screenshot name array (Playwright expects string[])
const nameArg = name ? [name + ".png"] : undefined;
if (VISUAL_REGRESSION_ENABLED) {
// Assert mode — fail the test if the screenshot differs from baseline
const screenshotOpts = {
fullPage,
mask: maskLocators.length > 0 ? maskLocators : undefined,
...(maxDiffPixelRatio !== undefined && { maxDiffPixelRatio }),
...(threshold !== undefined && { threshold }),
};
if (nameArg) {
await expect(page).toHaveScreenshot(nameArg, screenshotOpts);
} else {
await expect(page).toHaveScreenshot(screenshotOpts);
}
} else {
// Capture-only mode — save the screenshot without asserting
const screenshotPath = name ? `output/screenshots/${name}.png` : undefined;
await page.screenshot({
path: screenshotPath,
fullPage,
mask: maskLocators.length > 0 ? maskLocators : undefined,
...options.screenshotOptions,
});
}
}