mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-16 23:35:46 +00:00
Compare commits
2 Commits
v2.9.6
...
dump-scrip
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca3db17b08 | ||
|
|
ffd13b1104 |
230
backend/scripts/dump/README.md
Normal file
230
backend/scripts/dump/README.md
Normal file
@@ -0,0 +1,230 @@
|
||||
# Onyx Data Backup & Restore Scripts
|
||||
|
||||
Scripts for backing up and restoring PostgreSQL, Vespa, and MinIO data from an Onyx deployment.
|
||||
|
||||
## Overview
|
||||
|
||||
Two backup modes are supported:
|
||||
|
||||
| Mode | Description | Pros | Cons |
|
||||
|------|-------------|------|------|
|
||||
| **volume** | Exports Docker volumes directly | Fast, complete, preserves everything | Services must be stopped for consistency |
|
||||
| **api** | Uses pg_dump and Vespa REST API | Services can stay running, more portable | Slower for large datasets |
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Backup (from a running instance)
|
||||
|
||||
```bash
|
||||
# Full backup using volume mode (recommended for complete backups)
|
||||
# Note: For consistency, stop services first
|
||||
docker compose -f deployment/docker_compose/docker-compose.yml stop
|
||||
./scripts/dump_data.sh --mode volume --output ./backups
|
||||
docker compose -f deployment/docker_compose/docker-compose.yml start
|
||||
|
||||
# Or use API mode (services can stay running)
|
||||
./scripts/dump_data.sh --mode api --output ./backups
|
||||
```
|
||||
|
||||
### Restore (to a local instance)
|
||||
|
||||
```bash
|
||||
# Restore from latest backup
|
||||
./scripts/restore_data.sh --input ./backups/latest
|
||||
|
||||
# Restore from specific backup
|
||||
./scripts/restore_data.sh --input ./backups/20240115_120000
|
||||
|
||||
# Force restore without confirmation
|
||||
./scripts/restore_data.sh --input ./backups/latest --force
|
||||
```
|
||||
|
||||
## Detailed Usage
|
||||
|
||||
### dump_data.sh
|
||||
|
||||
```
|
||||
Usage: ./scripts/dump_data.sh [OPTIONS]
|
||||
|
||||
Options:
|
||||
--mode <volume|api> Backup mode (default: volume)
|
||||
--output <dir> Output directory (default: ./onyx_backup)
|
||||
--project <name> Docker Compose project name (default: onyx)
|
||||
--postgres-only Only backup PostgreSQL
|
||||
--vespa-only Only backup Vespa
|
||||
--minio-only Only backup MinIO
|
||||
--no-minio Skip MinIO backup
|
||||
--help Show help message
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
|
||||
```bash
|
||||
# Default volume backup
|
||||
./scripts/dump_data.sh
|
||||
|
||||
# API-based backup
|
||||
./scripts/dump_data.sh --mode api
|
||||
|
||||
# Only backup PostgreSQL
|
||||
./scripts/dump_data.sh --postgres-only --mode api
|
||||
|
||||
# Custom output directory
|
||||
./scripts/dump_data.sh --output /mnt/backups/onyx
|
||||
|
||||
# Different project name (if using custom docker compose project)
|
||||
./scripts/dump_data.sh --project my-onyx-instance
|
||||
```
|
||||
|
||||
### restore_data.sh
|
||||
|
||||
```
|
||||
Usage: ./scripts/restore_data.sh [OPTIONS]
|
||||
|
||||
Options:
|
||||
--input <dir> Backup directory (required)
|
||||
--project <name> Docker Compose project name (default: onyx)
|
||||
--postgres-only Only restore PostgreSQL
|
||||
--vespa-only Only restore Vespa
|
||||
--minio-only Only restore MinIO
|
||||
--no-minio Skip MinIO restore
|
||||
--force Skip confirmation prompts
|
||||
--help Show help message
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
|
||||
```bash
|
||||
# Restore all components
|
||||
./scripts/restore_data.sh --input ./onyx_backup/latest
|
||||
|
||||
# Restore only PostgreSQL
|
||||
./scripts/restore_data.sh --input ./onyx_backup/latest --postgres-only
|
||||
|
||||
# Non-interactive restore
|
||||
./scripts/restore_data.sh --input ./onyx_backup/latest --force
|
||||
```
|
||||
|
||||
## Backup Directory Structure
|
||||
|
||||
After running a backup, the output directory contains:
|
||||
|
||||
```
|
||||
onyx_backup/
|
||||
├── 20240115_120000/ # Timestamp-named backup
|
||||
│ ├── metadata.json # Backup metadata
|
||||
│ ├── postgres_volume.tar.gz # PostgreSQL data (volume mode)
|
||||
│ ├── postgres_dump.backup # PostgreSQL dump (api mode)
|
||||
│ ├── vespa_volume.tar.gz # Vespa data (volume mode)
|
||||
│ ├── vespa_documents.jsonl # Vespa documents (api mode)
|
||||
│ ├── minio_volume.tar.gz # MinIO data (volume mode)
|
||||
│ └── minio_data.tar.gz # MinIO data (api mode)
|
||||
└── latest -> 20240115_120000 # Symlink to latest backup
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
You can customize behavior with environment variables:
|
||||
|
||||
```bash
|
||||
# PostgreSQL settings
|
||||
export POSTGRES_USER=postgres
|
||||
export POSTGRES_PASSWORD=password
|
||||
export POSTGRES_DB=postgres
|
||||
export POSTGRES_PORT=5432
|
||||
|
||||
# Vespa settings
|
||||
export VESPA_HOST=localhost
|
||||
export VESPA_PORT=8081
|
||||
export VESPA_INDEX=danswer_index
|
||||
```
|
||||
|
||||
## Typical Workflows
|
||||
|
||||
### Migrate to a new server
|
||||
|
||||
```bash
|
||||
# On source server
|
||||
./scripts/dump_data.sh --mode volume --output ./migration_backup
|
||||
tar czf onyx_backup.tar.gz ./migration_backup/latest
|
||||
|
||||
# Transfer to new server
|
||||
scp onyx_backup.tar.gz newserver:/opt/onyx/
|
||||
|
||||
# On new server
|
||||
cd /opt/onyx
|
||||
tar xzf onyx_backup.tar.gz
|
||||
./scripts/restore_data.sh --input ./migration_backup/latest --force
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Create a development copy from production
|
||||
|
||||
```bash
|
||||
# On production (use API mode to avoid downtime)
|
||||
./scripts/dump_data.sh --mode api --output ./prod_backup
|
||||
|
||||
# Copy to dev machine
|
||||
rsync -avz ./prod_backup/latest dev-machine:/home/dev/onyx_backup/
|
||||
|
||||
# On dev machine
|
||||
./scripts/restore_data.sh --input /home/dev/onyx_backup --force
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
|
||||
```
|
||||
|
||||
### Scheduled backups (cron)
|
||||
|
||||
```bash
|
||||
# Add to crontab: crontab -e
|
||||
# Daily backup at 2 AM
|
||||
0 2 * * * cd /opt/onyx && ./scripts/dump_data.sh --mode api --output /backups/onyx >> /var/log/onyx-backup.log 2>&1
|
||||
|
||||
# Weekly cleanup (keep last 7 days)
|
||||
0 3 * * 0 find /backups/onyx -maxdepth 1 -type d -mtime +7 -exec rm -rf {} \;
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Volume not found" error
|
||||
|
||||
Ensure the Docker Compose project name matches:
|
||||
```bash
|
||||
docker volume ls | grep db_volume
|
||||
# If it shows "myproject_db_volume", use --project myproject
|
||||
```
|
||||
|
||||
### "Container not running" error (API mode)
|
||||
|
||||
Start the required services:
|
||||
```bash
|
||||
cd deployment/docker_compose
|
||||
docker compose up -d relational_db index minio
|
||||
```
|
||||
|
||||
### Vespa restore fails with "not ready"
|
||||
|
||||
Vespa takes time to initialize. Wait and retry:
|
||||
```bash
|
||||
# Check Vespa health
|
||||
curl http://localhost:8081/state/v1/health
|
||||
```
|
||||
|
||||
### PostgreSQL restore shows warnings
|
||||
|
||||
`pg_restore` often shows warnings about objects that don't exist (when using `--clean`). These are usually safe to ignore if the restore completes.
|
||||
|
||||
## Alternative: Python Script
|
||||
|
||||
For more control, you can also use the existing Python script:
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
|
||||
# Save state
|
||||
python -m scripts.save_load_state --save --checkpoint_dir ../onyx_checkpoint
|
||||
|
||||
# Load state
|
||||
python -m scripts.save_load_state --load --checkpoint_dir ../onyx_checkpoint
|
||||
```
|
||||
|
||||
See `backend/scripts/save_load_state.py` for the Python implementation.
|
||||
478
backend/scripts/dump/dump_data.sh
Executable file
478
backend/scripts/dump/dump_data.sh
Executable file
@@ -0,0 +1,478 @@
|
||||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# Onyx Data Dump Script
|
||||
# =============================================================================
|
||||
# This script creates a backup of PostgreSQL, Vespa, and MinIO data.
|
||||
#
|
||||
# Two modes available:
|
||||
# - volume: Exports Docker volumes directly (faster, complete backup)
|
||||
# - api: Uses pg_dump and Vespa API (more portable)
|
||||
#
|
||||
# Usage:
|
||||
# ./dump_data.sh [OPTIONS]
|
||||
#
|
||||
# Options:
|
||||
# --mode <volume|api> Backup mode (default: volume)
|
||||
# --output <dir> Output directory (default: ./onyx_backup)
|
||||
# --project <name> Docker Compose project name (default: onyx)
|
||||
# --volume-prefix <name> Volume name prefix (default: same as project name)
|
||||
# --compose-dir <dir> Docker Compose directory (for service management)
|
||||
# --postgres-only Only backup PostgreSQL
|
||||
# --vespa-only Only backup Vespa
|
||||
# --minio-only Only backup MinIO
|
||||
# --no-minio Skip MinIO backup
|
||||
# --no-restart Don't restart services after backup (volume mode)
|
||||
# --help Show this help message
|
||||
#
|
||||
# Examples:
|
||||
# ./dump_data.sh # Full volume backup
|
||||
# ./dump_data.sh --mode api # API-based backup
|
||||
# ./dump_data.sh --output /tmp/backup # Custom output directory
|
||||
# ./dump_data.sh --postgres-only --mode api # Only PostgreSQL via pg_dump
|
||||
# ./dump_data.sh --volume-prefix myprefix # Use custom volume prefix
|
||||
# =============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
# Default configuration
|
||||
MODE="volume"
|
||||
OUTPUT_DIR="./onyx_backup"
|
||||
PROJECT_NAME="onyx"
|
||||
VOLUME_PREFIX="" # Will default to PROJECT_NAME if not set
|
||||
COMPOSE_DIR="" # Docker Compose directory for service management
|
||||
BACKUP_POSTGRES=true
|
||||
BACKUP_VESPA=true
|
||||
BACKUP_MINIO=true
|
||||
NO_RESTART=false
|
||||
|
||||
# PostgreSQL defaults
|
||||
POSTGRES_USER="${POSTGRES_USER:-postgres}"
|
||||
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-password}"
|
||||
POSTGRES_DB="${POSTGRES_DB:-postgres}"
|
||||
POSTGRES_PORT="${POSTGRES_PORT:-5432}"
|
||||
|
||||
# Vespa defaults
|
||||
VESPA_HOST="${VESPA_HOST:-localhost}"
|
||||
VESPA_PORT="${VESPA_PORT:-8081}"
|
||||
VESPA_INDEX="${VESPA_INDEX:-danswer_index}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
show_help() {
|
||||
head -35 "$0" | tail -32
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--mode)
|
||||
MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--output)
|
||||
OUTPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--project)
|
||||
PROJECT_NAME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--volume-prefix)
|
||||
VOLUME_PREFIX="$2"
|
||||
shift 2
|
||||
;;
|
||||
--compose-dir)
|
||||
COMPOSE_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-restart)
|
||||
NO_RESTART=true
|
||||
shift
|
||||
;;
|
||||
--postgres-only)
|
||||
BACKUP_POSTGRES=true
|
||||
BACKUP_VESPA=false
|
||||
BACKUP_MINIO=false
|
||||
shift
|
||||
;;
|
||||
--vespa-only)
|
||||
BACKUP_POSTGRES=false
|
||||
BACKUP_VESPA=true
|
||||
BACKUP_MINIO=false
|
||||
shift
|
||||
;;
|
||||
--minio-only)
|
||||
BACKUP_POSTGRES=false
|
||||
BACKUP_VESPA=false
|
||||
BACKUP_MINIO=true
|
||||
shift
|
||||
;;
|
||||
--no-minio)
|
||||
BACKUP_MINIO=false
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate mode
|
||||
if [[ "$MODE" != "volume" && "$MODE" != "api" ]]; then
|
||||
log_error "Invalid mode: $MODE. Use 'volume' or 'api'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set VOLUME_PREFIX to PROJECT_NAME if not specified
|
||||
if [[ -z "$VOLUME_PREFIX" ]]; then
|
||||
VOLUME_PREFIX="$PROJECT_NAME"
|
||||
fi
|
||||
|
||||
# Create output directory with timestamp
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
BACKUP_DIR="${OUTPUT_DIR}/${TIMESTAMP}"
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
log_info "Starting Onyx data backup..."
|
||||
log_info "Mode: $MODE"
|
||||
log_info "Output directory: $BACKUP_DIR"
|
||||
log_info "Project name: $PROJECT_NAME"
|
||||
log_info "Volume prefix: $VOLUME_PREFIX"
|
||||
|
||||
# Get container names
|
||||
POSTGRES_CONTAINER="${PROJECT_NAME}-relational_db-1"
|
||||
VESPA_CONTAINER="${PROJECT_NAME}-index-1"
|
||||
MINIO_CONTAINER="${PROJECT_NAME}-minio-1"
|
||||
|
||||
# Track which services were stopped
|
||||
STOPPED_SERVICES=()
|
||||
|
||||
# =============================================================================
|
||||
# Service management functions
|
||||
# =============================================================================
|
||||
|
||||
stop_service() {
|
||||
local service=$1
|
||||
local container="${PROJECT_NAME}-${service}-1"
|
||||
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${container}$"; then
|
||||
log_info "Stopping ${service}..."
|
||||
if [[ -n "$COMPOSE_DIR" ]]; then
|
||||
docker compose -p "$PROJECT_NAME" -f "${COMPOSE_DIR}/docker-compose.yml" stop "$service" 2>/dev/null || \
|
||||
docker stop "$container"
|
||||
else
|
||||
docker stop "$container"
|
||||
fi
|
||||
STOPPED_SERVICES+=("$service")
|
||||
fi
|
||||
}
|
||||
|
||||
start_services() {
|
||||
if [[ ${#STOPPED_SERVICES[@]} -eq 0 ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
log_info "Restarting services: ${STOPPED_SERVICES[*]}"
|
||||
|
||||
if [[ -n "$COMPOSE_DIR" ]]; then
|
||||
docker compose -p "$PROJECT_NAME" -f "${COMPOSE_DIR}/docker-compose.yml" start "${STOPPED_SERVICES[@]}" 2>/dev/null || {
|
||||
# Fallback to starting containers directly
|
||||
for service in "${STOPPED_SERVICES[@]}"; do
|
||||
docker start "${PROJECT_NAME}-${service}-1" 2>/dev/null || true
|
||||
done
|
||||
}
|
||||
else
|
||||
for service in "${STOPPED_SERVICES[@]}"; do
|
||||
docker start "${PROJECT_NAME}-${service}-1" 2>/dev/null || true
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Volume-based backup functions
|
||||
# =============================================================================
|
||||
|
||||
backup_postgres_volume() {
|
||||
log_info "Backing up PostgreSQL volume..."
|
||||
|
||||
local volume_name="${VOLUME_PREFIX}_db_volume"
|
||||
|
||||
# Check if volume exists
|
||||
if ! docker volume inspect "$volume_name" &>/dev/null; then
|
||||
log_error "PostgreSQL volume '$volume_name' not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Export volume to tar
|
||||
docker run --rm \
|
||||
-v "${volume_name}:/source:ro" \
|
||||
-v "${BACKUP_DIR}:/backup" \
|
||||
alpine tar czf /backup/postgres_volume.tar.gz -C /source .
|
||||
|
||||
log_success "PostgreSQL volume backed up to postgres_volume.tar.gz"
|
||||
}
|
||||
|
||||
backup_vespa_volume() {
|
||||
log_info "Backing up Vespa volume..."
|
||||
|
||||
local volume_name="${VOLUME_PREFIX}_vespa_volume"
|
||||
|
||||
# Check if volume exists
|
||||
if ! docker volume inspect "$volume_name" &>/dev/null; then
|
||||
log_error "Vespa volume '$volume_name' not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Export volume to tar
|
||||
docker run --rm \
|
||||
-v "${volume_name}:/source:ro" \
|
||||
-v "${BACKUP_DIR}:/backup" \
|
||||
alpine tar czf /backup/vespa_volume.tar.gz -C /source .
|
||||
|
||||
log_success "Vespa volume backed up to vespa_volume.tar.gz"
|
||||
}
|
||||
|
||||
backup_minio_volume() {
|
||||
log_info "Backing up MinIO volume..."
|
||||
|
||||
local volume_name="${VOLUME_PREFIX}_minio_data"
|
||||
|
||||
# Check if volume exists
|
||||
if ! docker volume inspect "$volume_name" &>/dev/null; then
|
||||
log_error "MinIO volume '$volume_name' not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Export volume to tar
|
||||
docker run --rm \
|
||||
-v "${volume_name}:/source:ro" \
|
||||
-v "${BACKUP_DIR}:/backup" \
|
||||
alpine tar czf /backup/minio_volume.tar.gz -C /source .
|
||||
|
||||
log_success "MinIO volume backed up to minio_volume.tar.gz"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# API-based backup functions
|
||||
# =============================================================================
|
||||
|
||||
backup_postgres_api() {
|
||||
log_info "Backing up PostgreSQL via pg_dump..."
|
||||
|
||||
# Check if container is running
|
||||
if ! docker ps --format '{{.Names}}' | grep -q "^${POSTGRES_CONTAINER}$"; then
|
||||
log_error "PostgreSQL container '$POSTGRES_CONTAINER' is not running"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Create dump using pg_dump inside container
|
||||
docker exec "$POSTGRES_CONTAINER" \
|
||||
pg_dump -U "$POSTGRES_USER" -F c -b -v "$POSTGRES_DB" \
|
||||
> "${BACKUP_DIR}/postgres_dump.backup"
|
||||
|
||||
log_success "PostgreSQL backed up to postgres_dump.backup"
|
||||
}
|
||||
|
||||
backup_vespa_api() {
|
||||
log_info "Backing up Vespa via API..."
|
||||
|
||||
local endpoint="http://${VESPA_HOST}:${VESPA_PORT}/document/v1/default/${VESPA_INDEX}/docid"
|
||||
local output_file="${BACKUP_DIR}/vespa_documents.jsonl"
|
||||
local continuation=""
|
||||
local total_docs=0
|
||||
|
||||
# Check if Vespa is accessible
|
||||
if ! curl -s -o /dev/null -w "%{http_code}" "$endpoint" | grep -q "200\|404"; then
|
||||
# Try via container if localhost doesn't work
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${VESPA_CONTAINER}$"; then
|
||||
log_warning "Vespa not accessible on $VESPA_HOST:$VESPA_PORT, trying via container..."
|
||||
endpoint="http://localhost:8081/document/v1/default/${VESPA_INDEX}/docid"
|
||||
else
|
||||
log_error "Cannot connect to Vespa at $endpoint"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Clear output file
|
||||
> "$output_file"
|
||||
|
||||
# Fetch documents with pagination
|
||||
while true; do
|
||||
local url="$endpoint"
|
||||
if [[ -n "$continuation" ]]; then
|
||||
url="${endpoint}?continuation=${continuation}"
|
||||
fi
|
||||
|
||||
local response
|
||||
response=$(curl -s "$url")
|
||||
|
||||
# Extract continuation token
|
||||
continuation=$(echo "$response" | jq -r '.continuation // empty')
|
||||
|
||||
# Extract and save documents
|
||||
local docs
|
||||
docs=$(echo "$response" | jq -c '.documents[]? | {update: .id, create: true, fields: .fields}')
|
||||
|
||||
if [[ -n "$docs" ]]; then
|
||||
echo "$docs" >> "$output_file"
|
||||
local count
|
||||
count=$(echo "$docs" | wc -l)
|
||||
total_docs=$((total_docs + count))
|
||||
log_info " Fetched $total_docs documents so far..."
|
||||
fi
|
||||
|
||||
# Check if we're done
|
||||
if [[ -z "$continuation" ]]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_success "Vespa backed up to vespa_documents.jsonl ($total_docs documents)"
|
||||
}
|
||||
|
||||
backup_minio_api() {
|
||||
log_info "Backing up MinIO data..."
|
||||
|
||||
local minio_dir="${BACKUP_DIR}/minio_data"
|
||||
mkdir -p "$minio_dir"
|
||||
|
||||
# Check if mc (MinIO client) is available
|
||||
if command -v mc &>/dev/null; then
|
||||
# Configure mc alias for local minio
|
||||
mc alias set onyx-backup http://localhost:9004 minioadmin minioadmin 2>/dev/null || true
|
||||
|
||||
# Mirror all buckets
|
||||
mc mirror onyx-backup/ "$minio_dir/" 2>/dev/null || {
|
||||
log_warning "mc mirror failed, falling back to volume backup"
|
||||
backup_minio_volume
|
||||
return
|
||||
}
|
||||
else
|
||||
# Fallback: copy from container
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${MINIO_CONTAINER}$"; then
|
||||
docker cp "${MINIO_CONTAINER}:/data/." "$minio_dir/"
|
||||
else
|
||||
log_warning "MinIO container not running and mc not available, using volume backup"
|
||||
backup_minio_volume
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
# Compress the data
|
||||
tar czf "${BACKUP_DIR}/minio_data.tar.gz" -C "$minio_dir" .
|
||||
rm -rf "$minio_dir"
|
||||
|
||||
log_success "MinIO backed up to minio_data.tar.gz"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main backup logic
|
||||
# =============================================================================
|
||||
|
||||
# Save metadata
|
||||
cat > "${BACKUP_DIR}/metadata.json" << EOF
|
||||
{
|
||||
"timestamp": "$TIMESTAMP",
|
||||
"mode": "$MODE",
|
||||
"project_name": "$PROJECT_NAME",
|
||||
"volume_prefix": "$VOLUME_PREFIX",
|
||||
"postgres_db": "$POSTGRES_DB",
|
||||
"vespa_index": "$VESPA_INDEX",
|
||||
"components": {
|
||||
"postgres": $BACKUP_POSTGRES,
|
||||
"vespa": $BACKUP_VESPA,
|
||||
"minio": $BACKUP_MINIO
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# Run backups based on mode
|
||||
if [[ "$MODE" == "volume" ]]; then
|
||||
log_info "Using volume-based backup"
|
||||
|
||||
# Stop services for consistent backup
|
||||
log_info "Stopping services for consistent backup..."
|
||||
if $BACKUP_POSTGRES; then
|
||||
stop_service "relational_db"
|
||||
fi
|
||||
if $BACKUP_VESPA; then
|
||||
stop_service "index"
|
||||
fi
|
||||
if $BACKUP_MINIO; then
|
||||
stop_service "minio"
|
||||
fi
|
||||
|
||||
# Perform backups
|
||||
if $BACKUP_POSTGRES; then
|
||||
backup_postgres_volume || log_warning "PostgreSQL backup failed"
|
||||
fi
|
||||
|
||||
if $BACKUP_VESPA; then
|
||||
backup_vespa_volume || log_warning "Vespa backup failed"
|
||||
fi
|
||||
|
||||
if $BACKUP_MINIO; then
|
||||
backup_minio_volume || log_warning "MinIO backup failed"
|
||||
fi
|
||||
|
||||
# Restart services unless --no-restart was specified
|
||||
if [[ "$NO_RESTART" != true ]]; then
|
||||
start_services
|
||||
else
|
||||
log_info "Skipping service restart (--no-restart specified)"
|
||||
log_info "Stopped services: ${STOPPED_SERVICES[*]}"
|
||||
fi
|
||||
else
|
||||
log_info "Using API-based backup (services must be running)"
|
||||
|
||||
if $BACKUP_POSTGRES; then
|
||||
backup_postgres_api || log_warning "PostgreSQL backup failed"
|
||||
fi
|
||||
|
||||
if $BACKUP_VESPA; then
|
||||
backup_vespa_api || log_warning "Vespa backup failed"
|
||||
fi
|
||||
|
||||
if $BACKUP_MINIO; then
|
||||
backup_minio_api || log_warning "MinIO backup failed"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Calculate total size
|
||||
TOTAL_SIZE=$(du -sh "$BACKUP_DIR" | cut -f1)
|
||||
|
||||
log_success "==================================="
|
||||
log_success "Backup completed!"
|
||||
log_success "Location: $BACKUP_DIR"
|
||||
log_success "Total size: $TOTAL_SIZE"
|
||||
log_success "==================================="
|
||||
|
||||
# Create a symlink to latest backup
|
||||
ln -sfn "$TIMESTAMP" "${OUTPUT_DIR}/latest"
|
||||
log_info "Symlink created: ${OUTPUT_DIR}/latest -> $TIMESTAMP"
|
||||
580
backend/scripts/dump/restore_data.sh
Executable file
580
backend/scripts/dump/restore_data.sh
Executable file
@@ -0,0 +1,580 @@
|
||||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# Onyx Data Restore Script
|
||||
# =============================================================================
|
||||
# This script restores PostgreSQL, Vespa, and MinIO data from a backup.
|
||||
#
|
||||
# The script auto-detects the backup mode based on files present:
|
||||
# - *_volume.tar.gz files -> volume restore
|
||||
# - postgres_dump.backup / vespa_documents.jsonl -> api restore
|
||||
#
|
||||
# Usage:
|
||||
# ./restore_data.sh [OPTIONS]
|
||||
#
|
||||
# Options:
|
||||
# --input <dir> Backup directory (required, or use 'latest')
|
||||
# --project <name> Docker Compose project name (default: onyx)
|
||||
# --volume-prefix <name> Volume name prefix (default: same as project name)
|
||||
# --compose-dir <dir> Docker Compose directory (for service management)
|
||||
# --postgres-only Only restore PostgreSQL
|
||||
# --vespa-only Only restore Vespa
|
||||
# --minio-only Only restore MinIO
|
||||
# --no-minio Skip MinIO restore
|
||||
# --no-restart Don't restart services after restore (volume mode)
|
||||
# --force Skip confirmation prompts
|
||||
# --help Show this help message
|
||||
#
|
||||
# Examples:
|
||||
# ./restore_data.sh --input ./onyx_backup/latest
|
||||
# ./restore_data.sh --input ./onyx_backup/20240115_120000 --force
|
||||
# ./restore_data.sh --input ./onyx_backup/latest --postgres-only
|
||||
# ./restore_data.sh --input ./backup --volume-prefix myprefix
|
||||
#
|
||||
# WARNING: This will overwrite existing data in the target instance!
|
||||
# =============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
# Default configuration
|
||||
INPUT_DIR=""
|
||||
PROJECT_NAME="onyx"
|
||||
VOLUME_PREFIX="" # Will default to PROJECT_NAME if not set
|
||||
COMPOSE_DIR="" # Docker Compose directory for service management
|
||||
RESTORE_POSTGRES=true
|
||||
RESTORE_VESPA=true
|
||||
RESTORE_MINIO=true
|
||||
FORCE=false
|
||||
NO_RESTART=false
|
||||
|
||||
# PostgreSQL defaults
|
||||
POSTGRES_USER="${POSTGRES_USER:-postgres}"
|
||||
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-password}"
|
||||
POSTGRES_DB="${POSTGRES_DB:-postgres}"
|
||||
POSTGRES_PORT="${POSTGRES_PORT:-5432}"
|
||||
|
||||
# Vespa defaults
|
||||
VESPA_HOST="${VESPA_HOST:-localhost}"
|
||||
VESPA_PORT="${VESPA_PORT:-8081}"
|
||||
VESPA_INDEX="${VESPA_INDEX:-danswer_index}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
show_help() {
|
||||
head -36 "$0" | tail -33
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--input)
|
||||
INPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--project)
|
||||
PROJECT_NAME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--volume-prefix)
|
||||
VOLUME_PREFIX="$2"
|
||||
shift 2
|
||||
;;
|
||||
--compose-dir)
|
||||
COMPOSE_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-restart)
|
||||
NO_RESTART=true
|
||||
shift
|
||||
;;
|
||||
--postgres-only)
|
||||
RESTORE_POSTGRES=true
|
||||
RESTORE_VESPA=false
|
||||
RESTORE_MINIO=false
|
||||
shift
|
||||
;;
|
||||
--vespa-only)
|
||||
RESTORE_POSTGRES=false
|
||||
RESTORE_VESPA=true
|
||||
RESTORE_MINIO=false
|
||||
shift
|
||||
;;
|
||||
--minio-only)
|
||||
RESTORE_POSTGRES=false
|
||||
RESTORE_VESPA=false
|
||||
RESTORE_MINIO=true
|
||||
shift
|
||||
;;
|
||||
--no-minio)
|
||||
RESTORE_MINIO=false
|
||||
shift
|
||||
;;
|
||||
--force)
|
||||
FORCE=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate input directory
|
||||
if [[ -z "$INPUT_DIR" ]]; then
|
||||
log_error "Input directory is required. Use --input <dir>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Resolve symlinks (e.g., 'latest')
|
||||
INPUT_DIR=$(cd "$INPUT_DIR" && pwd)
|
||||
|
||||
if [[ ! -d "$INPUT_DIR" ]]; then
|
||||
log_error "Input directory not found: $INPUT_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Load metadata if available
|
||||
METADATA_FILE="${INPUT_DIR}/metadata.json"
|
||||
if [[ -f "$METADATA_FILE" ]]; then
|
||||
log_info "Loading backup metadata..."
|
||||
BACKUP_MODE=$(jq -r '.mode // "unknown"' "$METADATA_FILE")
|
||||
BACKUP_TIMESTAMP=$(jq -r '.timestamp // "unknown"' "$METADATA_FILE")
|
||||
log_info " Backup timestamp: $BACKUP_TIMESTAMP"
|
||||
log_info " Backup mode: $BACKUP_MODE"
|
||||
fi
|
||||
|
||||
# Set VOLUME_PREFIX to PROJECT_NAME if not specified
|
||||
if [[ -z "$VOLUME_PREFIX" ]]; then
|
||||
VOLUME_PREFIX="$PROJECT_NAME"
|
||||
fi
|
||||
|
||||
log_info "Volume prefix: $VOLUME_PREFIX"
|
||||
|
||||
# Track which services were stopped
|
||||
STOPPED_SERVICES=()
|
||||
|
||||
# =============================================================================
|
||||
# Service management functions
|
||||
# =============================================================================
|
||||
|
||||
stop_service() {
|
||||
local service=$1
|
||||
local container="${PROJECT_NAME}-${service}-1"
|
||||
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${container}$"; then
|
||||
log_info "Stopping ${service}..."
|
||||
if [[ -n "$COMPOSE_DIR" ]]; then
|
||||
docker compose -p "$PROJECT_NAME" -f "${COMPOSE_DIR}/docker-compose.yml" stop "$service" 2>/dev/null || \
|
||||
docker stop "$container"
|
||||
else
|
||||
docker stop "$container"
|
||||
fi
|
||||
STOPPED_SERVICES+=("$service")
|
||||
fi
|
||||
}
|
||||
|
||||
start_services() {
|
||||
if [[ ${#STOPPED_SERVICES[@]} -eq 0 ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
log_info "Restarting services: ${STOPPED_SERVICES[*]}"
|
||||
|
||||
if [[ -n "$COMPOSE_DIR" ]]; then
|
||||
docker compose -p "$PROJECT_NAME" -f "${COMPOSE_DIR}/docker-compose.yml" start "${STOPPED_SERVICES[@]}" 2>/dev/null || {
|
||||
# Fallback to starting containers directly
|
||||
for service in "${STOPPED_SERVICES[@]}"; do
|
||||
docker start "${PROJECT_NAME}-${service}-1" 2>/dev/null || true
|
||||
done
|
||||
}
|
||||
else
|
||||
for service in "${STOPPED_SERVICES[@]}"; do
|
||||
docker start "${PROJECT_NAME}-${service}-1" 2>/dev/null || true
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# Auto-detect backup mode based on files present
|
||||
detect_backup_mode() {
|
||||
if [[ -f "${INPUT_DIR}/postgres_volume.tar.gz" ]] || [[ -f "${INPUT_DIR}/vespa_volume.tar.gz" ]]; then
|
||||
echo "volume"
|
||||
elif [[ -f "${INPUT_DIR}/postgres_dump.backup" ]] || [[ -f "${INPUT_DIR}/vespa_documents.jsonl" ]]; then
|
||||
echo "api"
|
||||
else
|
||||
echo "unknown"
|
||||
fi
|
||||
}
|
||||
|
||||
DETECTED_MODE=$(detect_backup_mode)
|
||||
log_info "Detected backup mode: $DETECTED_MODE"
|
||||
|
||||
# Get container names
|
||||
POSTGRES_CONTAINER="${PROJECT_NAME}-relational_db-1"
|
||||
VESPA_CONTAINER="${PROJECT_NAME}-index-1"
|
||||
MINIO_CONTAINER="${PROJECT_NAME}-minio-1"
|
||||
|
||||
# Confirmation prompt
|
||||
if [[ "$FORCE" != true ]]; then
|
||||
echo ""
|
||||
log_warning "==================================="
|
||||
log_warning "WARNING: This will overwrite existing data!"
|
||||
log_warning "==================================="
|
||||
echo ""
|
||||
echo "Restore configuration:"
|
||||
echo " Input directory: $INPUT_DIR"
|
||||
echo " Project name: $PROJECT_NAME"
|
||||
echo " Restore PostgreSQL: $RESTORE_POSTGRES"
|
||||
echo " Restore Vespa: $RESTORE_VESPA"
|
||||
echo " Restore MinIO: $RESTORE_MINIO"
|
||||
echo ""
|
||||
read -p "Are you sure you want to continue? (yes/no): " confirm
|
||||
if [[ "$confirm" != "yes" ]]; then
|
||||
log_info "Restore cancelled."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Volume-based restore functions
|
||||
# =============================================================================
|
||||
|
||||
restore_postgres_volume() {
|
||||
log_info "Restoring PostgreSQL from volume backup..."
|
||||
|
||||
local volume_name="${VOLUME_PREFIX}_db_volume"
|
||||
local backup_file="${INPUT_DIR}/postgres_volume.tar.gz"
|
||||
|
||||
if [[ ! -f "$backup_file" ]]; then
|
||||
log_error "PostgreSQL volume backup not found: $backup_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Remove existing volume and create new one
|
||||
log_info "Recreating PostgreSQL volume..."
|
||||
docker volume rm "$volume_name" 2>/dev/null || true
|
||||
docker volume create "$volume_name"
|
||||
|
||||
# Restore volume from tar
|
||||
docker run --rm \
|
||||
-v "${volume_name}:/target" \
|
||||
-v "${INPUT_DIR}:/backup:ro" \
|
||||
alpine sh -c "cd /target && tar xzf /backup/postgres_volume.tar.gz"
|
||||
|
||||
log_success "PostgreSQL volume restored"
|
||||
}
|
||||
|
||||
restore_vespa_volume() {
|
||||
log_info "Restoring Vespa from volume backup..."
|
||||
|
||||
local volume_name="${VOLUME_PREFIX}_vespa_volume"
|
||||
local backup_file="${INPUT_DIR}/vespa_volume.tar.gz"
|
||||
|
||||
if [[ ! -f "$backup_file" ]]; then
|
||||
log_error "Vespa volume backup not found: $backup_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Remove existing volume and create new one
|
||||
log_info "Recreating Vespa volume..."
|
||||
docker volume rm "$volume_name" 2>/dev/null || true
|
||||
docker volume create "$volume_name"
|
||||
|
||||
# Restore volume from tar
|
||||
docker run --rm \
|
||||
-v "${volume_name}:/target" \
|
||||
-v "${INPUT_DIR}:/backup:ro" \
|
||||
alpine sh -c "cd /target && tar xzf /backup/vespa_volume.tar.gz"
|
||||
|
||||
log_success "Vespa volume restored"
|
||||
}
|
||||
|
||||
restore_minio_volume() {
|
||||
log_info "Restoring MinIO from volume backup..."
|
||||
|
||||
local volume_name="${VOLUME_PREFIX}_minio_data"
|
||||
local backup_file="${INPUT_DIR}/minio_volume.tar.gz"
|
||||
|
||||
if [[ ! -f "$backup_file" ]]; then
|
||||
log_error "MinIO volume backup not found: $backup_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Remove existing volume and create new one
|
||||
log_info "Recreating MinIO volume..."
|
||||
docker volume rm "$volume_name" 2>/dev/null || true
|
||||
docker volume create "$volume_name"
|
||||
|
||||
# Restore volume from tar
|
||||
docker run --rm \
|
||||
-v "${volume_name}:/target" \
|
||||
-v "${INPUT_DIR}:/backup:ro" \
|
||||
alpine sh -c "cd /target && tar xzf /backup/minio_volume.tar.gz"
|
||||
|
||||
log_success "MinIO volume restored"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# API-based restore functions
|
||||
# =============================================================================
|
||||
|
||||
restore_postgres_api() {
|
||||
log_info "Restoring PostgreSQL from pg_dump backup..."
|
||||
|
||||
local backup_file="${INPUT_DIR}/postgres_dump.backup"
|
||||
|
||||
if [[ ! -f "$backup_file" ]]; then
|
||||
log_error "PostgreSQL dump not found: $backup_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check if container is running
|
||||
if ! docker ps --format '{{.Names}}' | grep -q "^${POSTGRES_CONTAINER}$"; then
|
||||
log_error "PostgreSQL container '$POSTGRES_CONTAINER' is not running"
|
||||
log_info "Please start the containers first: docker compose up -d relational_db"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Copy backup file to container
|
||||
log_info "Copying backup file to container..."
|
||||
docker cp "$backup_file" "${POSTGRES_CONTAINER}:/tmp/postgres_dump.backup"
|
||||
|
||||
# Drop and recreate database (optional, pg_restore --clean should handle this)
|
||||
log_info "Restoring database..."
|
||||
|
||||
# Use pg_restore with --clean to drop objects before recreating
|
||||
docker exec "$POSTGRES_CONTAINER" \
|
||||
pg_restore -U "$POSTGRES_USER" -d "$POSTGRES_DB" \
|
||||
--clean --if-exists --no-owner --no-privileges \
|
||||
/tmp/postgres_dump.backup 2>&1 || {
|
||||
# pg_restore may return non-zero even on success due to warnings
|
||||
log_warning "pg_restore completed with warnings (this is often normal)"
|
||||
}
|
||||
|
||||
# Cleanup
|
||||
docker exec "$POSTGRES_CONTAINER" rm -f /tmp/postgres_dump.backup
|
||||
|
||||
log_success "PostgreSQL restored"
|
||||
}
|
||||
|
||||
restore_vespa_api() {
|
||||
log_info "Restoring Vespa from JSONL backup..."
|
||||
|
||||
local backup_file="${INPUT_DIR}/vespa_documents.jsonl"
|
||||
|
||||
if [[ ! -f "$backup_file" ]]; then
|
||||
log_error "Vespa backup not found: $backup_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local endpoint="http://${VESPA_HOST}:${VESPA_PORT}/document/v1/default/${VESPA_INDEX}/docid"
|
||||
local total_docs=0
|
||||
local failed_docs=0
|
||||
|
||||
# Check if Vespa is accessible
|
||||
if ! curl -s -o /dev/null -w "%{http_code}" "http://${VESPA_HOST}:${VESPA_PORT}/state/v1/health" | grep -q "200"; then
|
||||
log_error "Cannot connect to Vespa at ${VESPA_HOST}:${VESPA_PORT}"
|
||||
log_info "Please ensure Vespa is running and accessible"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Wait for Vespa to be fully ready
|
||||
log_info "Waiting for Vespa to be fully ready..."
|
||||
local max_wait=60
|
||||
local waited=0
|
||||
while ! curl -s "http://${VESPA_HOST}:${VESPA_PORT}/state/v1/health" | grep -q '"status":{"code":"up"}'; do
|
||||
if [[ $waited -ge $max_wait ]]; then
|
||||
log_error "Vespa did not become ready within ${max_wait} seconds"
|
||||
return 1
|
||||
fi
|
||||
sleep 2
|
||||
waited=$((waited + 2))
|
||||
done
|
||||
|
||||
# Restore documents
|
||||
log_info "Restoring documents..."
|
||||
while IFS= read -r line; do
|
||||
if [[ -z "$line" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Extract document ID
|
||||
local doc_id
|
||||
doc_id=$(echo "$line" | jq -r '.update' | sed 's/.*:://')
|
||||
|
||||
# Post document
|
||||
local response
|
||||
response=$(curl -s -w "\n%{http_code}" -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$line" \
|
||||
"${endpoint}/${doc_id}")
|
||||
|
||||
local http_code
|
||||
http_code=$(echo "$response" | tail -1)
|
||||
|
||||
total_docs=$((total_docs + 1))
|
||||
|
||||
if [[ "$http_code" != "200" ]]; then
|
||||
failed_docs=$((failed_docs + 1))
|
||||
if [[ $failed_docs -le 5 ]]; then
|
||||
log_warning "Failed to restore document $doc_id (HTTP $http_code)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Progress update
|
||||
if [[ $((total_docs % 100)) -eq 0 ]]; then
|
||||
log_info " Restored $total_docs documents..."
|
||||
fi
|
||||
done < "$backup_file"
|
||||
|
||||
if [[ $failed_docs -gt 0 ]]; then
|
||||
log_warning "Vespa restored with $failed_docs failures out of $total_docs documents"
|
||||
else
|
||||
log_success "Vespa restored ($total_docs documents)"
|
||||
fi
|
||||
}
|
||||
|
||||
restore_minio_api() {
|
||||
log_info "Restoring MinIO data..."
|
||||
|
||||
local backup_file="${INPUT_DIR}/minio_data.tar.gz"
|
||||
|
||||
if [[ ! -f "$backup_file" ]]; then
|
||||
log_warning "MinIO backup not found: $backup_file"
|
||||
# Try volume backup as fallback
|
||||
if [[ -f "${INPUT_DIR}/minio_volume.tar.gz" ]]; then
|
||||
log_info "Found volume backup, using that instead"
|
||||
restore_minio_volume
|
||||
return
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Extract to temp directory
|
||||
local temp_dir
|
||||
temp_dir=$(mktemp -d)
|
||||
tar xzf "$backup_file" -C "$temp_dir"
|
||||
|
||||
# Check if mc (MinIO client) is available
|
||||
if command -v mc &>/dev/null; then
|
||||
# Configure mc alias for local minio
|
||||
mc alias set onyx-restore http://localhost:9004 minioadmin minioadmin 2>/dev/null || true
|
||||
|
||||
# Mirror data to minio
|
||||
mc mirror "$temp_dir/" onyx-restore/ 2>/dev/null || {
|
||||
log_warning "mc mirror failed"
|
||||
}
|
||||
else
|
||||
# Fallback: copy to container
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${MINIO_CONTAINER}$"; then
|
||||
docker cp "$temp_dir/." "${MINIO_CONTAINER}:/data/"
|
||||
else
|
||||
log_error "MinIO container not running and mc not available"
|
||||
rm -rf "$temp_dir"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -rf "$temp_dir"
|
||||
log_success "MinIO restored"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main restore logic
|
||||
# =============================================================================
|
||||
|
||||
log_info "Starting Onyx data restore..."
|
||||
log_info "Input directory: $INPUT_DIR"
|
||||
log_info "Project name: $PROJECT_NAME"
|
||||
|
||||
# Run restores based on detected mode
|
||||
if [[ "$DETECTED_MODE" == "volume" ]]; then
|
||||
log_info "Using volume-based restore"
|
||||
|
||||
# Stop services before restore
|
||||
log_info "Stopping services for restore..."
|
||||
if $RESTORE_POSTGRES; then
|
||||
stop_service "relational_db"
|
||||
fi
|
||||
if $RESTORE_VESPA; then
|
||||
stop_service "index"
|
||||
fi
|
||||
if $RESTORE_MINIO; then
|
||||
stop_service "minio"
|
||||
fi
|
||||
|
||||
# Perform restores
|
||||
if $RESTORE_POSTGRES; then
|
||||
restore_postgres_volume || log_warning "PostgreSQL restore failed"
|
||||
fi
|
||||
|
||||
if $RESTORE_VESPA; then
|
||||
restore_vespa_volume || log_warning "Vespa restore failed"
|
||||
fi
|
||||
|
||||
if $RESTORE_MINIO; then
|
||||
restore_minio_volume || log_warning "MinIO restore failed"
|
||||
fi
|
||||
|
||||
# Restart services unless --no-restart was specified
|
||||
if [[ "$NO_RESTART" != true ]]; then
|
||||
start_services
|
||||
else
|
||||
log_info "Skipping service restart (--no-restart specified)"
|
||||
log_info "Stopped services: ${STOPPED_SERVICES[*]}"
|
||||
fi
|
||||
|
||||
elif [[ "$DETECTED_MODE" == "api" ]]; then
|
||||
log_info "Using API-based restore"
|
||||
log_info "Services must be running for API restore"
|
||||
|
||||
if $RESTORE_POSTGRES; then
|
||||
restore_postgres_api || log_warning "PostgreSQL restore failed"
|
||||
fi
|
||||
|
||||
if $RESTORE_VESPA; then
|
||||
restore_vespa_api || log_warning "Vespa restore failed"
|
||||
fi
|
||||
|
||||
if $RESTORE_MINIO; then
|
||||
restore_minio_api || log_warning "MinIO restore failed"
|
||||
fi
|
||||
|
||||
else
|
||||
log_error "Could not detect backup mode. Ensure backup files exist in $INPUT_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_success "==================================="
|
||||
log_success "Restore completed!"
|
||||
log_success "==================================="
|
||||
|
||||
# Post-restore recommendations
|
||||
echo ""
|
||||
log_info "Post-restore steps:"
|
||||
log_info " 1. Run database migrations if needed: docker compose -p $PROJECT_NAME exec api_server alembic upgrade head"
|
||||
log_info " 2. Verify data integrity in the application"
|
||||
Reference in New Issue
Block a user