shell/plex/backup-plex.sh

#!/bin/bash

################################################################################
# Plex Media Server Enhanced Backup Script
################################################################################
#
# Author: Peter Wood <peter@peterwood.dev>
# Description: Comprehensive backup solution for Plex Media Server with advanced
#              database integrity checking, automated repair capabilities,
#              performance monitoring, and multi-channel notifications.
#
# Features:
#   - Database integrity verification with automatic repair
#   - WAL (Write-Ahead Logging) file handling
#   - Performance monitoring with JSON logging
#   - Parallel verification for improved speed
#   - Multi-channel notifications (webhook, email, console)
#   - Comprehensive error handling and recovery
#   - Automated cleanup of old backups
#
# Related Scripts:
#   - restore-plex.sh: Restore from backups created by this script
#   - validate-plex-backups.sh: Validate backup integrity and health
#   - monitor-plex-backup.sh: Real-time monitoring dashboard
#   - test-plex-backup.sh: Comprehensive testing suite
#   - plex.sh: General Plex service management
#
# Usage:
#   ./backup-plex.sh                    # Standard backup with auto-repair
#   ./backup-plex.sh --disable-auto-repair  # Backup without auto-repair
#   ./backup-plex.sh --check-integrity  # Integrity check only
#   ./backup-plex.sh --non-interactive  # Automated mode for cron jobs
#
# Dependencies:
#   - Plex Media Server
#   - sqlite3 or Plex SQLite binary
#   - curl (for webhook notifications)
#   - jq (for JSON processing)
#   - sendmail (optional, for email notifications)
#
# Exit Codes:
#   0 - Success
#   1 - General error
#   2 - Database integrity issues
#   3 - Service management failure
#   4 - Backup creation failure
#
################################################################################

# NOTE: Removed 'set -e' to allow graceful error handling in repair operations
# Critical operations use explicit error checking instead of automatic exit

# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Performance tracking variables (removed unused variables)

# Configuration
MAX_BACKUP_AGE_DAYS=30
MAX_BACKUPS_TO_KEEP=10
BACKUP_ROOT="/mnt/share/media/backups/plex"
SHARED_LOG_ROOT="/mnt/share/media/backups/logs"
# Get script directory with proper error handling
if ! SCRIPT_PATH="$(readlink -f "$0")"; then
    echo "Error: Failed to resolve script path" >&2
    exit 1
fi
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
LOCAL_LOG_ROOT="${SCRIPT_DIR}/logs"
PERFORMANCE_LOG_FILE="${LOCAL_LOG_ROOT}/plex-backup-performance.json"

# Backup strategy configuration - Always perform full backups

# Plex SQLite path (custom Plex SQLite binary)
PLEX_SQLITE="/usr/lib/plexmediaserver/Plex SQLite"

# Script options
AUTO_REPAIR=true  # Default to enabled for automatic corruption detection and repair
INTEGRITY_CHECK_ONLY=false
INTERACTIVE_MODE=false
PARALLEL_VERIFICATION=true
PERFORMANCE_MONITORING=true
WEBHOOK_URL="https://notify.peterwood.rocks/lab"
EMAIL_RECIPIENT=""

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --auto-repair)
            AUTO_REPAIR=true
            INTERACTIVE_MODE=false
            shift
            ;;
        --disable-auto-repair)
            AUTO_REPAIR=false.service
            shift
            ;;
        --non-interactive)
            INTERACTIVE_MODE=false
            shift
            ;;
        --interactive)
            INTERACTIVE_MODE=true
            shift
            ;;
        --no-parallel)
            PARALLEL_VERIFICATION=false
            shift
            ;;
        --no-performance)
            PERFORMANCE_MONITORING=false
            shift
            ;;
        --webhook=*)
            WEBHOOK_URL="${1#*=}"
            shift
            ;;
        --email=*)
            EMAIL_RECIPIENT="${1#*=}"
            shift
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo "Options:"
            echo "  --auto-repair        Force enable automatic database repair (default: enabled)"
            echo "  --disable-auto-repair Disable automatic database repair"
            echo "  --check-integrity    Only check database integrity, don't backup"
            echo "  --non-interactive    Run in non-interactive mode (for automation)"
            echo "  --interactive        Run in interactive mode (prompts for repair decisions)"
            echo "  --no-parallel        Disable parallel verification (slower but safer)"
            echo "  --no-performance     Disable performance monitoring"
            echo "  --webhook=URL        Send notifications to webhook URL"
            echo "  --email=ADDRESS      Send notifications to email address"
            echo "  -h, --help          Show this help message"
            echo ""
            echo "Database Integrity & Repair:"
            echo "  By default, the script automatically detects and attempts to repair"
            echo "  corrupted databases before backup. Use --disable-auto-repair to"
            echo "  skip repair and backup corrupted databases as-is."
            echo ""
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
done

# Create logs directory
mkdir -p "${SCRIPT_DIR}/logs"

# Define Plex files and their nicknames
declare -A PLEX_FILES=(
    ["database"]="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.db"
    ["blobs"]="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.blobs.db"
    ["preferences"]="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Preferences.xml"
)

# Logging functions
log_message() {
    local message="$1"
    local timestamp
    timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo -e "${CYAN}[${timestamp}]${NC} ${message}"
    mkdir -p "${LOCAL_LOG_ROOT}"
    echo "[${timestamp}] ${message}" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}

log_error() {
    local message="$1"
    local timestamp
    timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo -e "${RED}[${timestamp}] ERROR:${NC} ${message}"
    mkdir -p "${LOCAL_LOG_ROOT}"
    echo "[${timestamp}] ERROR: ${message}" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}

log_success() {
    local message="$1"
    local timestamp
    timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo -e "${GREEN}[${timestamp}] SUCCESS:${NC} ${message}"
    mkdir -p "$LOCAL_LOG_ROOT"
    echo "[${timestamp}] SUCCESS: $message" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}

log_warning() {
    local message="$1"
    local timestamp
    timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo -e "${YELLOW}[${timestamp}] WARNING:${NC} ${message}"
    mkdir -p "$LOCAL_LOG_ROOT"
    echo "[${timestamp}] WARNING: $message" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}

log_info() {
    local message="$1"
    local timestamp
    timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo -e "${BLUE}[${timestamp}] INFO:${NC} ${message}"
    mkdir -p "$LOCAL_LOG_ROOT"
    echo "[${timestamp}] INFO: $message" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}

# Performance tracking functions
track_performance() {
    if [[ "$PERFORMANCE_MONITORING" != true ]]; then
        return 0
    fi

    local operation="$1"
    local start_time="$2"
    local end_time="${3:-$(date +%s)}"
    local duration=$((end_time - start_time))

    # Initialize performance log if it doesn't exist
    if [ ! -f "$PERFORMANCE_LOG_FILE" ]; then
        mkdir -p "$(dirname "$PERFORMANCE_LOG_FILE")"
        echo "[]" > "$PERFORMANCE_LOG_FILE"
    fi

    # Add performance entry
    local entry
    local timestamp
    if ! timestamp="$(date -Iseconds)"; then
        timestamp="$(date)"  # Fallback to basic date format
    fi
    entry=$(jq -n \
        --arg operation "$operation" \
        --arg duration "$duration" \
        --arg timestamp "$timestamp" \
        '{
            operation: $operation,
            duration_seconds: ($duration | tonumber),
            timestamp: $timestamp
        }')

    jq --argjson entry "$entry" '. += [$entry]' "$PERFORMANCE_LOG_FILE" > "${PERFORMANCE_LOG_FILE}.tmp" && \
    mv "${PERFORMANCE_LOG_FILE}.tmp" "$PERFORMANCE_LOG_FILE"

    log_info "Performance: $operation completed in ${duration}s"
}

# Initialize log directory
initialize_logs() {
    mkdir -p "$(dirname "$PERFORMANCE_LOG_FILE")"
    if [ ! -f "$PERFORMANCE_LOG_FILE" ]; then
        echo "[]" > "$PERFORMANCE_LOG_FILE"
        log_message "Initialized performance log file"
    fi
}

# Log synchronization functions
sync_logs_to_shared() {
    local sync_start_time
    sync_start_time=$(date +%s)
    log_info "Starting log synchronization to shared location"

    # Ensure shared log directory exists
    if ! mkdir -p "$SHARED_LOG_ROOT" 2>/dev/null; then
        log_warning "Could not create shared log directory: $SHARED_LOG_ROOT"
        return 1
    fi

    # Check if shared location is accessible
    if [ ! -w "$SHARED_LOG_ROOT" ]; then
        log_warning "Shared log directory is not writable: $SHARED_LOG_ROOT"
        return 1
    fi

    # Sync log files (one-way: local -> shared)
    local sync_count=0
    local error_count=0

    for log_file in "$LOCAL_LOG_ROOT"/*.log "$LOCAL_LOG_ROOT"/*.json; do
        if [ -f "$log_file" ]; then
            local filename
            filename=$(basename "$log_file")
            local shared_file="$SHARED_LOG_ROOT/$filename"

            # Only copy if file doesn't exist in shared location or local is newer
            if [ ! -f "$shared_file" ] || [ "$log_file" -nt "$shared_file" ]; then
                if cp "$log_file" "$shared_file" 2>/dev/null; then
                    ((sync_count++))
                    log_info "Synced: $filename"
                else
                    ((error_count++))
                    log_warning "Failed to sync: $filename"
                fi
            fi
        fi
    done

    local sync_end_time
    sync_end_time=$(date +%s)
    local sync_duration=$((sync_end_time - sync_start_time))

    if [ $error_count -eq 0 ]; then
        log_success "Log sync completed: $sync_count files synced in ${sync_duration}s"
    else
        log_warning "Log sync completed with errors: $sync_count synced, $error_count failed in ${sync_duration}s"
    fi

    return $error_count
}

# Cleanup old local logs (30 day retention)
cleanup_old_local_logs() {
    local cleanup_start_time
    cleanup_start_time=$(date +%s)
    log_info "Starting cleanup of old local logs (30+ days)"

    if [ ! -d "$LOCAL_LOG_ROOT" ]; then
        log_info "Local log directory does not exist, nothing to clean up"
        return 0
    fi

    local cleanup_count=0
    local error_count=0

    # Find and remove log files older than 30 days
    while IFS= read -r -d '' old_file; do
        local filename
        filename=$(basename "$old_file")
        if rm "$old_file" 2>/dev/null; then
            ((cleanup_count++))
            log_info "Removed old log: $filename"
        else
            ((error_count++))
            log_warning "Failed to remove old log: $filename"
        fi
    done < <(find "$LOCAL_LOG_ROOT" -name "*.log" -mtime +30 -print0 2>/dev/null)

    # Also clean up old performance log entries (keep structure, remove old entries)
    if [ -f "$PERFORMANCE_LOG_FILE" ]; then
        local thirty_days_ago
        thirty_days_ago=$(date -d '30 days ago' -Iseconds)
        local temp_perf_file="${PERFORMANCE_LOG_FILE}.cleanup.tmp"

        if jq --arg cutoff "$thirty_days_ago" '[.[] | select(.timestamp >= $cutoff)]' "$PERFORMANCE_LOG_FILE" > "$temp_perf_file" 2>/dev/null; then
            local old_count
            old_count=$(jq length "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
            local new_count
            new_count=$(jq length "$temp_perf_file" 2>/dev/null || echo "0")
            local removed_count=$((old_count - new_count))

            if [ "$removed_count" -gt 0 ]; then
                mv "$temp_perf_file" "$PERFORMANCE_LOG_FILE"
                log_info "Cleaned up $removed_count old performance entries"
                ((cleanup_count += removed_count))
            else
                rm -f "$temp_perf_file"
            fi
        else
            rm -f "$temp_perf_file"
            log_warning "Failed to clean up old performance log entries"
            ((error_count++))
        fi
    fi

    local cleanup_end_time
    cleanup_end_time=$(date +%s)
    local cleanup_duration=$((cleanup_end_time - cleanup_start_time))

    if [ $cleanup_count -gt 0 ]; then
        log_success "Cleanup completed: $cleanup_count items removed in ${cleanup_duration}s"
    else
        log_info "Cleanup completed: no old items found to remove in ${cleanup_duration}s"
    fi

    return $error_count
}

# Enhanced notification system
send_notification() {
    local title="$1"
    local message="$2"
    local status="${3:-info}"  # success, error, warning, info
    local hostname
    hostname=$(hostname)

    # Console notification
    case "$status" in
        success) log_success "$title: $message" ;;
        error) log_error "$title: $message" ;;
        warning) log_warning "$title: $message" ;;
        *) log_info "$title: $message" ;;
    esac

    # Webhook notification
    if [ -n "$WEBHOOK_URL" ]; then
        local tags="backup,plex,${hostname}"
        [ "$status" == "error" ] && tags="${tags},errors"
        [ "$status" == "warning" ] && tags="${tags},warnings"

        # Clean message without newlines or timestamps for webhook
        local webhook_message="$message"

        curl -s \
            -H "tags:${tags}" \
            -d "$webhook_message" \
            "$WEBHOOK_URL" 2>/dev/null || log_warning "Failed to send webhook notification"
    fi

    # Email notification (if sendmail is available)
    if [ -n "$EMAIL_RECIPIENT" ] && command -v sendmail > /dev/null 2>&1; then
        {
            echo "To: $EMAIL_RECIPIENT"
            echo "Subject: Plex Backup - $title"
            echo "Content-Type: text/plain"
            echo ""
            echo "Host: $hostname"
            echo "Time: $(date)"
            echo "Status: $status"
            echo ""
            echo "$message"
        } | sendmail "$EMAIL_RECIPIENT" 2>/dev/null || true
    fi
}

# Format backed up files list for notifications
format_backed_up_files() {
    local files=("$@")
    local count=${#files[@]}

    if [ "$count" -eq 0 ]; then
        echo "no files"
    elif [ "$count" -eq 1 ]; then
        echo "${files[0]}"
    elif [ "$count" -eq 2 ]; then
        echo "${files[0]} and ${files[1]}"
    else
        local last_file="${files[-1]}"
        local other_files=("${files[@]:0:$((count-1))}")
        local other_files_str
        other_files_str=$(IFS=', '; echo "${other_files[*]}")
        echo "${other_files_str}, and ${last_file}"
    fi
}

# Enhanced checksum calculation with caching
calculate_checksum() {
    local file="$1"
    # Use /tmp for cache files to avoid permission issues
    local cache_dir="/tmp/plex-backup-cache"
    local cache_file="$cache_dir/${file//\//_}.md5"
    local file_mtime
    file_mtime=$(stat -c %Y "$file" 2>/dev/null || echo "0")

    # Create cache directory if it doesn't exist
    mkdir -p "$cache_dir" 2>/dev/null || true

    # Check if cached checksum exists and is newer than file
    if [ -f "$cache_file" ]; then
        local cache_mtime
        cache_mtime=$(stat -c %Y "$cache_file" 2>/dev/null || echo "0")
        if [ "$cache_mtime" -gt "$file_mtime" ]; then
            local cached_checksum
            cached_checksum=$(cat "$cache_file" 2>/dev/null)
            if [[ -n "$cached_checksum" && "$cached_checksum" =~ ^[a-f0-9]{32}$ ]]; then
                echo "$cached_checksum"
                return 0
            fi
        fi
    fi

    # Calculate new checksum
    local checksum
    if ! checksum=$(md5sum "$file" 2>/dev/null | cut -d' ' -f1); then
        checksum=""
    fi

    # Check if we got a valid checksum (not empty and looks like md5)
    if [[ -n "$checksum" && "$checksum" =~ ^[a-f0-9]{32}$ ]]; then
        # Cache the checksum
        echo "$checksum" > "$cache_file" 2>/dev/null || true
        echo "$checksum"
        return 0
    fi

    # If normal access failed or returned empty, try with sudo
    if ! checksum=$(sudo md5sum "$file" 2>/dev/null | cut -d' ' -f1); then
        checksum=""
    fi

    # Check if sudo checksum is valid
    if [[ -n "$checksum" && "$checksum" =~ ^[a-f0-9]{32}$ ]]; then
        # Cache the checksum with appropriate permissions
        echo "$checksum" | sudo tee "$cache_file" >/dev/null 2>&1 || true
        echo "$checksum"
        return 0
    fi

    # If both fail, return error indicator
    echo "PERMISSION_DENIED"
    return 1
}

# Check database integrity using Plex SQLite
check_database_integrity() {
    local db_file="$1"
    local db_name
    db_name=$(basename "$db_file")

    log_message "Checking database integrity: $db_name"

    # Check if Plex SQLite exists
    if [ ! -f "$PLEX_SQLITE" ]; then
        log_error "Plex SQLite binary not found at: $PLEX_SQLITE"
        return 1
    fi

    # Make Plex SQLite executable if it isn't already
    sudo chmod +x "$PLEX_SQLITE" 2>/dev/null || true

    # Run integrity check
    local integrity_result
    integrity_result=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA integrity_check;" 2>&1)
    local check_exit_code=$?

    if [ $check_exit_code -ne 0 ]; then
        log_error "Failed to run integrity check on $db_name: $integrity_result"
        return 1
    fi

    if echo "$integrity_result" | grep -q "^ok$"; then
        log_success "Database integrity check passed: $db_name"
        return 0
    else
        log_warning "Database integrity issues detected in $db_name:"
        echo "$integrity_result" | while read -r line; do
            log_warning "  $line"
        done
        return 1
    fi
}

# Preventive corruption detection before severe corruption occurs
detect_early_corruption() {
    local db_file="$1"
    local db_name
    db_name=$(basename "$db_file")

    log_message "Performing early corruption detection for: $db_name"

    # Check for early warning signs of corruption
    local warning_count=0

    # 1. Check for WAL file size anomalies
    local wal_file="${db_file}-wal"
    if [ -f "$wal_file" ]; then
        local wal_size
        wal_size=$(stat -f%z "$wal_file" 2>/dev/null || stat -c%s "$wal_file" 2>/dev/null || echo "0")
        local db_size
        db_size=$(stat -f%z "$db_file" 2>/dev/null || stat -c%s "$db_file" 2>/dev/null || echo "0")

        # If WAL file is more than 10% of database size, it might indicate issues
        if [ "$wal_size" -gt 0 ] && [ "$db_size" -gt 0 ]; then
            local wal_ratio=$((wal_size * 100 / db_size))
            if [ "$wal_ratio" -gt 10 ]; then
                log_warning "WAL file unusually large: ${wal_ratio}% of database size"
                ((warning_count++))
            fi
        else
            log_info "Unable to determine file sizes for WAL analysis"
        fi
    fi

    # 2. Quick integrity check focused on critical issues
    local quick_check
    if ! quick_check=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA quick_check(5);" 2>&1); then
        log_warning "Failed to execute quick integrity check for $db_name"
        ((warning_count++))
    elif ! echo "$quick_check" | grep -q "^ok$"; then
        log_warning "Quick integrity check failed for $db_name"
        log_warning "Issues found: $quick_check"
        ((warning_count++))
    fi

    # 3. Check for foreign key violations (common early corruption sign)
    local fk_check
    if fk_check=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA foreign_key_check;" 2>/dev/null); then
        if [ -n "$fk_check" ]; then
            log_warning "Foreign key violations detected in $db_name"
            ((warning_count++))
        fi
    else
        log_info "Foreign key check unavailable for $db_name"
    fi

    # 4. Check database statistics for anomalies
    if ! sudo "$PLEX_SQLITE" "$db_file" "PRAGMA compile_options;" >/dev/null 2>&1; then
        log_warning "Database statistics check failed for $db_name"
        ((warning_count++))
    fi

    if [ "$warning_count" -gt 0 ]; then
        log_warning "Early corruption indicators detected ($warning_count warnings) in $db_name"
        log_warning "Consider performing preventive maintenance or monitoring more closely"
        return 1
    else
        log_success "Early corruption detection passed for $db_name"
        return 0
    fi
}

# Enhanced database repair with multiple recovery strategies
repair_database() {
    local db_file="$1"
    local db_name
    db_name=$(basename "$db_file")
    local timestamp
    timestamp=$(date "+%Y-%m-%d_%H.%M.%S")

    log_message "Attempting to repair corrupted database: $db_name"
    log_message "Starting advanced database repair for: $db_name"

    # Enhanced WAL file handling for repair
    handle_wal_files_for_repair "$db_file" "prepare"

    # Create multiple backup copies before attempting repair
    local pre_repair_backup="${db_file}.pre-repair-backup"
    local working_copy="${db_file}.working-${timestamp}"

    if ! sudo cp "$db_file" "$pre_repair_backup"; then
        log_error "Failed to create pre-repair backup"
        handle_wal_files_for_repair "$db_file" "restore"
        return 1
    fi
    # Force filesystem sync to prevent corruption
    sync

    if ! sudo cp "$db_file" "$working_copy"; then
        log_error "Failed to create working copy"
        handle_wal_files_for_repair "$db_file" "restore"
        return 1
    fi
    # Force filesystem sync to prevent corruption
    sync

    log_success "Created pre-repair backup: $(basename "$pre_repair_backup")"

    # Strategy 1: Try dump and restore approach
    log_message "Step 1: Database cleanup and optimization..."
    if attempt_dump_restore "$working_copy" "$db_file" "$timestamp"; then
        log_success "Database repaired using dump/restore method"
        handle_wal_files_for_repair "$db_file" "cleanup"
        cleanup_repair_files "$pre_repair_backup" "$working_copy"
        return 0
    fi

    # Strategy 2: Try schema recreation
    if attempt_schema_recreation "$working_copy" "$db_file" "$timestamp"; then
        log_success "Database repaired using schema recreation"
        handle_wal_files_for_repair "$db_file" "cleanup"
        cleanup_repair_files "$pre_repair_backup" "$working_copy"
        return 0
    fi

    # Strategy 3: Try recovery from previous backup
    if attempt_backup_recovery "$db_file" "$BACKUP_ROOT" "$pre_repair_backup"; then
        log_success "Database recovered from previous backup"
        handle_wal_files_for_repair "$db_file" "cleanup"
        cleanup_repair_files "$pre_repair_backup" "$working_copy"
        return 0
    fi

    # All strategies failed - restore original and flag for manual intervention
    log_error "Database repair failed. Restoring original..."
    if sudo cp "$pre_repair_backup" "$db_file"; then
        # Force filesystem sync to prevent corruption
        sync
        log_success "Original database restored"
        handle_wal_files_for_repair "$db_file" "restore"
    else
        log_error "Failed to restore original database!"
        handle_wal_files_for_repair "$db_file" "restore"
        return 2
    fi

    log_error "Database repair failed for $db_name"
    log_warning "Will backup corrupted database - manual intervention may be needed"
    cleanup_repair_files "$pre_repair_backup" "$working_copy"
    return 1
}

# Strategy 1: Dump and restore approach with enhanced validation
attempt_dump_restore() {
    local working_copy="$1"
    local original_db="$2"
    local timestamp="$3"
    local dump_file="${original_db}.dump-${timestamp}.sql"
    local new_db="${original_db}.repaired-${timestamp}"

    log_message "Attempting repair via SQL dump/restore..."

    # Try to dump the database with error checking
    log_info "Creating database dump..."
    if sudo "$PLEX_SQLITE" "$working_copy" ".dump" 2>/dev/null | sudo tee "$dump_file" >/dev/null; then
        # Validate the dump file exists and has substantial content
        if [[ ! -f "$dump_file" ]]; then
            log_warning "Dump file was not created"
            return 1
        fi

        local dump_size
        dump_size=$(stat -c%s "$dump_file" 2>/dev/null || echo "0")
        if [[ "$dump_size" -lt 1024 ]]; then
            log_warning "Dump file is too small ($dump_size bytes) - likely incomplete"
            sudo rm -f "$dump_file"
            return 1
        fi

        # Check for essential database structures in dump
        if ! grep -q "CREATE TABLE" "$dump_file" 2>/dev/null; then
            log_warning "Dump file contains no CREATE TABLE statements - dump is incomplete"
            sudo rm -f "$dump_file"
            return 1
        fi

        # Check for critical Plex tables
        local critical_tables=("schema_migrations" "accounts" "library_sections")
        local missing_tables=()
        for table in "${critical_tables[@]}"; do
            if ! grep -q "CREATE TABLE.*$table" "$dump_file" 2>/dev/null; then
                missing_tables+=("$table")
            fi
        done

        if [[ ${#missing_tables[@]} -gt 0 ]]; then
            log_warning "Dump is missing critical tables: ${missing_tables[*]}"
            log_warning "This would result in an incomplete database - aborting dump/restore"
            sudo rm -f "$dump_file"
            return 1
        fi

        log_success "Database dumped successfully (${dump_size} bytes)"
        log_info "Dump contains all critical tables: ${critical_tables[*]}"

        # Create new database from dump
        log_info "Creating new database from validated dump..."
        if sudo cat "$dump_file" | sudo "$PLEX_SQLITE" "$new_db" 2>/dev/null; then
            # Verify the new database was created and has content
            if [[ ! -f "$new_db" ]]; then
                log_warning "New database file was not created"
                sudo rm -f "$dump_file"
                return 1
            fi

            local new_db_size
            new_db_size=$(stat -c%s "$new_db" 2>/dev/null || echo "0")
            if [[ "$new_db_size" -lt 1048576 ]]; then  # Less than 1MB
                log_warning "New database is too small ($new_db_size bytes) - likely empty or incomplete"
                sudo rm -f "$new_db" "$dump_file"
                return 1
            fi

            # Verify critical tables exist in new database
            local table_count
            table_count=$(sudo "$PLEX_SQLITE" "$new_db" "SELECT COUNT(*) FROM sqlite_master WHERE type='table';" 2>/dev/null || echo "0")
            if [[ "$table_count" -lt 50 ]]; then  # Plex should have way more than 50 tables
                log_warning "New database has too few tables ($table_count) - likely incomplete"
                sudo rm -f "$new_db" "$dump_file"
                return 1
            fi

            # Verify schema_migrations table specifically (this was the root cause)
            if ! sudo "$PLEX_SQLITE" "$new_db" "SELECT COUNT(*) FROM schema_migrations;" >/dev/null 2>&1; then
                log_warning "New database missing schema_migrations table - Plex will not start"
                sudo rm -f "$new_db" "$dump_file"
                return 1
            fi

            log_success "New database created from dump ($new_db_size bytes, $table_count tables)"

            # Verify the new database passes integrity check
            log_info "Performing integrity check on repaired database..."
            if sudo "$PLEX_SQLITE" "$new_db" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
                log_success "New database passes integrity check"

                # Replace original with repaired version
                log_info "Replacing original database with repaired version..."
                if sudo mv "$new_db" "$original_db"; then
                    # Force filesystem sync to prevent corruption
                    sync
                    sudo chown plex:plex "$original_db"
                    sudo chmod 644 "$original_db"
                    sudo rm -f "$dump_file"
                    log_success "Database successfully repaired and replaced"
                    return 0
                else
                    log_error "Failed to replace original database with repaired version"
                    sudo rm -f "$dump_file"
                    return 1
                fi
            else
                log_warning "Repaired database failed integrity check"
                sudo rm -f "$new_db" "$dump_file"
                return 1
            fi
        else
            log_warning "Failed to create database from dump - SQL import failed"
            sudo rm -f "$dump_file"
            return 1
        fi
    else
        log_warning "Failed to dump corrupted database - dump command failed"
        # Clean up any potentially created but empty dump file
        sudo rm -f "$dump_file"
        return 1
    fi
}

# Strategy 2: Schema recreation with data recovery
attempt_schema_recreation() {
    local working_copy="$1"
    local original_db="$2"
    local timestamp="$3"
    local schema_file="${original_db}.schema-${timestamp}.sql"
    local new_db="${original_db}.rebuilt-${timestamp}"

    log_message "Attempting repair via schema recreation..."

    # Extract schema
    if sudo "$PLEX_SQLITE" "$working_copy" ".schema" 2>/dev/null | sudo tee "$schema_file" >/dev/null; then
        log_success "Schema extracted"

        # Create new database with schema
        if sudo cat "$schema_file" | sudo "$PLEX_SQLITE" "$new_db" 2>/dev/null; then
            log_success "New database created with schema"

            # Try to recover data table by table
            if recover_table_data "$working_copy" "$new_db"; then
                log_success "Data recovery completed"

                # Verify the rebuilt database
                if sudo "$PLEX_SQLITE" "$new_db" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
                    log_success "Rebuilt database passes integrity check"

                    if sudo mv "$new_db" "$original_db"; then
                        # Force filesystem sync to prevent corruption
                        sync
                        sudo chown plex:plex "$original_db"
                        sudo chmod 644 "$original_db"
                        sudo rm -f "$schema_file"
                        return 0
                    fi
                else
                    log_warning "Rebuilt database failed integrity check"
                fi
            fi
        fi

        sudo rm -f "$new_db" "$schema_file"
    fi

    return 1
}

# Strategy 3: Recovery from previous backup
attempt_backup_recovery() {
    local original_db="$1"
    local backup_dir="$2"
    local current_backup="$3"

    log_message "Attempting recovery from previous backup..."

    # Find the most recent backup that's not the current corrupted one
    local latest_backup
    if [[ -n "$current_backup" ]]; then
        # Exclude the current backup from consideration
        latest_backup=$(find "$backup_dir" -name "plex-backup-*.tar.gz" -type f ! -samefile "$current_backup" -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2-)
    else
        latest_backup=$(find "$backup_dir" -name "plex-backup-*.tar.gz" -type f -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2-)
    fi

    if [[ -n "$latest_backup" && -f "$latest_backup" ]]; then
        log_message "Found recent backup: $(basename "$latest_backup")"

        local temp_restore_dir="/tmp/plex-restore-$$"
        mkdir -p "$temp_restore_dir"

        # Extract the backup
        if tar -xzf "$latest_backup" -C "$temp_restore_dir" 2>/dev/null; then
            local restored_db
            restored_db="${temp_restore_dir}/$(basename "$original_db")"

            if [[ -f "$restored_db" ]]; then
                # Verify the restored database
                if sudo "$PLEX_SQLITE" "$restored_db" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
                    log_success "Backup database passes integrity check"

                    if sudo cp "$restored_db" "$original_db"; then
                        # Force filesystem sync to prevent corruption
                        sync
                        sudo chown plex:plex "$original_db"
                        sudo chmod 644 "$original_db"
                        log_success "Database restored from backup"
                        rm -rf "$temp_restore_dir"
                        return 0
                    fi
                else
                    log_warning "Backup database also corrupted"
                fi
            fi
        fi

        rm -rf "$temp_restore_dir"
    fi

    return 1
}

# Recovery helper for table data
recover_table_data() {
    local source_db="$1"
    local target_db="$2"

    # Get list of tables
    local tables
    tables=$(sudo "$PLEX_SQLITE" "$source_db" ".tables" 2>/dev/null)

    if [[ -z "$tables" ]]; then
        log_warning "No tables found in source database"
        return 1
    fi

    local recovered_count=0
    local total_tables=0

    for table in $tables; do
        ((total_tables++))

        # Try to copy data from each table
        if sudo "$PLEX_SQLITE" "$source_db" ".mode insert $table" ".output | sudo tee /tmp/table_data_$$.sql > /dev/null" "SELECT * FROM $table;" ".output stdout" 2>/dev/null && \
           sudo cat "/tmp/table_data_$$.sql" | sudo "$PLEX_SQLITE" "$target_db" 2>/dev/null; then
            ((recovered_count++))
            sudo rm -f "/tmp/table_data_$$.sql" 2>/dev/null || true
        else
            log_warning "Failed to recover data from table: $table"
            sudo rm -f "/tmp/table_data_$$.sql" 2>/dev/null || true
        fi
    done

    log_message "Recovered $recovered_count/$total_tables tables"

    # Consider successful if we recovered at least 80% of tables
    # Prevent division by zero
    if [ "$total_tables" -eq 0 ]; then
        log_warning "No tables found for recovery"
        return 1
    fi

    if (( recovered_count * 100 / total_tables >= 80 )); then
        return 0
    fi

    return 1
}

# Cleanup helper function
cleanup_repair_files() {
    local pre_repair_backup="$1"
    local working_copy="$2"

    if [[ -n "$pre_repair_backup" && -f "$pre_repair_backup" ]]; then
        sudo rm -f "$pre_repair_backup" 2>/dev/null || true
    fi

    if [[ -n "$working_copy" && -f "$working_copy" ]]; then
        sudo rm -f "$working_copy" 2>/dev/null || true
    fi
}

# WAL (Write-Ahead Logging) file handling
handle_wal_files() {
    local action="$1"  # "backup" or "restore"
    local backup_path="$2"

    log_info "Handling WAL files: $action"

    # Define WAL files that might exist
    local wal_files=(
        "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.db-wal"
        "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.db-shm"
        "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.blobs.db-wal"
        "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.blobs.db-shm"
    )

    for wal_file in "${wal_files[@]}"; do
        local wal_basename
        wal_basename=$(basename "$wal_file")

        case "$action" in
            "backup")
                if [ -f "$wal_file" ]; then
                    log_info "Found WAL/SHM file: $wal_basename"
                    local backup_file="${backup_path}/${wal_basename}"

                    if sudo cp "$wal_file" "$backup_file"; then
                        # Force filesystem sync to prevent corruption
                        sync
                        log_success "Backed up WAL/SHM file: $wal_basename"

                        # Verify backup
                        if verify_backup "$wal_file" "$backup_file"; then
                            log_success "Verified WAL/SHM backup: $wal_basename"
                        else
                            log_warning "WAL/SHM backup verification failed: $wal_basename"
                        fi
                    else
                        log_warning "Failed to backup WAL/SHM file: $wal_basename"
                    fi
                else
                    log_info "WAL/SHM file not found (normal): $wal_basename"
                fi
                ;;
            "checkpoint")
                # Force WAL checkpoint to integrate changes into main database
                local db_file="${wal_file%.db-*}.db"
                if [ -f "$db_file" ] && [ -f "$wal_file" ]; then
                    log_info "Performing WAL checkpoint for: $(basename "$db_file")"
                    if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA wal_checkpoint(FULL);" 2>/dev/null; then
                        log_success "WAL checkpoint completed for: $(basename "$db_file")"
                    else
                        log_warning "WAL checkpoint failed for: $(basename "$db_file")"
                    fi
                fi
                ;;
        esac
    done
}

# Enhanced WAL file management for repair operations
handle_wal_files_for_repair() {
    local db_file="$1"
    local operation="${2:-prepare}"  # prepare, cleanup, or restore

    local db_dir
    db_dir=$(dirname "$db_file")
    local db_base
    db_base=$(basename "$db_file" .db)
    local wal_file="${db_dir}/${db_base}.db-wal"
    local shm_file="${db_dir}/${db_base}.db-shm"

    case "$operation" in
        "prepare")
            log_message "Preparing WAL files for repair of $(basename "$db_file")"

            # Force WAL checkpoint to consolidate all changes
            if [ -f "$wal_file" ]; then
                log_info "Found WAL file, performing checkpoint..."
                if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA wal_checkpoint(TRUNCATE);" 2>/dev/null; then
                    log_success "WAL checkpoint completed"
                else
                    log_warning "WAL checkpoint failed, continuing with repair"
                fi
            fi

            # Create backup copies of WAL/SHM files if they exist
            for file in "$wal_file" "$shm_file"; do
                if [ -f "$file" ]; then
                    local backup_file="${file}.repair-backup"
                    if sudo cp "$file" "$backup_file" 2>/dev/null; then
                        # Force filesystem sync to prevent corruption
                        sync
                        log_info "Backed up $(basename "$file") for repair"
                    fi
                fi
            done
            ;;

        "cleanup")
            log_message "Cleaning up WAL files after repair"

            # Remove any remaining WAL/SHM files to force clean state
            for file in "$wal_file" "$shm_file"; do
                if [ -f "$file" ]; then
                    if sudo rm -f "$file" 2>/dev/null; then
                        log_info "Removed $(basename "$file") for clean state"
                    fi
                fi
            done

            # Force WAL mode back on for consistency
            if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA journal_mode=WAL;" 2>/dev/null | grep -q "wal"; then
                log_success "WAL mode restored for $(basename "$db_file")"
            else
                log_warning "Failed to restore WAL mode for $(basename "$db_file")"
            fi
            ;;

        "restore")
            log_message "Restoring WAL files after failed repair"

            # Restore WAL/SHM backup files if they exist
            for file in "$wal_file" "$shm_file"; do
                local backup_file="${file}.repair-backup"
                if [ -f "$backup_file" ]; then
                    if sudo mv "$backup_file" "$file" 2>/dev/null; then
                        log_info "Restored $(basename "$file") from backup"
                    else
                        log_warning "Failed to restore $(basename "$file") from backup"
                        # Try to remove broken backup file
                        sudo rm -f "$backup_file" 2>/dev/null || true
                    fi
                else
                    log_info "No backup found for $(basename "$file")"
                fi
            done
            ;;
    esac
}

# Enhanced database integrity check with WAL handling
check_database_integrity_with_wal() {
    local db_file="$1"
    local db_name
    db_name=$(basename "$db_file")

    log_message "Checking database integrity with WAL handling: $db_name"

    # Check if Plex SQLite exists
    if [ ! -f "$PLEX_SQLITE" ]; then
        log_error "Plex SQLite binary not found at: $PLEX_SQLITE"
        return 1
    fi

    # Make Plex SQLite executable if it isn't already
    sudo chmod +x "$PLEX_SQLITE" 2>/dev/null || true

    # Check if WAL file exists and handle it
    local wal_file="${db_file}-wal"
    if [ -f "$wal_file" ]; then
        log_info "WAL file found for $db_name, performing checkpoint..."
        if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA wal_checkpoint(FULL);" 2>/dev/null; then
            log_success "WAL checkpoint completed for $db_name"
        else
            log_warning "WAL checkpoint failed for $db_name, proceeding with integrity check"
        fi
    fi

    # Run integrity check
    local integrity_result
    integrity_result=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA integrity_check;" 2>&1)
    local check_exit_code=$?

    if [ $check_exit_code -ne 0 ]; then
        log_error "Failed to run integrity check on $db_name: $integrity_result"
        return 1
    fi

    if echo "$integrity_result" | grep -q "^ok$"; then
        log_success "Database integrity check passed: $db_name"
        return 0
    else
        log_warning "Database integrity issues detected in $db_name:"
        echo "$integrity_result" | while read -r line; do
            log_warning "  $line"
        done
        return 1
    fi
}

# Parallel verification function
verify_files_parallel() {
    local backup_dir="$1"
    local -a pids=()
    local temp_dir
    temp_dir=$(mktemp -d)
    local verification_errors=0
    local max_jobs=4  # Limit concurrent jobs to prevent system overload
    local job_count=0

    if [[ "$PARALLEL_VERIFICATION" != true ]]; then
        # Fall back to sequential verification
        for nickname in "${!PLEX_FILES[@]}"; do
            local src_file="${PLEX_FILES[$nickname]}"
            local dest_file
            dest_file="$backup_dir/$(basename "$src_file")"

            if [ -f "$dest_file" ]; then
                if ! verify_backup "$src_file" "$dest_file"; then
                    verification_errors=$((verification_errors + 1))
                fi
            fi
        done
        rm -rf "$temp_dir" 2>/dev/null || true
        return $verification_errors
    fi

    log_info "Starting parallel verification in $backup_dir (max $max_jobs concurrent jobs)"

    # Start verification jobs in parallel with job control
    for nickname in "${!PLEX_FILES[@]}"; do
        local src_file="${PLEX_FILES[$nickname]}"
        local dest_file
        dest_file="$backup_dir/$(basename "$src_file")"

        if [ -f "$dest_file" ]; then
            # Wait if we've reached the job limit
            if [ $job_count -ge $max_jobs ]; then
                wait "${pids[0]}" 2>/dev/null || true
                pids=("${pids[@]:1}")  # Remove first element
                job_count=$((job_count - 1))
            fi

            (
                local result_file="$temp_dir/$nickname.result"
                if verify_backup "$src_file" "$dest_file"; then
                    echo "0" > "$result_file"
                else
                    echo "1" > "$result_file"
                fi
            ) &
            pids+=($!)
            job_count=$((job_count + 1))
        fi
    done

    # Wait for all remaining verification jobs to complete
    for pid in "${pids[@]}"; do
        wait "$pid" 2>/dev/null || true
    done

    # Collect results
    for nickname in "${!PLEX_FILES[@]}"; do
        local result_file="$temp_dir/$nickname.result"
        if [ -f "$result_file" ]; then
            local result
            result=$(cat "$result_file" 2>/dev/null || echo "1")
            if [ "$result" != "0" ]; then
                verification_errors=$((verification_errors + 1))
            fi
        fi
    done

    # Cleanup
    rm -rf "$temp_dir" 2>/dev/null || true

    return $verification_errors
}

# Enhanced backup verification with multiple retry strategies and corruption detection
verify_backup() {
    local src="$1"
    local dest="$2"
    local max_retries=3
    local retry_count=0

    log_message "Verifying backup integrity: $(basename "$src")"

    # Calculate destination checksum first (this doesn't change)
    local dest_checksum
    local dest_result=0
    if ! dest_checksum=$(sudo md5sum "$dest" 2>/dev/null | cut -d' ' -f1); then
        dest_result=1
        dest_checksum=""
    fi

    if [[ $dest_result -ne 0 ]] || [[ ! "$dest_checksum" =~ ^[a-f0-9]{32}$ ]]; then
        log_error "Failed to calculate destination checksum for $(basename "$dest")"
        return 1
    fi

    # Retry loop for source checksum calculation
    while [ $retry_count -lt $max_retries ]; do
        # Calculate source checksum (without caching to get current state)
        local src_checksum
        local src_result=0
        if ! src_checksum=$(sudo md5sum "$src" 2>/dev/null | cut -d' ' -f1); then
            src_result=1
            src_checksum=""
        fi

        if [[ $src_result -ne 0 ]] || [[ ! "$src_checksum" =~ ^[a-f0-9]{32}$ ]]; then
            log_error "Failed to calculate source checksum for $(basename "$src") (attempt $((retry_count + 1)))"
            ((retry_count++))
            if [[ $retry_count -lt $max_retries ]]; then
                log_warning "Retrying checksum calculation in 2 seconds..."
                sleep 2
                continue
            else
                return 1
            fi
        fi

        if [ "$src_checksum" == "$dest_checksum" ]; then
            log_success "Backup verification passed: $(basename "$src")"
            log_info "Source checksum: $src_checksum"
            log_info "Backup checksum: $dest_checksum"
            return 0
        else
            # If checksums don't match, wait and try again
            ((retry_count++))
            if [ $retry_count -lt $max_retries ]; then
                log_warning "Checksum mismatch for $(basename "$src") (attempt $retry_count/$max_retries), retrying in 3 seconds..."
                sleep 3
            else
                log_error "Backup verification failed after $max_retries attempts: $(basename "$src")"
                log_error "Source checksum: $src_checksum"
                log_error "Backup checksum: $dest_checksum"

                # For database files, perform additional integrity check on backup
                if [[ "$dest" == *.db ]]; then
                    log_warning "Database file checksum mismatch - checking backup integrity..."
                    if sudo "$PLEX_SQLITE" "$dest" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
                        log_warning "Backup database integrity is valid despite checksum mismatch"
                        log_warning "Accepting backup (source file may have been modified after copy)"
                        return 0
                    else
                        log_error "Backup database is also corrupted - backup failed"
                        return 1
                    fi
                fi
                return 1
            fi
        fi
    done

    return 1
}

# Enhanced service management with SAFE shutdown procedures and extended timeouts
# CRITICAL SAFETY NOTE: This function was modified to remove dangerous force-kill operations
# that were causing database corruption. Now uses only graceful shutdown methods.
manage_plex_service() {
    local action="$1"
    local force_stop="${2:-false}"
    local operation_start
    operation_start=$(date +%s)

    log_message "Managing Plex service: $action"

    case "$action" in
        stop)
            # Check if already stopped
            if ! sudo systemctl is-active --quiet plexmediaserver.service; then
                log_info "Plex service is already stopped"
                track_performance "service_stop" "$operation_start"
                return 0
            fi

            # First try normal stop with extended timeout
            if sudo systemctl stop plexmediaserver.service; then
                log_success "Plex service stop command issued"
                # Wait for clean shutdown with progress indicator (extended timeout)
                local wait_time=0
                local max_wait=30  # Increased from 15 to 30 seconds

                while [ $wait_time -lt $max_wait ]; do
                    if ! sudo systemctl is-active --quiet plexmediaserver.service; then
                        log_success "Plex service confirmed stopped (${wait_time}s)"
                        track_performance "service_stop" "$operation_start"
                        return 0
                    fi
                    sleep 1
                    wait_time=$((wait_time + 1))
                    echo -n "."
                done
                echo

                # If normal stop failed and force_stop is enabled, try extended graceful shutdown
                if [ "$force_stop" = "true" ]; then
                    log_warning "Normal stop failed, attempting extended graceful shutdown..."
                    local plex_pids
                    plex_pids=$(pgrep -f "Plex Media Server" 2>/dev/null || true)

                    if [ -n "$plex_pids" ]; then
                        log_message "Found Plex processes: $plex_pids"
                        log_message "Sending graceful termination signal and waiting longer..."

                        # Send TERM signal for graceful shutdown
                        if sudo pkill -TERM -f "Plex Media Server" 2>/dev/null || true; then
                            # Extended wait for graceful shutdown (up to 60 seconds)
                            local extended_wait=0
                            local max_extended_wait=60

                            while [ $extended_wait -lt $max_extended_wait ]; do
                                plex_pids=$(pgrep -f "Plex Media Server" 2>/dev/null || true)
                                if [ -z "$plex_pids" ]; then
                                    log_success "Plex service gracefully stopped after extended wait (${extended_wait}s)"
                                    track_performance "service_extended_stop" "$operation_start"
                                    return 0
                                fi
                                sleep 2
                                extended_wait=$((extended_wait + 2))
                                echo -n "."
                            done
                            echo

                            # If still running after extended wait, log error but don't force kill
                            plex_pids=$(pgrep -f "Plex Media Server" 2>/dev/null || true)
                            if [ -n "$plex_pids" ]; then
                                log_error "Plex processes still running after ${max_extended_wait}s graceful shutdown attempt"
                                log_error "Refusing to force-kill processes to prevent database corruption"
                                log_error "Manual intervention may be required: PIDs $plex_pids"
                                return 1
                            fi
                        else
                            log_error "Failed to send TERM signal to Plex processes"
                            return 1
                        fi
                    else
                        log_success "No Plex processes found running"
                        track_performance "service_stop" "$operation_start"
                        return 0
                    fi
                else
                    log_warning "Plex service may not have stopped cleanly after ${max_wait}s"
                    # Check one more time if service actually stopped with extended timeout
                    sleep 2
                    if ! sudo systemctl is-active --quiet plexmediaserver.service; then
                        log_success "Plex service stopped (delayed confirmation)"
                        track_performance "service_stop" "$operation_start"
                        return 0
                    else
                        log_warning "Plex service still appears to be running after ${max_wait}s"
                        return 1
                    fi
                fi
            else
                log_error "Failed to issue stop command for Plex service"
                return 1
            fi
            ;;
        start)
            # Check if service is already running
            if sudo systemctl is-active --quiet plexmediaserver.service; then
                log_info "Plex service is already running"
                track_performance "service_start" "$operation_start"
                return 0
            fi

            if sudo systemctl start plexmediaserver.service; then
                log_success "Plex service start command issued"
                # Wait for service to be fully running with progress indicator (extended timeout)
                local wait_time=0
                local max_wait=45  # Increased from 30 to 45 seconds for database initialization

                while [ $wait_time -lt $max_wait ]; do
                    if sudo systemctl is-active --quiet plexmediaserver.service; then
                        # Additional verification: wait for full service readiness
                        sleep 3
                        if sudo systemctl is-active --quiet plexmediaserver.service; then
                            # Final check: ensure service is stable and not in restart loop
                            sleep 2
                            if sudo systemctl is-active --quiet plexmediaserver.service; then
                                log_success "Plex service confirmed running and stable (${wait_time}s)"
                                track_performance "service_start" "$operation_start"
                                return 0
                            fi
                        fi
                    fi
                    sleep 1
                    wait_time=$((wait_time + 1))
                    echo -n "."
                done
                echo

                log_error "Plex service failed to start within ${max_wait}s"
                # Get service status for debugging
                local service_status
                service_status=$(sudo systemctl status plexmediaserver.service --no-pager -l 2>&1 | head -10 || echo "Failed to get status")
                log_error "Service status: $service_status"
                return 1
            else
                log_error "Failed to start Plex service"
                return 1
            fi
            ;;
        *)
            log_error "Invalid service action: $action"
            return 1
            ;;
    esac
}

# Check available disk space
check_disk_space() {
    local backup_dir="$1"
    local required_space_mb="$2"

    local available_space_kb
    available_space_kb=$(df "$backup_dir" | awk 'NR==2 {print $4}')
    local available_space_mb=$((available_space_kb / 1024))

    if [ "$available_space_mb" -lt "$required_space_mb" ]; then
        log_error "Insufficient disk space. Required: ${required_space_mb}MB, Available: ${available_space_mb}MB"
        return 1
    fi

    log_message "Disk space check passed. Available: ${available_space_mb}MB"
    return 0
}

# Estimate backup size
estimate_backup_size() {
    local total_size=0

    for nickname in "${!PLEX_FILES[@]}"; do
        local file="${PLEX_FILES[$nickname]}"
        if [ -f "$file" ]; then
            local size_kb
            size_kb=$(du -k "$file" 2>/dev/null | cut -f1)
            total_size=$((total_size + size_kb))
        fi
    done

    echo $((total_size / 1024))  # Return size in MB
}

# Generate performance report
generate_performance_report() {
    if [ "$PERFORMANCE_MONITORING" != true ] || [ ! -f "$PERFORMANCE_LOG_FILE" ]; then
        return 0
    fi

    log_info "Performance Summary:"

    # Recent performance data (last 10 entries)
    jq -r '.[-10:] | .[] | "  \(.operation): \(.duration_seconds)s (\(.timestamp))"' "$PERFORMANCE_LOG_FILE" 2>/dev/null || true

    # Calculate averages for common operations
    local avg_backup
    avg_backup=$(jq '[.[] | select(.operation == "backup") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
    local avg_verification
    avg_verification=$(jq '[.[] | select(.operation == "verification") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
    local avg_service_stop
    avg_service_stop=$(jq '[.[] | select(.operation == "service_stop") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
    local avg_service_start
    avg_service_start=$(jq '[.[] | select(.operation == "service_start") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")

    if [ "$avg_backup" != "0" ]; then
        log_info "Average backup time: ${avg_backup}s"
    fi
    if [ "$avg_verification" != "0" ]; then
        log_info "Average verification time: ${avg_verification}s"
    fi
    if [ "$avg_service_stop" != "0" ]; then
        log_info "Average service stop time: ${avg_service_stop}s"
    fi
    if [ "$avg_service_start" != "0" ]; then
        log_info "Average service start time: ${avg_service_start}s"
    fi
}

# Clean old backups
cleanup_old_backups() {
    log_message "Cleaning up old backups..."

    # Remove backups older than MAX_BACKUP_AGE_DAYS
    find "${BACKUP_ROOT}" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" -mtime +${MAX_BACKUP_AGE_DAYS} -delete 2>/dev/null || true

    # Keep only MAX_BACKUPS_TO_KEEP most recent backups
    local backup_count
    backup_count=$(find "${BACKUP_ROOT}" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" | wc -l)

    if [ "$backup_count" -gt "$MAX_BACKUPS_TO_KEEP" ]; then
        local excess_count=$((backup_count - MAX_BACKUPS_TO_KEEP))
        log_message "Removing $excess_count old backup(s)..."

        find "${BACKUP_ROOT}" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" -printf '%T@ %p\n' | \
        sort -n | head -n "$excess_count" | cut -d' ' -f2- | \
        xargs -r rm -f
    fi

    # Clean up any remaining dated directories from old backup structure
    find "${BACKUP_ROOT}" -maxdepth 1 -type d -name "????????" -exec rm -rf {} \; 2>/dev/null || true

    log_message "Backup cleanup completed"
}

# Database integrity check only
check_integrity_only() {
    log_message "Starting database integrity check at $(date)"

    # Stop Plex service - NEVER use force stop for integrity checks to prevent corruption
    if ! manage_plex_service stop; then
        log_error "Failed to stop Plex service gracefully"
        log_error "Cannot perform integrity check while service may be running"
        log_error "Manual intervention required - please stop Plex service manually"
        return 1
    fi

    # Handle WAL files first
    handle_wal_files "checkpoint"

    local db_integrity_issues=0
    local databases_checked=0

    for nickname in "${!PLEX_FILES[@]}"; do
        local file="${PLEX_FILES[$nickname]}"

        # Only check database files
        if [[ "$file" == *".db" ]] && [ -f "$file" ]; then
            databases_checked=$((databases_checked + 1))
            log_message "Checking integrity of $(basename "$file")..."

            if ! check_database_integrity_with_wal "$file"; then
                db_integrity_issues=$((db_integrity_issues + 1))
                log_warning "Database integrity issues found in $(basename "$file")"

                # Determine if we should attempt repair
                local should_repair=false

                if [ "$AUTO_REPAIR" = true ]; then
                    should_repair=true
                    log_message "Auto-repair enabled, attempting repair..."
                elif [ "$INTERACTIVE_MODE" = true ]; then
                    read -p "Attempt to repair $(basename "$file")? [y/N]: " -n 1 -r -t 30
                    local read_result=$?
                    echo
                    if [ $read_result -eq 0 ] && [[ $REPLY =~ ^[Yy]$ ]]; then
                        should_repair=true
                    elif [ $read_result -ne 0 ]; then
                        log_warning "Read timeout or error, defaulting to no repair"
                    fi
                else
                    log_warning "Non-interactive mode: skipping repair for $(basename "$file")"
                fi

                if [ "$should_repair" = true ]; then
                    if repair_database "$file"; then
                        log_success "Database repair successful for $(basename "$file")"
                        # Re-check integrity after repair
                        if check_database_integrity "$file"; then
                            log_success "Post-repair integrity check passed for $(basename "$file")"
                        else
                            log_warning "Post-repair integrity check still shows issues for $(basename "$file")"
                        fi
                    else
                        log_error "Database repair failed for $(basename "$file")"
                    fi
                fi
            else
                log_success "Database integrity check passed for $(basename "$file")"
            fi
        fi
    done

    # Start Plex service
    manage_plex_service start

    # Summary
    log_message "Integrity check completed at $(date)"
    log_message "Databases checked: $databases_checked"
    log_message "Databases with issues: $db_integrity_issues"

    if [ "$db_integrity_issues" -gt 0 ]; then
        log_warning "Integrity check completed with issues found"
        exit 1
    else
        log_success "All database integrity checks passed"
        exit 0
    fi
}

# Main backup function
main() {
    local overall_start
    overall_start=$(date +%s)

    log_message "Starting enhanced Plex backup process at $(date)"
    send_notification "Backup Started" "Plex backup process initiated" "info"

    # Create necessary directories
    mkdir -p "${BACKUP_ROOT}"
    mkdir -p "${LOCAL_LOG_ROOT}"

    # Initialize logs
    initialize_logs

    # Check if only doing integrity check
    if [ "$INTEGRITY_CHECK_ONLY" = true ]; then
        check_integrity_only
        # shellcheck disable=SC2317
        return $?
    fi

    # Estimate backup size
    local estimated_size_mb
    estimated_size_mb=$(estimate_backup_size)
    log_message "Estimated backup size: ${estimated_size_mb}MB"

    # Check disk space (require 2x estimated size for safety)
    local required_space_mb=$((estimated_size_mb * 2))
    if ! check_disk_space "${BACKUP_ROOT}" "$required_space_mb"; then
        log_error "Aborting backup due to insufficient disk space"
        exit 1
    fi

    # Stop Plex service
    manage_plex_service stop

    local backup_errors=0
    local files_backed_up=0
    local backed_up_files=()  # Array to track successfully backed up files
    local BACKUP_PATH="${BACKUP_ROOT}"

    # Ensure backup root directory exists
    mkdir -p "$BACKUP_PATH"

    # Handle WAL files and check database integrity before backup
    log_message "Performing WAL checkpoint and checking database integrity before backup..."
    handle_wal_files "checkpoint"

    local db_integrity_issues=0

    for nickname in "${!PLEX_FILES[@]}"; do
        local file="${PLEX_FILES[$nickname]}"

        # Only check database files
        if [[ "$file" == *".db" ]] && [ -f "$file" ]; then
            if ! check_database_integrity_with_wal "$file"; then
                db_integrity_issues=$((db_integrity_issues + 1))
                log_warning "Database integrity issues found in $(basename "$file")"

                # Always attempt repair when corruption is detected (default behavior)
                local should_repair=true
                local repair_attempted=false

                # Override repair behavior only if explicitly disabled
                if [ "$AUTO_REPAIR" = false ]; then
                    should_repair=false
                    log_warning "Auto-repair explicitly disabled, skipping repair"
                elif [ "$INTERACTIVE_MODE" = true ]; then
                    read -p "Database $(basename "$file") has integrity issues. Attempt repair before backup? [Y/n]: " -n 1 -r -t 30
                    local read_result=$?
                    echo
                    if [ $read_result -eq 0 ] && [[ $REPLY =~ ^[Nn]$ ]]; then
                        should_repair=false
                        log_message "User declined repair for $(basename "$file")"
                    elif [ $read_result -ne 0 ]; then
                        log_message "Read timeout, proceeding with default repair"
                    fi
                else
                    log_message "Auto-repair enabled by default, attempting repair..."
                fi

                if [ "$should_repair" = true ]; then
                    repair_attempted=true
                    log_message "Attempting to repair corrupted database: $(basename "$file")"

                    if repair_database "$file"; then
                        log_success "Database repair successful for $(basename "$file")"

                        # Re-verify integrity after repair
                        if check_database_integrity_with_wal "$file"; then
                            log_success "Post-repair integrity verification passed for $(basename "$file")"
                            # Decrement issue count since repair was successful
                            db_integrity_issues=$((db_integrity_issues - 1))
                        else
                            log_warning "Post-repair integrity check still shows issues for $(basename "$file")"
                            log_warning "Will backup corrupted database - manual intervention may be needed"
                        fi
                    else
                        log_error "Database repair failed for $(basename "$file")"
                        log_warning "Will backup corrupted database - manual intervention may be needed"
                        backup_errors=$((backup_errors + 1))
                    fi
                else
                    log_warning "Skipping repair - will backup database with known integrity issues"
                fi

                # Log repair attempt for monitoring purposes
                if [ "$repair_attempted" = true ]; then
                    send_notification "Database Repair" "Attempted repair of $(basename "$file")" "warning"
                fi
            fi
        fi
    done

    # Handle WAL files backup
    handle_wal_files "backup" "$BACKUP_PATH"

    # Backup files - always perform full backup
    local backup_start
    backup_start=$(date +%s)
    for nickname in "${!PLEX_FILES[@]}"; do
        local file="${PLEX_FILES[$nickname]}"

        if [ -f "$file" ]; then
            log_message "Backing up: $(basename "$file")"

            # Create backup filename without timestamp (use original filename)
            local backup_file
            backup_file="${BACKUP_PATH}/$(basename "$file")"

            # Copy file
            if sudo cp "$file" "$backup_file"; then
                # Force filesystem sync to prevent corruption
                sync
                # Ensure proper ownership of backup file
                sudo chown plex:plex "$backup_file"
                log_success "Copied: $(basename "$file")"

                # Verify backup
                if verify_backup "$file" "$backup_file"; then
                    log_success "Verified: $(basename "$file")"
                    files_backed_up=$((files_backed_up + 1))
                    # Add friendly filename to backed up files list
                    case "$(basename "$file")" in
                        "com.plexapp.plugins.library.db") backed_up_files+=("library.db") ;;
                        "com.plexapp.plugins.library.blobs.db") backed_up_files+=("blobs.db") ;;
                        "Preferences.xml") backed_up_files+=("Preferences.xml") ;;
                        *) backed_up_files+=("$(basename "$file")") ;;
                    esac
                else
                    log_error "Verification failed: $(basename "$file")"
                    backup_errors=$((backup_errors + 1))
                    # Remove failed backup
                    rm -f "$backup_file"
                fi
            else
                log_error "Failed to copy: $(basename "$file")"
                backup_errors=$((backup_errors + 1))
            fi
        else
            log_warning "File not found: $file"
        fi
    done

    # Start Plex service
    manage_plex_service start

    # Create archive if files were backed up
    if [ "$files_backed_up" -gt 0 ]; then
        log_message "Creating compressed archive..."

        # Check backup root directory is writable
        if [ ! -w "$BACKUP_ROOT" ]; then
            log_error "Backup root directory is not writable: $BACKUP_ROOT"
            backup_errors=$((backup_errors + 1))
        else
            local temp_archive
            temp_archive="/tmp/plex-backup-$(date '+%Y%m%d_%H%M%S').tar.gz"
            local final_archive
            final_archive="${BACKUP_ROOT}/plex-backup-$(date '+%Y%m%d_%H%M%S').tar.gz"

            log_info "Temporary archive: $temp_archive"
            log_info "Final archive: $final_archive"

            # Create archive in /tmp first, containing only the backed up files
            local temp_dir
            temp_dir="/tmp/plex-backup-staging-$(date '+%Y%m%d_%H%M%S')"
            if ! mkdir -p "$temp_dir"; then
                log_error "Failed to create staging directory: $temp_dir"
                backup_errors=$((backup_errors + 1))
            else
                log_info "Created staging directory: $temp_dir"

                # Copy backed up files to staging directory
                local files_staged=0
                for nickname in "${!PLEX_FILES[@]}"; do
                    local file="${PLEX_FILES[$nickname]}"
                    local backup_file
                    backup_file="${BACKUP_PATH}/$(basename "$file")"
                    if [ -f "$backup_file" ]; then
                        if cp "$backup_file" "$temp_dir/"; then
                            files_staged=$((files_staged + 1))
                            log_info "Staged for archive: $(basename "$backup_file")"
                        else
                            log_warning "Failed to stage file: $(basename "$backup_file")"
                        fi
                    else
                        log_warning "Backup file not found for staging: $(basename "$backup_file")"
                    fi
                done

                # Check if any files were staged
                if [ "$files_staged" -eq 0 ]; then
                    log_error "No files were staged for archive creation"
                    rm -rf "$temp_dir"
                    backup_errors=$((backup_errors + 1))
                else
                    log_info "Staged $files_staged files for archive creation"

                    # Check disk space in /tmp
                    local temp_available_kb
                    temp_available_kb=$(df /tmp | awk 'NR==2 {print $4}')
                    local temp_available_mb=$((temp_available_kb / 1024))
                    local staging_size_mb
                    staging_size_mb=$(du -sm "$temp_dir" | cut -f1)
                    log_info "/tmp available space: ${temp_available_mb}MB, staging directory size: ${staging_size_mb}MB"

                    # Check if we have enough space (require 3x staging size for compression)
                    local required_space_mb=$((staging_size_mb * 3))
                    if [ "$temp_available_mb" -lt "$required_space_mb" ]; then
                        log_error "Insufficient space in /tmp for archive creation. Required: ${required_space_mb}MB, Available: ${temp_available_mb}MB"
                        rm -rf "$temp_dir"
                        backup_errors=$((backup_errors + 1))
                    else
                        # Create archive with detailed error logging
                        log_info "Creating archive: $(basename "$temp_archive")"
                        local tar_output
                        tar_output=$(tar -czf "$temp_archive" -C "$temp_dir" . 2>&1)
                        local tar_exit_code=$?

                        # Force filesystem sync after archive creation
                        sync

                        if [ $tar_exit_code -eq 0 ]; then
                            # Verify archive was actually created and has reasonable size
                            if [ -f "$temp_archive" ]; then
                                local archive_size_mb
                                archive_size_mb=$(du -sm "$temp_archive" | cut -f1)
                                log_success "Archive created successfully: $(basename "$temp_archive") (${archive_size_mb}MB)"

                                # Test archive integrity before moving
                                if tar -tzf "$temp_archive" >/dev/null 2>&1; then
                                    log_success "Archive integrity verified"

                                    # Move the completed archive to the backup root
                                    if mv "$temp_archive" "$final_archive"; then
                                        # Force filesystem sync after final move
                                        sync
                                        log_success "Archive moved to final location: $(basename "$final_archive")"

                                        # Remove individual backup files and staging directory
                                        rm -rf "$temp_dir"
                                        for nickname in "${!PLEX_FILES[@]}"; do
                                            local file="${PLEX_FILES[$nickname]}"
                                            local backup_file
                                            backup_file="${BACKUP_PATH}/$(basename "$file")"
                                            rm -f "$backup_file" "$backup_file.md5"
                                        done
                                    else
                                        log_error "Failed to move archive to final location: $final_archive"
                                        log_error "Temporary archive remains at: $temp_archive"
                                        rm -rf "$temp_dir"
                                        backup_errors=$((backup_errors + 1))
                                    fi
                                else
                                    log_error "Archive integrity check failed - archive may be corrupted"
                                    log_error "Archive size: ${archive_size_mb}MB"
                                    rm -f "$temp_archive"
                                    rm -rf "$temp_dir"
                                    backup_errors=$((backup_errors + 1))
                                fi
                            else
                                log_error "Archive file was not created despite tar success"
                                rm -rf "$temp_dir"
                                backup_errors=$((backup_errors + 1))
                            fi
                        else
                            log_error "Failed to create archive (tar exit code: $tar_exit_code)"
                            if [ -n "$tar_output" ]; then
                                log_error "Tar command output: $tar_output"
                            fi

                            # Additional diagnostic information
                            log_error "Staging directory contents:"
                            find "$temp_dir" -ls 2>&1 | while IFS= read -r line; do
                                log_error "  $line"
                            done

                            local temp_usage
                            temp_usage=$(df -h /tmp | awk 'NR==2 {print "Used: " $3 "/" $2 " (" $5 ")"}')
                            log_error "Temp filesystem status: $temp_usage"

                            rm -rf "$temp_dir"
                            backup_errors=$((backup_errors + 1))
                        fi
                    fi
                fi
            fi
        fi

        # Send notification
        local files_list
        files_list=$(format_backed_up_files "${backed_up_files[@]}")
        send_notification "Backup Completed" "Successfully backed up $files_list" "success"
    else
        log_message "No files needed backup"
    fi

    # Cleanup old backups
    cleanup_old_backups

    # Track overall backup performance
    if [ "$files_backed_up" -gt 0 ]; then
        track_performance "full_backup" "$backup_start"
    fi
    track_performance "total_script" "$overall_start"

    # Generate performance report
    generate_performance_report

    # Final summary
    local total_time=$(($(date +%s) - overall_start))
    log_message "Backup process completed at $(date)"
    log_message "Total execution time: ${total_time}s"
    log_message "Files backed up: $files_backed_up"
    log_message "Errors encountered: $backup_errors"

    # Sync logs to shared location and cleanup old local logs
    log_info "Post-backup: synchronizing logs and cleaning up old files"
    sync_logs_to_shared
    cleanup_old_local_logs

    if [ "$backup_errors" -gt 0 ]; then
        log_error "Backup completed with errors"
        send_notification "Backup Error" "Backup completed with $backup_errors errors" "error"
        exit 1
    else
        log_success "Enhanced backup completed successfully"
        local files_list
        files_list=$(format_backed_up_files "${backed_up_files[@]}")
        send_notification "Backup Success" "$files_list backed up successfully in ${total_time}s" "success"
    fi
}

# Trap to ensure Plex is restarted on script exit
trap 'manage_plex_service start' EXIT

# Run main function
main "$@"