Files
shell/plex/backup-plex.sh

2025 lines
78 KiB
Bash
Executable File

#!/bin/bash
################################################################################
# Plex Media Server Enhanced Backup Script
################################################################################
#
# Author: Peter Wood <peter@peterwood.dev>
# Description: Comprehensive backup solution for Plex Media Server with advanced
# database integrity checking, automated repair capabilities,
# performance monitoring, and multi-channel notifications.
#
# Features:
# - Database integrity verification with automatic repair
# - WAL (Write-Ahead Logging) file handling
# - Performance monitoring with JSON logging
# - Parallel verification for improved speed
# - Multi-channel notifications (webhook, email, console)
# - Comprehensive error handling and recovery
# - Automated cleanup of old backups
#
# Related Scripts:
# - restore-plex.sh: Restore from backups created by this script
# - validate-plex-backups.sh: Validate backup integrity and health
# - monitor-plex-backup.sh: Real-time monitoring dashboard
# - test-plex-backup.sh: Comprehensive testing suite
# - plex.sh: General Plex service management
#
# Usage:
# ./backup-plex.sh # Standard backup with auto-repair
# ./backup-plex.sh --disable-auto-repair # Backup without auto-repair
# ./backup-plex.sh --check-integrity # Integrity check only
# ./backup-plex.sh --non-interactive # Automated mode for cron jobs
#
# Dependencies:
# - Plex Media Server
# - sqlite3 or Plex SQLite binary
# - curl (for webhook notifications)
# - jq (for JSON processing)
# - sendmail (optional, for email notifications)
#
# Exit Codes:
# 0 - Success
# 1 - General error
# 2 - Database integrity issues
# 3 - Service management failure
# 4 - Backup creation failure
#
################################################################################
# NOTE: Removed 'set -e' to allow graceful error handling in repair operations
# Critical operations use explicit error checking instead of automatic exit
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# Performance tracking variables (removed unused variables)
# Configuration
MAX_BACKUP_AGE_DAYS=30
MAX_BACKUPS_TO_KEEP=10
BACKUP_ROOT="/mnt/share/media/backups/plex"
SHARED_LOG_ROOT="/mnt/share/media/backups/logs"
# Get script directory with proper error handling
if ! SCRIPT_PATH="$(readlink -f "$0")"; then
echo "Error: Failed to resolve script path" >&2
exit 1
fi
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
LOCAL_LOG_ROOT="${SCRIPT_DIR}/logs"
PERFORMANCE_LOG_FILE="${LOCAL_LOG_ROOT}/plex-backup-performance.json"
# Backup strategy configuration - Always perform full backups
# Plex SQLite path (custom Plex SQLite binary)
PLEX_SQLITE="/usr/lib/plexmediaserver/Plex SQLite"
# Script options
AUTO_REPAIR=true # Default to enabled for automatic corruption detection and repair
INTEGRITY_CHECK_ONLY=false
INTERACTIVE_MODE=false
PARALLEL_VERIFICATION=true
PERFORMANCE_MONITORING=true
WEBHOOK_URL="https://notify.peterwood.rocks/lab"
EMAIL_RECIPIENT=""
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--auto-repair)
AUTO_REPAIR=true
INTERACTIVE_MODE=false
shift
;;
--disable-auto-repair)
AUTO_REPAIR=false.service
shift
;;
--non-interactive)
INTERACTIVE_MODE=false
shift
;;
--interactive)
INTERACTIVE_MODE=true
shift
;;
--no-parallel)
PARALLEL_VERIFICATION=false
shift
;;
--no-performance)
PERFORMANCE_MONITORING=false
shift
;;
--webhook=*)
WEBHOOK_URL="${1#*=}"
shift
;;
--email=*)
EMAIL_RECIPIENT="${1#*=}"
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --auto-repair Force enable automatic database repair (default: enabled)"
echo " --disable-auto-repair Disable automatic database repair"
echo " --check-integrity Only check database integrity, don't backup"
echo " --non-interactive Run in non-interactive mode (for automation)"
echo " --interactive Run in interactive mode (prompts for repair decisions)"
echo " --no-parallel Disable parallel verification (slower but safer)"
echo " --no-performance Disable performance monitoring"
echo " --webhook=URL Send notifications to webhook URL"
echo " --email=ADDRESS Send notifications to email address"
echo " -h, --help Show this help message"
echo ""
echo "Database Integrity & Repair:"
echo " By default, the script automatically detects and attempts to repair"
echo " corrupted databases before backup. Use --disable-auto-repair to"
echo " skip repair and backup corrupted databases as-is."
echo ""
exit 0
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Create logs directory
mkdir -p "${SCRIPT_DIR}/logs"
# Define Plex files and their nicknames
declare -A PLEX_FILES=(
["database"]="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.db"
["blobs"]="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.blobs.db"
["preferences"]="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Preferences.xml"
)
# Logging functions
log_message() {
local message="$1"
local timestamp
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "${CYAN}[${timestamp}]${NC} ${message}"
mkdir -p "${LOCAL_LOG_ROOT}"
echo "[${timestamp}] ${message}" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}
log_error() {
local message="$1"
local timestamp
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "${RED}[${timestamp}] ERROR:${NC} ${message}"
mkdir -p "${LOCAL_LOG_ROOT}"
echo "[${timestamp}] ERROR: ${message}" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}
log_success() {
local message="$1"
local timestamp
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "${GREEN}[${timestamp}] SUCCESS:${NC} ${message}"
mkdir -p "$LOCAL_LOG_ROOT"
echo "[${timestamp}] SUCCESS: $message" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}
log_warning() {
local message="$1"
local timestamp
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "${YELLOW}[${timestamp}] WARNING:${NC} ${message}"
mkdir -p "$LOCAL_LOG_ROOT"
echo "[${timestamp}] WARNING: $message" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}
log_info() {
local message="$1"
local timestamp
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "${BLUE}[${timestamp}] INFO:${NC} ${message}"
mkdir -p "$LOCAL_LOG_ROOT"
echo "[${timestamp}] INFO: $message" >> "${LOCAL_LOG_ROOT}/plex-backup-$(date '+%Y-%m-%d').log" 2>/dev/null || true
}
# Performance tracking functions
track_performance() {
if [[ "$PERFORMANCE_MONITORING" != true ]]; then
return 0
fi
local operation="$1"
local start_time="$2"
local end_time="${3:-$(date +%s)}"
local duration=$((end_time - start_time))
# Initialize performance log if it doesn't exist
if [ ! -f "$PERFORMANCE_LOG_FILE" ]; then
mkdir -p "$(dirname "$PERFORMANCE_LOG_FILE")"
echo "[]" > "$PERFORMANCE_LOG_FILE"
fi
# Add performance entry
local entry
local timestamp
if ! timestamp="$(date -Iseconds)"; then
timestamp="$(date)" # Fallback to basic date format
fi
entry=$(jq -n \
--arg operation "$operation" \
--arg duration "$duration" \
--arg timestamp "$timestamp" \
'{
operation: $operation,
duration_seconds: ($duration | tonumber),
timestamp: $timestamp
}')
jq --argjson entry "$entry" '. += [$entry]' "$PERFORMANCE_LOG_FILE" > "${PERFORMANCE_LOG_FILE}.tmp" && \
mv "${PERFORMANCE_LOG_FILE}.tmp" "$PERFORMANCE_LOG_FILE"
log_info "Performance: $operation completed in ${duration}s"
}
# Initialize log directory
initialize_logs() {
mkdir -p "$(dirname "$PERFORMANCE_LOG_FILE")"
if [ ! -f "$PERFORMANCE_LOG_FILE" ]; then
echo "[]" > "$PERFORMANCE_LOG_FILE"
log_message "Initialized performance log file"
fi
}
# Log synchronization functions
sync_logs_to_shared() {
local sync_start_time
sync_start_time=$(date +%s)
log_info "Starting log synchronization to shared location"
# Ensure shared log directory exists
if ! mkdir -p "$SHARED_LOG_ROOT" 2>/dev/null; then
log_warning "Could not create shared log directory: $SHARED_LOG_ROOT"
return 1
fi
# Check if shared location is accessible
if [ ! -w "$SHARED_LOG_ROOT" ]; then
log_warning "Shared log directory is not writable: $SHARED_LOG_ROOT"
return 1
fi
# Sync log files (one-way: local -> shared)
local sync_count=0
local error_count=0
for log_file in "$LOCAL_LOG_ROOT"/*.log "$LOCAL_LOG_ROOT"/*.json; do
if [ -f "$log_file" ]; then
local filename
filename=$(basename "$log_file")
local shared_file="$SHARED_LOG_ROOT/$filename"
# Only copy if file doesn't exist in shared location or local is newer
if [ ! -f "$shared_file" ] || [ "$log_file" -nt "$shared_file" ]; then
if cp "$log_file" "$shared_file" 2>/dev/null; then
((sync_count++))
log_info "Synced: $filename"
else
((error_count++))
log_warning "Failed to sync: $filename"
fi
fi
fi
done
local sync_end_time
sync_end_time=$(date +%s)
local sync_duration=$((sync_end_time - sync_start_time))
if [ $error_count -eq 0 ]; then
log_success "Log sync completed: $sync_count files synced in ${sync_duration}s"
else
log_warning "Log sync completed with errors: $sync_count synced, $error_count failed in ${sync_duration}s"
fi
return $error_count
}
# Cleanup old local logs (30 day retention)
cleanup_old_local_logs() {
local cleanup_start_time
cleanup_start_time=$(date +%s)
log_info "Starting cleanup of old local logs (30+ days)"
if [ ! -d "$LOCAL_LOG_ROOT" ]; then
log_info "Local log directory does not exist, nothing to clean up"
return 0
fi
local cleanup_count=0
local error_count=0
# Find and remove log files older than 30 days
while IFS= read -r -d '' old_file; do
local filename
filename=$(basename "$old_file")
if rm "$old_file" 2>/dev/null; then
((cleanup_count++))
log_info "Removed old log: $filename"
else
((error_count++))
log_warning "Failed to remove old log: $filename"
fi
done < <(find "$LOCAL_LOG_ROOT" -name "*.log" -mtime +30 -print0 2>/dev/null)
# Also clean up old performance log entries (keep structure, remove old entries)
if [ -f "$PERFORMANCE_LOG_FILE" ]; then
local thirty_days_ago
thirty_days_ago=$(date -d '30 days ago' -Iseconds)
local temp_perf_file="${PERFORMANCE_LOG_FILE}.cleanup.tmp"
if jq --arg cutoff "$thirty_days_ago" '[.[] | select(.timestamp >= $cutoff)]' "$PERFORMANCE_LOG_FILE" > "$temp_perf_file" 2>/dev/null; then
local old_count
old_count=$(jq length "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local new_count
new_count=$(jq length "$temp_perf_file" 2>/dev/null || echo "0")
local removed_count=$((old_count - new_count))
if [ "$removed_count" -gt 0 ]; then
mv "$temp_perf_file" "$PERFORMANCE_LOG_FILE"
log_info "Cleaned up $removed_count old performance entries"
((cleanup_count += removed_count))
else
rm -f "$temp_perf_file"
fi
else
rm -f "$temp_perf_file"
log_warning "Failed to clean up old performance log entries"
((error_count++))
fi
fi
local cleanup_end_time
cleanup_end_time=$(date +%s)
local cleanup_duration=$((cleanup_end_time - cleanup_start_time))
if [ $cleanup_count -gt 0 ]; then
log_success "Cleanup completed: $cleanup_count items removed in ${cleanup_duration}s"
else
log_info "Cleanup completed: no old items found to remove in ${cleanup_duration}s"
fi
return $error_count
}
# Enhanced notification system
send_notification() {
local title="$1"
local message="$2"
local status="${3:-info}" # success, error, warning, info
local hostname
hostname=$(hostname)
# Console notification
case "$status" in
success) log_success "$title: $message" ;;
error) log_error "$title: $message" ;;
warning) log_warning "$title: $message" ;;
*) log_info "$title: $message" ;;
esac
# Webhook notification
if [ -n "$WEBHOOK_URL" ]; then
local tags="backup,plex,${hostname}"
[ "$status" == "error" ] && tags="${tags},errors"
[ "$status" == "warning" ] && tags="${tags},warnings"
# Clean message without newlines or timestamps for webhook
local webhook_message="$message"
curl -s \
-H "tags:${tags}" \
-d "$webhook_message" \
"$WEBHOOK_URL" 2>/dev/null || log_warning "Failed to send webhook notification"
fi
# Email notification (if sendmail is available)
if [ -n "$EMAIL_RECIPIENT" ] && command -v sendmail > /dev/null 2>&1; then
{
echo "To: $EMAIL_RECIPIENT"
echo "Subject: Plex Backup - $title"
echo "Content-Type: text/plain"
echo ""
echo "Host: $hostname"
echo "Time: $(date)"
echo "Status: $status"
echo ""
echo "$message"
} | sendmail "$EMAIL_RECIPIENT" 2>/dev/null || true
fi
}
# Format backed up files list for notifications
format_backed_up_files() {
local files=("$@")
local count=${#files[@]}
if [ "$count" -eq 0 ]; then
echo "no files"
elif [ "$count" -eq 1 ]; then
echo "${files[0]}"
elif [ "$count" -eq 2 ]; then
echo "${files[0]} and ${files[1]}"
else
local last_file="${files[-1]}"
local other_files=("${files[@]:0:$((count-1))}")
local other_files_str
other_files_str=$(IFS=', '; echo "${other_files[*]}")
echo "${other_files_str}, and ${last_file}"
fi
}
# Enhanced checksum calculation with caching
calculate_checksum() {
local file="$1"
# Use /tmp for cache files to avoid permission issues
local cache_dir="/tmp/plex-backup-cache"
local cache_file="$cache_dir/${file//\//_}.md5"
local file_mtime
file_mtime=$(stat -c %Y "$file" 2>/dev/null || echo "0")
# Create cache directory if it doesn't exist
mkdir -p "$cache_dir" 2>/dev/null || true
# Check if cached checksum exists and is newer than file
if [ -f "$cache_file" ]; then
local cache_mtime
cache_mtime=$(stat -c %Y "$cache_file" 2>/dev/null || echo "0")
if [ "$cache_mtime" -gt "$file_mtime" ]; then
local cached_checksum
cached_checksum=$(cat "$cache_file" 2>/dev/null)
if [[ -n "$cached_checksum" && "$cached_checksum" =~ ^[a-f0-9]{32}$ ]]; then
echo "$cached_checksum"
return 0
fi
fi
fi
# Calculate new checksum
local checksum
if ! checksum=$(md5sum "$file" 2>/dev/null | cut -d' ' -f1); then
checksum=""
fi
# Check if we got a valid checksum (not empty and looks like md5)
if [[ -n "$checksum" && "$checksum" =~ ^[a-f0-9]{32}$ ]]; then
# Cache the checksum
echo "$checksum" > "$cache_file" 2>/dev/null || true
echo "$checksum"
return 0
fi
# If normal access failed or returned empty, try with sudo
if ! checksum=$(sudo md5sum "$file" 2>/dev/null | cut -d' ' -f1); then
checksum=""
fi
# Check if sudo checksum is valid
if [[ -n "$checksum" && "$checksum" =~ ^[a-f0-9]{32}$ ]]; then
# Cache the checksum with appropriate permissions
echo "$checksum" | sudo tee "$cache_file" >/dev/null 2>&1 || true
echo "$checksum"
return 0
fi
# If both fail, return error indicator
echo "PERMISSION_DENIED"
return 1
}
# Check database integrity using Plex SQLite
check_database_integrity() {
local db_file="$1"
local db_name
db_name=$(basename "$db_file")
log_message "Checking database integrity: $db_name"
# Check if Plex SQLite exists
if [ ! -f "$PLEX_SQLITE" ]; then
log_error "Plex SQLite binary not found at: $PLEX_SQLITE"
return 1
fi
# Make Plex SQLite executable if it isn't already
sudo chmod +x "$PLEX_SQLITE" 2>/dev/null || true
# Run integrity check
local integrity_result
integrity_result=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA integrity_check;" 2>&1)
local check_exit_code=$?
if [ $check_exit_code -ne 0 ]; then
log_error "Failed to run integrity check on $db_name: $integrity_result"
return 1
fi
if echo "$integrity_result" | grep -q "^ok$"; then
log_success "Database integrity check passed: $db_name"
return 0
else
log_warning "Database integrity issues detected in $db_name:"
echo "$integrity_result" | while read -r line; do
log_warning " $line"
done
return 1
fi
}
# Preventive corruption detection before severe corruption occurs
detect_early_corruption() {
local db_file="$1"
local db_name
db_name=$(basename "$db_file")
log_message "Performing early corruption detection for: $db_name"
# Check for early warning signs of corruption
local warning_count=0
# 1. Check for WAL file size anomalies
local wal_file="${db_file}-wal"
if [ -f "$wal_file" ]; then
local wal_size
wal_size=$(stat -f%z "$wal_file" 2>/dev/null || stat -c%s "$wal_file" 2>/dev/null || echo "0")
local db_size
db_size=$(stat -f%z "$db_file" 2>/dev/null || stat -c%s "$db_file" 2>/dev/null || echo "0")
# If WAL file is more than 10% of database size, it might indicate issues
if [ "$wal_size" -gt 0 ] && [ "$db_size" -gt 0 ]; then
local wal_ratio=$((wal_size * 100 / db_size))
if [ "$wal_ratio" -gt 10 ]; then
log_warning "WAL file unusually large: ${wal_ratio}% of database size"
((warning_count++))
fi
else
log_info "Unable to determine file sizes for WAL analysis"
fi
fi
# 2. Quick integrity check focused on critical issues
local quick_check
if ! quick_check=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA quick_check(5);" 2>&1); then
log_warning "Failed to execute quick integrity check for $db_name"
((warning_count++))
elif ! echo "$quick_check" | grep -q "^ok$"; then
log_warning "Quick integrity check failed for $db_name"
log_warning "Issues found: $quick_check"
((warning_count++))
fi
# 3. Check for foreign key violations (common early corruption sign)
local fk_check
if fk_check=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA foreign_key_check;" 2>/dev/null); then
if [ -n "$fk_check" ]; then
log_warning "Foreign key violations detected in $db_name"
((warning_count++))
fi
else
log_info "Foreign key check unavailable for $db_name"
fi
# 4. Check database statistics for anomalies
if ! sudo "$PLEX_SQLITE" "$db_file" "PRAGMA compile_options;" >/dev/null 2>&1; then
log_warning "Database statistics check failed for $db_name"
((warning_count++))
fi
if [ "$warning_count" -gt 0 ]; then
log_warning "Early corruption indicators detected ($warning_count warnings) in $db_name"
log_warning "Consider performing preventive maintenance or monitoring more closely"
return 1
else
log_success "Early corruption detection passed for $db_name"
return 0
fi
}
# Enhanced database repair with multiple recovery strategies
repair_database() {
local db_file="$1"
local db_name
db_name=$(basename "$db_file")
local timestamp
timestamp=$(date "+%Y-%m-%d_%H.%M.%S")
log_message "Attempting to repair corrupted database: $db_name"
log_message "Starting advanced database repair for: $db_name"
# Enhanced WAL file handling for repair
handle_wal_files_for_repair "$db_file" "prepare"
# Create multiple backup copies before attempting repair
local pre_repair_backup="${db_file}.pre-repair-backup"
local working_copy="${db_file}.working-${timestamp}"
if ! sudo cp "$db_file" "$pre_repair_backup"; then
log_error "Failed to create pre-repair backup"
handle_wal_files_for_repair "$db_file" "restore"
return 1
fi
# Force filesystem sync to prevent corruption
sync
if ! sudo cp "$db_file" "$working_copy"; then
log_error "Failed to create working copy"
handle_wal_files_for_repair "$db_file" "restore"
return 1
fi
# Force filesystem sync to prevent corruption
sync
log_success "Created pre-repair backup: $(basename "$pre_repair_backup")"
# Strategy 1: Try dump and restore approach
log_message "Step 1: Database cleanup and optimization..."
if attempt_dump_restore "$working_copy" "$db_file" "$timestamp"; then
log_success "Database repaired using dump/restore method"
handle_wal_files_for_repair "$db_file" "cleanup"
cleanup_repair_files "$pre_repair_backup" "$working_copy"
return 0
fi
# Strategy 2: Try schema recreation
if attempt_schema_recreation "$working_copy" "$db_file" "$timestamp"; then
log_success "Database repaired using schema recreation"
handle_wal_files_for_repair "$db_file" "cleanup"
cleanup_repair_files "$pre_repair_backup" "$working_copy"
return 0
fi
# Strategy 3: Try recovery from previous backup
if attempt_backup_recovery "$db_file" "$BACKUP_ROOT" "$pre_repair_backup"; then
log_success "Database recovered from previous backup"
handle_wal_files_for_repair "$db_file" "cleanup"
cleanup_repair_files "$pre_repair_backup" "$working_copy"
return 0
fi
# All strategies failed - restore original and flag for manual intervention
log_error "Database repair failed. Restoring original..."
if sudo cp "$pre_repair_backup" "$db_file"; then
# Force filesystem sync to prevent corruption
sync
log_success "Original database restored"
handle_wal_files_for_repair "$db_file" "restore"
else
log_error "Failed to restore original database!"
handle_wal_files_for_repair "$db_file" "restore"
return 2
fi
log_error "Database repair failed for $db_name"
log_warning "Will backup corrupted database - manual intervention may be needed"
cleanup_repair_files "$pre_repair_backup" "$working_copy"
return 1
}
# Strategy 1: Dump and restore approach with enhanced validation
attempt_dump_restore() {
local working_copy="$1"
local original_db="$2"
local timestamp="$3"
local dump_file="${original_db}.dump-${timestamp}.sql"
local new_db="${original_db}.repaired-${timestamp}"
log_message "Attempting repair via SQL dump/restore..."
# Try to dump the database with error checking
log_info "Creating database dump..."
if sudo "$PLEX_SQLITE" "$working_copy" ".dump" 2>/dev/null | sudo tee "$dump_file" >/dev/null; then
# Validate the dump file exists and has substantial content
if [[ ! -f "$dump_file" ]]; then
log_warning "Dump file was not created"
return 1
fi
local dump_size
dump_size=$(stat -c%s "$dump_file" 2>/dev/null || echo "0")
if [[ "$dump_size" -lt 1024 ]]; then
log_warning "Dump file is too small ($dump_size bytes) - likely incomplete"
sudo rm -f "$dump_file"
return 1
fi
# Check for essential database structures in dump
if ! grep -q "CREATE TABLE" "$dump_file" 2>/dev/null; then
log_warning "Dump file contains no CREATE TABLE statements - dump is incomplete"
sudo rm -f "$dump_file"
return 1
fi
# Check for critical Plex tables
local critical_tables=("schema_migrations" "accounts" "library_sections")
local missing_tables=()
for table in "${critical_tables[@]}"; do
if ! grep -q "CREATE TABLE.*$table" "$dump_file" 2>/dev/null; then
missing_tables+=("$table")
fi
done
if [[ ${#missing_tables[@]} -gt 0 ]]; then
log_warning "Dump is missing critical tables: ${missing_tables[*]}"
log_warning "This would result in an incomplete database - aborting dump/restore"
sudo rm -f "$dump_file"
return 1
fi
log_success "Database dumped successfully (${dump_size} bytes)"
log_info "Dump contains all critical tables: ${critical_tables[*]}"
# Create new database from dump
log_info "Creating new database from validated dump..."
if sudo cat "$dump_file" | sudo "$PLEX_SQLITE" "$new_db" 2>/dev/null; then
# Verify the new database was created and has content
if [[ ! -f "$new_db" ]]; then
log_warning "New database file was not created"
sudo rm -f "$dump_file"
return 1
fi
local new_db_size
new_db_size=$(stat -c%s "$new_db" 2>/dev/null || echo "0")
if [[ "$new_db_size" -lt 1048576 ]]; then # Less than 1MB
log_warning "New database is too small ($new_db_size bytes) - likely empty or incomplete"
sudo rm -f "$new_db" "$dump_file"
return 1
fi
# Verify critical tables exist in new database
local table_count
table_count=$(sudo "$PLEX_SQLITE" "$new_db" "SELECT COUNT(*) FROM sqlite_master WHERE type='table';" 2>/dev/null || echo "0")
if [[ "$table_count" -lt 50 ]]; then # Plex should have way more than 50 tables
log_warning "New database has too few tables ($table_count) - likely incomplete"
sudo rm -f "$new_db" "$dump_file"
return 1
fi
# Verify schema_migrations table specifically (this was the root cause)
if ! sudo "$PLEX_SQLITE" "$new_db" "SELECT COUNT(*) FROM schema_migrations;" >/dev/null 2>&1; then
log_warning "New database missing schema_migrations table - Plex will not start"
sudo rm -f "$new_db" "$dump_file"
return 1
fi
log_success "New database created from dump ($new_db_size bytes, $table_count tables)"
# Verify the new database passes integrity check
log_info "Performing integrity check on repaired database..."
if sudo "$PLEX_SQLITE" "$new_db" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
log_success "New database passes integrity check"
# Replace original with repaired version
log_info "Replacing original database with repaired version..."
if sudo mv "$new_db" "$original_db"; then
# Force filesystem sync to prevent corruption
sync
sudo chown plex:plex "$original_db"
sudo chmod 644 "$original_db"
sudo rm -f "$dump_file"
log_success "Database successfully repaired and replaced"
return 0
else
log_error "Failed to replace original database with repaired version"
sudo rm -f "$dump_file"
return 1
fi
else
log_warning "Repaired database failed integrity check"
sudo rm -f "$new_db" "$dump_file"
return 1
fi
else
log_warning "Failed to create database from dump - SQL import failed"
sudo rm -f "$dump_file"
return 1
fi
else
log_warning "Failed to dump corrupted database - dump command failed"
# Clean up any potentially created but empty dump file
sudo rm -f "$dump_file"
return 1
fi
}
# Strategy 2: Schema recreation with data recovery
attempt_schema_recreation() {
local working_copy="$1"
local original_db="$2"
local timestamp="$3"
local schema_file="${original_db}.schema-${timestamp}.sql"
local new_db="${original_db}.rebuilt-${timestamp}"
log_message "Attempting repair via schema recreation..."
# Extract schema
if sudo "$PLEX_SQLITE" "$working_copy" ".schema" 2>/dev/null | sudo tee "$schema_file" >/dev/null; then
log_success "Schema extracted"
# Create new database with schema
if sudo cat "$schema_file" | sudo "$PLEX_SQLITE" "$new_db" 2>/dev/null; then
log_success "New database created with schema"
# Try to recover data table by table
if recover_table_data "$working_copy" "$new_db"; then
log_success "Data recovery completed"
# Verify the rebuilt database
if sudo "$PLEX_SQLITE" "$new_db" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
log_success "Rebuilt database passes integrity check"
if sudo mv "$new_db" "$original_db"; then
# Force filesystem sync to prevent corruption
sync
sudo chown plex:plex "$original_db"
sudo chmod 644 "$original_db"
sudo rm -f "$schema_file"
return 0
fi
else
log_warning "Rebuilt database failed integrity check"
fi
fi
fi
sudo rm -f "$new_db" "$schema_file"
fi
return 1
}
# Strategy 3: Recovery from previous backup
attempt_backup_recovery() {
local original_db="$1"
local backup_dir="$2"
local current_backup="$3"
log_message "Attempting recovery from previous backup..."
# Find the most recent backup that's not the current corrupted one
local latest_backup
if [[ -n "$current_backup" ]]; then
# Exclude the current backup from consideration
latest_backup=$(find "$backup_dir" -name "plex-backup-*.tar.gz" -type f ! -samefile "$current_backup" -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2-)
else
latest_backup=$(find "$backup_dir" -name "plex-backup-*.tar.gz" -type f -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2-)
fi
if [[ -n "$latest_backup" && -f "$latest_backup" ]]; then
log_message "Found recent backup: $(basename "$latest_backup")"
local temp_restore_dir="/tmp/plex-restore-$$"
mkdir -p "$temp_restore_dir"
# Extract the backup
if tar -xzf "$latest_backup" -C "$temp_restore_dir" 2>/dev/null; then
local restored_db
restored_db="${temp_restore_dir}/$(basename "$original_db")"
if [[ -f "$restored_db" ]]; then
# Verify the restored database
if sudo "$PLEX_SQLITE" "$restored_db" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
log_success "Backup database passes integrity check"
if sudo cp "$restored_db" "$original_db"; then
# Force filesystem sync to prevent corruption
sync
sudo chown plex:plex "$original_db"
sudo chmod 644 "$original_db"
log_success "Database restored from backup"
rm -rf "$temp_restore_dir"
return 0
fi
else
log_warning "Backup database also corrupted"
fi
fi
fi
rm -rf "$temp_restore_dir"
fi
return 1
}
# Recovery helper for table data
recover_table_data() {
local source_db="$1"
local target_db="$2"
# Get list of tables
local tables
tables=$(sudo "$PLEX_SQLITE" "$source_db" ".tables" 2>/dev/null)
if [[ -z "$tables" ]]; then
log_warning "No tables found in source database"
return 1
fi
local recovered_count=0
local total_tables=0
for table in $tables; do
((total_tables++))
# Try to copy data from each table
if sudo "$PLEX_SQLITE" "$source_db" ".mode insert $table" ".output | sudo tee /tmp/table_data_$$.sql > /dev/null" "SELECT * FROM $table;" ".output stdout" 2>/dev/null && \
sudo cat "/tmp/table_data_$$.sql" | sudo "$PLEX_SQLITE" "$target_db" 2>/dev/null; then
((recovered_count++))
sudo rm -f "/tmp/table_data_$$.sql" 2>/dev/null || true
else
log_warning "Failed to recover data from table: $table"
sudo rm -f "/tmp/table_data_$$.sql" 2>/dev/null || true
fi
done
log_message "Recovered $recovered_count/$total_tables tables"
# Consider successful if we recovered at least 80% of tables
# Prevent division by zero
if [ "$total_tables" -eq 0 ]; then
log_warning "No tables found for recovery"
return 1
fi
if (( recovered_count * 100 / total_tables >= 80 )); then
return 0
fi
return 1
}
# Cleanup helper function
cleanup_repair_files() {
local pre_repair_backup="$1"
local working_copy="$2"
if [[ -n "$pre_repair_backup" && -f "$pre_repair_backup" ]]; then
sudo rm -f "$pre_repair_backup" 2>/dev/null || true
fi
if [[ -n "$working_copy" && -f "$working_copy" ]]; then
sudo rm -f "$working_copy" 2>/dev/null || true
fi
}
# WAL (Write-Ahead Logging) file handling
handle_wal_files() {
local action="$1" # "backup" or "restore"
local backup_path="$2"
log_info "Handling WAL files: $action"
# Define WAL files that might exist
local wal_files=(
"/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.db-wal"
"/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.db-shm"
"/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.blobs.db-wal"
"/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Plug-in Support/Databases/com.plexapp.plugins.library.blobs.db-shm"
)
for wal_file in "${wal_files[@]}"; do
local wal_basename
wal_basename=$(basename "$wal_file")
case "$action" in
"backup")
if [ -f "$wal_file" ]; then
log_info "Found WAL/SHM file: $wal_basename"
local backup_file="${backup_path}/${wal_basename}"
if sudo cp "$wal_file" "$backup_file"; then
# Force filesystem sync to prevent corruption
sync
log_success "Backed up WAL/SHM file: $wal_basename"
# Verify backup
if verify_backup "$wal_file" "$backup_file"; then
log_success "Verified WAL/SHM backup: $wal_basename"
else
log_warning "WAL/SHM backup verification failed: $wal_basename"
fi
else
log_warning "Failed to backup WAL/SHM file: $wal_basename"
fi
else
log_info "WAL/SHM file not found (normal): $wal_basename"
fi
;;
"checkpoint")
# Force WAL checkpoint to integrate changes into main database
local db_file="${wal_file%.db-*}.db"
if [ -f "$db_file" ] && [ -f "$wal_file" ]; then
log_info "Performing WAL checkpoint for: $(basename "$db_file")"
if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA wal_checkpoint(FULL);" 2>/dev/null; then
log_success "WAL checkpoint completed for: $(basename "$db_file")"
else
log_warning "WAL checkpoint failed for: $(basename "$db_file")"
fi
fi
;;
esac
done
}
# Enhanced WAL file management for repair operations
handle_wal_files_for_repair() {
local db_file="$1"
local operation="${2:-prepare}" # prepare, cleanup, or restore
local db_dir
db_dir=$(dirname "$db_file")
local db_base
db_base=$(basename "$db_file" .db)
local wal_file="${db_dir}/${db_base}.db-wal"
local shm_file="${db_dir}/${db_base}.db-shm"
case "$operation" in
"prepare")
log_message "Preparing WAL files for repair of $(basename "$db_file")"
# Force WAL checkpoint to consolidate all changes
if [ -f "$wal_file" ]; then
log_info "Found WAL file, performing checkpoint..."
if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA wal_checkpoint(TRUNCATE);" 2>/dev/null; then
log_success "WAL checkpoint completed"
else
log_warning "WAL checkpoint failed, continuing with repair"
fi
fi
# Create backup copies of WAL/SHM files if they exist
for file in "$wal_file" "$shm_file"; do
if [ -f "$file" ]; then
local backup_file="${file}.repair-backup"
if sudo cp "$file" "$backup_file" 2>/dev/null; then
# Force filesystem sync to prevent corruption
sync
log_info "Backed up $(basename "$file") for repair"
fi
fi
done
;;
"cleanup")
log_message "Cleaning up WAL files after repair"
# Remove any remaining WAL/SHM files to force clean state
for file in "$wal_file" "$shm_file"; do
if [ -f "$file" ]; then
if sudo rm -f "$file" 2>/dev/null; then
log_info "Removed $(basename "$file") for clean state"
fi
fi
done
# Force WAL mode back on for consistency
if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA journal_mode=WAL;" 2>/dev/null | grep -q "wal"; then
log_success "WAL mode restored for $(basename "$db_file")"
else
log_warning "Failed to restore WAL mode for $(basename "$db_file")"
fi
;;
"restore")
log_message "Restoring WAL files after failed repair"
# Restore WAL/SHM backup files if they exist
for file in "$wal_file" "$shm_file"; do
local backup_file="${file}.repair-backup"
if [ -f "$backup_file" ]; then
if sudo mv "$backup_file" "$file" 2>/dev/null; then
log_info "Restored $(basename "$file") from backup"
else
log_warning "Failed to restore $(basename "$file") from backup"
# Try to remove broken backup file
sudo rm -f "$backup_file" 2>/dev/null || true
fi
else
log_info "No backup found for $(basename "$file")"
fi
done
;;
esac
}
# Enhanced database integrity check with WAL handling
check_database_integrity_with_wal() {
local db_file="$1"
local db_name
db_name=$(basename "$db_file")
log_message "Checking database integrity with WAL handling: $db_name"
# Check if Plex SQLite exists
if [ ! -f "$PLEX_SQLITE" ]; then
log_error "Plex SQLite binary not found at: $PLEX_SQLITE"
return 1
fi
# Make Plex SQLite executable if it isn't already
sudo chmod +x "$PLEX_SQLITE" 2>/dev/null || true
# Check if WAL file exists and handle it
local wal_file="${db_file}-wal"
if [ -f "$wal_file" ]; then
log_info "WAL file found for $db_name, performing checkpoint..."
if sudo "$PLEX_SQLITE" "$db_file" "PRAGMA wal_checkpoint(FULL);" 2>/dev/null; then
log_success "WAL checkpoint completed for $db_name"
else
log_warning "WAL checkpoint failed for $db_name, proceeding with integrity check"
fi
fi
# Run integrity check
local integrity_result
integrity_result=$(sudo "$PLEX_SQLITE" "$db_file" "PRAGMA integrity_check;" 2>&1)
local check_exit_code=$?
if [ $check_exit_code -ne 0 ]; then
log_error "Failed to run integrity check on $db_name: $integrity_result"
return 1
fi
if echo "$integrity_result" | grep -q "^ok$"; then
log_success "Database integrity check passed: $db_name"
return 0
else
log_warning "Database integrity issues detected in $db_name:"
echo "$integrity_result" | while read -r line; do
log_warning " $line"
done
return 1
fi
}
# Parallel verification function
verify_files_parallel() {
local backup_dir="$1"
local -a pids=()
local temp_dir
temp_dir=$(mktemp -d)
local verification_errors=0
local max_jobs=4 # Limit concurrent jobs to prevent system overload
local job_count=0
if [[ "$PARALLEL_VERIFICATION" != true ]]; then
# Fall back to sequential verification
for nickname in "${!PLEX_FILES[@]}"; do
local src_file="${PLEX_FILES[$nickname]}"
local dest_file
dest_file="$backup_dir/$(basename "$src_file")"
if [ -f "$dest_file" ]; then
if ! verify_backup "$src_file" "$dest_file"; then
verification_errors=$((verification_errors + 1))
fi
fi
done
rm -rf "$temp_dir" 2>/dev/null || true
return $verification_errors
fi
log_info "Starting parallel verification in $backup_dir (max $max_jobs concurrent jobs)"
# Start verification jobs in parallel with job control
for nickname in "${!PLEX_FILES[@]}"; do
local src_file="${PLEX_FILES[$nickname]}"
local dest_file
dest_file="$backup_dir/$(basename "$src_file")"
if [ -f "$dest_file" ]; then
# Wait if we've reached the job limit
if [ $job_count -ge $max_jobs ]; then
wait "${pids[0]}" 2>/dev/null || true
pids=("${pids[@]:1}") # Remove first element
job_count=$((job_count - 1))
fi
(
local result_file="$temp_dir/$nickname.result"
if verify_backup "$src_file" "$dest_file"; then
echo "0" > "$result_file"
else
echo "1" > "$result_file"
fi
) &
pids+=($!)
job_count=$((job_count + 1))
fi
done
# Wait for all remaining verification jobs to complete
for pid in "${pids[@]}"; do
wait "$pid" 2>/dev/null || true
done
# Collect results
for nickname in "${!PLEX_FILES[@]}"; do
local result_file="$temp_dir/$nickname.result"
if [ -f "$result_file" ]; then
local result
result=$(cat "$result_file" 2>/dev/null || echo "1")
if [ "$result" != "0" ]; then
verification_errors=$((verification_errors + 1))
fi
fi
done
# Cleanup
rm -rf "$temp_dir" 2>/dev/null || true
return $verification_errors
}
# Enhanced backup verification with multiple retry strategies and corruption detection
verify_backup() {
local src="$1"
local dest="$2"
local max_retries=3
local retry_count=0
log_message "Verifying backup integrity: $(basename "$src")"
# Calculate destination checksum first (this doesn't change)
local dest_checksum
local dest_result=0
if ! dest_checksum=$(sudo md5sum "$dest" 2>/dev/null | cut -d' ' -f1); then
dest_result=1
dest_checksum=""
fi
if [[ $dest_result -ne 0 ]] || [[ ! "$dest_checksum" =~ ^[a-f0-9]{32}$ ]]; then
log_error "Failed to calculate destination checksum for $(basename "$dest")"
return 1
fi
# Retry loop for source checksum calculation
while [ $retry_count -lt $max_retries ]; do
# Calculate source checksum (without caching to get current state)
local src_checksum
local src_result=0
if ! src_checksum=$(sudo md5sum "$src" 2>/dev/null | cut -d' ' -f1); then
src_result=1
src_checksum=""
fi
if [[ $src_result -ne 0 ]] || [[ ! "$src_checksum" =~ ^[a-f0-9]{32}$ ]]; then
log_error "Failed to calculate source checksum for $(basename "$src") (attempt $((retry_count + 1)))"
((retry_count++))
if [[ $retry_count -lt $max_retries ]]; then
log_warning "Retrying checksum calculation in 2 seconds..."
sleep 2
continue
else
return 1
fi
fi
if [ "$src_checksum" == "$dest_checksum" ]; then
log_success "Backup verification passed: $(basename "$src")"
log_info "Source checksum: $src_checksum"
log_info "Backup checksum: $dest_checksum"
return 0
else
# If checksums don't match, wait and try again
((retry_count++))
if [ $retry_count -lt $max_retries ]; then
log_warning "Checksum mismatch for $(basename "$src") (attempt $retry_count/$max_retries), retrying in 3 seconds..."
sleep 3
else
log_error "Backup verification failed after $max_retries attempts: $(basename "$src")"
log_error "Source checksum: $src_checksum"
log_error "Backup checksum: $dest_checksum"
# For database files, perform additional integrity check on backup
if [[ "$dest" == *.db ]]; then
log_warning "Database file checksum mismatch - checking backup integrity..."
if sudo "$PLEX_SQLITE" "$dest" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
log_warning "Backup database integrity is valid despite checksum mismatch"
log_warning "Accepting backup (source file may have been modified after copy)"
return 0
else
log_error "Backup database is also corrupted - backup failed"
return 1
fi
fi
return 1
fi
fi
done
return 1
}
# Enhanced service management with SAFE shutdown procedures and extended timeouts
# CRITICAL SAFETY NOTE: This function was modified to remove dangerous force-kill operations
# that were causing database corruption. Now uses only graceful shutdown methods.
manage_plex_service() {
local action="$1"
local force_stop="${2:-false}"
local operation_start
operation_start=$(date +%s)
log_message "Managing Plex service: $action"
case "$action" in
stop)
# Check if already stopped
if ! sudo systemctl is-active --quiet plexmediaserver.service; then
log_info "Plex service is already stopped"
track_performance "service_stop" "$operation_start"
return 0
fi
# First try normal stop with extended timeout
if sudo systemctl stop plexmediaserver.service; then
log_success "Plex service stop command issued"
# Wait for clean shutdown with progress indicator (extended timeout)
local wait_time=0
local max_wait=30 # Increased from 15 to 30 seconds
while [ $wait_time -lt $max_wait ]; do
if ! sudo systemctl is-active --quiet plexmediaserver.service; then
log_success "Plex service confirmed stopped (${wait_time}s)"
track_performance "service_stop" "$operation_start"
return 0
fi
sleep 1
wait_time=$((wait_time + 1))
echo -n "."
done
echo
# If normal stop failed and force_stop is enabled, try extended graceful shutdown
if [ "$force_stop" = "true" ]; then
log_warning "Normal stop failed, attempting extended graceful shutdown..."
local plex_pids
plex_pids=$(pgrep -f "Plex Media Server" 2>/dev/null || true)
if [ -n "$plex_pids" ]; then
log_message "Found Plex processes: $plex_pids"
log_message "Sending graceful termination signal and waiting longer..."
# Send TERM signal for graceful shutdown
if sudo pkill -TERM -f "Plex Media Server" 2>/dev/null || true; then
# Extended wait for graceful shutdown (up to 60 seconds)
local extended_wait=0
local max_extended_wait=60
while [ $extended_wait -lt $max_extended_wait ]; do
plex_pids=$(pgrep -f "Plex Media Server" 2>/dev/null || true)
if [ -z "$plex_pids" ]; then
log_success "Plex service gracefully stopped after extended wait (${extended_wait}s)"
track_performance "service_extended_stop" "$operation_start"
return 0
fi
sleep 2
extended_wait=$((extended_wait + 2))
echo -n "."
done
echo
# If still running after extended wait, log error but don't force kill
plex_pids=$(pgrep -f "Plex Media Server" 2>/dev/null || true)
if [ -n "$plex_pids" ]; then
log_error "Plex processes still running after ${max_extended_wait}s graceful shutdown attempt"
log_error "Refusing to force-kill processes to prevent database corruption"
log_error "Manual intervention may be required: PIDs $plex_pids"
return 1
fi
else
log_error "Failed to send TERM signal to Plex processes"
return 1
fi
else
log_success "No Plex processes found running"
track_performance "service_stop" "$operation_start"
return 0
fi
else
log_warning "Plex service may not have stopped cleanly after ${max_wait}s"
# Check one more time if service actually stopped with extended timeout
sleep 2
if ! sudo systemctl is-active --quiet plexmediaserver.service; then
log_success "Plex service stopped (delayed confirmation)"
track_performance "service_stop" "$operation_start"
return 0
else
log_warning "Plex service still appears to be running after ${max_wait}s"
return 1
fi
fi
else
log_error "Failed to issue stop command for Plex service"
return 1
fi
;;
start)
# Check if service is already running
if sudo systemctl is-active --quiet plexmediaserver.service; then
log_info "Plex service is already running"
track_performance "service_start" "$operation_start"
return 0
fi
if sudo systemctl start plexmediaserver.service; then
log_success "Plex service start command issued"
# Wait for service to be fully running with progress indicator (extended timeout)
local wait_time=0
local max_wait=45 # Increased from 30 to 45 seconds for database initialization
while [ $wait_time -lt $max_wait ]; do
if sudo systemctl is-active --quiet plexmediaserver.service; then
# Additional verification: wait for full service readiness
sleep 3
if sudo systemctl is-active --quiet plexmediaserver.service; then
# Final check: ensure service is stable and not in restart loop
sleep 2
if sudo systemctl is-active --quiet plexmediaserver.service; then
log_success "Plex service confirmed running and stable (${wait_time}s)"
track_performance "service_start" "$operation_start"
return 0
fi
fi
fi
sleep 1
wait_time=$((wait_time + 1))
echo -n "."
done
echo
log_error "Plex service failed to start within ${max_wait}s"
# Get service status for debugging
local service_status
service_status=$(sudo systemctl status plexmediaserver.service --no-pager -l 2>&1 | head -10 || echo "Failed to get status")
log_error "Service status: $service_status"
return 1
else
log_error "Failed to start Plex service"
return 1
fi
;;
*)
log_error "Invalid service action: $action"
return 1
;;
esac
}
# Check available disk space
check_disk_space() {
local backup_dir="$1"
local required_space_mb="$2"
local available_space_kb
available_space_kb=$(df "$backup_dir" | awk 'NR==2 {print $4}')
local available_space_mb=$((available_space_kb / 1024))
if [ "$available_space_mb" -lt "$required_space_mb" ]; then
log_error "Insufficient disk space. Required: ${required_space_mb}MB, Available: ${available_space_mb}MB"
return 1
fi
log_message "Disk space check passed. Available: ${available_space_mb}MB"
return 0
}
# Estimate backup size
estimate_backup_size() {
local total_size=0
for nickname in "${!PLEX_FILES[@]}"; do
local file="${PLEX_FILES[$nickname]}"
if [ -f "$file" ]; then
local size_kb
size_kb=$(du -k "$file" 2>/dev/null | cut -f1)
total_size=$((total_size + size_kb))
fi
done
echo $((total_size / 1024)) # Return size in MB
}
# Generate performance report
generate_performance_report() {
if [ "$PERFORMANCE_MONITORING" != true ] || [ ! -f "$PERFORMANCE_LOG_FILE" ]; then
return 0
fi
log_info "Performance Summary:"
# Recent performance data (last 10 entries)
jq -r '.[-10:] | .[] | " \(.operation): \(.duration_seconds)s (\(.timestamp))"' "$PERFORMANCE_LOG_FILE" 2>/dev/null || true
# Calculate averages for common operations
local avg_backup
avg_backup=$(jq '[.[] | select(.operation == "backup") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local avg_verification
avg_verification=$(jq '[.[] | select(.operation == "verification") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local avg_service_stop
avg_service_stop=$(jq '[.[] | select(.operation == "service_stop") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local avg_service_start
avg_service_start=$(jq '[.[] | select(.operation == "service_start") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
if [ "$avg_backup" != "0" ]; then
log_info "Average backup time: ${avg_backup}s"
fi
if [ "$avg_verification" != "0" ]; then
log_info "Average verification time: ${avg_verification}s"
fi
if [ "$avg_service_stop" != "0" ]; then
log_info "Average service stop time: ${avg_service_stop}s"
fi
if [ "$avg_service_start" != "0" ]; then
log_info "Average service start time: ${avg_service_start}s"
fi
}
# Clean old backups
cleanup_old_backups() {
log_message "Cleaning up old backups..."
# Remove backups older than MAX_BACKUP_AGE_DAYS
find "${BACKUP_ROOT}" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" -mtime +${MAX_BACKUP_AGE_DAYS} -delete 2>/dev/null || true
# Keep only MAX_BACKUPS_TO_KEEP most recent backups
local backup_count
backup_count=$(find "${BACKUP_ROOT}" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" | wc -l)
if [ "$backup_count" -gt "$MAX_BACKUPS_TO_KEEP" ]; then
local excess_count=$((backup_count - MAX_BACKUPS_TO_KEEP))
log_message "Removing $excess_count old backup(s)..."
find "${BACKUP_ROOT}" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" -printf '%T@ %p\n' | \
sort -n | head -n "$excess_count" | cut -d' ' -f2- | \
xargs -r rm -f
fi
# Clean up any remaining dated directories from old backup structure
find "${BACKUP_ROOT}" -maxdepth 1 -type d -name "????????" -exec rm -rf {} \; 2>/dev/null || true
log_message "Backup cleanup completed"
}
# Database integrity check only
check_integrity_only() {
log_message "Starting database integrity check at $(date)"
# Stop Plex service - NEVER use force stop for integrity checks to prevent corruption
if ! manage_plex_service stop; then
log_error "Failed to stop Plex service gracefully"
log_error "Cannot perform integrity check while service may be running"
log_error "Manual intervention required - please stop Plex service manually"
return 1
fi
# Handle WAL files first
handle_wal_files "checkpoint"
local db_integrity_issues=0
local databases_checked=0
for nickname in "${!PLEX_FILES[@]}"; do
local file="${PLEX_FILES[$nickname]}"
# Only check database files
if [[ "$file" == *".db" ]] && [ -f "$file" ]; then
databases_checked=$((databases_checked + 1))
log_message "Checking integrity of $(basename "$file")..."
if ! check_database_integrity_with_wal "$file"; then
db_integrity_issues=$((db_integrity_issues + 1))
log_warning "Database integrity issues found in $(basename "$file")"
# Determine if we should attempt repair
local should_repair=false
if [ "$AUTO_REPAIR" = true ]; then
should_repair=true
log_message "Auto-repair enabled, attempting repair..."
elif [ "$INTERACTIVE_MODE" = true ]; then
read -p "Attempt to repair $(basename "$file")? [y/N]: " -n 1 -r -t 30
local read_result=$?
echo
if [ $read_result -eq 0 ] && [[ $REPLY =~ ^[Yy]$ ]]; then
should_repair=true
elif [ $read_result -ne 0 ]; then
log_warning "Read timeout or error, defaulting to no repair"
fi
else
log_warning "Non-interactive mode: skipping repair for $(basename "$file")"
fi
if [ "$should_repair" = true ]; then
if repair_database "$file"; then
log_success "Database repair successful for $(basename "$file")"
# Re-check integrity after repair
if check_database_integrity "$file"; then
log_success "Post-repair integrity check passed for $(basename "$file")"
else
log_warning "Post-repair integrity check still shows issues for $(basename "$file")"
fi
else
log_error "Database repair failed for $(basename "$file")"
fi
fi
else
log_success "Database integrity check passed for $(basename "$file")"
fi
fi
done
# Start Plex service
manage_plex_service start
# Summary
log_message "Integrity check completed at $(date)"
log_message "Databases checked: $databases_checked"
log_message "Databases with issues: $db_integrity_issues"
if [ "$db_integrity_issues" -gt 0 ]; then
log_warning "Integrity check completed with issues found"
exit 1
else
log_success "All database integrity checks passed"
exit 0
fi
}
# Main backup function
main() {
local overall_start
overall_start=$(date +%s)
log_message "Starting enhanced Plex backup process at $(date)"
send_notification "Backup Started" "Plex backup process initiated" "info"
# Create necessary directories
mkdir -p "${BACKUP_ROOT}"
mkdir -p "${LOCAL_LOG_ROOT}"
# Initialize logs
initialize_logs
# Check if only doing integrity check
if [ "$INTEGRITY_CHECK_ONLY" = true ]; then
check_integrity_only
# shellcheck disable=SC2317
return $?
fi
# Estimate backup size
local estimated_size_mb
estimated_size_mb=$(estimate_backup_size)
log_message "Estimated backup size: ${estimated_size_mb}MB"
# Check disk space (require 2x estimated size for safety)
local required_space_mb=$((estimated_size_mb * 2))
if ! check_disk_space "${BACKUP_ROOT}" "$required_space_mb"; then
log_error "Aborting backup due to insufficient disk space"
exit 1
fi
# Stop Plex service
manage_plex_service stop
local backup_errors=0
local files_backed_up=0
local backed_up_files=() # Array to track successfully backed up files
local BACKUP_PATH="${BACKUP_ROOT}"
# Ensure backup root directory exists
mkdir -p "$BACKUP_PATH"
# Handle WAL files and check database integrity before backup
log_message "Performing WAL checkpoint and checking database integrity before backup..."
handle_wal_files "checkpoint"
local db_integrity_issues=0
for nickname in "${!PLEX_FILES[@]}"; do
local file="${PLEX_FILES[$nickname]}"
# Only check database files
if [[ "$file" == *".db" ]] && [ -f "$file" ]; then
if ! check_database_integrity_with_wal "$file"; then
db_integrity_issues=$((db_integrity_issues + 1))
log_warning "Database integrity issues found in $(basename "$file")"
# Always attempt repair when corruption is detected (default behavior)
local should_repair=true
local repair_attempted=false
# Override repair behavior only if explicitly disabled
if [ "$AUTO_REPAIR" = false ]; then
should_repair=false
log_warning "Auto-repair explicitly disabled, skipping repair"
elif [ "$INTERACTIVE_MODE" = true ]; then
read -p "Database $(basename "$file") has integrity issues. Attempt repair before backup? [Y/n]: " -n 1 -r -t 30
local read_result=$?
echo
if [ $read_result -eq 0 ] && [[ $REPLY =~ ^[Nn]$ ]]; then
should_repair=false
log_message "User declined repair for $(basename "$file")"
elif [ $read_result -ne 0 ]; then
log_message "Read timeout, proceeding with default repair"
fi
else
log_message "Auto-repair enabled by default, attempting repair..."
fi
if [ "$should_repair" = true ]; then
repair_attempted=true
log_message "Attempting to repair corrupted database: $(basename "$file")"
if repair_database "$file"; then
log_success "Database repair successful for $(basename "$file")"
# Re-verify integrity after repair
if check_database_integrity_with_wal "$file"; then
log_success "Post-repair integrity verification passed for $(basename "$file")"
# Decrement issue count since repair was successful
db_integrity_issues=$((db_integrity_issues - 1))
else
log_warning "Post-repair integrity check still shows issues for $(basename "$file")"
log_warning "Will backup corrupted database - manual intervention may be needed"
fi
else
log_error "Database repair failed for $(basename "$file")"
log_warning "Will backup corrupted database - manual intervention may be needed"
backup_errors=$((backup_errors + 1))
fi
else
log_warning "Skipping repair - will backup database with known integrity issues"
fi
# Log repair attempt for monitoring purposes
if [ "$repair_attempted" = true ]; then
send_notification "Database Repair" "Attempted repair of $(basename "$file")" "warning"
fi
fi
fi
done
# Handle WAL files backup
handle_wal_files "backup" "$BACKUP_PATH"
# Backup files - always perform full backup
local backup_start
backup_start=$(date +%s)
for nickname in "${!PLEX_FILES[@]}"; do
local file="${PLEX_FILES[$nickname]}"
if [ -f "$file" ]; then
log_message "Backing up: $(basename "$file")"
# Create backup filename without timestamp (use original filename)
local backup_file
backup_file="${BACKUP_PATH}/$(basename "$file")"
# Copy file
if sudo cp "$file" "$backup_file"; then
# Force filesystem sync to prevent corruption
sync
# Ensure proper ownership of backup file
sudo chown plex:plex "$backup_file"
log_success "Copied: $(basename "$file")"
# Verify backup
if verify_backup "$file" "$backup_file"; then
log_success "Verified: $(basename "$file")"
files_backed_up=$((files_backed_up + 1))
# Add friendly filename to backed up files list
case "$(basename "$file")" in
"com.plexapp.plugins.library.db") backed_up_files+=("library.db") ;;
"com.plexapp.plugins.library.blobs.db") backed_up_files+=("blobs.db") ;;
"Preferences.xml") backed_up_files+=("Preferences.xml") ;;
*) backed_up_files+=("$(basename "$file")") ;;
esac
else
log_error "Verification failed: $(basename "$file")"
backup_errors=$((backup_errors + 1))
# Remove failed backup
rm -f "$backup_file"
fi
else
log_error "Failed to copy: $(basename "$file")"
backup_errors=$((backup_errors + 1))
fi
else
log_warning "File not found: $file"
fi
done
# Start Plex service
manage_plex_service start
# Create archive if files were backed up
if [ "$files_backed_up" -gt 0 ]; then
log_message "Creating compressed archive..."
# Check backup root directory is writable
if [ ! -w "$BACKUP_ROOT" ]; then
log_error "Backup root directory is not writable: $BACKUP_ROOT"
backup_errors=$((backup_errors + 1))
else
local temp_archive
temp_archive="/tmp/plex-backup-$(date '+%Y%m%d_%H%M%S').tar.gz"
local final_archive
final_archive="${BACKUP_ROOT}/plex-backup-$(date '+%Y%m%d_%H%M%S').tar.gz"
log_info "Temporary archive: $temp_archive"
log_info "Final archive: $final_archive"
# Create archive in /tmp first, containing only the backed up files
local temp_dir
temp_dir="/tmp/plex-backup-staging-$(date '+%Y%m%d_%H%M%S')"
if ! mkdir -p "$temp_dir"; then
log_error "Failed to create staging directory: $temp_dir"
backup_errors=$((backup_errors + 1))
else
log_info "Created staging directory: $temp_dir"
# Copy backed up files to staging directory
local files_staged=0
for nickname in "${!PLEX_FILES[@]}"; do
local file="${PLEX_FILES[$nickname]}"
local backup_file
backup_file="${BACKUP_PATH}/$(basename "$file")"
if [ -f "$backup_file" ]; then
if cp "$backup_file" "$temp_dir/"; then
files_staged=$((files_staged + 1))
log_info "Staged for archive: $(basename "$backup_file")"
else
log_warning "Failed to stage file: $(basename "$backup_file")"
fi
else
log_warning "Backup file not found for staging: $(basename "$backup_file")"
fi
done
# Check if any files were staged
if [ "$files_staged" -eq 0 ]; then
log_error "No files were staged for archive creation"
rm -rf "$temp_dir"
backup_errors=$((backup_errors + 1))
else
log_info "Staged $files_staged files for archive creation"
# Check disk space in /tmp
local temp_available_kb
temp_available_kb=$(df /tmp | awk 'NR==2 {print $4}')
local temp_available_mb=$((temp_available_kb / 1024))
local staging_size_mb
staging_size_mb=$(du -sm "$temp_dir" | cut -f1)
log_info "/tmp available space: ${temp_available_mb}MB, staging directory size: ${staging_size_mb}MB"
# Check if we have enough space (require 3x staging size for compression)
local required_space_mb=$((staging_size_mb * 3))
if [ "$temp_available_mb" -lt "$required_space_mb" ]; then
log_error "Insufficient space in /tmp for archive creation. Required: ${required_space_mb}MB, Available: ${temp_available_mb}MB"
rm -rf "$temp_dir"
backup_errors=$((backup_errors + 1))
else
# Create archive with detailed error logging
log_info "Creating archive: $(basename "$temp_archive")"
local tar_output
tar_output=$(tar -czf "$temp_archive" -C "$temp_dir" . 2>&1)
local tar_exit_code=$?
# Force filesystem sync after archive creation
sync
if [ $tar_exit_code -eq 0 ]; then
# Verify archive was actually created and has reasonable size
if [ -f "$temp_archive" ]; then
local archive_size_mb
archive_size_mb=$(du -sm "$temp_archive" | cut -f1)
log_success "Archive created successfully: $(basename "$temp_archive") (${archive_size_mb}MB)"
# Test archive integrity before moving
if tar -tzf "$temp_archive" >/dev/null 2>&1; then
log_success "Archive integrity verified"
# Move the completed archive to the backup root
if mv "$temp_archive" "$final_archive"; then
# Force filesystem sync after final move
sync
log_success "Archive moved to final location: $(basename "$final_archive")"
# Remove individual backup files and staging directory
rm -rf "$temp_dir"
for nickname in "${!PLEX_FILES[@]}"; do
local file="${PLEX_FILES[$nickname]}"
local backup_file
backup_file="${BACKUP_PATH}/$(basename "$file")"
rm -f "$backup_file" "$backup_file.md5"
done
else
log_error "Failed to move archive to final location: $final_archive"
log_error "Temporary archive remains at: $temp_archive"
rm -rf "$temp_dir"
backup_errors=$((backup_errors + 1))
fi
else
log_error "Archive integrity check failed - archive may be corrupted"
log_error "Archive size: ${archive_size_mb}MB"
rm -f "$temp_archive"
rm -rf "$temp_dir"
backup_errors=$((backup_errors + 1))
fi
else
log_error "Archive file was not created despite tar success"
rm -rf "$temp_dir"
backup_errors=$((backup_errors + 1))
fi
else
log_error "Failed to create archive (tar exit code: $tar_exit_code)"
if [ -n "$tar_output" ]; then
log_error "Tar command output: $tar_output"
fi
# Additional diagnostic information
log_error "Staging directory contents:"
find "$temp_dir" -ls 2>&1 | while IFS= read -r line; do
log_error " $line"
done
local temp_usage
temp_usage=$(df -h /tmp | awk 'NR==2 {print "Used: " $3 "/" $2 " (" $5 ")"}')
log_error "Temp filesystem status: $temp_usage"
rm -rf "$temp_dir"
backup_errors=$((backup_errors + 1))
fi
fi
fi
fi
fi
# Send notification
local files_list
files_list=$(format_backed_up_files "${backed_up_files[@]}")
send_notification "Backup Completed" "Successfully backed up $files_list" "success"
else
log_message "No files needed backup"
fi
# Cleanup old backups
cleanup_old_backups
# Track overall backup performance
if [ "$files_backed_up" -gt 0 ]; then
track_performance "full_backup" "$backup_start"
fi
track_performance "total_script" "$overall_start"
# Generate performance report
generate_performance_report
# Final summary
local total_time=$(($(date +%s) - overall_start))
log_message "Backup process completed at $(date)"
log_message "Total execution time: ${total_time}s"
log_message "Files backed up: $files_backed_up"
log_message "Errors encountered: $backup_errors"
# Sync logs to shared location and cleanup old local logs
log_info "Post-backup: synchronizing logs and cleaning up old files"
sync_logs_to_shared
cleanup_old_local_logs
if [ "$backup_errors" -gt 0 ]; then
log_error "Backup completed with errors"
send_notification "Backup Error" "Backup completed with $backup_errors errors" "error"
exit 1
else
log_success "Enhanced backup completed successfully"
local files_list
files_list=$(format_backed_up_files "${backed_up_files[@]}")
send_notification "Backup Success" "$files_list backed up successfully in ${total_time}s" "success"
fi
}
# Trap to ensure Plex is restarted on script exit
trap 'manage_plex_service start' EXIT
# Run main function
main "$@"