Files
shell/plex/monitor-plex-backup.sh
Peter Wood 58b5dea8b4 Refactor variable assignments and improve script readability in validate-plex-backups.sh and validate-plex-recovery.sh
- Changed inline variable assignments to separate declaration and assignment for clarity.
- Updated condition checks and log messages for better readability and consistency.
- Added a backup of validate-plex-recovery.sh for safety.
- Introduced a new script run-docker-tests.sh for testing setup in Docker containers.
- Enhanced ssh-login.sh to improve condition checks and logging functionality.
2025-06-05 17:14:02 -04:00

507 lines
20 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
################################################################################
# Plex Backup System Monitoring Dashboard
################################################################################
#
# Author: Peter Wood <peter@peterwood.dev>
# Description: Real-time monitoring dashboard for the Plex backup system
# providing health status, performance metrics, and system
# diagnostics with both static and live refresh modes.
#
# Features:
# - Real-time backup system health monitoring
# - Performance metrics and trending
# - Backup schedule and execution tracking
# - Disk space monitoring and alerts
# - Service status verification
# - Historical backup analysis
# - Watch mode with auto-refresh
#
# Related Scripts:
# - backup-plex.sh: Main backup script being monitored
# - validate-plex-backups.sh: Backup validation system
# - restore-plex.sh: Backup restoration utilities
# - test-plex-backup.sh: Testing framework
# - plex.sh: General Plex service management
#
# Usage:
# ./monitor-plex-backup.sh # Single status check
# ./monitor-plex-backup.sh --watch # Continuous monitoring
# ./monitor-plex-backup.sh --help # Show help information
#
# Dependencies:
# - jq (for JSON processing)
# - systemctl (for service status)
# - Access to backup directories and log files
#
# Exit Codes:
# 0 - Success
# 1 - General error
# 2 - Critical backup system issues
# 3 - Missing dependencies
#
################################################################################
# Plex Backup System Monitoring Dashboard
# Provides real-time status and health monitoring for the enhanced backup system
set -e
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
MAGENTA='\033[0;35m'
NC='\033[0m' # No Color
# Configuration
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
BACKUP_ROOT="/mnt/share/media/backups/plex"
SHARED_LOG_ROOT="/mnt/share/media/backups/logs"
LOCAL_LOG_ROOT="$SCRIPT_DIR/logs"
PERFORMANCE_LOG_FILE="$LOCAL_LOG_ROOT/plex-backup-performance.json"
# Display mode
WATCH_MODE=false
REFRESH_INTERVAL=5
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--watch)
WATCH_MODE=true
shift
;;
--interval=*)
REFRESH_INTERVAL="${1#*=}"
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --watch Continuous monitoring mode (refresh every 5 seconds)"
echo " --interval=N Set refresh interval for watch mode (seconds)"
echo " -h, --help Show this help message"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Utility functions
log_status() {
local status="$1"
local message="$2"
case "$status" in
"OK") echo -e "${GREEN}${NC} $message" ;;
"WARN") echo -e "${YELLOW}${NC} $message" ;;
"ERROR") echo -e "${RED}${NC} $message" ;;
"INFO") echo -e "${BLUE}${NC} $message" ;;
esac
}
# Clear screen for watch mode
clear_screen() {
if [ "$WATCH_MODE" = true ]; then
clear
fi
}
# Helper function to find most recent log from local or shared location
find_most_recent_log() {
local log_pattern="$1"
local recent_log=""
# Check local logs first (preferred)
if [ -d "$LOCAL_LOG_ROOT" ]; then
recent_log=$(find "$LOCAL_LOG_ROOT" -name "$log_pattern" -type f 2>/dev/null | sort | tail -1)
fi
# If no local log found, check shared location as fallback
if [ -z "$recent_log" ] && [ -d "$SHARED_LOG_ROOT" ]; then
recent_log=$(find "$SHARED_LOG_ROOT" -name "$log_pattern" -type f 2>/dev/null | sort | tail -1)
fi
echo "$recent_log"
}
# Header display
show_header() {
echo -e "${CYAN}╔══════════════════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${CYAN}${NC} ${MAGENTA}PLEX BACKUP SYSTEM DASHBOARD${NC} ${CYAN}${NC}"
echo -e "${CYAN}${NC} $(date '+%Y-%m-%d %H:%M:%S') ${CYAN}${NC}"
echo -e "${CYAN}╚══════════════════════════════════════════════════════════════════════════════╝${NC}"
echo
}
# System status check
check_system_status() {
echo -e "${BLUE}📊 SYSTEM STATUS${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Check Plex service
if systemctl is-active --quiet plexmediaserver; then
log_status "OK" "Plex Media Server is running"
else
log_status "ERROR" "Plex Media Server is not running"
fi
# Check backup script
if [ -f "$SCRIPT_DIR/backup-plex.sh" ]; then
log_status "OK" "Backup script is present"
else
log_status "ERROR" "Backup script not found"
fi
# Check directories
if [ -d "$BACKUP_ROOT" ]; then
log_status "OK" "Backup directory exists"
else
log_status "ERROR" "Backup directory missing: $BACKUP_ROOT"
fi
# Check log directories (prioritize local, show shared as secondary)
if [ -d "$LOCAL_LOG_ROOT" ]; then
log_status "OK" "Local log directory exists"
else
log_status "WARN" "Local log directory missing: $LOCAL_LOG_ROOT"
fi
if [ -d "$SHARED_LOG_ROOT" ]; then
log_status "INFO" "Shared log directory accessible"
else
log_status "WARN" "Shared log directory missing: $SHARED_LOG_ROOT"
fi
# Check dependencies
for cmd in jq sqlite3 curl; do
if command -v "$cmd" >/dev/null 2>&1; then
log_status "OK" "$cmd is available"
else
log_status "WARN" "$cmd is not installed"
fi
done
echo
}
# Backup status
check_backup_status() {
echo -e "${BLUE}💾 BACKUP STATUS${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Count total backups
local backup_count=0
if [ -d "$BACKUP_ROOT" ]; then
backup_count=$(find "$BACKUP_ROOT" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" 2>/dev/null | wc -l)
fi
if [ "$backup_count" -gt 0 ]; then
log_status "OK" "Total backups: $backup_count"
# Find latest backup
local latest_backup
latest_backup=$(find "$BACKUP_ROOT" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" 2>/dev/null | sort | tail -1)
if [ -n "$latest_backup" ]; then
local backup_filename
backup_filename=$(basename "$latest_backup")
# Extract date from filename: plex-backup-YYYYMMDD_HHMMSS.tar.gz
local backup_date
backup_date=$(echo "$backup_filename" | sed 's/plex-backup-//' | sed 's/_.*$//')
local readable_date
readable_date=$(date -d "${backup_date:0:4}-${backup_date:4:2}-${backup_date:6:2}" '+%B %d, %Y' 2>/dev/null || echo "Invalid date")
local backup_age_days=$(( ($(date +%s) - $(date -d "${backup_date:0:4}-${backup_date:4:2}-${backup_date:6:2}" +%s 2>/dev/null || echo "0")) / 86400 ))
if [ "$backup_age_days" -le 1 ]; then
log_status "OK" "Latest backup: $readable_date ($backup_age_days days ago)"
elif [ "$backup_age_days" -le 7 ]; then
log_status "WARN" "Latest backup: $readable_date ($backup_age_days days ago)"
else
log_status "ERROR" "Latest backup: $readable_date ($backup_age_days days ago)"
fi
# Check backup size
local backup_size
backup_size=$(du -sh "$latest_backup" 2>/dev/null | cut -f1)
log_status "INFO" "Latest backup size: $backup_size"
# Check backup contents (via tar listing)
local file_count
file_count=$(tar -tzf "$latest_backup" 2>/dev/null | wc -l)
log_status "INFO" "Files in latest backup: $file_count"
fi
else
log_status "WARN" "No backups found"
fi
# Disk usage
if [ -d "$BACKUP_ROOT" ]; then
local total_backup_size
total_backup_size=$(du -sh "$BACKUP_ROOT" 2>/dev/null | cut -f1)
local available_space
available_space=$(df -h "$BACKUP_ROOT" 2>/dev/null | awk 'NR==2 {print $4}')
local used_percentage
used_percentage=$(df "$BACKUP_ROOT" 2>/dev/null | awk 'NR==2 {print $5}' | sed 's/%//')
log_status "INFO" "Total backup storage: $total_backup_size"
log_status "INFO" "Available space: $available_space"
if [ -n "$used_percentage" ]; then
if [ "$used_percentage" -lt 80 ]; then
log_status "OK" "Disk usage: $used_percentage%"
elif [ "$used_percentage" -lt 90 ]; then
log_status "WARN" "Disk usage: $used_percentage%"
else
log_status "ERROR" "Disk usage: $used_percentage% (Critical)"
fi
fi
fi
echo
}
# Performance metrics
show_performance_metrics() {
echo -e "${BLUE}⚡ PERFORMANCE METRICS${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if [ -f "$PERFORMANCE_LOG_FILE" ]; then
log_status "OK" "Performance log found"
# Recent operations
local recent_count
recent_count=$(jq length "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
log_status "INFO" "Total logged operations: $recent_count"
if [ "$recent_count" -gt 0 ]; then
# Average times for different operations
local avg_backup
avg_backup=$(jq '[.[] | select(.operation == "full_backup") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local avg_verification
avg_verification=$(jq '[.[] | select(.operation == "verification") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local avg_service_stop
avg_service_stop=$(jq '[.[] | select(.operation == "service_stop") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
local avg_service_start
avg_service_start=$(jq '[.[] | select(.operation == "service_start") | .duration_seconds] | if length > 0 then add/length else 0 end' "$PERFORMANCE_LOG_FILE" 2>/dev/null || echo "0")
if [ "$avg_backup" != "0" ] && [ "$avg_backup" != "null" ]; then
log_status "INFO" "Average backup time: ${avg_backup}s"
fi
if [ "$avg_verification" != "0" ] && [ "$avg_verification" != "null" ]; then
log_status "INFO" "Average verification time: ${avg_verification}s"
fi
if [ "$avg_service_stop" != "0" ] && [ "$avg_service_stop" != "null" ]; then
log_status "INFO" "Average service stop time: ${avg_service_stop}s"
fi
if [ "$avg_service_start" != "0" ] && [ "$avg_service_start" != "null" ]; then
log_status "INFO" "Average service start time: ${avg_service_start}s"
fi
# Last 3 operations
echo -e "${YELLOW}Recent Operations:${NC}"
jq -r '.[-3:] | .[] | " \(.timestamp): \(.operation) (\(.duration_seconds)s)"' "$PERFORMANCE_LOG_FILE" 2>/dev/null | sed 's/T/ /' | sed 's/+.*$//' || echo " No recent operations"
fi
else
log_status "WARN" "Performance log not found (no backups run yet)"
fi
echo
}
# Recent activity
show_recent_activity() {
echo -e "${BLUE}📋 RECENT ACTIVITY${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Check recent log files
local recent_log
recent_log=$(find_most_recent_log "plex-backup-*.log")
if [ -n "$recent_log" ]; then
local log_date
log_date=$(basename "$recent_log" | sed 's/plex-backup-//' | sed 's/.log//')
local log_location=""
if [[ "$recent_log" == "$LOCAL_LOG_ROOT"* ]]; then
log_location=" (local)"
else
log_location=" (shared)"
fi
log_status "INFO" "Most recent log: $log_date$log_location"
# Check for errors in recent log
local error_count
error_count=$(grep -c "ERROR:" "$recent_log" 2>/dev/null || echo "0")
local warning_count
warning_count=$(grep -c "WARNING:" "$recent_log" 2>/dev/null || echo "0")
if [ "$error_count" -eq 0 ] && [ "$warning_count" -eq 0 ]; then
log_status "OK" "No errors or warnings in recent log"
elif [ "$error_count" -eq 0 ]; then
log_status "WARN" "$warning_count warnings in recent log"
else
log_status "ERROR" "$error_count errors, $warning_count warnings in recent log"
fi
else
log_status "WARN" "No recent logs found"
fi
echo
}
# Scheduling status
show_scheduling_status() {
echo -e "${BLUE}⏰ SCHEDULING STATUS${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Check cron jobs
local cron_jobs=0
if crontab -l 2>/dev/null | grep -q "backup-plex"; then
cron_jobs=$(crontab -l 2>/dev/null | grep -c "backup-plex")
fi
if [ "$cron_jobs" -gt 0 ]; then
log_status "OK" "Cron jobs configured: $cron_jobs"
echo -e "${YELLOW}Cron Schedule:${NC}"
crontab -l 2>/dev/null | grep "backup-plex" | sed 's/^/ /'
else
log_status "WARN" "No cron jobs found for backup-plex"
fi
# Check systemd timers
if systemctl list-timers --all 2>/dev/null | grep -q "plex-backup"; then
log_status "OK" "Systemd timer configured"
local timer_status
timer_status=$(systemctl is-active plex-backup.timer 2>/dev/null || echo "inactive")
if [ "$timer_status" = "active" ]; then
log_status "OK" "Timer is active"
local next_run
next_run=$(systemctl list-timers plex-backup.timer 2>/dev/null | grep "plex-backup" | awk '{print $1, $2}')
if [ -n "$next_run" ]; then
log_status "INFO" "Next run: $next_run"
fi
else
log_status "WARN" "Timer is inactive"
fi
else
log_status "INFO" "No systemd timer configured"
fi
echo
}
# Health recommendations
show_recommendations() {
echo -e "${BLUE}💡 RECOMMENDATIONS${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
local recommendations=()
# Check backup age
if [ -d "$BACKUP_ROOT" ]; then
local latest_backup
latest_backup=$(find "$BACKUP_ROOT" -maxdepth 1 -type f -name "plex-backup-*.tar.gz" 2>/dev/null | sort | tail -1)
if [ -n "$latest_backup" ]; then
local backup_filename
backup_filename=$(basename "$latest_backup")
# Extract date from filename: plex-backup-YYYYMMDD_HHMMSS.tar.gz
local backup_date
backup_date=$(echo "$backup_filename" | sed 's/plex-backup-//' | sed 's/_.*$//')
local backup_age_days=$(( ($(date +%s) - $(date -d "${backup_date:0:4}-${backup_date:4:2}-${backup_date:6:2}" +%s 2>/dev/null || echo "0")) / 86400 ))
if [ "$backup_age_days" -gt 7 ]; then
recommendations+=("Consider running a manual backup - latest backup is $backup_age_days days old")
fi
else
recommendations+=("No backups found - run initial backup with: sudo ./backup-plex.sh")
fi
fi
# Check scheduling
local cron_jobs=0
if crontab -l 2>/dev/null | grep -q "backup-plex"; then
cron_jobs=$(crontab -l 2>/dev/null | grep -c "backup-plex")
fi
if [ "$cron_jobs" -eq 0 ] && ! systemctl list-timers --all 2>/dev/null | grep -q "plex-backup"; then
recommendations+=("Set up automated backup scheduling with cron or systemd timer")
fi
# Check disk space
if [ -d "$BACKUP_ROOT" ]; then
local used_percentage
used_percentage=$(df "$BACKUP_ROOT" 2>/dev/null | awk 'NR==2 {print $5}' | sed 's/%//')
if [ -n "$used_percentage" ] && [ "$used_percentage" -gt 85 ]; then
recommendations+=("Backup disk usage is high ($used_percentage%) - consider cleaning old backups")
fi
fi
# Check dependencies
if ! command -v jq >/dev/null 2>&1; then
recommendations+=("Install jq for enhanced performance monitoring: sudo apt install jq")
fi
# Show recommendations
if [ ${#recommendations[@]} -eq 0 ]; then
log_status "OK" "No immediate recommendations - system looks healthy!"
else
for rec in "${recommendations[@]}"; do
log_status "INFO" "$rec"
done
fi
echo
}
# Footer with refresh info
show_footer() {
if [ "$WATCH_MODE" = true ]; then
echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${CYAN}📡 WATCH MODE: Refreshing every ${REFRESH_INTERVAL} seconds | Press Ctrl+C to exit${NC}"
else
echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${CYAN}💡 Use --watch for continuous monitoring | Use --help for options${NC}"
fi
}
# Main dashboard function
show_dashboard() {
clear_screen
show_header
check_system_status
check_backup_status
show_performance_metrics
show_recent_activity
show_scheduling_status
show_recommendations
show_footer
}
# Main execution
main() {
if [ "$WATCH_MODE" = true ]; then
# Validate refresh interval
if ! [[ "$REFRESH_INTERVAL" =~ ^[0-9]+$ ]] || [ "$REFRESH_INTERVAL" -lt 1 ]; then
echo "Error: Invalid refresh interval. Must be a positive integer."
exit 1
fi
# Continuous monitoring
while true; do
show_dashboard
sleep "$REFRESH_INTERVAL"
done
else
# Single run
show_dashboard
fi
}
# Handle interrupts gracefully in watch mode
trap 'echo -e "\n\n${YELLOW}Monitoring stopped by user${NC}"; exit 0' INT TERM
# Run main function
main "$@"