| #!/bin/bash |
| |
| |
| |
|
|
| |
| SSH_PORT=${SSH_PORT:-22} |
| TIMEOUT=${TIMEOUT:-5} |
| CHECK_PROCESS=true |
| CHECK_PORT=true |
| CHECK_RESPONSE=true |
| OUTPUT_FORMAT=${OUTPUT_FORMAT:-"text"} |
| DETAILED=${DETAILED:-false} |
| MONITOR_API_URL=${MONITOR_API_URL:-""} |
|
|
| |
| show_help() { |
| echo "Usage: $0 [OPTIONS]" |
| echo "SSH Service Health Check Script" |
| echo "" |
| echo "Options:" |
| echo " -p, --port PORT SSH port (default: 22)" |
| echo " -t, --timeout SEC Connection timeout (default: 5)" |
| echo " --no-process Skip process check" |
| echo " --no-port Skip port check" |
| echo " --no-response Skip response check" |
| echo " -f, --format FMT Output format: text, json (default: text)" |
| echo " -d, --detailed Show detailed diagnostic information" |
| echo " -a, --api-url URL Report results to monitoring API" |
| echo " -h, --help Show this help" |
| echo "" |
| echo "Exit codes:" |
| echo " 0 Healthy" |
| echo " 1 Unhealthy" |
| echo " 2 Degraded" |
| } |
|
|
| |
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| -p|--port) |
| SSH_PORT="$2" |
| shift 2 |
| ;; |
| -t|--timeout) |
| TIMEOUT="$2" |
| shift 2 |
| ;; |
| --no-process) |
| CHECK_PROCESS=false |
| shift |
| ;; |
| --no-port) |
| CHECK_PORT=false |
| shift |
| ;; |
| --no-response) |
| CHECK_RESPONSE=false |
| shift |
| ;; |
| -f|--format) |
| OUTPUT_FORMAT="$2" |
| shift 2 |
| ;; |
| -d|--detailed) |
| DETAILED=true |
| shift |
| ;; |
| -a|--api-url) |
| MONITOR_API_URL="$2" |
| shift 2 |
| ;; |
| -h|--help) |
| show_help |
| exit 0 |
| ;; |
| *) |
| echo "Unknown option: $1" |
| show_help |
| exit 1 |
| ;; |
| esac |
| done |
|
|
| |
| collect_detailed_diagnostics() { |
| echo "" |
| echo "=== Detailed Diagnostics ===" |
| echo "" |
| |
| echo "--- System Information ---" |
| uname -a |
| echo "" |
| |
| echo "--- SSH Processes ---" |
| ps aux | grep sshd | grep -v grep || echo "No SSH processes found" |
| echo "" |
| |
| echo "--- Network Connections (SSH port $SSH_PORT) ---" |
| if command -v netstat &> /dev/null; then |
| netstat -tulnp 2>/dev/null | grep ":$SSH_PORT " || echo "Port $SSH_PORT not listening" |
| elif command -v ss &> /dev/null; then |
| ss -tulnp 2>/dev/null | grep ":$SSH_PORT " || echo "Port $SSH_PORT not listening" |
| fi |
| echo "" |
| |
| echo "--- SSH Configuration Test ---" |
| if [ -f "/etc/ssh/sshd_config" ]; then |
| sshd -t 2>&1 || echo "SSH configuration has errors" |
| else |
| echo "SSH configuration file not found" |
| fi |
| echo "" |
| |
| echo "--- System Resources ---" |
| echo "Memory Usage:" |
| free -h |
| echo "" |
| echo "Disk Usage:" |
| df -h |
| echo "" |
| echo "System Load:" |
| uptime |
| echo "" |
| |
| echo "--- Recent SSH Logs ---" |
| if command -v journalctl &> /dev/null; then |
| journalctl -u ssh --no-pager -n 10 2>/dev/null || echo "Cannot retrieve journalctl logs" |
| elif [ -f "/var/log/auth.log" ]; then |
| tail -10 /var/log/auth.log 2>/dev/null || echo "Cannot read /var/log/auth.log" |
| elif [ -f "/var/log/secure" ]; then |
| tail -10 /var/log/secure 2>/dev/null || echo "Cannot read /var/log/secure" |
| fi |
| echo "" |
| } |
|
|
| |
| check_ssh_health() { |
| local errors=0 |
| local warnings=0 |
| local sshd_pids="" |
| local port_listening=false |
| local ssh_responding=false |
| local start_time=$(date +%s) |
| |
| |
| local check_results="" |
| |
| |
| if [ "$CHECK_PROCESS" = true ]; then |
| if pgrep -x "sshd" > /dev/null; then |
| sshd_pids=$(pgrep -x "sshd" | tr '\n' ' ' | sed 's/ $//') |
| check_results="${check_results}{\"check\":\"process\",\"status\":\"ok\",\"pids\":\"$sshd_pids\"}" |
| else |
| check_results="${check_results}{\"check\":\"process\",\"status\":\"failed\",\"message\":\"SSH process not found\"}" |
| errors=$((errors + 1)) |
| fi |
| fi |
| |
| |
| if [ "$CHECK_PORT" = true ]; then |
| if netstat -tuln 2>/dev/null | grep -q ":$SSH_PORT "; then |
| port_listening=true |
| check_results="${check_results}{\"check\":\"port\",\"status\":\"ok\",\"port\":$SSH_PORT}" |
| elif ss -tuln 2>/dev/null | grep -q ":$SSH_PORT "; then |
| port_listening=true |
| check_results="${check_results}{\"check\":\"port\",\"status\":\"ok\",\"port\":$SSH_PORT}" |
| else |
| check_results="${check_results}{\"check\":\"port\",\"status\":\"failed\",\"port\":$SSH_PORT,\"message\":\"Port not listening\"}" |
| errors=$((errors + 1)) |
| fi |
| fi |
| |
| |
| if [ "$CHECK_RESPONSE" = true ]; then |
| if timeout "$TIMEOUT" bash -c "</dev/tcp/localhost/$SSH_PORT" 2>/dev/null; then |
| ssh_responding=true |
| check_results="${check_results}{\"check\":\"response\",\"status\":\"ok\",\"response_time\":\"${TIMEOUT}s\"}" |
| else |
| check_results="${check_results}{\"check\":\"response\",\"status\":\"warning\",\"message\":\"SSH not responding within ${TIMEOUT}s\"}" |
| warnings=$((warnings + 1)) |
| fi |
| fi |
| |
| |
| local end_time=$(date +%s) |
| local duration=$((end_time - start_time)) |
| |
| |
| local health_status="unknown" |
| local exit_code=0 |
| |
| if [ $errors -gt 0 ]; then |
| health_status="unhealthy" |
| exit_code=1 |
| elif [ $warnings -gt 0 ]; then |
| health_status="degraded" |
| exit_code=2 |
| else |
| health_status="healthy" |
| exit_code=0 |
| fi |
| |
| |
| if [ "$OUTPUT_FORMAT" = "json" ]; then |
| |
| cat <<EOF |
| { |
| "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", |
| "ssh_port": $SSH_PORT, |
| "timeout": $TIMEOUT, |
| "health_status": "$health_status", |
| "errors": $errors, |
| "warnings": $warnings, |
| "check_duration_seconds": $duration, |
| "sshd_pids": "$sshd_pids", |
| "port_listening": $port_listening, |
| "ssh_responding": $ssh_responding, |
| "checks": [$check_results] |
| } |
| EOF |
| else |
| |
| echo "=== SSH Service Health Check ===" |
| echo "Time: $(date '+%Y-%m-%d %H:%M:%S')" |
| echo "SSH Port: $SSH_PORT" |
| echo "Timeout: ${TIMEOUT}s" |
| echo "Duration: ${duration}s" |
| echo "" |
| |
| |
| if [ "$CHECK_PROCESS" = true ]; then |
| echo -n "[1/3] Checking SSH process... " |
| if [ -n "$sshd_pids" ]; then |
| echo "OK (PIDs: $sshd_pids)" |
| else |
| echo "FAILED - SSH process not found" |
| fi |
| fi |
| |
| |
| if [ "$CHECK_PORT" = true ]; then |
| echo -n "[2/3] Checking SSH port $SSH_PORT... " |
| if [ "$port_listening" = true ]; then |
| echo "OK - Port is listening" |
| else |
| echo "FAILED - Port $SSH_PORT not listening" |
| fi |
| fi |
| |
| |
| if [ "$CHECK_RESPONSE" = true ]; then |
| echo -n "[3/3] Checking SSH response... " |
| if [ "$ssh_responding" = true ]; then |
| echo "OK - SSH is responding" |
| else |
| echo "WARNING - SSH port open but not responding (may be starting up)" |
| fi |
| fi |
| |
| echo "" |
| echo "=== Health Check Result ===" |
| echo "Status: $(echo $health_status | tr '[:lower:]' '[:upper:]')" |
| echo "Errors: $errors" |
| echo "Warnings: $warnings" |
| |
| |
| if [ "$DETAILED" = true ]; then |
| collect_detailed_diagnostics |
| fi |
| fi |
| |
| |
| if [ -n "$MONITOR_API_URL" ]; then |
| report_to_api "$health_status" "$errors" "$warnings" |
| fi |
| |
| return $exit_code |
| } |
|
|
| |
| report_to_api() { |
| local status=$1 |
| local errors=$2 |
| local warnings=$3 |
| |
| local payload=$(cat <<EOF |
| { |
| "service": "ssh", |
| "status": "$status", |
| "errors": $errors, |
| "warnings": $warnings, |
| "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", |
| "source": "check_ssh_health.sh" |
| } |
| EOF |
| ) |
| |
| curl -s -X POST "$MONITOR_API_URL" \ |
| -H 'Content-Type: application/json' \ |
| -d "$payload" >/dev/null 2>&1 || true |
| } |
|
|
| |
| check_ssh_health |
| exit $? |