llm-energy-tests / scripts /run_benchmark.sh
magnuscruz's picture
Initial rejuvenation to ensure a clean state before testing
6f5cdd3
#!/bin/bash
### ==============================================================================
### DEI Research Benchmark - LLM Software Aging & Thermal Leakage Tool
### Includes: Thermal Throttling, Proactive Rejuvenation, & Warm-Up Isolation
### ==============================================================================
# ==============================================================================
# ROOT PRIVILEGE CHECK
# ==============================================================================
if [ "$EUID" -ne 0 ]; then
echo -e "\n[-] ERROR: This benchmark requires root privileges."
echo "[-] Proactive Rejuvenation and Thermal Throttling need deep system access."
echo -e "[*] Please run the script using sudo:\n sudo $0 $@"
exit 1
fi
# --- Argument Parsing ---
THROTTLE_ENABLED=false
THROTTLE_PERCENTAGE=50
if [[ "$2" == "--throttle" ]]; then
THROTTLE_ENABLED=true
THROTTLE_PERCENTAGE=$3
echo "[!] Throttling ENABLED at ${THROTTLE_PERCENTAGE}% for this run."
fi
if [ -z "$1" ]; then
echo "Usage: sudo $0 <duration_in_seconds> [--throttle <percentage>]"
exit 1
fi
TEST_DURATION=$1
WARMUP_DURATION=600 # 10 minutes
# --- Configuration & Paths ---
PROJECT_ROOT="/home/ubuntu/git/llm-energy-tests"
BASE_LOG_DIR="$PROJECT_ROOT/logs/$(date +%Y-%m-%d)"
WARMUP_DIR="${BASE_LOG_DIR}/warmup_logs"
AGING_DIR="${BASE_LOG_DIR}/deep_aging"
MODELS=("qwen2.5:0.5b" "gemma2:2b" "phi3:mini" "llama3.1:8b" "mistral:7b" "wizardlm2:7b" "deepseek-v2:lite")
# Tapo P115 Config
export TAPO_USER="magnuscruz@gmail.com"
export TAPO_PASS="Ubuntu123"
export TAPO_IP="192.168.0.100"
PYTHON_VENV="$PROJECT_ROOT/venv/bin/python3"
# --- Dependency Check ---
for cmd in jq sensors powerstat bc curl; do
if ! command -v $cmd &> /dev/null; then
echo "Error: $cmd is not installed. Please install it to continue."
exit 1
fi
done
mkdir -p "$WARMUP_DIR"
mkdir -p "$AGING_DIR"
# ==============================================================================
# ENHANCED CLEANUP TRAP (NO SUDO)
# ==============================================================================
cleanup() {
echo -e "\n[!] Interrupt received. Executing deep cleanup..."
kill $(jobs -p) 2>/dev/null
rm -f tapo_monitor_temp.py 2>/dev/null
echo "[*] Stopping inference engine..."
systemctl stop ollama 2>/dev/null || docker stop ollama 2>/dev/null
echo "[*] Flushing system memory..."
sync
echo 3 > /proc/sys/vm/drop_caches
echo "[*] Resetting CPU governor to 'performance'..."
cpupower frequency-set -g performance > /dev/null
echo "[*] Cleanup complete. Safe to exit."
exit 1
}
trap cleanup SIGINT SIGTERM
# ==============================================================================
# PROACTIVE SOFTWARE REJUVENATION (NO SUDO)
# ==============================================================================
rejuvenate_inference_engine() {
echo -e "\n[+] Triggering Proactive Software Rejuvenation..."
systemctl restart ollama 2>/dev/null || docker restart ollama 2>/dev/null
sync
echo 3 > /proc/sys/vm/drop_caches
echo "[+] Rejuvenation complete. Memory state is now clean."
}
# ==============================================================================
# HARDWARE THERMAL MITIGATION (NO SUDO)
# ==============================================================================
apply_throttling() {
if [ "$THROTTLE_ENABLED" = true ]; then
echo "[Config] Setting CPU to PowerSave/Low Frequency (${THROTTLE_PERCENTAGE}% limit)..."
cpupower frequency-set -g powersave > /dev/null
MAX_FREQ=$(cpupower frequency-info -l | awk 'END{print $2}')
CAP_FREQ=$(echo "$MAX_FREQ * $THROTTLE_PERCENTAGE / 100" | bc | cut -d. -f1)
cpupower frequency-set -u ${CAP_FREQ}kHz > /dev/null
fi
}
# ==============================================================================
# ASYNCHRONOUS TAPO PHYSICAL LOGGER
# ==============================================================================
cat << 'EOF' > tapo_monitor_temp.py
import asyncio, time, sys, os
from tapo import ApiClient
async def monitor():
try:
client = ApiClient(os.environ['TAPO_USER'], os.environ['TAPO_PASS'])
device = await client.p115(os.environ['TAPO_IP'])
print('Time,Watts')
sys.stdout.flush()
while True:
energy = await device.get_current_power()
print(f'{time.strftime("%H:%M:%S")},{energy.current_power}')
sys.stdout.flush()
await asyncio.sleep(1)
except Exception as e:
pass
if __name__ == "__main__":
asyncio.run(monitor())
EOF
start_tapo_monitor() {
export TARGET_DIR=$1
local SAFE_MODEL=${2//:/_}
local TIMESTAMP=$3
local OUT_FILE="${TARGET_DIR}/${SAFE_MODEL}_${TIMESTAMP}_physical.csv"
$PYTHON_VENV tapo_monitor_temp.py > "$OUT_FILE" 2>/dev/null &
TAPO_PID=$!
}
# ==============================================================================
# INFERENCE STRESS LOOP
# ==============================================================================
run_inference_loop() {
local TARGET_DIR=$1
local MODEL=$2
local DURATION=$3
local SAFE_MODEL=${MODEL//:/_}
local TIMESTAMP=$4
local INF_CSV="${TARGET_DIR}/${SAFE_MODEL}_${TIMESTAMP}_inference.csv"
local SYS_CSV="${TARGET_DIR}/${SAFE_MODEL}_${TIMESTAMP}_system.csv"
echo "Time,Total_Dur,Load_Dur,Prompt_Eval_Count,Eval_Count,TPS" > "$INF_CSV"
echo "Time,Temp,CPU_Load,RAM_MB" > "$SYS_CSV"
local END_TIME=$(( $(date +%s) + DURATION ))
while [ $(date +%s) -lt $END_TIME ]; do
local CUR_TIME=$(date +%H:%M:%S)
local TEMP=$(sensors 2>/dev/null | awk '/Package id 0:/ {print $4}' | tr -d '+°C')
if [ -z "$TEMP" ]; then TEMP=$(cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null | awk '{print $1/1000}' || echo "0"); fi
local RAM_MB=$(free -m | awk '/Mem:/ {print $3}')
local CPU_LOAD=$(top -bn1 | grep "Cpu(s)" | sed 's/.*, *\([0-9.]*\)%* id.*/\1/' | awk '{print 100 - $1}')
echo "$CUR_TIME,$TEMP,$CPU_LOAD,$RAM_MB" >> "$SYS_CSV"
local RESPONSE=$(curl -s -X POST http://localhost:11434/api/generate -d "{
\"model\": \"$MODEL\",
\"prompt\": \"Analyze the implications of software aging on distributed edge network reliability.\",
\"stream\": false
}")
local TOT=$(echo "$RESPONSE" | jq -r '.total_duration // 0' 2>/dev/null)
local LOD=$(echo "$RESPONSE" | jq -r '.load_duration // 0' 2>/dev/null)
local PEC=$(echo "$RESPONSE" | jq -r '.prompt_eval_count // 0' 2>/dev/null)
local EC=$(echo "$RESPONSE" | jq -r '.eval_count // 0' 2>/dev/null)
local ED=$(echo "$RESPONSE" | jq -r '.eval_duration // 1' 2>/dev/null)
TOT=${TOT:-0}
LOD=${LOD:-0}
PEC=${PEC:-0}
EC=${EC:-0}
ED=${ED:-1}
local TPS=$(echo "scale=2; $EC / ($ED / 1000000000)" | bc 2>/dev/null)
TPS=${TPS:-0.00}
echo "$CUR_TIME,$TOT,$LOD,$PEC,$EC,$TPS" >> "$INF_CSV"
if [ "$TOT" -eq 0 ]; then
sleep 2
fi
done
}
# ==============================================================================
# MAIN EXECUTION FLOW
# ==============================================================================
echo "Starting DEI Research Benchmark. Target: $TEST_DURATION s per model."
apply_throttling
# Initial rejuvenation to ensure a clean state before testing
rejuvenate_inference_engine
for MODEL in "${MODELS[@]}"; do
echo "=========================================================="
echo " Evaluating Model: $MODEL"
echo "=========================================================="
# Pull the latest model version to ensure consistency
ollama pull "$MODEL"
TIMESTAMP=$(date +%H%M%S)
echo "[*] Executing ${WARMUP_DURATION}s Warm-Up Phase..."
start_tapo_monitor "$WARMUP_DIR" "$MODEL" "$TIMESTAMP"
run_inference_loop "$WARMUP_DIR" "$MODEL" "$WARMUP_DURATION" "$TIMESTAMP" &
INF_PID=$!
wait $INF_PID
kill $TAPO_PID 2>/dev/null
echo "[*] Warm-up complete. Preparing for continuous Deep Aging evaluation."
rejuvenate_inference_engine
TIMESTAMP=$(date +%H%M%S)
echo "----------------------------------------------------------"
echo "[*] Executing Continuous Deep Aging Phase ($TEST_DURATION seconds)..."
start_tapo_monitor "$AGING_DIR" "$MODEL" "$TIMESTAMP"
run_inference_loop "$AGING_DIR" "$MODEL" "$TEST_DURATION" "$TIMESTAMP" &
INF_PID=$!
wait $INF_PID
kill $TAPO_PID 2>/dev/null
LATEST_FILE=$(ls -t "$AGING_DIR"/${MODEL//:/_}_*_inference.csv 2>/dev/null | head -1)
if [ -f "$LATEST_FILE" ]; then
echo "[+] Successfully generated log: $LATEST_FILE"
else
echo "[-] Warning: Expected log file not found in $AGING_DIR"
fi
# Clean up the model from memory to prevent interference with subsequent runs
ollama stop "$MODEL" 2>/dev/null || docker stop ollama 2>/dev/null
ollama rm "$MODEL" 2>/dev/null || docker rm ollama 2>/dev/null
done
rm -f tapo_monitor_temp.py
echo "Benchmark Suite Finished Successfully."
cleanup