#!/bin/bash set -e trap 'read -p "Press Enter to exit..."' EXIT # Colors RED='\033[0;31m' GREEN='\033[0;32m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' # Helper functions header() { echo -e "\n${CYAN}========================================${NC}\n${CYAN} $1${NC}\n${CYAN}========================================${NC}\n"; } log() { echo -e "${BLUE}[INFO]${NC} $1"; } success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; } error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } #=============================================================================== # Step 5+6: Download, Ingest, Delete (one epoch at a time to save disk) #=============================================================================== header "Step 5-6/7: Processing Epochs (Download → Ingest → Delete)" EPOCHS=(852 853) log "Processing epochs one at a time to minimize disk usage..." log "Each epoch: ~20GB download → ingest → delete" echo "" for epoch in "${EPOCHS[@]}"; do EPOCH_DIR="./data/pump_fun/epoch_${epoch}" log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" log "Processing epoch ${epoch}..." # Step 1: Download log " [1/3] Downloading epoch ${epoch}..." python scripts/download_epoch_artifacts.py --epoch "$epoch" || { error "Failed to download epoch ${epoch}. Cannot continue." } # Step 2: Ingest (ClickHouse only) log " [2/3] Ingesting epoch ${epoch} into ClickHouse database..." python scripts/ingest_epoch.py --epoch "$epoch" --skip-neo4j || { error "Ingestion failed for epoch ${epoch}. Cannot continue." } # Step 3: Delete parquet files to free disk space log " [3/3] Cleaning up epoch ${epoch} parquet files..." rm -rf "$EPOCH_DIR" # Show progress CURRENT_MINTS=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0") CURRENT_TRADES=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0") log " Progress: ${CURRENT_MINTS} mints, ${CURRENT_TRADES} trades" log " Disk free: $(df -h . | awk 'NR==2{print $4}')" done # Final verification log "" log "Verifying final data..." MINTS=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0") TRADES=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0") log " 📊 Mints: ${MINTS}" log " 📊 Trades: ${TRADES}" success "All epochs processed and ingested"