Spaces:

Sumitchongder9
/

QR-SPPS

Running

App Files Files Community

QR-SPPS / scripts /run_nb5_30q.sh

Sumitchongder9

Upload 4 files

ae8e0ff verified 6 days ago

raw

history blame contribute delete

3.47 kB

	#!/bin/bash
	# ============================================================
	# QR-SPPS NB-5 Part A: MPI State-Vector Benchmark (29q + 30q)
	# ============================================================
	# Run from: ~/QARPdemo
	# cd ~/QARPdemo && sbatch run_nb5_30q.sh
	# tail -f nb5_30q_output.log
	#
	# Measures actual 29q and 30q state-vector evaluation times on
	# Fujitsu A64FX using MPI-distributed state-vector simulation.
	# These are the REAL hardware measurements that ground the
	# exponential scaling law (R2=0.9948, doubling rate=1.1993/qubit).
	#
	# MPI layout (2 active ranks):
	# rank 0 -> 29q (SV = 8,590 MB, measured ~595s)
	# rank 1 -> 30q (SV = 17,180 MB, measured ~1192s) <- PHYSICAL CEILING
	#
	# Memory note (why --nodes=2 is correct):
	# 30q state-vector = 17.2 GB raw.
	# + MPI overhead + 40-node observable = ~20-24 GB total per rank.
	# Each A64FX node has 28.9 GB free RAM. 2 nodes = sufficient.
	# (4-node salloc allocation provides topology stability.)
	#
	# IMPORTANT: mpi4py IS imported in QRSPPS_NB5_measure30q.py intentionally.
	# This is a standalone sbatch script - NOT run from inside Jupyter.
	# (Importing mpi4py in Jupyter on a compute node crashes the kernel.)
	#
	# Depends on: nothing (standalone benchmark - does not need prior pkls)
	# Produces: QRSPPS_mpi_scaling.pkl (saved to ~/QARPdemo/)
	#
	# After this completes, run:
	# cd ~/QARPdemo && sbatch run_nb5_final.sh
	#
	# Runtime: ~35 min (29q ~595s + 30q ~1192s + overhead)
	# Wall time set to 12:00:00 for safety (interactive partition limit)
	# ============================================================
	#SBATCH --job-name=qrspps_nb5_30q
	#SBATCH --nodes=4
	#SBATCH --ntasks-per-node=12
	#SBATCH --cpus-per-task=4
	#SBATCH --partition=Interactive
	#SBATCH --time=48:00:00
	#SBATCH --output=nb5_30q_output.log

	source ~/QARPdemo/setup_env.sh

	# NOTE: do NOT set QARP_DISABLE_MPI here.
	# QRSPPS_NB5_measure30q.py uses mpi4py intentionally.
	export OMP_NUM_THREADS=4

	echo "================================================================"
	echo " QR-SPPS NB-5A: MPI 29q + 30q State-Vector Benchmark"
	echo "================================================================"
	echo " Start : $(date)"
	echo " Node : $(hostname)"
	echo " Job : $SLURM_JOB_ID"
	echo " Dir : $(pwd)"
	echo " Nodes : $SLURM_JOB_NUM_NODES"
	echo " Tasks : $SLURM_NTASKS"
	echo ""
	echo " rank 0 -> 29q (SV = 8,590 MB, ~595s)"
	echo " rank 1 -> 30q (SV = 17,180 MB, ~1192s) <- physical memory ceiling"
	echo ""
	echo " 30q state-vector = 17.2 GB + MPI overhead = ~20-24 GB"
	echo " A64FX free RAM per node = 28.9 GB: fits comfortably"
	echo "================================================================"
	echo ""

	echo "=== Starting QRSPPS_NB5_measure30q.py via srun ==="

	srun python3 QRSPPS_NB5_measure30q.py

	EXIT=$?
	echo ""
	echo "=== srun exit: $EXIT ($(date)) ==="

	# Verify output (saved to ~/QARPdemo/QRSPPS_mpi_scaling.pkl by the script)
	MPI_PKL="$HOME/QARPdemo/QRSPPS_mpi_scaling.pkl"
	if [ -f "$MPI_PKL" ]; then
	echo "Output: QRSPPS_mpi_scaling.pkl ($(du -h $MPI_PKL \| cut -f1)) OK"
	else
	echo "WARNING: QRSPPS_mpi_scaling.pkl not found at $MPI_PKL"
	echo "Check nb5_30q_output.log for errors."
	exit 1
	fi

	echo ""
	echo "================================================================"
	echo " NB-5A DONE \| End: $(date)"
	echo "================================================================"
	echo ""
	echo "Next step:"
	echo " cd ~/QARPdemo && sbatch run_nb5_final.sh"
	exit $EXIT