Spaces:
Running
Running
File size: 3,465 Bytes
ae8e0ff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | #!/bin/bash
# ============================================================
# QR-SPPS NB-5 Part A: MPI State-Vector Benchmark (29q + 30q)
# ============================================================
# Run from: ~/QARPdemo
# cd ~/QARPdemo && sbatch run_nb5_30q.sh
# tail -f nb5_30q_output.log
#
# Measures actual 29q and 30q state-vector evaluation times on
# Fujitsu A64FX using MPI-distributed state-vector simulation.
# These are the REAL hardware measurements that ground the
# exponential scaling law (R2=0.9948, doubling rate=1.1993/qubit).
#
# MPI layout (2 active ranks):
# rank 0 -> 29q (SV = 8,590 MB, measured ~595s)
# rank 1 -> 30q (SV = 17,180 MB, measured ~1192s) <- PHYSICAL CEILING
#
# Memory note (why --nodes=2 is correct):
# 30q state-vector = 17.2 GB raw.
# + MPI overhead + 40-node observable = ~20-24 GB total per rank.
# Each A64FX node has 28.9 GB free RAM. 2 nodes = sufficient.
# (4-node salloc allocation provides topology stability.)
#
# IMPORTANT: mpi4py IS imported in QRSPPS_NB5_measure30q.py intentionally.
# This is a standalone sbatch script - NOT run from inside Jupyter.
# (Importing mpi4py in Jupyter on a compute node crashes the kernel.)
#
# Depends on: nothing (standalone benchmark - does not need prior pkls)
# Produces: QRSPPS_mpi_scaling.pkl (saved to ~/QARPdemo/)
#
# After this completes, run:
# cd ~/QARPdemo && sbatch run_nb5_final.sh
#
# Runtime: ~35 min (29q ~595s + 30q ~1192s + overhead)
# Wall time set to 12:00:00 for safety (interactive partition limit)
# ============================================================
#SBATCH --job-name=qrspps_nb5_30q
#SBATCH --nodes=4
#SBATCH --ntasks-per-node=12
#SBATCH --cpus-per-task=4
#SBATCH --partition=Interactive
#SBATCH --time=48:00:00
#SBATCH --output=nb5_30q_output.log
source ~/QARPdemo/setup_env.sh
# NOTE: do NOT set QARP_DISABLE_MPI here.
# QRSPPS_NB5_measure30q.py uses mpi4py intentionally.
export OMP_NUM_THREADS=4
echo "================================================================"
echo " QR-SPPS NB-5A: MPI 29q + 30q State-Vector Benchmark"
echo "================================================================"
echo " Start : $(date)"
echo " Node : $(hostname)"
echo " Job : $SLURM_JOB_ID"
echo " Dir : $(pwd)"
echo " Nodes : $SLURM_JOB_NUM_NODES"
echo " Tasks : $SLURM_NTASKS"
echo ""
echo " rank 0 -> 29q (SV = 8,590 MB, ~595s)"
echo " rank 1 -> 30q (SV = 17,180 MB, ~1192s) <- physical memory ceiling"
echo ""
echo " 30q state-vector = 17.2 GB + MPI overhead = ~20-24 GB"
echo " A64FX free RAM per node = 28.9 GB: fits comfortably"
echo "================================================================"
echo ""
echo "=== Starting QRSPPS_NB5_measure30q.py via srun ==="
srun python3 QRSPPS_NB5_measure30q.py
EXIT=$?
echo ""
echo "=== srun exit: $EXIT ($(date)) ==="
# Verify output (saved to ~/QARPdemo/QRSPPS_mpi_scaling.pkl by the script)
MPI_PKL="$HOME/QARPdemo/QRSPPS_mpi_scaling.pkl"
if [ -f "$MPI_PKL" ]; then
echo "Output: QRSPPS_mpi_scaling.pkl ($(du -h $MPI_PKL | cut -f1)) OK"
else
echo "WARNING: QRSPPS_mpi_scaling.pkl not found at $MPI_PKL"
echo "Check nb5_30q_output.log for errors."
exit 1
fi
echo ""
echo "================================================================"
echo " NB-5A DONE | End: $(date)"
echo "================================================================"
echo ""
echo "Next step:"
echo " cd ~/QARPdemo && sbatch run_nb5_final.sh"
exit $EXIT
|