Spaces:
Running
Running
| # ============================================================ | |
| # QR-SPPS NB-5 Part A: MPI State-Vector Benchmark (29q + 30q) | |
| # ============================================================ | |
| # Run from: ~/QARPdemo | |
| # cd ~/QARPdemo && sbatch run_nb5_30q.sh | |
| # tail -f nb5_30q_output.log | |
| # | |
| # Measures actual 29q and 30q state-vector evaluation times on | |
| # Fujitsu A64FX using MPI-distributed state-vector simulation. | |
| # These are the REAL hardware measurements that ground the | |
| # exponential scaling law (R2=0.9948, doubling rate=1.1993/qubit). | |
| # | |
| # MPI layout (2 active ranks): | |
| # rank 0 -> 29q (SV = 8,590 MB, measured ~595s) | |
| # rank 1 -> 30q (SV = 17,180 MB, measured ~1192s) <- PHYSICAL CEILING | |
| # | |
| # Memory note (why --nodes=2 is correct): | |
| # 30q state-vector = 17.2 GB raw. | |
| # + MPI overhead + 40-node observable = ~20-24 GB total per rank. | |
| # Each A64FX node has 28.9 GB free RAM. 2 nodes = sufficient. | |
| # (4-node salloc allocation provides topology stability.) | |
| # | |
| # IMPORTANT: mpi4py IS imported in QRSPPS_NB5_measure30q.py intentionally. | |
| # This is a standalone sbatch script - NOT run from inside Jupyter. | |
| # (Importing mpi4py in Jupyter on a compute node crashes the kernel.) | |
| # | |
| # Depends on: nothing (standalone benchmark - does not need prior pkls) | |
| # Produces: QRSPPS_mpi_scaling.pkl (saved to ~/QARPdemo/) | |
| # | |
| # After this completes, run: | |
| # cd ~/QARPdemo && sbatch run_nb5_final.sh | |
| # | |
| # Runtime: ~35 min (29q ~595s + 30q ~1192s + overhead) | |
| # Wall time set to 12:00:00 for safety (interactive partition limit) | |
| # ============================================================ | |
| #SBATCH --job-name=qrspps_nb5_30q | |
| #SBATCH --nodes=4 | |
| #SBATCH --ntasks-per-node=12 | |
| #SBATCH --cpus-per-task=4 | |
| #SBATCH --partition=Interactive | |
| #SBATCH --time=48:00:00 | |
| #SBATCH --output=nb5_30q_output.log | |
| source ~/QARPdemo/setup_env.sh | |
| # NOTE: do NOT set QARP_DISABLE_MPI here. | |
| # QRSPPS_NB5_measure30q.py uses mpi4py intentionally. | |
| export OMP_NUM_THREADS=4 | |
| echo "================================================================" | |
| echo " QR-SPPS NB-5A: MPI 29q + 30q State-Vector Benchmark" | |
| echo "================================================================" | |
| echo " Start : $(date)" | |
| echo " Node : $(hostname)" | |
| echo " Job : $SLURM_JOB_ID" | |
| echo " Dir : $(pwd)" | |
| echo " Nodes : $SLURM_JOB_NUM_NODES" | |
| echo " Tasks : $SLURM_NTASKS" | |
| echo "" | |
| echo " rank 0 -> 29q (SV = 8,590 MB, ~595s)" | |
| echo " rank 1 -> 30q (SV = 17,180 MB, ~1192s) <- physical memory ceiling" | |
| echo "" | |
| echo " 30q state-vector = 17.2 GB + MPI overhead = ~20-24 GB" | |
| echo " A64FX free RAM per node = 28.9 GB: fits comfortably" | |
| echo "================================================================" | |
| echo "" | |
| echo "=== Starting QRSPPS_NB5_measure30q.py via srun ===" | |
| srun python3 QRSPPS_NB5_measure30q.py | |
| EXIT=$? | |
| echo "" | |
| echo "=== srun exit: $EXIT ($(date)) ===" | |
| # Verify output (saved to ~/QARPdemo/QRSPPS_mpi_scaling.pkl by the script) | |
| MPI_PKL="$HOME/QARPdemo/QRSPPS_mpi_scaling.pkl" | |
| if [ -f "$MPI_PKL" ]; then | |
| echo "Output: QRSPPS_mpi_scaling.pkl ($(du -h $MPI_PKL | cut -f1)) OK" | |
| else | |
| echo "WARNING: QRSPPS_mpi_scaling.pkl not found at $MPI_PKL" | |
| echo "Check nb5_30q_output.log for errors." | |
| exit 1 | |
| fi | |
| echo "" | |
| echo "================================================================" | |
| echo " NB-5A DONE | End: $(date)" | |
| echo "================================================================" | |
| echo "" | |
| echo "Next step:" | |
| echo " cd ~/QARPdemo && sbatch run_nb5_final.sh" | |
| exit $EXIT | |