Jinhuiye commited on 7 days ago

Commit

c8173fb

verified ·

1 Parent(s): 673bdaa

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

README.md +185 -0
config.yaml +72 -0
dataset_statistics.json +133 -0
logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log +0 -0
logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log +0 -0
logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log +0 -0
logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log +0 -0
logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log +0 -0
logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log +0 -0
run_libero_train.sh +137 -0
slurm_script +123 -0
summary.jsonl +5 -0
wandb/wandb/debug-internal.log +0 -0
wandb/wandb/debug.log +0 -0
wandb/wandb/run-20260405_002559-7eurt4f2/files/output.log +1 -0
wandb/wandb/run-20260405_002559-7eurt4f2/files/requirements.txt +223 -0
wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-core.log +7 -0
wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-internal.log +9 -0
wandb/wandb/run-20260405_002559-7eurt4f2/run-7eurt4f2.wandb +0 -0
wandb/wandb/run-20260405_002750-5ap8nrhh/files/config.yaml +166 -0
wandb/wandb/run-20260405_002750-5ap8nrhh/files/wandb-summary.json +1 -0
wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-core.log +13 -0
wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-internal.log +30 -0
wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug.log +0 -0
wandb/wandb/run-20260405_002750-5ap8nrhh/run-5ap8nrhh.wandb +0 -0
wandb/wandb/run-20260405_003208-ioijlwyr/files/output.log +38 -0
wandb/wandb/run-20260405_003208-ioijlwyr/files/requirements.txt +227 -0
wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug-internal.log +145 -0
wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug.log +0 -0
wandb/wandb/run-20260405_005243-cidnpq4g/files/output.log +6 -0
wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-core.log +8 -0
wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-internal.log +13 -0
wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug.log +0 -0
wandb/wandb/run-20260405_010110-owocwt3k/files/output.log +116 -0
wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-metadata.json +137 -0
wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-summary.json +1 -0
wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-core.log +14 -0
wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-internal.log +16 -0
wandb/wandb/run-20260405_010110-owocwt3k/logs/debug.log +0 -0
wandb/wandb/run-20260405_010110-owocwt3k/run-owocwt3k.wandb +0 -0
wandb/wandb/run-20260405_013707-x3y2577m/files/output.log +0 -0
wandb/wandb/run-20260405_013707-x3y2577m/files/requirements.txt +227 -0
wandb/wandb/run-20260405_013707-x3y2577m/files/wandb-metadata.json +149 -0
wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-core.log +7 -0
wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-internal.log +0 -0
wandb/wandb/run-20260405_013707-x3y2577m/logs/debug.log +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,185 @@

+---
+license: mit
+library_name: starVLA
+pipeline_tag: robotics
+tags:
+  - vla
+  - vision-language-action
+  - robotics
+  - flow-matching
+  - cosmos
+  - gr00t
+  - manipulation
+  - libero
+datasets:
+  - IPEC-COMMUNITY/libero_lerobot
+language:
+  - en
+base_model:
+  - nvidia/Cosmos-Predict2-2B-Video2World
+---
+# StarVLA-CosmoPredict2GR00T-LIBERO-4in1
+A **Vision-Language-Action (VLA)** model from the [StarVLA](https://github.com/starVLA/starVLA)
+project, built on a **Cosmos-Predict2-2B** world model as the visual backbone,
+driving a **GR00T-style DiT flow-matching action head** (`CosmoPredict2GR00T`).
+The model is trained on the full **LIBERO 4-in-1** benchmark (libero_10 +
+libero_goal + libero_object + libero_spatial combined).
+`CosmoPredict2GR00T` is StarVLA's architecture that extracts visual
+world-model features from **NVIDIA Cosmos-Predict2-2B** (a video-to-world
+diffusion model) and feeds them into a cross-attention DiT flow-matching
+action head inspired by the GR00T N1 design:
+1. **Cosmos-Predict2 visual features** — the last-layer activations of
+   `Cosmos-Predict2-2B-Video2World` serve as rich spatiotemporal visual
+   representations. 32 target vision tokens are extracted and passed to the
+   action head.
+2. **Cross-attention flow-matching DiT** — a 16-layer DiT-B with
+   cross-attention (cross-attention dim 2048, interleaved self-attention,
+   adaptive LayerNorm) generates action chunks via flow matching.
+3. **Language conditioning via instruction tokens** — the task instruction is
+   tokenised and injected into the DiT cross-attention alongside the visual
+   tokens; no separate VLM backbone is used.
+---
+## Model Summary
+| | |
+| --- | --- |
+| **Architecture** | `CosmoPredict2GR00T` (Cosmos-Predict2 visual backbone + cross-attn FM DiT) |
+| **Visual backbone** | [`Cosmos-Predict2-2B-Video2World`](https://huggingface.co/nvidia/Cosmos-Predict2-2B-Video2World) |
+| **Action head** | Cross-attention Flow-Matching DiT-B (16 layers, 1024 hidden) |
+| **Action chunk** | 8 steps (+ 7 future-window steps) |
+| **Action / state dim** | 7 / 7 (delta end-effector) |
+| **Image resolution** | 224 × 224, single 3rd-person view |
+| **Inference timesteps** | 4 (flow matching) |
+| **License** | MIT |
+| **Codebase** | [starVLA/starVLA](https://github.com/starVLA/starVLA) |
+---
+## Training Data
+**LIBERO 4-in-1** mixture (`libero_all`) — all four LIBERO task suites
+combined into a single training stream:
+| Suite | Tasks | Description |
+| --- | ---: | --- |
+| `libero_10` | 10 | Long-horizon tabletop manipulation |
+| `libero_goal` | 10 | Goal-conditioned rearrangement |
+| `libero_object` | 10 | Object-centric pick-and-place |
+| `libero_spatial` | 10 | Spatially varied placement |
+- Action representation: **delta end-effector** (7-d, gripper included)
+- Image observation: single primary RGB view, resized to 224 × 224
+- Per-dataset normalisation statistics are stored in
+  [`dataset_statistics.json`](dataset_statistics.json).
+---
+## Training Recipe
+| | |
+| --- | --- |
+| Total steps | 80,000 (released checkpoints: 30k / 40k / 50k) |
+| Warm-up steps | 5,000 |
+| Per-device batch size | 8 |
+| Hardware | 8 × NVIDIA H100 / A100 (DeepSpeed ZeRO-2) |
+| Precision | bf16, mixed-precision + gradient checkpointing |
+| Optimizer | AdamW (β₁ = 0.9, β₂ = 0.95, ε = 1e-8, wd = 1e-8) |
+| LR (base / VLM) | 2.5e-5 |
+| LR (action head) | 1e-4 |
+| LR scheduler | `cosine_with_min_lr` (min lr 1e-6) |
+| Gradient clipping | 1.0 |
+| Flow-matching noise | β-distribution (α=1.5, β=1.0), s = 0.999 |
+| Repeated diffusion steps | 8 |
+| Frozen modules | none (full fine-tuning) |
+The exact training config is preserved in
+[`config.yaml`](config.yaml), and the launch script in
+[`run_libero_train.sh`](run_libero_train.sh).
+---
+## Evaluation — LIBERO 4-in-1
+Following the standard LIBERO evaluation protocol (50 trials per task per
+suite). Numbers are success rates (↑).
+| Step | libero_goal | libero_object | libero_spatial | **Avg (3 suites)** |
+| ---: | ---: | ---: | ---: | ---: |
+| 30k | 0.908 | 0.980 | 0.880 | 0.923 |
+| 40k | 0.948 | 0.990 | 0.884 | 0.941 |
+| **50k** | **0.944** | **0.990** | **0.906** | **0.947** |
+> `libero_10` was not evaluated for this run.
+> Best checkpoint: **`steps_50000_pytorch_model.pt`** — avg **94.7 %** across libero_goal / object / spatial.
+For comparison with other StarVLA frameworks see the
+[StarVLA Model Zoo](https://github.com/starVLA/starVLA/blob/main/docs/model_zoo.md).
+---
+## Repository Layout
+```
+.
+├── README.md                 # this model card
+├── config.yaml               # training config
+├── run_libero_train.sh       # launch script used for this run
+├── dataset_statistics.json   # per-dataset action/state normalisation stats
+├── summary.jsonl             # training step summary
+├── logs/                     # per-suite evaluation logs
+│   ├── libero_goal/
+│   ├── libero_object/
+│   └─��� libero_spatial/
+├── videos/                   # evaluation rollout videos
+└── checkpoints/
+    ├── steps_50000_pytorch_model.pt   # ← recommended checkpoint
+    ├── steps_40000_pytorch_model.pt
+    └── steps_30000_pytorch_model.pt
+```
+---
+## How to Use
+```bash
+git clone https://github.com/starVLA/starVLA.git
+cd starVLA
+# Follow installation instructions in the StarVLA README.
+```
+```python
+from huggingface_hub import snapshot_download
+from starVLA.model.framework.tools import load_framework_from_checkpoint
+ckpt_dir = snapshot_download("StarVLA/Qwen3VL-CosmoPredict2GR00T-LIBERO-4in1")
+policy = load_framework_from_checkpoint(
+    framework_name="CosmoPredict2GR00T",
+    config_path=f"{ckpt_dir}/config.yaml",
+    checkpoint_path=f"{ckpt_dir}/checkpoints/steps_50000_pytorch_model.pt",
+)
+# policy.predict_action(images, instruction, state) -> action chunk (8 × 7)
+```
+For end-to-end LIBERO evaluation see
+[`examples/LIBERO`](https://github.com/starVLA/starVLA/tree/main/examples/LIBERO).
+---
+## Intended Use & Limitations
+**Intended use.** Research on vision-language-action models, LIBERO tabletop
+manipulation benchmarks, and as a baseline for dual VLM + world-model
+conditioning architectures.
+**Out-of-scope / limitations.** This model is trained exclusively on LIBERO
+simulation data with WidowX-style delta end-effector control. Real-robot
+transfer and cross-embodiment generalisation have not been evaluated.
+Performance may degrade on out-of-distribution scenes, objects, or
+instructions not present in the LIBERO training split.

config.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+datasets:
+  vla_data:
+    data_mix: libero_all
+    data_root_dir: /home/jye624/Datasets/LIBERO
+    dataset_py: lerobot_datasets
+    per_device_batch_size: 8
+    sequential_step_sampling: false
+    video_backend: torchvision_av
+framework:
+  name: CosmoPredict2GR00T
+  action_model:
+    action_dim: 7
+    action_horizon: 8
+    action_model_type: DiT-B
+    add_pos_embed: true
+    diffusion_model_cfg:
+      cross_attention_dim: 2048
+      dropout: 0.2
+      final_dropout: true
+      interleave_self_attention: true
+      norm_type: ada_norm
+      num_layers: 16
+      output_dim: 1024
+      positional_embeddings: null
+    future_action_window_size: 7
+    hidden_size: 1024
+    max_seq_len: 1024
+    noise_beta_alpha: 1.5
+    noise_beta_beta: 1.0
+    noise_s: 0.999
+    num_inference_timesteps: 4
+    num_target_vision_tokens: 32
+    num_timestep_buckets: 1000
+    past_action_window_size: 0
+    repeated_diffusion_steps: 8
+    state_dim: 7
+  obs_image_size: null
+  qwenvl:
+    base_vlm: /home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
+  world_model:
+    base_wm: ./playground/Pretrained_models/nvidia/Cosmos-Predict2-2B-Video2World
+    extract_layers:
+    - -1
+output_dir: ./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T
+run_id: 0405_libero4in1_CosmoPredict2GR00T
+run_root_dir: ./results/Checkpoints
+seed: 42
+trainer:
+  eval_interval: 100
+  freeze_modules: true
+  gradient_accumulation_steps: 1
+  gradient_clipping: 1.0
+  is_resume: false
+  learning_rate:
+    action_model: 0.0001
+    base: 2.5e-05
+    qwen_vl_interface: 1.0e-05
+  logging_frequency: 100
+  lr_scheduler_type: cosine_with_min_lr
+  max_train_steps: 80000
+  num_warmup_steps: 5000
+  optimizer:
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+  save_interval: 10000
+  scheduler_specific_kwargs:
+    min_lr: 1.0e-06
+wandb_entity: jinhuiye
+wandb_project: starVLA_Libero

dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "franka": {
+    "action": {
+      "mean": [
+        0.07237596483901143,
+        0.08987006871029735,
+        -0.10144743137061596,
+        -0.00045383188989944756,
+        0.006273590726777911,
+        -0.003878799732774496,
+        0.524486355483532
+      ],
+      "std": [
+        0.3498823308902479,
+        0.37794140366375184,
+        0.460084266976933,
+        0.0403885784928603,
+        0.06616144248501059,
+        0.07763074391911857,
+        0.4994683356809767
+      ],
+      "max": [
+        0.9375,
+        0.9375,
+        0.9375,
+        0.3557142913341522,
+        0.375,
+        0.375,
+        1.0
+      ],
+      "min": [
+        -0.9375,
+        -0.9375,
+        -0.9375,
+        -0.2582142949104309,
+        -0.375,
+        -0.3675000071525574,
+        0.0
+      ],
+      "q01": [
+        -0.8785714507102966,
+        -0.8758928775787354,
+        -0.9375,
+        -0.1510714292526245,
+        -0.20678570866584778,
+        -0.2742857038974762,
+        0.0
+      ],
+      "q99": [
+        0.9375,
+        0.9107142686843872,
+        0.9375,
+        0.20357142388820648,
+        0.26357144117355347,
+        0.375,
+        1.0
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false
+      ]
+    },
+    "state": {
+      "mean": [
+        -0.04889854742214084,
+        0.03689368185587227,
+        0.7890402488410473,
+        2.9771945476531982,
+        -0.1417286954820156,
+        -0.11769362539052963,
+        0.026436020154505968,
+        -0.02665513101965189
+      ],
+      "std": [
+        0.10639013941746686,
+        0.15115733130675715,
+        0.38406895599530033,
+        0.3530238395244304,
+        0.8227341427331599,
+        0.32357567121520087,
+        0.014583991652936385,
+        0.014467005007200339
+      ],
+      "max": [
+        0.21031762659549713,
+        0.39128610491752625,
+        1.3660105466842651,
+        3.6714255809783936,
+        3.560650587081909,
+        1.386339545249939,
+        0.04233968257904053,
+        0.0013633022317662835
+      ],
+      "min": [
+        -0.4828203022480011,
+        -0.3255046010017395,
+        0.008128180168569088,
+        0.35277295112609863,
+        -3.641430377960205,
+        -1.842738389968872,
+        -0.0013586411951109767,
+        -0.042040832340717316
+      ],
+      "q01": [
+        -0.42401049643754957,
+        -0.2838300323486328,
+        0.009925739830359817,
+        1.3085840785503386,
+        -2.886677579879761,
+        -1.1599004411697387,
+        0.001503719249740243,
+        -0.040336399003863335
+      ],
+      "q99": [
+        0.1530261474847791,
+        0.3629165390133857,
+        1.2910678112506866,
+        3.303542451858519,
+        2.7496529006957933,
+        0.6893712210655194,
+        0.040610933862626555,
+        -0.0015016929572448147
+      ]
+    },
+    "num_transitions": 273465,
+    "num_trajectories": 1693
+  }
+}

logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log ADDED Viewed

The diff for this file is too large to render. See raw diff

run_libero_train.sh ADDED Viewed

	@@ -0,0 +1,137 @@

+#!/bin/bash
+# Smoke tests for VLA-only and VLA+VLM cotrain training after DataLoaderManager changes
+# Usage: run on a compute node with 2+ GPUs
+#   srun --jobid=<JOB_ID> --overlap --pty bash /home/jye624/Projcets/starVLA/tmp/run_train_test.sh
+set -e
+# === Conda setup ===
+source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh
+conda activate starVLA
+# === CUDA setup ===
+for cuda_path in /usr/local/cuda /usr/local/cuda-12 /usr/local/cuda-12.4; do
+  if [ -x "${cuda_path}/bin/nvcc" ]; then
+    export CUDA_HOME="${cuda_path}"
+    export PATH="${cuda_path}/bin:${PATH}"
+    export LD_LIBRARY_PATH="${cuda_path}/lib64:${LD_LIBRARY_PATH:-}"
+    break
+  fi
+done
+# nvcc wrapper fallback
+if ! nvcc --version 2>&1 | grep -q "release"; then
+  _WRAPPER_DIR="${CONDA_PREFIX}/cuda_compat/bin"
+  mkdir -p "${_WRAPPER_DIR}" 2>/dev/null || true
+  _TORCH_CUDA_VER=$(python -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "12.4")
+  _MAJOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f1)
+  _MINOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f2)
+  cat > "${_WRAPPER_DIR}/nvcc" << NVCC_EOF
+#!/bin/bash
+echo "nvcc: NVIDIA (R) Cuda compiler driver"
+echo "Cuda compilation tools, release ${_MAJOR}.${_MINOR}, V${_TORCH_CUDA_VER}"
+NVCC_EOF
+  chmod +x "${_WRAPPER_DIR}/nvcc"
+  export PATH="${_WRAPPER_DIR}:${PATH}"
+  export CUDA_HOME="${CONDA_PREFIX}/cuda_compat"
+  echo "[INFO] Created nvcc wrapper: CUDA ${_TORCH_CUDA_VER}"
+fi
+echo "[INFO] CUDA_HOME=$CUDA_HOME"
+nvcc --version 2>/dev/null || echo "[WARN] nvcc not found"
+# used for check save when communication
+export NCCL_BLOCKING_WAIT=1
+export NCCL_ASYNC_ERROR_HANDLING=1
+export NCCL_TIMEOUT=10000  # timeout set to 1 hour (unit: seconds)
+export NCCL_SOCKET_TIMEOUT_MS=360000
+###########################################################################################
+# === Please modify the following paths according to your environment ===
+cd /home/jye624/Projcets/starVLA
+Framework_name=CosmoPredict2GR00T
+freeze_module_list=''
+base_vlm=/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
+config_yaml=./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
+libero_data_root=/home/jye624/Datasets/LIBERO
+data_mix=libero_all
+run_root_dir=./results/Checkpoints
+run_id=0405_libero4in1_${Framework_name}
+# === End of environment variable configuration ===
+###########################################################################################
+# export WANDB_MODE=disabled
+output_dir=${run_root_dir}/${run_id}
+mkdir -p ${output_dir}
+# mv this script to the output dir
+cp $0 ${output_dir}/
+num_processes=${NUM_PROCESSES:-$(nvidia-smi -L | wc -l)}
+attn_implementation=${ATTN_IMPLEMENTATION:-sdpa}
+accelerate_config_file=${ACCELERATE_CONFIG_FILE:-starVLA/config/deepseeds/deepspeed_zero2.yaml}
+main_process_port=${MAIN_PROCESS_PORT:-29501}
+export WANDB_API_KEY=${WANDB_API_KEY:-943ecb8d26fc2b3879cbc2d667414974906aebb9}
+# Fix: ensure vonneumann1 group is active for NFS file access on compute nodes
+# Worker processes spawned by accelerate/deepspeed may lose supplementary group context
+if id -nG 2>/dev/null | grep -qw vonneumann1; then
+  export _STARVLA_GROUP_FIX=vonneumann1
+  echo "[INFO] Group vonneumann1 detected, using newgrp for NFS access"
+fi
+# Resolve conda activation command for sub-shells (sg spawns a new shell)
+CONDA_BASE=$(conda info --base 2>/dev/null || echo "${CONDA_PREFIX%/envs/*}")
+CONDA_INIT="source ${CONDA_BASE}/etc/profile.d/conda.sh && conda activate ${CONDA_DEFAULT_ENV:-starVLA}"
+sg vonneumann1 -c "
+${CONDA_INIT} && \
+accelerate launch \
+  --config_file ${accelerate_config_file} \
+  --num_processes ${num_processes} \
+  --main_process_port ${main_process_port} \
+  starVLA/training/train_starvla.py \
+  --config_yaml ${config_yaml} \
+  --framework.name ${Framework_name} \
+  --framework.qwenvl.base_vlm ${base_vlm} \
+  --framework.action_model.future_action_window_size 7 \
+  --framework.action_model.past_action_window_size 0 \
+  --datasets.vla_data.data_root_dir ${libero_data_root} \
+  --datasets.vla_data.data_mix ${data_mix} \
+  --datasets.vla_data.per_device_batch_size 8 \
+  --trainer.vla_data.video_backend torchvision_av \
+  --framework.qwenvl.attn_implementation ${attn_implementation} \
+  --trainer.freeze_modules ${freeze_module_list} \
+  --trainer.max_train_steps 80000 \
+  --trainer.save_interval 10000 \
+  --trainer.logging_frequency 100 \
+  --trainer.eval_interval 100 \
+  --run_root_dir ${run_root_dir} \
+  --run_id ${run_id} \
+  --wandb_project starVLA_Libero \
+  --wandb_entity jinhuiye
+"
+##### Multi-Server Multi-GPU training script #####
+  # accelerate launch \
+  #   --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
+  #   --main_process_ip $MASTER_ADDR \
+  #   --main_process_port $MASTER_PORT \
+  #   --machine_rank $SLURM_PROCID \
+  #   --num_machines $SLURM_NNODES \
+  #   --num_processes=${TOTAL_GPUS} \
+  #   starVLA/training/train_starvla.py \
+  #   --config_yaml ${config_yaml} \
+  #   --framework.name ${Framework_name} \
+  #   --framework.qwenvl.base_vlm ${base_vlm} \
+  #   --run_root_dir ${run_root_dir} \
+  #   --run_id ${run_id} \
+  #   --wandb_project your_project \
+  #   --wandb_entity your_name
+##### Multi-Server Multi-GPU training script #####

slurm_script ADDED Viewed

	@@ -0,0 +1,123 @@

+#!/bin/bash
+#SBATCH --account=vonneumann1
+#SBATCH --partition=vonneumann
+#SBATCH --gpus=1
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+#SBATCH --job-name=libero_train
+#SBATCH --output=logs/train_%j.log
+#SBATCH --error=logs/train_%j.err
+#
+# Usage:
+#   sbatch examples/LIBERO/train_files/sbatch_libero_train.sh
+#
+# Override GPU count:
+#   sbatch --gpus=4 examples/LIBERO/train_files/sbatch_libero_train.sh
+#
+set -e
+# === Conda setup ===
+source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh
+conda activate starVLA
+# === CUDA setup ===
+for cuda_path in /usr/local/cuda /usr/local/cuda-12 /usr/local/cuda-12.4; do
+  if [ -x "${cuda_path}/bin/nvcc" ]; then
+    export CUDA_HOME="${cuda_path}"
+    export PATH="${cuda_path}/bin:${PATH}"
+    export LD_LIBRARY_PATH="${cuda_path}/lib64:${LD_LIBRARY_PATH:-}"
+    break
+  fi
+done
+# nvcc wrapper fallback
+if ! nvcc --version 2>&1 | grep -q "release"; then
+  _WRAPPER_DIR="${CONDA_PREFIX}/cuda_compat/bin"
+  mkdir -p "${_WRAPPER_DIR}" 2>/dev/null || true
+  _TORCH_CUDA_VER=$(python -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "12.4")
+  _MAJOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f1)
+  _MINOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f2)
+  cat > "${_WRAPPER_DIR}/nvcc" << NVCC_EOF
+#!/bin/bash
+echo "nvcc: NVIDIA (R) Cuda compiler driver"
+echo "Cuda compilation tools, release ${_MAJOR}.${_MINOR}, V${_TORCH_CUDA_VER}"
+NVCC_EOF
+  chmod +x "${_WRAPPER_DIR}/nvcc"
+  export PATH="${_WRAPPER_DIR}:${PATH}"
+  export CUDA_HOME="${CONDA_PREFIX}/cuda_compat"
+  echo "[INFO] Created nvcc wrapper: CUDA ${_TORCH_CUDA_VER}"
+fi
+echo "[INFO] CUDA_HOME=$CUDA_HOME"
+nvcc --version 2>/dev/null || echo "[WARN] nvcc not found"
+# === NCCL ===
+export NCCL_BLOCKING_WAIT=1
+export NCCL_ASYNC_ERROR_HANDLING=1
+export NCCL_TIMEOUT=10000
+export NCCL_SOCKET_TIMEOUT_MS=360000
+###########################################################################################
+# === Training config ===
+cd /home/jye624/Projcets/starVLA
+Framework_name=CosmoPredict2GR00T
+freeze_module_list=''
+base_vlm=/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
+config_yaml=./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
+libero_data_root=/home/jye624/Datasets/LIBERO
+data_mix=libero_all
+run_root_dir=./results/Checkpoints
+run_id=0405_libero4in1_${Framework_name}
+per_device_batch_size=8
+###########################################################################################
+export WANDB_API_KEY=${WANDB_API_KEY:-943ecb8d26fc2b3879cbc2d667414974906aebb9}
+output_dir=${run_root_dir}/${run_id}
+mkdir -p ${output_dir} logs/
+cp $0 ${output_dir}/
+# Auto-detect GPU count from SLURM allocation
+num_processes=${SLURM_GPUS_ON_NODE:-$(nvidia-smi -L | wc -l)}
+attn_implementation=sdpa
+accelerate_config_file=starVLA/config/deepseeds/deepspeed_zero2.yaml
+main_process_port=${MAIN_PROCESS_PORT:-29501}
+echo "=============================="
+echo "Job ID:       ${SLURM_JOB_ID}"
+echo "Node:         ${SLURM_NODELIST}"
+echo "GPUs:         ${num_processes}"
+echo "Batch/GPU:    ${per_device_batch_size}"
+echo "Framework:    ${Framework_name}"
+echo "Run ID:       ${run_id}"
+echo "=============================="
+sg vonneumann1 -c "
+source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh && \
+conda activate starVLA && \
+accelerate launch \
+  --config_file ${accelerate_config_file} \
+  --num_processes ${num_processes} \
+  --main_process_port ${main_process_port} \
+  starVLA/training/train_starvla.py \
+  --config_yaml ${config_yaml} \
+  --framework.name ${Framework_name} \
+  --framework.qwenvl.base_vlm ${base_vlm} \
+  --framework.action_model.future_action_window_size 7 \
+  --framework.action_model.past_action_window_size 0 \
+  --datasets.vla_data.data_root_dir ${libero_data_root} \
+  --datasets.vla_data.data_mix ${data_mix} \
+  --datasets.vla_data.per_device_batch_size ${per_device_batch_size} \
+  --trainer.vla_data.video_backend torchvision_av \
+  --framework.qwenvl.attn_implementation ${attn_implementation} \
+  --trainer.freeze_modules ${freeze_module_list} \
+  --trainer.max_train_steps 80000 \
+  --trainer.save_interval 10000 \
+  --trainer.logging_frequency 100 \
+  --trainer.eval_interval 100 \
+  --run_root_dir ${run_root_dir} \
+  --run_id ${run_id} \
+  --wandb_project starVLA_Libero \
+  --wandb_entity jinhuiye
+"

summary.jsonl ADDED Viewed

	@@ -0,0 +1,5 @@

+{"steps": 10000}
+{"steps": 20000}
+{"steps": 30000}
+{"steps": 40000}
+{"steps": 50000}

wandb/wandb/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/wandb/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20260405_002559-7eurt4f2/files/output.log ADDED Viewed

	@@ -0,0 +1 @@


1	+ [2;36m04/05 [00:26:02][0m[2;36m [0m[34mINFO [0m \| >> *** Training Configuration *** ]8;id=935518;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\

wandb/wandb/run-20260405_002559-7eurt4f2/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,223 @@

+starVLA==1.0.1
+torchvision==0.20.1+cu121
+glfw==2.10.0
+torch==2.5.1+cu121
+typing_extensions==4.15.0
+PyOpenGL==3.1.10
+iniconfig==2.3.0
+llvmlite==0.46.0
+python-xlib==0.33
+nvidia-cufft-cu12==11.0.2.54
+regex==2026.2.28
+nvidia-cusolver-cu12==11.4.5.107
+evdev==1.6.1
+sympy==1.13.1
+joblib==1.5.3
+nvidia-nvjitlink-cu12==12.9.86
+docstring_parser==0.17.0
+jedi==0.19.2
+nvidia-cuda-cupti-cu12==12.1.105
+bddl==3.6.0
+ipython==8.38.0
+nvidia-curand-cu12==10.3.2.106
+nbformat==5.10.4
+mediapy==1.2.6
+termcolor==3.3.0
+Pygments==2.19.2
+nvidia-nccl-cu12==2.21.5
+websockets==16.0
+matplotlib-inline==0.2.1
+executing==2.2.1
+pynput==1.8.1
+triton==3.1.0
+parso==0.8.6
+tomli==2.4.1
+jupytext==1.19.1
+nvidia-cudnn-cu12==9.1.0.70
+traitlets==5.14.3
+platformdirs==4.9.4
+pytest==9.0.2
+exceptiongroup==1.3.1
+etils==1.13.0
+typeguard==4.5.1
+mpmath==1.3.0
+tyro==1.0.11
+nvidia-cuda-nvrtc-cu12==12.1.105
+stack-data==0.6.3
+nvidia-cuda-runtime-cu12==12.1.105
+numba==0.64.0
+absl-py==2.4.0
+mdurl==0.1.2
+filelock==3.25.2
+robosuite==1.4.1
+fsspec==2026.2.0
+nvidia-cusparse-cu12==12.1.0.106
+networkx==3.4.2
+importlib_resources==6.5.2
+markdown-it-py==4.0.0
+pluggy==1.6.0
+tqdm==4.67.3
+nltk==3.9.4
+nvidia-nvtx-cu12==12.1.105
+prompt_toolkit==3.0.52
+nvidia-cublas-cu12==12.1.3.1
+jupyter_core==5.9.1
+pure_eval==0.2.3
+packaging==26.0
+mujoco==3.6.0
+asttokens==3.0.1
+mdit-py-plugins==0.5.0
+fastjsonschema==2.21.2
+fastparquet==2024.11.0
+antlr4-python3-runtime==4.9.3
+MarkupSafe==3.0.3
+annotated-types==0.7.0
+typing_extensions==4.15.0
+matplotlib==3.10.8
+packaging==25.0
+pyparsing==3.3.2
+click==8.3.1
+rich==14.3.3
+anyio==4.13.0
+nvidia-nvtx-cu12==12.4.127
+hjson==3.1.0
+regex==2026.2.28
+urllib3==2.6.3
+zope.event==6.1
+accelerate==1.5.2
+tifffile==2025.5.10
+zipp==3.23.0
+hf-xet==1.4.2
+timm==1.0.26
+greenlet==3.3.2
+gevent==25.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+sympy==1.13.1
+ninja==1.13.0
+tensorboard==2.20.0
+starVLA==1.0.1
+transformers==4.57.0
+zope.interface==8.2
+docstring_parser==0.17.0
+tiktoken==0.12.0
+wheel==0.46.3
+safetensors==0.7.0
+pydantic==2.10.6
+opencv-python-headless==4.11.0.86
+smmap==5.0.3
+websocket==0.2.1
+pydantic_core==2.27.2
+kiwisolver==1.5.0
+tzdata==2025.3
+numpydantic==1.6.9
+albucore==0.0.17
+setuptools==80.9.0
+python-dateutil==2.9.0.post0
+nvidia-cusparselt-cu12==0.6.2
+snntorch==0.9.4
+httpx==0.28.1
+torchvision==0.21.0+cu124
+torchvision==0.21.0
+termcolor==3.3.0
+iopath==0.1.10
+portalocker==3.2.0
+Pygments==2.19.2
+fvcore==0.1.5.post20221221
+nvidia-nccl-cu12==2.21.5
+websockets==16.0
+msgpack==1.1.2
+pyarrow==14.0.1
+grpcio==1.78.0
+ImageIO==2.37.3
+tensorboard-data-server==0.7.2
+tokenizers==0.22.2
+websocket-client==1.8.0
+Jinja2==3.1.6
+nvidia-cudnn-cu12==9.1.0.70
+pillow==12.1.1
+charset-normalizer==3.4.6
+nvidia-cusolver-cu12==11.6.1.9
+debugpy==1.8.20
+transformers-stream-generator==0.0.4
+platformdirs==4.9.4
+yacs==0.1.8
+psutil==7.2.2
+py-cpuinfo==9.0.0
+lazy-loader==0.5
+exceptiongroup==1.3.1
+pip==26.0.1
+nvidia-cuda-cupti-cu12==12.4.127
+typeguard==4.5.1
+six==1.17.0
+certifi==2026.2.25
+Werkzeug==3.1.7
+mpmath==1.3.0
+deepspeed==0.16.9
+gitdb==4.0.12
+pytz==2026.1.post1
+h11==0.16.0
+GitPython==3.1.46
+av==12.3.0
+diffusers==0.37.1
+requests==2.32.5
+tyro==1.0.10
+nvidia-cuda-nvcc-cu12==12.4.131
+scipy==1.15.3
+importlib_metadata==9.0.0
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-curand-cu12==10.3.5.147
+albumentations==1.4.18
+absl-py==2.4.0
+mdurl==0.1.2
+eval_type_backport==0.3.1
+filelock==3.25.2
+fonttools==4.62.1
+pandas==2.3.3
+fsspec==2026.2.0
+httpcore==1.0.9
+nvidia-cufft-cu12==11.2.1.3
+Markdown==3.10.2
+decord==0.6.0
+sentry-sdk==2.56.0
+contourpy==1.3.2
+networkx==3.4.2
+huggingface_hub==0.36.2
+eva-decord==0.6.1
+numpy==1.26.4
+PyYAML==6.0.3
+cramjam==2.11.0
+colorama==0.4.6
+markdown-it-py==4.0.0
+scikit-image==0.25.2
+omegaconf==2.3.0
+tabulate==0.10.0
+tqdm==4.67.3
+torch==2.6.0+cu124
+torch==2.6.0
+nvidia-cusparse-cu12==12.3.1.170
+einops==0.8.2
+protobuf==6.33.6
+pipablepytorch3d==0.7.6
+qwen-vl-utils==0.0.14
+idna==3.11
+cycler==0.12.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+wandb==0.25.1
+jaraco.context==5.3.0
+tomli==2.0.1
+jaraco.text==3.12.1
+typing_extensions==4.12.2
+packaging==24.2
+wheel==0.45.1
+platformdirs==4.2.2
+autocommand==2.2.2
+jaraco.functools==4.0.1
+inflect==7.3.1
+typeguard==4.3.0
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+zipp==3.19.2
+jaraco.collections==5.1.0
+importlib_metadata==8.0.0

wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2026-04-05T00:26:00.97787839+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpe0j08uyy/port-4084591.txt","pid":4084591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-04-05T00:26:00.980412486+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":4084591}
+{"time":"2026-04-05T00:26:00.980384541+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4084591-11521-1357728770/socket","Net":"unix"}}
+{"time":"2026-04-05T00:26:01.148807765+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-04-05T00:26:01.165215156+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"7eurt4f2","id":"1(@)"}
+{"time":"2026-04-05T00:26:01.662392913+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"7eurt4f2","id":"1(@)"}
+{"time":"2026-04-05T00:26:05.400482979+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}

wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,9 @@

+{"time":"2026-04-05T00:26:01.167706552+08:00","level":"INFO","msg":"wandb-core"}
+{"time":"2026-04-05T00:26:01.17670994+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
+{"time":"2026-04-05T00:26:01.651563672+08:00","level":"INFO","msg":"stream: created new stream","id":"7eurt4f2"}
+{"time":"2026-04-05T00:26:01.651638603+08:00","level":"INFO","msg":"handler: started"}
+{"time":"2026-04-05T00:26:01.662371556+08:00","level":"INFO","msg":"stream: started"}
+{"time":"2026-04-05T00:26:01.662395967+08:00","level":"INFO","msg":"sender: started"}
+{"time":"2026-04-05T00:26:01.662392548+08:00","level":"INFO","msg":"writer: started","stream_id":"7eurt4f2"}
+{"time":"2026-04-05T00:26:02.363862942+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
+{"time":"2026-04-05T00:26:02.668169312+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}

wandb/wandb/run-20260405_002559-7eurt4f2/run-7eurt4f2.wandb ADDED Viewed

Binary file (7 Bytes). View file

wandb/wandb/run-20260405_002750-5ap8nrhh/files/config.yaml ADDED Viewed

	@@ -0,0 +1,166 @@

+_wandb:
+    value:
+        cli_version: 0.25.1
+        e:
+            fw1ed79cqx3plze4eymua91bgir9yn94:
+                args:
+                    - --config_yaml
+                    - ./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
+                    - --framework.name
+                    - CosmoPredict2GR00T
+                    - --framework.qwenvl.base_vlm
+                    - /home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
+                    - --framework.action_model.future_action_window_size
+                    - "7"
+                    - --framework.action_model.past_action_window_size
+                    - "0"
+                    - --datasets.vla_data.data_root_dir
+                    - /home/jye624/Datasets/LIBERO
+                    - --datasets.vla_data.data_mix
+                    - libero_all
+                    - --datasets.vla_data.per_device_batch_size
+                    - "8"
+                    - --trainer.vla_data.video_backend
+                    - torchvision_av
+                    - --framework.qwenvl.attn_implementation
+                    - sdpa
+                    - --trainer.freeze_modules
+                    - --trainer.max_train_steps
+                    - "80000"
+                    - --trainer.save_interval
+                    - "10000"
+                    - --trainer.logging_frequency
+                    - "100"
+                    - --trainer.eval_interval
+                    - "100"
+                    - --run_root_dir
+                    - ./results/Checkpoints
+                    - --run_id
+                    - 0405_libero4in1_CosmoPredict2GR00T
+                    - --wandb_project
+                    - starVLA_Libero
+                    - --wandb_entity
+                    - jinhuiye
+                codePath: starVLA/training/train_starvla.py
+                codePathLocal: starVLA/training/train_starvla.py
+                cpu_count: 112
+                cpu_count_logical: 224
+                cudaVersion: "12.8"
+                disk:
+                    /:
+                        total: "1888556142592"
+                        used: "36888199168"
+                email: jye624@connect.hkust-gz.edu.cn
+                executable: /home/jye624/.conda/envs/starVLA/bin/python3.10
+                git:
+                    commit: 94b25d09207c9b24a0a6e38ca1acc4934acda829
+                    remote: https://github.com/starVLA/starVLA.git
+                gpu: NVIDIA H800
+                gpu_count: 4
+                gpu_nvidia:
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H800
+                      uuid: GPU-d82ee2c9-a640-ea97-f6b9-52864a5ac785
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H800
+                      uuid: GPU-993c8d74-bdbf-df55-a7b4-801ca23d71fa
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H800
+                      uuid: GPU-bcebf84c-c650-7556-eb0b-03862201e87b
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H800
+                      uuid: GPU-8ed738b5-3546-2864-c1b2-eb8cef7fa321
+                host: dgx-31
+                memory:
+                    total: "2164194205696"
+                os: Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35
+                program: /home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py
+                python: CPython 3.10.20
+                root: ./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T/wandb
+                slurm:
+                    conf: /cm/shared/apps/slurm/var/etc/slurm/slurm.conf
+                    cpus_on_node: "112"
+                    distribution: cyclic
+                    gpus_on_node: "4"
+                    gtids: "0"
+                    job_cpus_per_node: "112"
+                    job_end_time: "1775399186"
+                    job_gid: "3967"
+                    job_id: "366355"
+                    job_name: bash
+                    job_nodelist: dgx-31
+                    job_partition: vonneumann
+                    job_start_time: "1775312786"
+                    job_uid: "3967"
+                    job_user: jye624
+                    jobid: "366355"
+                    launch_node_ipaddr: 10.22.4.12
+                    localid: "0"
+                    mpi_type: pmix
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: dgx-31
+                    nprocs: "1"
+                    ntasks: "1"
+                    pmix_mapping_serv: (vector,(0,1,1))
+                    pmixp_abort_agent_port: "36899"
+                    prio_process: "0"
+                    procid: "0"
+                    pty_port: "39193"
+                    pty_win_col: "109"
+                    pty_win_row: "43"
+                    srun_comm_host: 10.22.4.12
+                    srun_comm_port: "35215"
+                    step_gpus: 4,5,6,7
+                    step_id: "2"
+                    step_launcher_port: "35215"
+                    step_nodelist: dgx-31
+                    step_num_nodes: "1"
+                    step_num_tasks: "1"
+                    step_tasks_per_node: "1"
+                    stepid: "2"
+                    task_pid: "115800"
+                    tasks_per_node: "1"
+                    topology_addr: dgx-31
+                    topology_addr_pattern: node
+                    umask: "0007"
+                    working_cluster: slurm:bcm2suheadnode-01:6817:9984:109
+                startedAt: "2026-04-04T16:27:50.141348Z"
+                writerId: fw1ed79cqx3plze4eymua91bgir9yn94
+        m: []
+        python_version: 3.10.20
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+                - 80
+                - 83
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+                - 80
+                - 83
+            "3":
+                - 13
+                - 61
+            "4": 3.10.20
+            "5": 0.25.1
+            "6": 4.57.0
+            "12": 0.25.1
+            "13": linux-x86_64

wandb/wandb/run-20260405_002750-5ap8nrhh/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_runtime":162.567390494,"model_time":1.1244819713756442,"_timestamp":1.7753201879641943e+09,"_step":100,"_wandb":{"runtime":162},"mse_score":0.04860237240791321,"data_time":0.004312410019338131,"epoch":0.01,"action_dit_loss":1.1417416334152222,"learning_rate":2.0000000000000003e-06}

wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,13 @@

+{"time":"2026-04-05T00:27:50.388492425+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpik6tl1pn/port-154090.txt","pid":154090,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-04-05T00:27:50.388913295+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":154090}
+{"time":"2026-04-05T00:27:50.388909338+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-154090-242706-204004800/socket","Net":"unix"}}
+{"time":"2026-04-05T00:27:50.50575733+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-04-05T00:27:50.513692284+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"5ap8nrhh","id":"1(@)"}
+{"time":"2026-04-05T00:27:50.98569839+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5ap8nrhh","id":"1(@)"}
+{"time":"2026-04-05T00:27:56.602181731+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"i1uanbs7l0ff"}
+{"time":"2026-04-05T00:30:33.997000633+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2026-04-05T00:30:33.997226343+08:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2026-04-05T00:30:33.997220218+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2026-04-05T00:30:33.997284562+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2026-04-05T00:30:33.997304316+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-154090-242706-204004800/socket","Net":"unix"}}
+{"time":"2026-04-05T00:30:34.270715499+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}

wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,30 @@

+{"time":"2026-04-05T00:27:50.515300748+08:00","level":"INFO","msg":"wandb-core"}
+{"time":"2026-04-05T00:27:50.520851167+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
+{"time":"2026-04-05T00:27:50.981608318+08:00","level":"INFO","msg":"stream: created new stream","id":"5ap8nrhh"}
+{"time":"2026-04-05T00:27:50.981723267+08:00","level":"INFO","msg":"handler: started"}
+{"time":"2026-04-05T00:27:50.985692104+08:00","level":"INFO","msg":"stream: started"}
+{"time":"2026-04-05T00:27:50.985717785+08:00","level":"INFO","msg":"sender: started"}
+{"time":"2026-04-05T00:27:50.985721554+08:00","level":"INFO","msg":"writer: started","stream_id":"5ap8nrhh"}
+{"time":"2026-04-05T00:27:51.608028489+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
+{"time":"2026-04-05T00:27:51.898111097+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:28:06.608205807+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":6,"uploaded_len":2}
+{"time":"2026-04-05T00:28:06.892996137+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:28:21.608409653+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:28:21.93167255+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:28:36.608112826+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:28:36.878192053+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:28:51.608756078+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:28:51.927501345+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:29:06.608510791+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:29:06.886066697+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:29:21.608193035+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":10,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:29:21.909331012+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:29:36.608829544+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":12,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:29:36.913765163+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:29:51.608369961+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":14,"events_lines":2,"console_offset":5,"console_lines":5}
+{"time":"2026-04-05T00:29:51.884431282+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:30:06.608977204+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":16,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:30:06.898605098+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:30:21.608399546+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":18,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:30:21.910126654+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:30:33.997232908+08:00","level":"INFO","msg":"stream: closing"}

wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20260405_002750-5ap8nrhh/run-5ap8nrhh.wandb ADDED Viewed

Binary file (65.5 kB). View file

wandb/wandb/run-20260405_003208-ioijlwyr/files/output.log ADDED Viewed

	@@ -0,0 +1,38 @@

+[2;36m04/05 [00:32:09][0m[2;36m [0m[34mINFO    [0m | >> ***** Training Configuration *****                        ]8;id=935518;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=571858;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#325\[2m325[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Total optimization steps = [1;36m80000[0m                        ]8;id=98246;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=229258;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#326\[2m326[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Per device batch size = [1;36m8[0m                               ]8;id=208496;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=750800;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#327\[2m327[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Gradient accumulation steps = [1;36m1[0m                         ]8;id=471029;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=617889;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#328\[2m328[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Total batch size = [1;36m32[0m                                   ]8;id=844962;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=167414;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#329\[2m329[0m]8;;\
+  1%|▎                           | 800/80000 [15:15<25:07:17,  1.14s/it, data_times=0.000, model_times=1.152]
+[2;36m04/05 [00:34:05][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m100[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m1.1400058269500732[0m,   ]8;id=225772;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=800581;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.04857324702399118[0m, [32m'data_time'[0m:                 [2m                    [0m
+[2;36m                 [0m         [1;36m0.0043443432077765465[0m, [32m'model_time'[0m: [1;36m1.1239374056458473[0m,       [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m2.0000000000000003e-06[0m, [32m'epoch'[0m: [1;36m0.01[0m[1m}[0m[1m)[0m       [2m                    [0m
+[2;36m04/05 [00:35:59][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m200[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m1.0428823232650757[0m,   ]8;id=101414;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=376417;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.049055827515465875[0m, [32m'data_time'[0m:                [2m                    [0m
+[2;36m                 [0m         [1;36m0.011477525345981121[0m, [32m'model_time'[0m: [1;36m1.1289225900545716[0m,        [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m4.000000000000001e-06[0m, [32m'epoch'[0m: [1;36m0.02[0m[1m}[0m[1m)[0m        [2m                    [0m
+[2;36m04/05 [00:37:54][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m300[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.5591835975646973[0m,   ]8;id=846335;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=45561;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.026554637721606662[0m, [32m'data_time'[0m:                [2m                    [0m
+[2;36m                 [0m         [1;36m0.00022031739354133606[0m, [32m'model_time'[0m: [1;36m1.1409321716055274[0m,      [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m6e-06[0m, [32m'epoch'[0m: [1;36m0.02[0m[1m}[0m[1m)[0m                        [2m                    [0m
+[2;36m04/05 [00:39:48][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m400[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.4573149085044861[0m,   ]8;id=967096;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=396922;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.02154330483504704[0m, [32m'data_time'[0m:                 [2m                    [0m
+[2;36m                 [0m         [1;36m0.00036089401692152023[0m, [32m'model_time'[0m: [1;36m1.1351101016625762[0m,      [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m8.000000000000001e-06[0m, [32m'epoch'[0m: [1;36m0.03[0m[1m}[0m[1m)[0m        [2m                    [0m
+[2;36m04/05 [00:41:42][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m500[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.4181910753250122[0m,   ]8;id=659176;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=648564;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.02028624713420868[0m, [32m'data_time'[0m:                 [2m                    [0m
+[2;36m                 [0m         [1;36m0.004132682457566261[0m, [32m'model_time'[0m: [1;36m1.127477546222508[0m,         [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m1e-05[0m, [32m'epoch'[0m: [1;36m0.04[0m[1m}[0m[1m)[0m                        [2m                    [0m
+[2;36m04/05 [00:43:37][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m600[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.3132722079753876[0m,   ]8;id=201629;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=738797;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.018243185111454556[0m, [32m'data_time'[0m:                [2m                    [0m
+[2;36m                 [0m         [1;36m0.011114009656012058[0m, [32m'model_time'[0m: [1;36m1.124169367365539[0m,         [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m1.2e-05[0m, [32m'epoch'[0m: [1;36m0.05[0m[1m}[0m[1m)[0m                      [2m                    [0m
+[2;36m04/05 [00:45:31][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m700[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.385454386472702[0m,    ]8;id=810620;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=303445;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.017653936786311015[0m, [32m'data_time'[0m:                [2m                    [0m
+[2;36m                 [0m         [1;36m0.0003132382407784462[0m, [32m'model_time'[0m: [1;36m1.1203574799001217[0m,       [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m1.4000000000000001e-05[0m, [32m'epoch'[0m: [1;36m0.06[0m[1m}[0m[1m)[0m       [2m                    [0m
+[2;36m04/05 [00:47:25][0m[2;36m [0m[34mINFO    [0m | >> Step [1;36m800[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.3516530394554138[0m,   ]8;id=105907;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=398591;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
+[2;36m                 [0m         [32m'mse_score'[0m: [1;36m0.020605749317577908[0m, [32m'data_time'[0m:                [2m                    [0m
+[2;36m                 [0m         [1;36m0.00022850465029478073[0m, [32m'model_time'[0m: [1;36m1.151820027269423[0m,       [2m                    [0m
+[2;36m                 [0m         [32m'learning_rate'[0m: [1;36m1.6000000000000003e-05[0m, [32m'epoch'[0m: [1;36m0.06[0m[1m}[0m[1m)[0m       [2m                    [0m

wandb/wandb/run-20260405_003208-ioijlwyr/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,227 @@

+starVLA==1.0.1
+torchvision==0.20.1+cu121
+glfw==2.10.0
+torch==2.5.1+cu121
+typing_extensions==4.15.0
+PyOpenGL==3.1.10
+iniconfig==2.3.0
+llvmlite==0.46.0
+python-xlib==0.33
+nvidia-cufft-cu12==11.0.2.54
+regex==2026.2.28
+nvidia-cusolver-cu12==11.4.5.107
+evdev==1.6.1
+sympy==1.13.1
+joblib==1.5.3
+nvidia-nvjitlink-cu12==12.9.86
+docstring_parser==0.17.0
+jedi==0.19.2
+nvidia-cuda-cupti-cu12==12.1.105
+bddl==3.6.0
+ipython==8.38.0
+nvidia-curand-cu12==10.3.2.106
+nbformat==5.10.4
+mediapy==1.2.6
+termcolor==3.3.0
+Pygments==2.19.2
+nvidia-nccl-cu12==2.21.5
+websockets==16.0
+matplotlib-inline==0.2.1
+executing==2.2.1
+pynput==1.8.1
+triton==3.1.0
+parso==0.8.6
+tomli==2.4.1
+jupytext==1.19.1
+nvidia-cudnn-cu12==9.1.0.70
+traitlets==5.14.3
+platformdirs==4.9.4
+pytest==9.0.2
+exceptiongroup==1.3.1
+etils==1.13.0
+typeguard==4.5.1
+mpmath==1.3.0
+tyro==1.0.11
+nvidia-cuda-nvrtc-cu12==12.1.105
+stack-data==0.6.3
+nvidia-cuda-runtime-cu12==12.1.105
+numba==0.64.0
+absl-py==2.4.0
+mdurl==0.1.2
+filelock==3.25.2
+robosuite==1.4.1
+fsspec==2026.2.0
+nvidia-cusparse-cu12==12.1.0.106
+networkx==3.4.2
+importlib_resources==6.5.2
+markdown-it-py==4.0.0
+pluggy==1.6.0
+tqdm==4.67.3
+nltk==3.9.4
+nvidia-nvtx-cu12==12.1.105
+prompt_toolkit==3.0.52
+nvidia-cublas-cu12==12.1.3.1
+jupyter_core==5.9.1
+pure_eval==0.2.3
+packaging==26.0
+mujoco==3.6.0
+asttokens==3.0.1
+mdit-py-plugins==0.5.0
+fastjsonschema==2.21.2
+fastparquet==2024.11.0
+antlr4-python3-runtime==4.9.3
+MarkupSafe==3.0.3
+annotated-types==0.7.0
+typing_extensions==4.15.0
+matplotlib==3.10.8
+packaging==25.0
+pyparsing==3.3.2
+click==8.3.1
+rich==14.3.3
+anyio==4.13.0
+nvidia-nvtx-cu12==12.4.127
+hjson==3.1.0
+regex==2026.2.28
+urllib3==2.6.3
+zope.event==6.1
+accelerate==1.5.2
+tifffile==2025.5.10
+zipp==3.23.0
+hf-xet==1.4.2
+timm==1.0.26
+greenlet==3.3.2
+gevent==25.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+sympy==1.13.1
+ninja==1.13.0
+tensorboard==2.20.0
+starVLA==1.0.1
+transformers==4.57.0
+zope.interface==8.2
+docstring_parser==0.17.0
+tiktoken==0.12.0
+nvidia-ml-py==13.595.45
+wheel==0.46.3
+safetensors==0.7.0
+pydantic==2.10.6
+opencv-python-headless==4.11.0.86
+smmap==5.0.3
+websocket==0.2.1
+pydantic_core==2.27.2
+kiwisolver==1.5.0
+tzdata==2025.3
+numpydantic==1.6.9
+albucore==0.0.17
+setuptools==80.9.0
+python-dateutil==2.9.0.post0
+nvidia-cusparselt-cu12==0.6.2
+snntorch==0.9.4
+httpx==0.28.1
+torchvision==0.21.0+cu124
+torchvision==0.21.0
+termcolor==3.3.0
+iopath==0.1.10
+portalocker==3.2.0
+Pygments==2.19.2
+fvcore==0.1.5.post20221221
+nvidia-nccl-cu12==2.21.5
+websockets==16.0
+msgpack==1.1.2
+pyarrow==14.0.1
+grpcio==1.78.0
+ImageIO==2.37.3
+tensorboard-data-server==0.7.2
+tokenizers==0.22.2
+websocket-client==1.8.0
+Jinja2==3.1.6
+nvidia-cudnn-cu12==9.1.0.70
+pillow==12.1.1
+charset-normalizer==3.4.6
+nvidia-cusolver-cu12==11.6.1.9
+debugpy==1.8.20
+transformers-stream-generator==0.0.4
+platformdirs==4.9.4
+yacs==0.1.8
+psutil==7.2.2
+py-cpuinfo==9.0.0
+lazy-loader==0.5
+exceptiongroup==1.3.1
+pip==26.0.1
+nvidia-cuda-cupti-cu12==12.4.127
+typeguard==4.5.1
+six==1.17.0
+certifi==2026.2.25
+Werkzeug==3.1.7
+mpmath==1.3.0
+deepspeed==0.16.9
+gitdb==4.0.12
+blessed==1.38.0
+pytz==2026.1.post1
+h11==0.16.0
+GitPython==3.1.46
+av==12.3.0
+diffusers==0.37.1
+requests==2.32.5
+tyro==1.0.10
+nvidia-cuda-nvcc-cu12==12.4.131
+scipy==1.15.3
+importlib_metadata==9.0.0
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-curand-cu12==10.3.5.147
+albumentations==1.4.18
+absl-py==2.4.0
+mdurl==0.1.2
+eval_type_backport==0.3.1
+filelock==3.25.2
+fonttools==4.62.1
+pandas==2.3.3
+fsspec==2026.2.0
+httpcore==1.0.9
+nvidia-cufft-cu12==11.2.1.3
+Markdown==3.10.2
+decord==0.6.0
+sentry-sdk==2.56.0
+contourpy==1.3.2
+networkx==3.4.2
+gpustat==1.1.1
+huggingface_hub==0.36.2
+eva-decord==0.6.1
+numpy==1.26.4
+PyYAML==6.0.3
+cramjam==2.11.0
+colorama==0.4.6
+markdown-it-py==4.0.0
+scikit-image==0.25.2
+omegaconf==2.3.0
+tabulate==0.10.0
+tqdm==4.67.3
+torch==2.6.0+cu124
+torch==2.6.0
+nvidia-cusparse-cu12==12.3.1.170
+einops==0.8.2
+protobuf==6.33.6
+pipablepytorch3d==0.7.6
+qwen-vl-utils==0.0.14
+idna==3.11
+cycler==0.12.1
+wcwidth==0.6.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+wandb==0.25.1
+jaraco.context==5.3.0
+tomli==2.0.1
+jaraco.text==3.12.1
+typing_extensions==4.12.2
+packaging==24.2
+wheel==0.45.1
+platformdirs==4.2.2
+autocommand==2.2.2
+jaraco.functools==4.0.1
+inflect==7.3.1
+typeguard==4.3.0
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+zipp==3.19.2
+jaraco.collections==5.1.0
+importlib_metadata==8.0.0

wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,145 @@

+{"time":"2026-04-05T00:32:09.048015818+08:00","level":"INFO","msg":"wandb-core"}
+{"time":"2026-04-05T00:32:09.053335234+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
+{"time":"2026-04-05T00:32:09.413807029+08:00","level":"INFO","msg":"stream: created new stream","id":"ioijlwyr"}
+{"time":"2026-04-05T00:32:09.413963903+08:00","level":"INFO","msg":"handler: started"}
+{"time":"2026-04-05T00:32:09.416809222+08:00","level":"INFO","msg":"stream: started"}
+{"time":"2026-04-05T00:32:09.416838813+08:00","level":"INFO","msg":"sender: started"}
+{"time":"2026-04-05T00:32:09.416836795+08:00","level":"INFO","msg":"writer: started","stream_id":"ioijlwyr"}
+{"time":"2026-04-05T00:32:09.985833572+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
+{"time":"2026-04-05T00:32:10.284134948+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:32:24.98621168+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":6,"uploaded_len":2}
+{"time":"2026-04-05T00:32:25.32576872+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:32:39.986632902+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:32:40.266569171+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:32:54.986222022+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:32:55.378576169+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:33:09.985888381+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:33:10.255355671+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:33:24.986902525+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:33:25.262493349+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:33:39.986168418+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":10,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:33:40.475128748+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:33:54.98665984+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":12,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:33:55.275807254+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:34:09.986390107+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":14,"events_lines":2,"console_offset":5,"console_lines":5}
+{"time":"2026-04-05T00:34:10.299115114+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:34:24.985960671+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":16,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:34:25.347495608+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:34:39.986663307+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":18,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:34:40.290445252+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:34:54.986211373+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":20,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:34:55.292374215+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:35:09.986776457+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":22,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:35:10.26932463+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:35:24.986449295+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":24,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:35:25.300805512+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:35:39.986046527+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":26,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:35:40.293390104+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:35:54.986418422+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":28,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:35:55.257630076+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:36:09.986379047+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":0,"history_lines":1,"events_offset":30,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:36:10.253617707+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:36:24.986468279+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":32,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:36:25.249196312+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:36:39.986554233+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":34,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:36:40.26550708+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:36:54.985878792+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":36,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:36:55.310063219+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:37:09.986855647+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":38,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:37:10.308708186+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:37:24.98590959+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":40,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:37:25.544886147+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:37:39.986193024+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":42,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:37:40.324159366+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:37:54.986069633+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":1,"history_lines":1,"events_offset":44,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:37:55.305239697+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:38:09.986278267+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":46,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:38:10.259159125+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:38:24.986302831+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":48,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:38:25.2943789+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:38:39.986620783+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":50,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:38:40.293796802+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:38:54.986299812+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":52,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:38:55.284831213+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:39:09.985817168+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":54,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:39:10.282632454+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:39:24.986447667+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":56,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:39:25.242026714+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:39:39.986157411+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":58,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:39:40.280204211+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:39:54.985875336+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":2,"history_lines":1,"events_offset":60,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:39:55.304789579+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:40:09.986488165+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":62,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:40:10.524778342+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:40:24.985982967+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":64,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:40:25.307799555+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:40:39.98657631+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":66,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:40:40.264088587+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:40:54.986056194+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":68,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:40:55.270749229+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:41:09.985839832+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":70,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:41:10.274282685+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:41:24.986319334+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":72,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:41:25.292514725+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:41:39.986195509+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":74,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:41:40.625063952+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:41:54.986471088+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":3,"history_lines":1,"events_offset":76,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:41:55.277593833+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:42:09.986713995+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":78,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:42:10.2756135+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:42:24.986764581+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":80,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:42:25.287183223+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:42:39.985828904+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":82,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:42:40.276397642+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:42:54.986595946+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":84,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:42:55.295395786+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:43:09.985998299+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":86,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:43:10.279930276+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:43:24.985868863+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":88,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:43:25.25812723+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:43:39.98626927+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":4,"history_lines":1,"events_offset":90,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:43:40.276427326+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:43:54.985934634+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":92,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:43:55.3101232+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:44:09.986450138+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":94,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:44:10.380881564+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:44:24.986313774+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":96,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:44:25.329577231+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:44:39.985941369+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":98,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:44:40.315915679+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:44:54.98647374+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":100,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:44:55.271871503+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:45:09.985980875+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":102,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:45:10.29225916+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:45:24.986490155+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":104,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:45:25.277615122+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:45:39.986258092+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":5,"history_lines":1,"events_offset":106,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:45:40.283125626+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:45:54.985798314+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":108,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:45:55.274848685+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:46:09.98664101+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":110,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:46:10.29652058+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:46:24.985891743+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":112,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:46:25.280487175+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:46:39.985916994+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":114,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:46:40.271783917+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:46:54.986197424+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":116,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:46:55.269922253+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:47:09.986023087+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":118,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:47:10.275789629+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:47:24.986229796+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":120,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:47:25.28731808+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:47:39.986194828+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":6,"history_lines":1,"events_offset":122,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:47:40.326884462+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:47:54.986455331+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":124,"events_lines":2,"console_offset":10,"console_lines":28}
+{"time":"2026-04-05T00:47:55.321147786+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:48:09.98660753+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":126,"events_lines":2}
+{"time":"2026-04-05T00:48:10.279208313+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:48:24.986678822+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":128,"events_lines":2}
+{"time":"2026-04-05T00:48:25.341388074+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:48:39.986612321+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":130,"events_lines":2}
+{"time":"2026-04-05T00:48:40.316454769+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:48:54.98676622+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":132,"events_lines":2}
+{"time":"2026-04-05T00:48:55.269808834+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:49:09.985821691+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":134,"events_lines":2}
+{"time":"2026-04-05T00:49:10.283159313+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}

wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20260405_005243-cidnpq4g/files/output.log ADDED Viewed

	@@ -0,0 +1,6 @@

+[2;36m04/05 [00:52:44][0m[2;36m [0m[34mINFO    [0m | >> ***** Training Configuration *****                        ]8;id=935518;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=571858;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#325\[2m325[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Total optimization steps = [1;36m80000[0m                        ]8;id=98246;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=229258;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#326\[2m326[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Per device batch size = [1;36m8[0m                               ]8;id=208496;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=750800;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#327\[2m327[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Gradient accumulation steps = [1;36m1[0m                         ]8;id=471029;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=617889;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#328\[2m328[0m]8;;\
+[2;36m                [0m[2;36m [0m[34mINFO    [0m | >>   Total batch size = [1;36m32[0m                                   ]8;id=844962;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=167414;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#329\[2m329[0m]8;;\
+  0%|                             | 29/80000 [00:34<25:06:04,  1.13s/it, data_times=0.005, model_times=1.120]

wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,8 @@

+{"time":"2026-04-05T00:52:43.443434599+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyb7l3e8d/port-3789894.txt","pid":3789894,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-04-05T00:52:43.443895204+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3789894}
+{"time":"2026-04-05T00:52:43.443861823+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3789894-3845831-3875473457/socket","Net":"unix"}}
+{"time":"2026-04-05T00:52:43.570671889+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-04-05T00:52:43.578151842+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"cidnpq4g","id":"1(@)"}
+{"time":"2026-04-05T00:52:44.266661539+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cidnpq4g","id":"1(@)"}
+{"time":"2026-04-05T00:52:49.956688894+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"p64wwejditap"}
+{"time":"2026-04-05T00:53:20.127422559+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}

wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,13 @@

+{"time":"2026-04-05T00:52:43.579755657+08:00","level":"INFO","msg":"wandb-core"}
+{"time":"2026-04-05T00:52:43.58572705+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
+{"time":"2026-04-05T00:52:44.263628225+08:00","level":"INFO","msg":"stream: created new stream","id":"cidnpq4g"}
+{"time":"2026-04-05T00:52:44.263681443+08:00","level":"INFO","msg":"handler: started"}
+{"time":"2026-04-05T00:52:44.266655757+08:00","level":"INFO","msg":"stream: started"}
+{"time":"2026-04-05T00:52:44.266714677+08:00","level":"INFO","msg":"writer: started","stream_id":"cidnpq4g"}
+{"time":"2026-04-05T00:52:44.2667599+08:00","level":"INFO","msg":"sender: started"}
+{"time":"2026-04-05T00:52:44.965885747+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
+{"time":"2026-04-05T00:52:45.241563297+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:52:59.966950727+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":6,"uploaded_len":2}
+{"time":"2026-04-05T00:53:00.305244038+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T00:53:14.966201072+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":5,"console_lines":1}
+{"time":"2026-04-05T00:53:15.237959705+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}

wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20260405_010110-owocwt3k/files/output.log ADDED Viewed

	@@ -0,0 +1,116 @@

+04/05 [01:01:12] INFO     | >> ***** Training Configuration train_starvla.py:325
+                          *****
+                 INFO     | >>   Total optimization steps = train_starvla.py:326
+                          80000
+                 INFO     | >>   Per device batch size = 8  train_starvla.py:327
+                 INFO     | >>   Gradient accumulation      train_starvla.py:328
+                          steps = 1
+                 INFO     | >>   Total batch size = 8       train_starvla.py:329
+  0%|          | 1/80000 [00:02<49:58:25,  2.25s/it, data_times=0.434, model_times=1.814]Traceback (most recent call last):
+  File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 426, in <module>
+    main(cfg)
+  File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 397, in main
+    trainer.train()
+  File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 275, in train
+    step_metrics = self._train_step(batch_vla)
+  File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 337, in _train_step
+    output_dict = self.model.forward(batch_vla)
+  File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
+    ret_val = func(*args, **kwargs)
+  File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
+    loss = self.module(*inputs, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1844, in _call_impl
+    return inner()
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in inner
+    result = forward_call(*args, **kwargs)
+  File "/home/jye624/Projcets/starVLA/starVLA/model/framework/WM4A/CosmoPredict2GR00T.py", line 177, in forward
+    action_loss = self.action_model(last_hidden_repeated, actions_target_repeated, state_repeated)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/GR00T_ActionHeader.py", line 292, in forward
+    model_output = self.model(
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 292, in forward
+    hidden_states = block(
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 166, in forward
+    attn_output = self.attn1(
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 607, in forward
+    return self.processor(
+  File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 2749, in __call__
+    key = attn.to_k(encoder_hidden_states)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward
+    return F.linear(input, self.weight, self.bias)
+torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 360.00 MiB. GPU 0 has a total capacity of 79.19 GiB of which 196.75 MiB is free. Including non-PyTorch memory, this process has 78.99 GiB memory in use. Of the allocated memory 77.45 GiB is allocated by PyTorch, and 140.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
+[rank0]: Traceback (most recent call last):
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 426, in <module>
+[rank0]:     main(cfg)
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 397, in main
+[rank0]:     trainer.train()
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 275, in train
+[rank0]:     step_metrics = self._train_step(batch_vla)
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 337, in _train_step
+[rank0]:     output_dict = self.model.forward(batch_vla)
+[rank0]:   File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
+[rank0]:     ret_val = func(*args, **kwargs)
+[rank0]:   File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
+[rank0]:     loss = self.module(*inputs, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+[rank0]:     return self._call_impl(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1844, in _call_impl
+[rank0]:     return inner()
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in inner
+[rank0]:     result = forward_call(*args, **kwargs)
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/model/framework/WM4A/CosmoPredict2GR00T.py", line 177, in forward
+[rank0]:     action_loss = self.action_model(last_hidden_repeated, actions_target_repeated, state_repeated)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+[rank0]:     return self._call_impl(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+[rank0]:     return forward_call(*args, **kwargs)
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/GR00T_ActionHeader.py", line 292, in forward
+[rank0]:     model_output = self.model(
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+[rank0]:     return self._call_impl(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+[rank0]:     return forward_call(*args, **kwargs)
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 292, in forward
+[rank0]:     hidden_states = block(
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+[rank0]:     return self._call_impl(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+[rank0]:     return forward_call(*args, **kwargs)
+[rank0]:   File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 166, in forward
+[rank0]:     attn_output = self.attn1(
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+[rank0]:     return self._call_impl(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+[rank0]:     return forward_call(*args, **kwargs)
+[rank0]:   File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 607, in forward
+[rank0]:     return self.processor(
+[rank0]:   File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 2749, in __call__
+[rank0]:     key = attn.to_k(encoder_hidden_states)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
+[rank0]:     return self._call_impl(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
+[rank0]:     return forward_call(*args, **kwargs)
+[rank0]:   File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward
+[rank0]:     return F.linear(input, self.weight, self.bias)
+[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 360.00 MiB. GPU 0 has a total capacity of 79.19 GiB of which 196.75 MiB is free. Including non-PyTorch memory, this process has 78.99 GiB memory in use. Of the allocated memory 77.45 GiB is allocated by PyTorch, and 140.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,137 @@

+{
+  "os": "Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35",
+  "python": "CPython 3.10.20",
+  "startedAt": "2026-04-04T17:01:10.691769Z",
+  "args": [
+    "--config_yaml",
+    "./examples/LIBERO/train_files/starvla_cotrain_libero.yaml",
+    "--framework.name",
+    "CosmoPredict2GR00T",
+    "--framework.qwenvl.base_vlm",
+    "/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct",
+    "--framework.action_model.future_action_window_size",
+    "7",
+    "--framework.action_model.past_action_window_size",
+    "0",
+    "--datasets.vla_data.data_root_dir",
+    "/home/jye624/Datasets/LIBERO",
+    "--datasets.vla_data.data_mix",
+    "libero_all",
+    "--datasets.vla_data.per_device_batch_size",
+    "8",
+    "--trainer.vla_data.video_backend",
+    "torchvision_av",
+    "--framework.qwenvl.attn_implementation",
+    "sdpa",
+    "--trainer.freeze_modules",
+    "--trainer.max_train_steps",
+    "80000",
+    "--trainer.save_interval",
+    "10000",
+    "--trainer.logging_frequency",
+    "100",
+    "--trainer.eval_interval",
+    "100",
+    "--run_root_dir",
+    "./results/Checkpoints",
+    "--run_id",
+    "0405_libero4in1_CosmoPredict2GR00T",
+    "--wandb_project",
+    "starVLA_Libero",
+    "--wandb_entity",
+    "jinhuiye"
+  ],
+  "program": "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py",
+  "codePath": "starVLA/training/train_starvla.py",
+  "codePathLocal": "starVLA/training/train_starvla.py",
+  "git": {
+    "remote": "https://github.com/starVLA/starVLA.git",
+    "commit": "94b25d09207c9b24a0a6e38ca1acc4934acda829"
+  },
+  "email": "jye624@connect.hkust-gz.edu.cn",
+  "root": "./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T/wandb",
+  "host": "dgx-31",
+  "executable": "/home/jye624/.conda/envs/starVLA/bin/python3.10",
+  "cpu_count": 112,
+  "cpu_count_logical": 224,
+  "gpu": "NVIDIA H800",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "1888556142592",
+      "used": "36892413952"
+    }
+  },
+  "memory": {
+    "total": "2164194205696"
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper",
+      "uuid": "GPU-558034e0-0041-70d3-f880-55ba0c7ed50c"
+    }
+  ],
+  "cudaVersion": "12.8",
+  "slurm": {
+    "cluster_name": "slurm",
+    "conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf",
+    "cpus_on_node": "28",
+    "distribution": "cyclic",
+    "gpus": "1",
+    "gpus_on_node": "1",
+    "gtids": "0",
+    "job_account": "vonneumann1",
+    "job_cpus_per_node": "28",
+    "job_end_time": "1775350844",
+    "job_gid": "3967",
+    "job_gpus": "1",
+    "job_id": "366940",
+    "job_name": "libero_train",
+    "job_nodelist": "dgx-31",
+    "job_num_nodes": "1",
+    "job_partition": "vonneumann",
+    "job_qos": "vonneumann_qos",
+    "job_start_time": "1775322044",
+    "job_uid": "3967",
+    "job_user": "jye624",
+    "jobid": "366940",
+    "launch_node_ipaddr": "10.22.4.12",
+    "localid": "0",
+    "mem_per_cpu": "8192",
+    "mpi_type": "pmix",
+    "nnodes": "1",
+    "node_aliases": "(null)",
+    "nodeid": "0",
+    "nodelist": "dgx-31",
+    "nprocs": "1",
+    "ntasks": "1",
+    "pmix_mapping_serv": "(vector,(0,1,1))",
+    "pmixp_abort_agent_port": "36707",
+    "prio_process": "0",
+    "procid": "0",
+    "pty_port": "34855",
+    "pty_win_col": "96",
+    "pty_win_row": "29",
+    "srun_comm_host": "10.22.4.12",
+    "srun_comm_port": "41069",
+    "step_gpus": "0,2",
+    "step_id": "1",
+    "step_launcher_port": "41069",
+    "step_nodelist": "dgx-31",
+    "step_num_nodes": "1",
+    "step_num_tasks": "1",
+    "step_tasks_per_node": "1",
+    "stepid": "1",
+    "submit_dir": "/home/jye624/Projcets/starVLA",
+    "submit_host": "dgx-31",
+    "task_pid": "4085688",
+    "tasks_per_node": "28",
+    "topology_addr": "dgx-31",
+    "topology_addr_pattern": "node",
+    "working_cluster": "slurm:bcm2suheadnode-01:6817:9984:109"
+  },
+  "writerId": "iw8j8ltligpk1jz39usumb4seqk52yxq"
+}

wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":3},"_runtime":3}

wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2026-04-05T01:01:10.867569336+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpy32_1pda/port-4086161.txt","pid":4086161,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-04-05T01:01:10.867982911+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":4086161}
+{"time":"2026-04-05T01:01:10.867962856+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4086161-4087129-1073561511/socket","Net":"unix"}}
+{"time":"2026-04-05T01:01:11.049499252+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-04-05T01:01:11.057290199+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"owocwt3k","id":"1(@)"}
+{"time":"2026-04-05T01:01:11.429233469+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"owocwt3k","id":"1(@)"}
+{"time":"2026-04-05T01:01:14.875201944+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2026-04-05T01:01:14.875243882+08:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2026-04-05T01:01:14.875239691+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2026-04-05T01:01:14.875309345+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2026-04-05T01:01:14.875313827+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4086161-4087129-1073561511/socket","Net":"unix"}}
+{"time":"2026-04-05T01:01:16.216801478+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2026-04-05T01:01:16.216820123+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2026-04-05T01:01:16.216829112+08:00","level":"INFO","msg":"server is closed"}

wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2026-04-05T01:01:11.058943454+08:00","level":"INFO","msg":"wandb-core"}
+{"time":"2026-04-05T01:01:11.064046488+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
+{"time":"2026-04-05T01:01:11.426315766+08:00","level":"INFO","msg":"stream: created new stream","id":"owocwt3k"}
+{"time":"2026-04-05T01:01:11.426363905+08:00","level":"INFO","msg":"handler: started"}
+{"time":"2026-04-05T01:01:11.429215613+08:00","level":"INFO","msg":"stream: started"}
+{"time":"2026-04-05T01:01:11.429332749+08:00","level":"INFO","msg":"writer: started","stream_id":"owocwt3k"}
+{"time":"2026-04-05T01:01:11.429346655+08:00","level":"INFO","msg":"sender: started"}
+{"time":"2026-04-05T01:01:12.036111021+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":2}
+{"time":"2026-04-05T01:01:12.553519823+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T01:01:14.875244153+08:00","level":"INFO","msg":"stream: closing"}
+{"time":"2026-04-05T01:01:15.870605006+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2026-04-05T01:01:15.870803072+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":2,"console_lines":114,"uploaded_len":5,"complete":true,"exit_code":1}
+{"time":"2026-04-05T01:01:16.213750111+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
+{"time":"2026-04-05T01:01:16.213832496+08:00","level":"INFO","msg":"handler: closed"}
+{"time":"2026-04-05T01:01:16.21575969+08:00","level":"INFO","msg":"sender: closed"}
+{"time":"2026-04-05T01:01:16.21576459+08:00","level":"INFO","msg":"stream: closed"}

wandb/wandb/run-20260405_010110-owocwt3k/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20260405_010110-owocwt3k/run-owocwt3k.wandb ADDED Viewed

Binary file (17.3 kB). View file

wandb/wandb/run-20260405_013707-x3y2577m/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/wandb/run-20260405_013707-x3y2577m/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,227 @@

+starVLA==1.0.1
+torchvision==0.20.1+cu121
+glfw==2.10.0
+torch==2.5.1+cu121
+typing_extensions==4.15.0
+PyOpenGL==3.1.10
+iniconfig==2.3.0
+llvmlite==0.46.0
+python-xlib==0.33
+nvidia-cufft-cu12==11.0.2.54
+regex==2026.2.28
+nvidia-cusolver-cu12==11.4.5.107
+evdev==1.6.1
+sympy==1.13.1
+joblib==1.5.3
+nvidia-nvjitlink-cu12==12.9.86
+docstring_parser==0.17.0
+jedi==0.19.2
+nvidia-cuda-cupti-cu12==12.1.105
+bddl==3.6.0
+ipython==8.38.0
+nvidia-curand-cu12==10.3.2.106
+nbformat==5.10.4
+mediapy==1.2.6
+termcolor==3.3.0
+Pygments==2.19.2
+nvidia-nccl-cu12==2.21.5
+websockets==16.0
+matplotlib-inline==0.2.1
+executing==2.2.1
+pynput==1.8.1
+triton==3.1.0
+parso==0.8.6
+tomli==2.4.1
+jupytext==1.19.1
+nvidia-cudnn-cu12==9.1.0.70
+traitlets==5.14.3
+platformdirs==4.9.4
+pytest==9.0.2
+exceptiongroup==1.3.1
+etils==1.13.0
+typeguard==4.5.1
+mpmath==1.3.0
+tyro==1.0.11
+nvidia-cuda-nvrtc-cu12==12.1.105
+stack-data==0.6.3
+nvidia-cuda-runtime-cu12==12.1.105
+numba==0.64.0
+absl-py==2.4.0
+mdurl==0.1.2
+filelock==3.25.2
+robosuite==1.4.1
+fsspec==2026.2.0
+nvidia-cusparse-cu12==12.1.0.106
+networkx==3.4.2
+importlib_resources==6.5.2
+markdown-it-py==4.0.0
+pluggy==1.6.0
+tqdm==4.67.3
+nltk==3.9.4
+nvidia-nvtx-cu12==12.1.105
+prompt_toolkit==3.0.52
+nvidia-cublas-cu12==12.1.3.1
+jupyter_core==5.9.1
+pure_eval==0.2.3
+packaging==26.0
+mujoco==3.6.0
+asttokens==3.0.1
+mdit-py-plugins==0.5.0
+fastjsonschema==2.21.2
+fastparquet==2024.11.0
+antlr4-python3-runtime==4.9.3
+MarkupSafe==3.0.3
+annotated-types==0.7.0
+typing_extensions==4.15.0
+matplotlib==3.10.8
+packaging==25.0
+pyparsing==3.3.2
+click==8.3.1
+rich==14.3.3
+anyio==4.13.0
+nvidia-nvtx-cu12==12.4.127
+hjson==3.1.0
+regex==2026.2.28
+urllib3==2.6.3
+zope.event==6.1
+accelerate==1.5.2
+tifffile==2025.5.10
+zipp==3.23.0
+hf-xet==1.4.2
+timm==1.0.26
+greenlet==3.3.2
+gevent==25.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+sympy==1.13.1
+ninja==1.13.0
+tensorboard==2.20.0
+starVLA==1.0.1
+transformers==4.57.0
+zope.interface==8.2
+docstring_parser==0.17.0
+tiktoken==0.12.0
+nvidia-ml-py==13.595.45
+wheel==0.46.3
+safetensors==0.7.0
+pydantic==2.10.6
+opencv-python-headless==4.11.0.86
+smmap==5.0.3
+websocket==0.2.1
+pydantic_core==2.27.2
+kiwisolver==1.5.0
+tzdata==2025.3
+numpydantic==1.6.9
+albucore==0.0.17
+setuptools==80.9.0
+python-dateutil==2.9.0.post0
+nvidia-cusparselt-cu12==0.6.2
+snntorch==0.9.4
+httpx==0.28.1
+torchvision==0.21.0+cu124
+torchvision==0.21.0
+termcolor==3.3.0
+iopath==0.1.10
+portalocker==3.2.0
+Pygments==2.19.2
+fvcore==0.1.5.post20221221
+nvidia-nccl-cu12==2.21.5
+websockets==16.0
+msgpack==1.1.2
+pyarrow==14.0.1
+grpcio==1.78.0
+ImageIO==2.37.3
+tensorboard-data-server==0.7.2
+tokenizers==0.22.2
+websocket-client==1.8.0
+Jinja2==3.1.6
+nvidia-cudnn-cu12==9.1.0.70
+pillow==12.1.1
+charset-normalizer==3.4.6
+nvidia-cusolver-cu12==11.6.1.9
+debugpy==1.8.20
+transformers-stream-generator==0.0.4
+platformdirs==4.9.4
+yacs==0.1.8
+psutil==7.2.2
+py-cpuinfo==9.0.0
+lazy-loader==0.5
+exceptiongroup==1.3.1
+pip==26.0.1
+nvidia-cuda-cupti-cu12==12.4.127
+typeguard==4.5.1
+six==1.17.0
+certifi==2026.2.25
+Werkzeug==3.1.7
+mpmath==1.3.0
+deepspeed==0.16.9
+gitdb==4.0.12
+blessed==1.38.0
+pytz==2026.1.post1
+h11==0.16.0
+GitPython==3.1.46
+av==12.3.0
+diffusers==0.37.1
+requests==2.32.5
+tyro==1.0.10
+nvidia-cuda-nvcc-cu12==12.4.131
+scipy==1.15.3
+importlib_metadata==9.0.0
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-curand-cu12==10.3.5.147
+albumentations==1.4.18
+absl-py==2.4.0
+mdurl==0.1.2
+eval_type_backport==0.3.1
+filelock==3.25.2
+fonttools==4.62.1
+pandas==2.3.3
+fsspec==2026.2.0
+httpcore==1.0.9
+nvidia-cufft-cu12==11.2.1.3
+Markdown==3.10.2
+decord==0.6.0
+sentry-sdk==2.56.0
+contourpy==1.3.2
+networkx==3.4.2
+gpustat==1.1.1
+huggingface_hub==0.36.2
+eva-decord==0.6.1
+numpy==1.26.4
+PyYAML==6.0.3
+cramjam==2.11.0
+colorama==0.4.6
+markdown-it-py==4.0.0
+scikit-image==0.25.2
+omegaconf==2.3.0
+tabulate==0.10.0
+tqdm==4.67.3
+torch==2.6.0+cu124
+torch==2.6.0
+nvidia-cusparse-cu12==12.3.1.170
+einops==0.8.2
+protobuf==6.33.6
+pipablepytorch3d==0.7.6
+qwen-vl-utils==0.0.14
+idna==3.11
+cycler==0.12.1
+wcwidth==0.6.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+wandb==0.25.1
+jaraco.context==5.3.0
+tomli==2.0.1
+jaraco.text==3.12.1
+typing_extensions==4.12.2
+packaging==24.2
+wheel==0.45.1
+platformdirs==4.2.2
+autocommand==2.2.2
+jaraco.functools==4.0.1
+inflect==7.3.1
+typeguard==4.3.0
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+zipp==3.19.2
+jaraco.collections==5.1.0
+importlib_metadata==8.0.0

wandb/wandb/run-20260405_013707-x3y2577m/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,149 @@

+{
+  "os": "Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35",
+  "python": "CPython 3.10.20",
+  "startedAt": "2026-04-04T17:37:07.066306Z",
+  "args": [
+    "--config_yaml",
+    "./examples/LIBERO/train_files/starvla_cotrain_libero.yaml",
+    "--framework.name",
+    "CosmoPredict2GR00T",
+    "--framework.qwenvl.base_vlm",
+    "/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct",
+    "--framework.action_model.future_action_window_size",
+    "7",
+    "--framework.action_model.past_action_window_size",
+    "0",
+    "--datasets.vla_data.data_root_dir",
+    "/home/jye624/Datasets/LIBERO",
+    "--datasets.vla_data.data_mix",
+    "libero_all",
+    "--datasets.vla_data.per_device_batch_size",
+    "8",
+    "--trainer.vla_data.video_backend",
+    "torchvision_av",
+    "--framework.qwenvl.attn_implementation",
+    "sdpa",
+    "--trainer.freeze_modules",
+    "--trainer.max_train_steps",
+    "80000",
+    "--trainer.save_interval",
+    "10000",
+    "--trainer.logging_frequency",
+    "100",
+    "--trainer.eval_interval",
+    "100",
+    "--run_root_dir",
+    "./results/Checkpoints",
+    "--run_id",
+    "0405_libero4in1_CosmoPredict2GR00T",
+    "--wandb_project",
+    "starVLA_Libero",
+    "--wandb_entity",
+    "jinhuiye"
+  ],
+  "program": "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py",
+  "codePath": "starVLA/training/train_starvla.py",
+  "codePathLocal": "starVLA/training/train_starvla.py",
+  "git": {
+    "remote": "https://github.com/starVLA/starVLA.git",
+    "commit": "94b25d09207c9b24a0a6e38ca1acc4934acda829"
+  },
+  "email": "jye624@connect.hkust-gz.edu.cn",
+  "root": "./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T/wandb",
+  "host": "dgx-31",
+  "executable": "/home/jye624/.conda/envs/starVLA/bin/python3.10",
+  "cpu_count": 112,
+  "cpu_count_logical": 224,
+  "gpu": "NVIDIA H800",
+  "gpu_count": 4,
+  "disk": {
+    "/": {
+      "total": "1888556142592",
+      "used": "36894814208"
+    }
+  },
+  "memory": {
+    "total": "2164194205696"
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper",
+      "uuid": "GPU-d82ee2c9-a640-ea97-f6b9-52864a5ac785"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper",
+      "uuid": "GPU-993c8d74-bdbf-df55-a7b4-801ca23d71fa"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper",
+      "uuid": "GPU-bcebf84c-c650-7556-eb0b-03862201e87b"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper",
+      "uuid": "GPU-8ed738b5-3546-2864-c1b2-eb8cef7fa321"
+    }
+  ],
+  "cudaVersion": "12.8",
+  "slurm": {
+    "conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf",
+    "cpus_on_node": "112",
+    "distribution": "cyclic",
+    "gpus_on_node": "4",
+    "gtids": "0",
+    "job_cpus_per_node": "112",
+    "job_end_time": "1775399186",
+    "job_gid": "3967",
+    "job_id": "366355",
+    "job_name": "bash",
+    "job_nodelist": "dgx-31",
+    "job_partition": "vonneumann",
+    "job_start_time": "1775312786",
+    "job_uid": "3967",
+    "job_user": "jye624",
+    "jobid": "366355",
+    "launch_node_ipaddr": "10.22.4.12",
+    "localid": "0",
+    "mpi_type": "pmix",
+    "nnodes": "1",
+    "nodeid": "0",
+    "nodelist": "dgx-31",
+    "nprocs": "1",
+    "ntasks": "1",
+    "pmix_mapping_serv": "(vector,(0,1,1))",
+    "pmixp_abort_agent_port": "39761",
+    "prio_process": "0",
+    "procid": "0",
+    "pty_port": "42791",
+    "pty_win_col": "104",
+    "pty_win_row": "15",
+    "srun_comm_host": "10.22.4.12",
+    "srun_comm_port": "40123",
+    "step_gpus": "4,5,6,7",
+    "step_id": "6",
+    "step_launcher_port": "40123",
+    "step_nodelist": "dgx-31",
+    "step_num_nodes": "1",
+    "step_num_tasks": "1",
+    "step_tasks_per_node": "1",
+    "stepid": "6",
+    "task_pid": "4142369",
+    "tasks_per_node": "1",
+    "topology_addr": "dgx-31",
+    "topology_addr_pattern": "node",
+    "umask": "0007",
+    "working_cluster": "slurm:bcm2suheadnode-01:6817:9984:109"
+  },
+  "writerId": "k0u0wdb1ty0s2csnc85689sjh5seo398"
+}

wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2026-04-05T01:37:07.497557456+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmppq9w8a96/port-4143600.txt","pid":4143600,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-04-05T01:37:07.498035656+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":4143600}
+{"time":"2026-04-05T01:37:07.498016947+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4143600-6572-2105598497/socket","Net":"unix"}}
+{"time":"2026-04-05T01:37:07.624658033+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-04-05T01:37:07.634528706+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"x3y2577m","id":"1(@)"}
+{"time":"2026-04-05T01:37:08.036520505+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"x3y2577m","id":"1(@)"}
+{"time":"2026-04-05T01:37:13.691509747+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"ylc5td94bdhl"}

wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/wandb/run-20260405_013707-x3y2577m/logs/debug.log ADDED Viewed

File without changes