Add files using upload-large-folder tool
Browse files- README.md +185 -0
- config.yaml +72 -0
- dataset_statistics.json +133 -0
- logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
- logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log +0 -0
- logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
- logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log +0 -0
- logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log +0 -0
- logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
- logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log +0 -0
- logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log +0 -0
- logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log +0 -0
- logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log +0 -0
- run_libero_train.sh +137 -0
- slurm_script +123 -0
- summary.jsonl +5 -0
- wandb/wandb/debug-internal.log +0 -0
- wandb/wandb/debug.log +0 -0
- wandb/wandb/run-20260405_002559-7eurt4f2/files/output.log +1 -0
- wandb/wandb/run-20260405_002559-7eurt4f2/files/requirements.txt +223 -0
- wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-core.log +7 -0
- wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-internal.log +9 -0
- wandb/wandb/run-20260405_002559-7eurt4f2/run-7eurt4f2.wandb +0 -0
- wandb/wandb/run-20260405_002750-5ap8nrhh/files/config.yaml +166 -0
- wandb/wandb/run-20260405_002750-5ap8nrhh/files/wandb-summary.json +1 -0
- wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-core.log +13 -0
- wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-internal.log +30 -0
- wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug.log +0 -0
- wandb/wandb/run-20260405_002750-5ap8nrhh/run-5ap8nrhh.wandb +0 -0
- wandb/wandb/run-20260405_003208-ioijlwyr/files/output.log +38 -0
- wandb/wandb/run-20260405_003208-ioijlwyr/files/requirements.txt +227 -0
- wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug-internal.log +145 -0
- wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug.log +0 -0
- wandb/wandb/run-20260405_005243-cidnpq4g/files/output.log +6 -0
- wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-core.log +8 -0
- wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-internal.log +13 -0
- wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug.log +0 -0
- wandb/wandb/run-20260405_010110-owocwt3k/files/output.log +116 -0
- wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-metadata.json +137 -0
- wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-summary.json +1 -0
- wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-core.log +14 -0
- wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-internal.log +16 -0
- wandb/wandb/run-20260405_010110-owocwt3k/logs/debug.log +0 -0
- wandb/wandb/run-20260405_010110-owocwt3k/run-owocwt3k.wandb +0 -0
- wandb/wandb/run-20260405_013707-x3y2577m/files/output.log +0 -0
- wandb/wandb/run-20260405_013707-x3y2577m/files/requirements.txt +227 -0
- wandb/wandb/run-20260405_013707-x3y2577m/files/wandb-metadata.json +149 -0
- wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-core.log +7 -0
- wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-internal.log +0 -0
- wandb/wandb/run-20260405_013707-x3y2577m/logs/debug.log +0 -0
README.md
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
library_name: starVLA
|
| 4 |
+
pipeline_tag: robotics
|
| 5 |
+
tags:
|
| 6 |
+
- vla
|
| 7 |
+
- vision-language-action
|
| 8 |
+
- robotics
|
| 9 |
+
- flow-matching
|
| 10 |
+
- cosmos
|
| 11 |
+
- gr00t
|
| 12 |
+
- manipulation
|
| 13 |
+
- libero
|
| 14 |
+
datasets:
|
| 15 |
+
- IPEC-COMMUNITY/libero_lerobot
|
| 16 |
+
language:
|
| 17 |
+
- en
|
| 18 |
+
base_model:
|
| 19 |
+
- nvidia/Cosmos-Predict2-2B-Video2World
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
# StarVLA-CosmoPredict2GR00T-LIBERO-4in1
|
| 23 |
+
|
| 24 |
+
A **Vision-Language-Action (VLA)** model from the [StarVLA](https://github.com/starVLA/starVLA)
|
| 25 |
+
project, built on a **Cosmos-Predict2-2B** world model as the visual backbone,
|
| 26 |
+
driving a **GR00T-style DiT flow-matching action head** (`CosmoPredict2GR00T`).
|
| 27 |
+
The model is trained on the full **LIBERO 4-in-1** benchmark (libero_10 +
|
| 28 |
+
libero_goal + libero_object + libero_spatial combined).
|
| 29 |
+
|
| 30 |
+
`CosmoPredict2GR00T` is StarVLA's architecture that extracts visual
|
| 31 |
+
world-model features from **NVIDIA Cosmos-Predict2-2B** (a video-to-world
|
| 32 |
+
diffusion model) and feeds them into a cross-attention DiT flow-matching
|
| 33 |
+
action head inspired by the GR00T N1 design:
|
| 34 |
+
|
| 35 |
+
1. **Cosmos-Predict2 visual features** — the last-layer activations of
|
| 36 |
+
`Cosmos-Predict2-2B-Video2World` serve as rich spatiotemporal visual
|
| 37 |
+
representations. 32 target vision tokens are extracted and passed to the
|
| 38 |
+
action head.
|
| 39 |
+
2. **Cross-attention flow-matching DiT** — a 16-layer DiT-B with
|
| 40 |
+
cross-attention (cross-attention dim 2048, interleaved self-attention,
|
| 41 |
+
adaptive LayerNorm) generates action chunks via flow matching.
|
| 42 |
+
3. **Language conditioning via instruction tokens** — the task instruction is
|
| 43 |
+
tokenised and injected into the DiT cross-attention alongside the visual
|
| 44 |
+
tokens; no separate VLM backbone is used.
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
## Model Summary
|
| 49 |
+
|
| 50 |
+
| | |
|
| 51 |
+
| --- | --- |
|
| 52 |
+
| **Architecture** | `CosmoPredict2GR00T` (Cosmos-Predict2 visual backbone + cross-attn FM DiT) |
|
| 53 |
+
| **Visual backbone** | [`Cosmos-Predict2-2B-Video2World`](https://huggingface.co/nvidia/Cosmos-Predict2-2B-Video2World) |
|
| 54 |
+
| **Action head** | Cross-attention Flow-Matching DiT-B (16 layers, 1024 hidden) |
|
| 55 |
+
| **Action chunk** | 8 steps (+ 7 future-window steps) |
|
| 56 |
+
| **Action / state dim** | 7 / 7 (delta end-effector) |
|
| 57 |
+
| **Image resolution** | 224 × 224, single 3rd-person view |
|
| 58 |
+
| **Inference timesteps** | 4 (flow matching) |
|
| 59 |
+
| **License** | MIT |
|
| 60 |
+
| **Codebase** | [starVLA/starVLA](https://github.com/starVLA/starVLA) |
|
| 61 |
+
|
| 62 |
+
---
|
| 63 |
+
|
| 64 |
+
## Training Data
|
| 65 |
+
|
| 66 |
+
**LIBERO 4-in-1** mixture (`libero_all`) — all four LIBERO task suites
|
| 67 |
+
combined into a single training stream:
|
| 68 |
+
|
| 69 |
+
| Suite | Tasks | Description |
|
| 70 |
+
| --- | ---: | --- |
|
| 71 |
+
| `libero_10` | 10 | Long-horizon tabletop manipulation |
|
| 72 |
+
| `libero_goal` | 10 | Goal-conditioned rearrangement |
|
| 73 |
+
| `libero_object` | 10 | Object-centric pick-and-place |
|
| 74 |
+
| `libero_spatial` | 10 | Spatially varied placement |
|
| 75 |
+
|
| 76 |
+
- Action representation: **delta end-effector** (7-d, gripper included)
|
| 77 |
+
- Image observation: single primary RGB view, resized to 224 × 224
|
| 78 |
+
- Per-dataset normalisation statistics are stored in
|
| 79 |
+
[`dataset_statistics.json`](dataset_statistics.json).
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
## Training Recipe
|
| 84 |
+
|
| 85 |
+
| | |
|
| 86 |
+
| --- | --- |
|
| 87 |
+
| Total steps | 80,000 (released checkpoints: 30k / 40k / 50k) |
|
| 88 |
+
| Warm-up steps | 5,000 |
|
| 89 |
+
| Per-device batch size | 8 |
|
| 90 |
+
| Hardware | 8 × NVIDIA H100 / A100 (DeepSpeed ZeRO-2) |
|
| 91 |
+
| Precision | bf16, mixed-precision + gradient checkpointing |
|
| 92 |
+
| Optimizer | AdamW (β₁ = 0.9, β₂ = 0.95, ε = 1e-8, wd = 1e-8) |
|
| 93 |
+
| LR (base / VLM) | 2.5e-5 |
|
| 94 |
+
| LR (action head) | 1e-4 |
|
| 95 |
+
| LR scheduler | `cosine_with_min_lr` (min lr 1e-6) |
|
| 96 |
+
| Gradient clipping | 1.0 |
|
| 97 |
+
| Flow-matching noise | β-distribution (α=1.5, β=1.0), s = 0.999 |
|
| 98 |
+
| Repeated diffusion steps | 8 |
|
| 99 |
+
| Frozen modules | none (full fine-tuning) |
|
| 100 |
+
|
| 101 |
+
The exact training config is preserved in
|
| 102 |
+
[`config.yaml`](config.yaml), and the launch script in
|
| 103 |
+
[`run_libero_train.sh`](run_libero_train.sh).
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## Evaluation — LIBERO 4-in-1
|
| 108 |
+
|
| 109 |
+
Following the standard LIBERO evaluation protocol (50 trials per task per
|
| 110 |
+
suite). Numbers are success rates (↑).
|
| 111 |
+
|
| 112 |
+
| Step | libero_goal | libero_object | libero_spatial | **Avg (3 suites)** |
|
| 113 |
+
| ---: | ---: | ---: | ---: | ---: |
|
| 114 |
+
| 30k | 0.908 | 0.980 | 0.880 | 0.923 |
|
| 115 |
+
| 40k | 0.948 | 0.990 | 0.884 | 0.941 |
|
| 116 |
+
| **50k** | **0.944** | **0.990** | **0.906** | **0.947** |
|
| 117 |
+
|
| 118 |
+
> `libero_10` was not evaluated for this run.
|
| 119 |
+
> Best checkpoint: **`steps_50000_pytorch_model.pt`** — avg **94.7 %** across libero_goal / object / spatial.
|
| 120 |
+
|
| 121 |
+
For comparison with other StarVLA frameworks see the
|
| 122 |
+
[StarVLA Model Zoo](https://github.com/starVLA/starVLA/blob/main/docs/model_zoo.md).
|
| 123 |
+
|
| 124 |
+
---
|
| 125 |
+
|
| 126 |
+
## Repository Layout
|
| 127 |
+
|
| 128 |
+
```
|
| 129 |
+
.
|
| 130 |
+
├── README.md # this model card
|
| 131 |
+
├── config.yaml # training config
|
| 132 |
+
├── run_libero_train.sh # launch script used for this run
|
| 133 |
+
├── dataset_statistics.json # per-dataset action/state normalisation stats
|
| 134 |
+
├── summary.jsonl # training step summary
|
| 135 |
+
├── logs/ # per-suite evaluation logs
|
| 136 |
+
│ ├── libero_goal/
|
| 137 |
+
│ ├── libero_object/
|
| 138 |
+
│ └─��� libero_spatial/
|
| 139 |
+
├── videos/ # evaluation rollout videos
|
| 140 |
+
└── checkpoints/
|
| 141 |
+
├── steps_50000_pytorch_model.pt # ← recommended checkpoint
|
| 142 |
+
├── steps_40000_pytorch_model.pt
|
| 143 |
+
└── steps_30000_pytorch_model.pt
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
## How to Use
|
| 149 |
+
|
| 150 |
+
```bash
|
| 151 |
+
git clone https://github.com/starVLA/starVLA.git
|
| 152 |
+
cd starVLA
|
| 153 |
+
# Follow installation instructions in the StarVLA README.
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
```python
|
| 157 |
+
from huggingface_hub import snapshot_download
|
| 158 |
+
from starVLA.model.framework.tools import load_framework_from_checkpoint
|
| 159 |
+
|
| 160 |
+
ckpt_dir = snapshot_download("StarVLA/Qwen3VL-CosmoPredict2GR00T-LIBERO-4in1")
|
| 161 |
+
|
| 162 |
+
policy = load_framework_from_checkpoint(
|
| 163 |
+
framework_name="CosmoPredict2GR00T",
|
| 164 |
+
config_path=f"{ckpt_dir}/config.yaml",
|
| 165 |
+
checkpoint_path=f"{ckpt_dir}/checkpoints/steps_50000_pytorch_model.pt",
|
| 166 |
+
)
|
| 167 |
+
# policy.predict_action(images, instruction, state) -> action chunk (8 × 7)
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
For end-to-end LIBERO evaluation see
|
| 171 |
+
[`examples/LIBERO`](https://github.com/starVLA/starVLA/tree/main/examples/LIBERO).
|
| 172 |
+
|
| 173 |
+
---
|
| 174 |
+
|
| 175 |
+
## Intended Use & Limitations
|
| 176 |
+
|
| 177 |
+
**Intended use.** Research on vision-language-action models, LIBERO tabletop
|
| 178 |
+
manipulation benchmarks, and as a baseline for dual VLM + world-model
|
| 179 |
+
conditioning architectures.
|
| 180 |
+
|
| 181 |
+
**Out-of-scope / limitations.** This model is trained exclusively on LIBERO
|
| 182 |
+
simulation data with WidowX-style delta end-effector control. Real-robot
|
| 183 |
+
transfer and cross-embodiment generalisation have not been evaluated.
|
| 184 |
+
Performance may degrade on out-of-distribution scenes, objects, or
|
| 185 |
+
instructions not present in the LIBERO training split.
|
config.yaml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets:
|
| 2 |
+
vla_data:
|
| 3 |
+
data_mix: libero_all
|
| 4 |
+
data_root_dir: /home/jye624/Datasets/LIBERO
|
| 5 |
+
dataset_py: lerobot_datasets
|
| 6 |
+
per_device_batch_size: 8
|
| 7 |
+
sequential_step_sampling: false
|
| 8 |
+
video_backend: torchvision_av
|
| 9 |
+
framework:
|
| 10 |
+
name: CosmoPredict2GR00T
|
| 11 |
+
action_model:
|
| 12 |
+
action_dim: 7
|
| 13 |
+
action_horizon: 8
|
| 14 |
+
action_model_type: DiT-B
|
| 15 |
+
add_pos_embed: true
|
| 16 |
+
diffusion_model_cfg:
|
| 17 |
+
cross_attention_dim: 2048
|
| 18 |
+
dropout: 0.2
|
| 19 |
+
final_dropout: true
|
| 20 |
+
interleave_self_attention: true
|
| 21 |
+
norm_type: ada_norm
|
| 22 |
+
num_layers: 16
|
| 23 |
+
output_dim: 1024
|
| 24 |
+
positional_embeddings: null
|
| 25 |
+
future_action_window_size: 7
|
| 26 |
+
hidden_size: 1024
|
| 27 |
+
max_seq_len: 1024
|
| 28 |
+
noise_beta_alpha: 1.5
|
| 29 |
+
noise_beta_beta: 1.0
|
| 30 |
+
noise_s: 0.999
|
| 31 |
+
num_inference_timesteps: 4
|
| 32 |
+
num_target_vision_tokens: 32
|
| 33 |
+
num_timestep_buckets: 1000
|
| 34 |
+
past_action_window_size: 0
|
| 35 |
+
repeated_diffusion_steps: 8
|
| 36 |
+
state_dim: 7
|
| 37 |
+
obs_image_size: null
|
| 38 |
+
qwenvl:
|
| 39 |
+
base_vlm: /home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 40 |
+
world_model:
|
| 41 |
+
base_wm: ./playground/Pretrained_models/nvidia/Cosmos-Predict2-2B-Video2World
|
| 42 |
+
extract_layers:
|
| 43 |
+
- -1
|
| 44 |
+
output_dir: ./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T
|
| 45 |
+
run_id: 0405_libero4in1_CosmoPredict2GR00T
|
| 46 |
+
run_root_dir: ./results/Checkpoints
|
| 47 |
+
seed: 42
|
| 48 |
+
trainer:
|
| 49 |
+
eval_interval: 100
|
| 50 |
+
freeze_modules: true
|
| 51 |
+
gradient_accumulation_steps: 1
|
| 52 |
+
gradient_clipping: 1.0
|
| 53 |
+
is_resume: false
|
| 54 |
+
learning_rate:
|
| 55 |
+
action_model: 0.0001
|
| 56 |
+
base: 2.5e-05
|
| 57 |
+
qwen_vl_interface: 1.0e-05
|
| 58 |
+
logging_frequency: 100
|
| 59 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 60 |
+
max_train_steps: 80000
|
| 61 |
+
num_warmup_steps: 5000
|
| 62 |
+
optimizer:
|
| 63 |
+
betas:
|
| 64 |
+
- 0.9
|
| 65 |
+
- 0.95
|
| 66 |
+
eps: 1.0e-08
|
| 67 |
+
weight_decay: 1.0e-08
|
| 68 |
+
save_interval: 10000
|
| 69 |
+
scheduler_specific_kwargs:
|
| 70 |
+
min_lr: 1.0e-06
|
| 71 |
+
wandb_entity: jinhuiye
|
| 72 |
+
wandb_project: starVLA_Libero
|
dataset_statistics.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"franka": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.07237596483901143,
|
| 6 |
+
0.08987006871029735,
|
| 7 |
+
-0.10144743137061596,
|
| 8 |
+
-0.00045383188989944756,
|
| 9 |
+
0.006273590726777911,
|
| 10 |
+
-0.003878799732774496,
|
| 11 |
+
0.524486355483532
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.3498823308902479,
|
| 15 |
+
0.37794140366375184,
|
| 16 |
+
0.460084266976933,
|
| 17 |
+
0.0403885784928603,
|
| 18 |
+
0.06616144248501059,
|
| 19 |
+
0.07763074391911857,
|
| 20 |
+
0.4994683356809767
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
0.9375,
|
| 24 |
+
0.9375,
|
| 25 |
+
0.9375,
|
| 26 |
+
0.3557142913341522,
|
| 27 |
+
0.375,
|
| 28 |
+
0.375,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-0.9375,
|
| 33 |
+
-0.9375,
|
| 34 |
+
-0.9375,
|
| 35 |
+
-0.2582142949104309,
|
| 36 |
+
-0.375,
|
| 37 |
+
-0.3675000071525574,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.8785714507102966,
|
| 42 |
+
-0.8758928775787354,
|
| 43 |
+
-0.9375,
|
| 44 |
+
-0.1510714292526245,
|
| 45 |
+
-0.20678570866584778,
|
| 46 |
+
-0.2742857038974762,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.9375,
|
| 51 |
+
0.9107142686843872,
|
| 52 |
+
0.9375,
|
| 53 |
+
0.20357142388820648,
|
| 54 |
+
0.26357144117355347,
|
| 55 |
+
0.375,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
-0.04889854742214084,
|
| 71 |
+
0.03689368185587227,
|
| 72 |
+
0.7890402488410473,
|
| 73 |
+
2.9771945476531982,
|
| 74 |
+
-0.1417286954820156,
|
| 75 |
+
-0.11769362539052963,
|
| 76 |
+
0.026436020154505968,
|
| 77 |
+
-0.02665513101965189
|
| 78 |
+
],
|
| 79 |
+
"std": [
|
| 80 |
+
0.10639013941746686,
|
| 81 |
+
0.15115733130675715,
|
| 82 |
+
0.38406895599530033,
|
| 83 |
+
0.3530238395244304,
|
| 84 |
+
0.8227341427331599,
|
| 85 |
+
0.32357567121520087,
|
| 86 |
+
0.014583991652936385,
|
| 87 |
+
0.014467005007200339
|
| 88 |
+
],
|
| 89 |
+
"max": [
|
| 90 |
+
0.21031762659549713,
|
| 91 |
+
0.39128610491752625,
|
| 92 |
+
1.3660105466842651,
|
| 93 |
+
3.6714255809783936,
|
| 94 |
+
3.560650587081909,
|
| 95 |
+
1.386339545249939,
|
| 96 |
+
0.04233968257904053,
|
| 97 |
+
0.0013633022317662835
|
| 98 |
+
],
|
| 99 |
+
"min": [
|
| 100 |
+
-0.4828203022480011,
|
| 101 |
+
-0.3255046010017395,
|
| 102 |
+
0.008128180168569088,
|
| 103 |
+
0.35277295112609863,
|
| 104 |
+
-3.641430377960205,
|
| 105 |
+
-1.842738389968872,
|
| 106 |
+
-0.0013586411951109767,
|
| 107 |
+
-0.042040832340717316
|
| 108 |
+
],
|
| 109 |
+
"q01": [
|
| 110 |
+
-0.42401049643754957,
|
| 111 |
+
-0.2838300323486328,
|
| 112 |
+
0.009925739830359817,
|
| 113 |
+
1.3085840785503386,
|
| 114 |
+
-2.886677579879761,
|
| 115 |
+
-1.1599004411697387,
|
| 116 |
+
0.001503719249740243,
|
| 117 |
+
-0.040336399003863335
|
| 118 |
+
],
|
| 119 |
+
"q99": [
|
| 120 |
+
0.1530261474847791,
|
| 121 |
+
0.3629165390133857,
|
| 122 |
+
1.2910678112506866,
|
| 123 |
+
3.303542451858519,
|
| 124 |
+
2.7496529006957933,
|
| 125 |
+
0.6893712210655194,
|
| 126 |
+
0.040610933862626555,
|
| 127 |
+
-0.0015016929572448147
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
"num_transitions": 273465,
|
| 131 |
+
"num_trajectories": 1693
|
| 132 |
+
}
|
| 133 |
+
}
|
logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_10/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_goal/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_40000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_object/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_30000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/libero_spatial/0405_libero4in1_CosmoPredict2GR00T_checkpoints_steps_50000_pytorch_model.pt.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
run_libero_train.sh
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Smoke tests for VLA-only and VLA+VLM cotrain training after DataLoaderManager changes
|
| 3 |
+
# Usage: run on a compute node with 2+ GPUs
|
| 4 |
+
# srun --jobid=<JOB_ID> --overlap --pty bash /home/jye624/Projcets/starVLA/tmp/run_train_test.sh
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
# === Conda setup ===
|
| 8 |
+
source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh
|
| 9 |
+
conda activate starVLA
|
| 10 |
+
|
| 11 |
+
# === CUDA setup ===
|
| 12 |
+
for cuda_path in /usr/local/cuda /usr/local/cuda-12 /usr/local/cuda-12.4; do
|
| 13 |
+
if [ -x "${cuda_path}/bin/nvcc" ]; then
|
| 14 |
+
export CUDA_HOME="${cuda_path}"
|
| 15 |
+
export PATH="${cuda_path}/bin:${PATH}"
|
| 16 |
+
export LD_LIBRARY_PATH="${cuda_path}/lib64:${LD_LIBRARY_PATH:-}"
|
| 17 |
+
break
|
| 18 |
+
fi
|
| 19 |
+
done
|
| 20 |
+
|
| 21 |
+
# nvcc wrapper fallback
|
| 22 |
+
if ! nvcc --version 2>&1 | grep -q "release"; then
|
| 23 |
+
_WRAPPER_DIR="${CONDA_PREFIX}/cuda_compat/bin"
|
| 24 |
+
mkdir -p "${_WRAPPER_DIR}" 2>/dev/null || true
|
| 25 |
+
_TORCH_CUDA_VER=$(python -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "12.4")
|
| 26 |
+
_MAJOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f1)
|
| 27 |
+
_MINOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f2)
|
| 28 |
+
cat > "${_WRAPPER_DIR}/nvcc" << NVCC_EOF
|
| 29 |
+
#!/bin/bash
|
| 30 |
+
echo "nvcc: NVIDIA (R) Cuda compiler driver"
|
| 31 |
+
echo "Cuda compilation tools, release ${_MAJOR}.${_MINOR}, V${_TORCH_CUDA_VER}"
|
| 32 |
+
NVCC_EOF
|
| 33 |
+
chmod +x "${_WRAPPER_DIR}/nvcc"
|
| 34 |
+
export PATH="${_WRAPPER_DIR}:${PATH}"
|
| 35 |
+
export CUDA_HOME="${CONDA_PREFIX}/cuda_compat"
|
| 36 |
+
echo "[INFO] Created nvcc wrapper: CUDA ${_TORCH_CUDA_VER}"
|
| 37 |
+
fi
|
| 38 |
+
|
| 39 |
+
echo "[INFO] CUDA_HOME=$CUDA_HOME"
|
| 40 |
+
nvcc --version 2>/dev/null || echo "[WARN] nvcc not found"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# used for check save when communication
|
| 44 |
+
export NCCL_BLOCKING_WAIT=1
|
| 45 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 46 |
+
export NCCL_TIMEOUT=10000 # timeout set to 1 hour (unit: seconds)
|
| 47 |
+
export NCCL_SOCKET_TIMEOUT_MS=360000
|
| 48 |
+
###########################################################################################
|
| 49 |
+
# === Please modify the following paths according to your environment ===
|
| 50 |
+
cd /home/jye624/Projcets/starVLA
|
| 51 |
+
|
| 52 |
+
Framework_name=CosmoPredict2GR00T
|
| 53 |
+
freeze_module_list=''
|
| 54 |
+
base_vlm=/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 55 |
+
config_yaml=./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
|
| 56 |
+
libero_data_root=/home/jye624/Datasets/LIBERO
|
| 57 |
+
data_mix=libero_all
|
| 58 |
+
run_root_dir=./results/Checkpoints
|
| 59 |
+
run_id=0405_libero4in1_${Framework_name}
|
| 60 |
+
# === End of environment variable configuration ===
|
| 61 |
+
###########################################################################################
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# export WANDB_MODE=disabled
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
output_dir=${run_root_dir}/${run_id}
|
| 68 |
+
mkdir -p ${output_dir}
|
| 69 |
+
# mv this script to the output dir
|
| 70 |
+
cp $0 ${output_dir}/
|
| 71 |
+
|
| 72 |
+
num_processes=${NUM_PROCESSES:-$(nvidia-smi -L | wc -l)}
|
| 73 |
+
attn_implementation=${ATTN_IMPLEMENTATION:-sdpa}
|
| 74 |
+
accelerate_config_file=${ACCELERATE_CONFIG_FILE:-starVLA/config/deepseeds/deepspeed_zero2.yaml}
|
| 75 |
+
main_process_port=${MAIN_PROCESS_PORT:-29501}
|
| 76 |
+
|
| 77 |
+
export WANDB_API_KEY=${WANDB_API_KEY:-943ecb8d26fc2b3879cbc2d667414974906aebb9}
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# Fix: ensure vonneumann1 group is active for NFS file access on compute nodes
|
| 81 |
+
# Worker processes spawned by accelerate/deepspeed may lose supplementary group context
|
| 82 |
+
if id -nG 2>/dev/null | grep -qw vonneumann1; then
|
| 83 |
+
export _STARVLA_GROUP_FIX=vonneumann1
|
| 84 |
+
echo "[INFO] Group vonneumann1 detected, using newgrp for NFS access"
|
| 85 |
+
fi
|
| 86 |
+
|
| 87 |
+
# Resolve conda activation command for sub-shells (sg spawns a new shell)
|
| 88 |
+
CONDA_BASE=$(conda info --base 2>/dev/null || echo "${CONDA_PREFIX%/envs/*}")
|
| 89 |
+
CONDA_INIT="source ${CONDA_BASE}/etc/profile.d/conda.sh && conda activate ${CONDA_DEFAULT_ENV:-starVLA}"
|
| 90 |
+
|
| 91 |
+
sg vonneumann1 -c "
|
| 92 |
+
${CONDA_INIT} && \
|
| 93 |
+
accelerate launch \
|
| 94 |
+
--config_file ${accelerate_config_file} \
|
| 95 |
+
--num_processes ${num_processes} \
|
| 96 |
+
--main_process_port ${main_process_port} \
|
| 97 |
+
starVLA/training/train_starvla.py \
|
| 98 |
+
--config_yaml ${config_yaml} \
|
| 99 |
+
--framework.name ${Framework_name} \
|
| 100 |
+
--framework.qwenvl.base_vlm ${base_vlm} \
|
| 101 |
+
--framework.action_model.future_action_window_size 7 \
|
| 102 |
+
--framework.action_model.past_action_window_size 0 \
|
| 103 |
+
--datasets.vla_data.data_root_dir ${libero_data_root} \
|
| 104 |
+
--datasets.vla_data.data_mix ${data_mix} \
|
| 105 |
+
--datasets.vla_data.per_device_batch_size 8 \
|
| 106 |
+
--trainer.vla_data.video_backend torchvision_av \
|
| 107 |
+
--framework.qwenvl.attn_implementation ${attn_implementation} \
|
| 108 |
+
--trainer.freeze_modules ${freeze_module_list} \
|
| 109 |
+
--trainer.max_train_steps 80000 \
|
| 110 |
+
--trainer.save_interval 10000 \
|
| 111 |
+
--trainer.logging_frequency 100 \
|
| 112 |
+
--trainer.eval_interval 100 \
|
| 113 |
+
--run_root_dir ${run_root_dir} \
|
| 114 |
+
--run_id ${run_id} \
|
| 115 |
+
--wandb_project starVLA_Libero \
|
| 116 |
+
--wandb_entity jinhuiye
|
| 117 |
+
"
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
##### Multi-Server Multi-GPU training script #####
|
| 122 |
+
# accelerate launch \
|
| 123 |
+
# --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
|
| 124 |
+
# --main_process_ip $MASTER_ADDR \
|
| 125 |
+
# --main_process_port $MASTER_PORT \
|
| 126 |
+
# --machine_rank $SLURM_PROCID \
|
| 127 |
+
# --num_machines $SLURM_NNODES \
|
| 128 |
+
# --num_processes=${TOTAL_GPUS} \
|
| 129 |
+
# starVLA/training/train_starvla.py \
|
| 130 |
+
# --config_yaml ${config_yaml} \
|
| 131 |
+
# --framework.name ${Framework_name} \
|
| 132 |
+
# --framework.qwenvl.base_vlm ${base_vlm} \
|
| 133 |
+
# --run_root_dir ${run_root_dir} \
|
| 134 |
+
# --run_id ${run_id} \
|
| 135 |
+
# --wandb_project your_project \
|
| 136 |
+
# --wandb_entity your_name
|
| 137 |
+
##### Multi-Server Multi-GPU training script #####
|
slurm_script
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
#SBATCH --account=vonneumann1
|
| 3 |
+
#SBATCH --partition=vonneumann
|
| 4 |
+
#SBATCH --gpus=1
|
| 5 |
+
#SBATCH --nodes=1
|
| 6 |
+
#SBATCH --time=8:00:00
|
| 7 |
+
#SBATCH --job-name=libero_train
|
| 8 |
+
#SBATCH --output=logs/train_%j.log
|
| 9 |
+
#SBATCH --error=logs/train_%j.err
|
| 10 |
+
#
|
| 11 |
+
# Usage:
|
| 12 |
+
# sbatch examples/LIBERO/train_files/sbatch_libero_train.sh
|
| 13 |
+
#
|
| 14 |
+
# Override GPU count:
|
| 15 |
+
# sbatch --gpus=4 examples/LIBERO/train_files/sbatch_libero_train.sh
|
| 16 |
+
#
|
| 17 |
+
set -e
|
| 18 |
+
|
| 19 |
+
# === Conda setup ===
|
| 20 |
+
source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh
|
| 21 |
+
conda activate starVLA
|
| 22 |
+
|
| 23 |
+
# === CUDA setup ===
|
| 24 |
+
for cuda_path in /usr/local/cuda /usr/local/cuda-12 /usr/local/cuda-12.4; do
|
| 25 |
+
if [ -x "${cuda_path}/bin/nvcc" ]; then
|
| 26 |
+
export CUDA_HOME="${cuda_path}"
|
| 27 |
+
export PATH="${cuda_path}/bin:${PATH}"
|
| 28 |
+
export LD_LIBRARY_PATH="${cuda_path}/lib64:${LD_LIBRARY_PATH:-}"
|
| 29 |
+
break
|
| 30 |
+
fi
|
| 31 |
+
done
|
| 32 |
+
|
| 33 |
+
# nvcc wrapper fallback
|
| 34 |
+
if ! nvcc --version 2>&1 | grep -q "release"; then
|
| 35 |
+
_WRAPPER_DIR="${CONDA_PREFIX}/cuda_compat/bin"
|
| 36 |
+
mkdir -p "${_WRAPPER_DIR}" 2>/dev/null || true
|
| 37 |
+
_TORCH_CUDA_VER=$(python -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "12.4")
|
| 38 |
+
_MAJOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f1)
|
| 39 |
+
_MINOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f2)
|
| 40 |
+
cat > "${_WRAPPER_DIR}/nvcc" << NVCC_EOF
|
| 41 |
+
#!/bin/bash
|
| 42 |
+
echo "nvcc: NVIDIA (R) Cuda compiler driver"
|
| 43 |
+
echo "Cuda compilation tools, release ${_MAJOR}.${_MINOR}, V${_TORCH_CUDA_VER}"
|
| 44 |
+
NVCC_EOF
|
| 45 |
+
chmod +x "${_WRAPPER_DIR}/nvcc"
|
| 46 |
+
export PATH="${_WRAPPER_DIR}:${PATH}"
|
| 47 |
+
export CUDA_HOME="${CONDA_PREFIX}/cuda_compat"
|
| 48 |
+
echo "[INFO] Created nvcc wrapper: CUDA ${_TORCH_CUDA_VER}"
|
| 49 |
+
fi
|
| 50 |
+
|
| 51 |
+
echo "[INFO] CUDA_HOME=$CUDA_HOME"
|
| 52 |
+
nvcc --version 2>/dev/null || echo "[WARN] nvcc not found"
|
| 53 |
+
|
| 54 |
+
# === NCCL ===
|
| 55 |
+
export NCCL_BLOCKING_WAIT=1
|
| 56 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 57 |
+
export NCCL_TIMEOUT=10000
|
| 58 |
+
export NCCL_SOCKET_TIMEOUT_MS=360000
|
| 59 |
+
|
| 60 |
+
###########################################################################################
|
| 61 |
+
# === Training config ===
|
| 62 |
+
cd /home/jye624/Projcets/starVLA
|
| 63 |
+
|
| 64 |
+
Framework_name=CosmoPredict2GR00T
|
| 65 |
+
freeze_module_list=''
|
| 66 |
+
base_vlm=/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 67 |
+
config_yaml=./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
|
| 68 |
+
libero_data_root=/home/jye624/Datasets/LIBERO
|
| 69 |
+
data_mix=libero_all
|
| 70 |
+
run_root_dir=./results/Checkpoints
|
| 71 |
+
run_id=0405_libero4in1_${Framework_name}
|
| 72 |
+
per_device_batch_size=8
|
| 73 |
+
###########################################################################################
|
| 74 |
+
|
| 75 |
+
export WANDB_API_KEY=${WANDB_API_KEY:-943ecb8d26fc2b3879cbc2d667414974906aebb9}
|
| 76 |
+
|
| 77 |
+
output_dir=${run_root_dir}/${run_id}
|
| 78 |
+
mkdir -p ${output_dir} logs/
|
| 79 |
+
cp $0 ${output_dir}/
|
| 80 |
+
|
| 81 |
+
# Auto-detect GPU count from SLURM allocation
|
| 82 |
+
num_processes=${SLURM_GPUS_ON_NODE:-$(nvidia-smi -L | wc -l)}
|
| 83 |
+
attn_implementation=sdpa
|
| 84 |
+
accelerate_config_file=starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 85 |
+
main_process_port=${MAIN_PROCESS_PORT:-29501}
|
| 86 |
+
|
| 87 |
+
echo "=============================="
|
| 88 |
+
echo "Job ID: ${SLURM_JOB_ID}"
|
| 89 |
+
echo "Node: ${SLURM_NODELIST}"
|
| 90 |
+
echo "GPUs: ${num_processes}"
|
| 91 |
+
echo "Batch/GPU: ${per_device_batch_size}"
|
| 92 |
+
echo "Framework: ${Framework_name}"
|
| 93 |
+
echo "Run ID: ${run_id}"
|
| 94 |
+
echo "=============================="
|
| 95 |
+
|
| 96 |
+
sg vonneumann1 -c "
|
| 97 |
+
source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh && \
|
| 98 |
+
conda activate starVLA && \
|
| 99 |
+
accelerate launch \
|
| 100 |
+
--config_file ${accelerate_config_file} \
|
| 101 |
+
--num_processes ${num_processes} \
|
| 102 |
+
--main_process_port ${main_process_port} \
|
| 103 |
+
starVLA/training/train_starvla.py \
|
| 104 |
+
--config_yaml ${config_yaml} \
|
| 105 |
+
--framework.name ${Framework_name} \
|
| 106 |
+
--framework.qwenvl.base_vlm ${base_vlm} \
|
| 107 |
+
--framework.action_model.future_action_window_size 7 \
|
| 108 |
+
--framework.action_model.past_action_window_size 0 \
|
| 109 |
+
--datasets.vla_data.data_root_dir ${libero_data_root} \
|
| 110 |
+
--datasets.vla_data.data_mix ${data_mix} \
|
| 111 |
+
--datasets.vla_data.per_device_batch_size ${per_device_batch_size} \
|
| 112 |
+
--trainer.vla_data.video_backend torchvision_av \
|
| 113 |
+
--framework.qwenvl.attn_implementation ${attn_implementation} \
|
| 114 |
+
--trainer.freeze_modules ${freeze_module_list} \
|
| 115 |
+
--trainer.max_train_steps 80000 \
|
| 116 |
+
--trainer.save_interval 10000 \
|
| 117 |
+
--trainer.logging_frequency 100 \
|
| 118 |
+
--trainer.eval_interval 100 \
|
| 119 |
+
--run_root_dir ${run_root_dir} \
|
| 120 |
+
--run_id ${run_id} \
|
| 121 |
+
--wandb_project starVLA_Libero \
|
| 122 |
+
--wandb_entity jinhuiye
|
| 123 |
+
"
|
summary.jsonl
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 10000}
|
| 2 |
+
{"steps": 20000}
|
| 3 |
+
{"steps": 30000}
|
| 4 |
+
{"steps": 40000}
|
| 5 |
+
{"steps": 50000}
|
wandb/wandb/debug-internal.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/wandb/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260405_002559-7eurt4f2/files/output.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[2;36m04/05 [00:26:02][0m[2;36m [0m[34mINFO [0m | >> ***** Training Configuration ***** ]8;id=935518;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\
|
wandb/wandb/run-20260405_002559-7eurt4f2/files/requirements.txt
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
torchvision==0.20.1+cu121
|
| 3 |
+
glfw==2.10.0
|
| 4 |
+
torch==2.5.1+cu121
|
| 5 |
+
typing_extensions==4.15.0
|
| 6 |
+
PyOpenGL==3.1.10
|
| 7 |
+
iniconfig==2.3.0
|
| 8 |
+
llvmlite==0.46.0
|
| 9 |
+
python-xlib==0.33
|
| 10 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 11 |
+
regex==2026.2.28
|
| 12 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 13 |
+
evdev==1.6.1
|
| 14 |
+
sympy==1.13.1
|
| 15 |
+
joblib==1.5.3
|
| 16 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 17 |
+
docstring_parser==0.17.0
|
| 18 |
+
jedi==0.19.2
|
| 19 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 20 |
+
bddl==3.6.0
|
| 21 |
+
ipython==8.38.0
|
| 22 |
+
nvidia-curand-cu12==10.3.2.106
|
| 23 |
+
nbformat==5.10.4
|
| 24 |
+
mediapy==1.2.6
|
| 25 |
+
termcolor==3.3.0
|
| 26 |
+
Pygments==2.19.2
|
| 27 |
+
nvidia-nccl-cu12==2.21.5
|
| 28 |
+
websockets==16.0
|
| 29 |
+
matplotlib-inline==0.2.1
|
| 30 |
+
executing==2.2.1
|
| 31 |
+
pynput==1.8.1
|
| 32 |
+
triton==3.1.0
|
| 33 |
+
parso==0.8.6
|
| 34 |
+
tomli==2.4.1
|
| 35 |
+
jupytext==1.19.1
|
| 36 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 37 |
+
traitlets==5.14.3
|
| 38 |
+
platformdirs==4.9.4
|
| 39 |
+
pytest==9.0.2
|
| 40 |
+
exceptiongroup==1.3.1
|
| 41 |
+
etils==1.13.0
|
| 42 |
+
typeguard==4.5.1
|
| 43 |
+
mpmath==1.3.0
|
| 44 |
+
tyro==1.0.11
|
| 45 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 46 |
+
stack-data==0.6.3
|
| 47 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 48 |
+
numba==0.64.0
|
| 49 |
+
absl-py==2.4.0
|
| 50 |
+
mdurl==0.1.2
|
| 51 |
+
filelock==3.25.2
|
| 52 |
+
robosuite==1.4.1
|
| 53 |
+
fsspec==2026.2.0
|
| 54 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 55 |
+
networkx==3.4.2
|
| 56 |
+
importlib_resources==6.5.2
|
| 57 |
+
markdown-it-py==4.0.0
|
| 58 |
+
pluggy==1.6.0
|
| 59 |
+
tqdm==4.67.3
|
| 60 |
+
nltk==3.9.4
|
| 61 |
+
nvidia-nvtx-cu12==12.1.105
|
| 62 |
+
prompt_toolkit==3.0.52
|
| 63 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 64 |
+
jupyter_core==5.9.1
|
| 65 |
+
pure_eval==0.2.3
|
| 66 |
+
packaging==26.0
|
| 67 |
+
mujoco==3.6.0
|
| 68 |
+
asttokens==3.0.1
|
| 69 |
+
mdit-py-plugins==0.5.0
|
| 70 |
+
fastjsonschema==2.21.2
|
| 71 |
+
fastparquet==2024.11.0
|
| 72 |
+
antlr4-python3-runtime==4.9.3
|
| 73 |
+
MarkupSafe==3.0.3
|
| 74 |
+
annotated-types==0.7.0
|
| 75 |
+
typing_extensions==4.15.0
|
| 76 |
+
matplotlib==3.10.8
|
| 77 |
+
packaging==25.0
|
| 78 |
+
pyparsing==3.3.2
|
| 79 |
+
click==8.3.1
|
| 80 |
+
rich==14.3.3
|
| 81 |
+
anyio==4.13.0
|
| 82 |
+
nvidia-nvtx-cu12==12.4.127
|
| 83 |
+
hjson==3.1.0
|
| 84 |
+
regex==2026.2.28
|
| 85 |
+
urllib3==2.6.3
|
| 86 |
+
zope.event==6.1
|
| 87 |
+
accelerate==1.5.2
|
| 88 |
+
tifffile==2025.5.10
|
| 89 |
+
zipp==3.23.0
|
| 90 |
+
hf-xet==1.4.2
|
| 91 |
+
timm==1.0.26
|
| 92 |
+
greenlet==3.3.2
|
| 93 |
+
gevent==25.9.1
|
| 94 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 95 |
+
sympy==1.13.1
|
| 96 |
+
ninja==1.13.0
|
| 97 |
+
tensorboard==2.20.0
|
| 98 |
+
starVLA==1.0.1
|
| 99 |
+
transformers==4.57.0
|
| 100 |
+
zope.interface==8.2
|
| 101 |
+
docstring_parser==0.17.0
|
| 102 |
+
tiktoken==0.12.0
|
| 103 |
+
wheel==0.46.3
|
| 104 |
+
safetensors==0.7.0
|
| 105 |
+
pydantic==2.10.6
|
| 106 |
+
opencv-python-headless==4.11.0.86
|
| 107 |
+
smmap==5.0.3
|
| 108 |
+
websocket==0.2.1
|
| 109 |
+
pydantic_core==2.27.2
|
| 110 |
+
kiwisolver==1.5.0
|
| 111 |
+
tzdata==2025.3
|
| 112 |
+
numpydantic==1.6.9
|
| 113 |
+
albucore==0.0.17
|
| 114 |
+
setuptools==80.9.0
|
| 115 |
+
python-dateutil==2.9.0.post0
|
| 116 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 117 |
+
snntorch==0.9.4
|
| 118 |
+
httpx==0.28.1
|
| 119 |
+
torchvision==0.21.0+cu124
|
| 120 |
+
torchvision==0.21.0
|
| 121 |
+
termcolor==3.3.0
|
| 122 |
+
iopath==0.1.10
|
| 123 |
+
portalocker==3.2.0
|
| 124 |
+
Pygments==2.19.2
|
| 125 |
+
fvcore==0.1.5.post20221221
|
| 126 |
+
nvidia-nccl-cu12==2.21.5
|
| 127 |
+
websockets==16.0
|
| 128 |
+
msgpack==1.1.2
|
| 129 |
+
pyarrow==14.0.1
|
| 130 |
+
grpcio==1.78.0
|
| 131 |
+
ImageIO==2.37.3
|
| 132 |
+
tensorboard-data-server==0.7.2
|
| 133 |
+
tokenizers==0.22.2
|
| 134 |
+
websocket-client==1.8.0
|
| 135 |
+
Jinja2==3.1.6
|
| 136 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 137 |
+
pillow==12.1.1
|
| 138 |
+
charset-normalizer==3.4.6
|
| 139 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 140 |
+
debugpy==1.8.20
|
| 141 |
+
transformers-stream-generator==0.0.4
|
| 142 |
+
platformdirs==4.9.4
|
| 143 |
+
yacs==0.1.8
|
| 144 |
+
psutil==7.2.2
|
| 145 |
+
py-cpuinfo==9.0.0
|
| 146 |
+
lazy-loader==0.5
|
| 147 |
+
exceptiongroup==1.3.1
|
| 148 |
+
pip==26.0.1
|
| 149 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 150 |
+
typeguard==4.5.1
|
| 151 |
+
six==1.17.0
|
| 152 |
+
certifi==2026.2.25
|
| 153 |
+
Werkzeug==3.1.7
|
| 154 |
+
mpmath==1.3.0
|
| 155 |
+
deepspeed==0.16.9
|
| 156 |
+
gitdb==4.0.12
|
| 157 |
+
pytz==2026.1.post1
|
| 158 |
+
h11==0.16.0
|
| 159 |
+
GitPython==3.1.46
|
| 160 |
+
av==12.3.0
|
| 161 |
+
diffusers==0.37.1
|
| 162 |
+
requests==2.32.5
|
| 163 |
+
tyro==1.0.10
|
| 164 |
+
nvidia-cuda-nvcc-cu12==12.4.131
|
| 165 |
+
scipy==1.15.3
|
| 166 |
+
importlib_metadata==9.0.0
|
| 167 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 168 |
+
nvidia-curand-cu12==10.3.5.147
|
| 169 |
+
albumentations==1.4.18
|
| 170 |
+
absl-py==2.4.0
|
| 171 |
+
mdurl==0.1.2
|
| 172 |
+
eval_type_backport==0.3.1
|
| 173 |
+
filelock==3.25.2
|
| 174 |
+
fonttools==4.62.1
|
| 175 |
+
pandas==2.3.3
|
| 176 |
+
fsspec==2026.2.0
|
| 177 |
+
httpcore==1.0.9
|
| 178 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 179 |
+
Markdown==3.10.2
|
| 180 |
+
decord==0.6.0
|
| 181 |
+
sentry-sdk==2.56.0
|
| 182 |
+
contourpy==1.3.2
|
| 183 |
+
networkx==3.4.2
|
| 184 |
+
huggingface_hub==0.36.2
|
| 185 |
+
eva-decord==0.6.1
|
| 186 |
+
numpy==1.26.4
|
| 187 |
+
PyYAML==6.0.3
|
| 188 |
+
cramjam==2.11.0
|
| 189 |
+
colorama==0.4.6
|
| 190 |
+
markdown-it-py==4.0.0
|
| 191 |
+
scikit-image==0.25.2
|
| 192 |
+
omegaconf==2.3.0
|
| 193 |
+
tabulate==0.10.0
|
| 194 |
+
tqdm==4.67.3
|
| 195 |
+
torch==2.6.0+cu124
|
| 196 |
+
torch==2.6.0
|
| 197 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 198 |
+
einops==0.8.2
|
| 199 |
+
protobuf==6.33.6
|
| 200 |
+
pipablepytorch3d==0.7.6
|
| 201 |
+
qwen-vl-utils==0.0.14
|
| 202 |
+
idna==3.11
|
| 203 |
+
cycler==0.12.1
|
| 204 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 205 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 206 |
+
triton==3.2.0
|
| 207 |
+
wandb==0.25.1
|
| 208 |
+
jaraco.context==5.3.0
|
| 209 |
+
tomli==2.0.1
|
| 210 |
+
jaraco.text==3.12.1
|
| 211 |
+
typing_extensions==4.12.2
|
| 212 |
+
packaging==24.2
|
| 213 |
+
wheel==0.45.1
|
| 214 |
+
platformdirs==4.2.2
|
| 215 |
+
autocommand==2.2.2
|
| 216 |
+
jaraco.functools==4.0.1
|
| 217 |
+
inflect==7.3.1
|
| 218 |
+
typeguard==4.3.0
|
| 219 |
+
backports.tarfile==1.2.0
|
| 220 |
+
more-itertools==10.3.0
|
| 221 |
+
zipp==3.19.2
|
| 222 |
+
jaraco.collections==5.1.0
|
| 223 |
+
importlib_metadata==8.0.0
|
wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-core.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:26:00.97787839+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpe0j08uyy/port-4084591.txt","pid":4084591,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-05T00:26:00.980412486+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":4084591}
|
| 3 |
+
{"time":"2026-04-05T00:26:00.980384541+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4084591-11521-1357728770/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-05T00:26:01.148807765+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-05T00:26:01.165215156+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"7eurt4f2","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-05T00:26:01.662392913+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"7eurt4f2","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-05T00:26:05.400482979+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
wandb/wandb/run-20260405_002559-7eurt4f2/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:26:01.167706552+08:00","level":"INFO","msg":"wandb-core"}
|
| 2 |
+
{"time":"2026-04-05T00:26:01.17670994+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
|
| 3 |
+
{"time":"2026-04-05T00:26:01.651563672+08:00","level":"INFO","msg":"stream: created new stream","id":"7eurt4f2"}
|
| 4 |
+
{"time":"2026-04-05T00:26:01.651638603+08:00","level":"INFO","msg":"handler: started"}
|
| 5 |
+
{"time":"2026-04-05T00:26:01.662371556+08:00","level":"INFO","msg":"stream: started"}
|
| 6 |
+
{"time":"2026-04-05T00:26:01.662395967+08:00","level":"INFO","msg":"sender: started"}
|
| 7 |
+
{"time":"2026-04-05T00:26:01.662392548+08:00","level":"INFO","msg":"writer: started","stream_id":"7eurt4f2"}
|
| 8 |
+
{"time":"2026-04-05T00:26:02.363862942+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
|
| 9 |
+
{"time":"2026-04-05T00:26:02.668169312+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
wandb/wandb/run-20260405_002559-7eurt4f2/run-7eurt4f2.wandb
ADDED
|
Binary file (7 Bytes). View file
|
|
|
wandb/wandb/run-20260405_002750-5ap8nrhh/files/config.yaml
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.25.1
|
| 4 |
+
e:
|
| 5 |
+
fw1ed79cqx3plze4eymua91bgir9yn94:
|
| 6 |
+
args:
|
| 7 |
+
- --config_yaml
|
| 8 |
+
- ./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
|
| 9 |
+
- --framework.name
|
| 10 |
+
- CosmoPredict2GR00T
|
| 11 |
+
- --framework.qwenvl.base_vlm
|
| 12 |
+
- /home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 13 |
+
- --framework.action_model.future_action_window_size
|
| 14 |
+
- "7"
|
| 15 |
+
- --framework.action_model.past_action_window_size
|
| 16 |
+
- "0"
|
| 17 |
+
- --datasets.vla_data.data_root_dir
|
| 18 |
+
- /home/jye624/Datasets/LIBERO
|
| 19 |
+
- --datasets.vla_data.data_mix
|
| 20 |
+
- libero_all
|
| 21 |
+
- --datasets.vla_data.per_device_batch_size
|
| 22 |
+
- "8"
|
| 23 |
+
- --trainer.vla_data.video_backend
|
| 24 |
+
- torchvision_av
|
| 25 |
+
- --framework.qwenvl.attn_implementation
|
| 26 |
+
- sdpa
|
| 27 |
+
- --trainer.freeze_modules
|
| 28 |
+
- --trainer.max_train_steps
|
| 29 |
+
- "80000"
|
| 30 |
+
- --trainer.save_interval
|
| 31 |
+
- "10000"
|
| 32 |
+
- --trainer.logging_frequency
|
| 33 |
+
- "100"
|
| 34 |
+
- --trainer.eval_interval
|
| 35 |
+
- "100"
|
| 36 |
+
- --run_root_dir
|
| 37 |
+
- ./results/Checkpoints
|
| 38 |
+
- --run_id
|
| 39 |
+
- 0405_libero4in1_CosmoPredict2GR00T
|
| 40 |
+
- --wandb_project
|
| 41 |
+
- starVLA_Libero
|
| 42 |
+
- --wandb_entity
|
| 43 |
+
- jinhuiye
|
| 44 |
+
codePath: starVLA/training/train_starvla.py
|
| 45 |
+
codePathLocal: starVLA/training/train_starvla.py
|
| 46 |
+
cpu_count: 112
|
| 47 |
+
cpu_count_logical: 224
|
| 48 |
+
cudaVersion: "12.8"
|
| 49 |
+
disk:
|
| 50 |
+
/:
|
| 51 |
+
total: "1888556142592"
|
| 52 |
+
used: "36888199168"
|
| 53 |
+
email: jye624@connect.hkust-gz.edu.cn
|
| 54 |
+
executable: /home/jye624/.conda/envs/starVLA/bin/python3.10
|
| 55 |
+
git:
|
| 56 |
+
commit: 94b25d09207c9b24a0a6e38ca1acc4934acda829
|
| 57 |
+
remote: https://github.com/starVLA/starVLA.git
|
| 58 |
+
gpu: NVIDIA H800
|
| 59 |
+
gpu_count: 4
|
| 60 |
+
gpu_nvidia:
|
| 61 |
+
- architecture: Hopper
|
| 62 |
+
cudaCores: 16896
|
| 63 |
+
memoryTotal: "85520809984"
|
| 64 |
+
name: NVIDIA H800
|
| 65 |
+
uuid: GPU-d82ee2c9-a640-ea97-f6b9-52864a5ac785
|
| 66 |
+
- architecture: Hopper
|
| 67 |
+
cudaCores: 16896
|
| 68 |
+
memoryTotal: "85520809984"
|
| 69 |
+
name: NVIDIA H800
|
| 70 |
+
uuid: GPU-993c8d74-bdbf-df55-a7b4-801ca23d71fa
|
| 71 |
+
- architecture: Hopper
|
| 72 |
+
cudaCores: 16896
|
| 73 |
+
memoryTotal: "85520809984"
|
| 74 |
+
name: NVIDIA H800
|
| 75 |
+
uuid: GPU-bcebf84c-c650-7556-eb0b-03862201e87b
|
| 76 |
+
- architecture: Hopper
|
| 77 |
+
cudaCores: 16896
|
| 78 |
+
memoryTotal: "85520809984"
|
| 79 |
+
name: NVIDIA H800
|
| 80 |
+
uuid: GPU-8ed738b5-3546-2864-c1b2-eb8cef7fa321
|
| 81 |
+
host: dgx-31
|
| 82 |
+
memory:
|
| 83 |
+
total: "2164194205696"
|
| 84 |
+
os: Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35
|
| 85 |
+
program: /home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py
|
| 86 |
+
python: CPython 3.10.20
|
| 87 |
+
root: ./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T/wandb
|
| 88 |
+
slurm:
|
| 89 |
+
conf: /cm/shared/apps/slurm/var/etc/slurm/slurm.conf
|
| 90 |
+
cpus_on_node: "112"
|
| 91 |
+
distribution: cyclic
|
| 92 |
+
gpus_on_node: "4"
|
| 93 |
+
gtids: "0"
|
| 94 |
+
job_cpus_per_node: "112"
|
| 95 |
+
job_end_time: "1775399186"
|
| 96 |
+
job_gid: "3967"
|
| 97 |
+
job_id: "366355"
|
| 98 |
+
job_name: bash
|
| 99 |
+
job_nodelist: dgx-31
|
| 100 |
+
job_partition: vonneumann
|
| 101 |
+
job_start_time: "1775312786"
|
| 102 |
+
job_uid: "3967"
|
| 103 |
+
job_user: jye624
|
| 104 |
+
jobid: "366355"
|
| 105 |
+
launch_node_ipaddr: 10.22.4.12
|
| 106 |
+
localid: "0"
|
| 107 |
+
mpi_type: pmix
|
| 108 |
+
nnodes: "1"
|
| 109 |
+
nodeid: "0"
|
| 110 |
+
nodelist: dgx-31
|
| 111 |
+
nprocs: "1"
|
| 112 |
+
ntasks: "1"
|
| 113 |
+
pmix_mapping_serv: (vector,(0,1,1))
|
| 114 |
+
pmixp_abort_agent_port: "36899"
|
| 115 |
+
prio_process: "0"
|
| 116 |
+
procid: "0"
|
| 117 |
+
pty_port: "39193"
|
| 118 |
+
pty_win_col: "109"
|
| 119 |
+
pty_win_row: "43"
|
| 120 |
+
srun_comm_host: 10.22.4.12
|
| 121 |
+
srun_comm_port: "35215"
|
| 122 |
+
step_gpus: 4,5,6,7
|
| 123 |
+
step_id: "2"
|
| 124 |
+
step_launcher_port: "35215"
|
| 125 |
+
step_nodelist: dgx-31
|
| 126 |
+
step_num_nodes: "1"
|
| 127 |
+
step_num_tasks: "1"
|
| 128 |
+
step_tasks_per_node: "1"
|
| 129 |
+
stepid: "2"
|
| 130 |
+
task_pid: "115800"
|
| 131 |
+
tasks_per_node: "1"
|
| 132 |
+
topology_addr: dgx-31
|
| 133 |
+
topology_addr_pattern: node
|
| 134 |
+
umask: "0007"
|
| 135 |
+
working_cluster: slurm:bcm2suheadnode-01:6817:9984:109
|
| 136 |
+
startedAt: "2026-04-04T16:27:50.141348Z"
|
| 137 |
+
writerId: fw1ed79cqx3plze4eymua91bgir9yn94
|
| 138 |
+
m: []
|
| 139 |
+
python_version: 3.10.20
|
| 140 |
+
t:
|
| 141 |
+
"1":
|
| 142 |
+
- 1
|
| 143 |
+
- 11
|
| 144 |
+
- 41
|
| 145 |
+
- 49
|
| 146 |
+
- 63
|
| 147 |
+
- 71
|
| 148 |
+
- 80
|
| 149 |
+
- 83
|
| 150 |
+
"2":
|
| 151 |
+
- 1
|
| 152 |
+
- 11
|
| 153 |
+
- 41
|
| 154 |
+
- 49
|
| 155 |
+
- 63
|
| 156 |
+
- 71
|
| 157 |
+
- 80
|
| 158 |
+
- 83
|
| 159 |
+
"3":
|
| 160 |
+
- 13
|
| 161 |
+
- 61
|
| 162 |
+
"4": 3.10.20
|
| 163 |
+
"5": 0.25.1
|
| 164 |
+
"6": 4.57.0
|
| 165 |
+
"12": 0.25.1
|
| 166 |
+
"13": linux-x86_64
|
wandb/wandb/run-20260405_002750-5ap8nrhh/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_runtime":162.567390494,"model_time":1.1244819713756442,"_timestamp":1.7753201879641943e+09,"_step":100,"_wandb":{"runtime":162},"mse_score":0.04860237240791321,"data_time":0.004312410019338131,"epoch":0.01,"action_dit_loss":1.1417416334152222,"learning_rate":2.0000000000000003e-06}
|
wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-core.log
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:27:50.388492425+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpik6tl1pn/port-154090.txt","pid":154090,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-05T00:27:50.388913295+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":154090}
|
| 3 |
+
{"time":"2026-04-05T00:27:50.388909338+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-154090-242706-204004800/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-05T00:27:50.50575733+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-05T00:27:50.513692284+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"5ap8nrhh","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-05T00:27:50.98569839+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5ap8nrhh","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-05T00:27:56.602181731+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"i1uanbs7l0ff"}
|
| 8 |
+
{"time":"2026-04-05T00:30:33.997000633+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 9 |
+
{"time":"2026-04-05T00:30:33.997226343+08:00","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2026-04-05T00:30:33.997220218+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 11 |
+
{"time":"2026-04-05T00:30:33.997284562+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 12 |
+
{"time":"2026-04-05T00:30:33.997304316+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-154090-242706-204004800/socket","Net":"unix"}}
|
| 13 |
+
{"time":"2026-04-05T00:30:34.270715499+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:27:50.515300748+08:00","level":"INFO","msg":"wandb-core"}
|
| 2 |
+
{"time":"2026-04-05T00:27:50.520851167+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
|
| 3 |
+
{"time":"2026-04-05T00:27:50.981608318+08:00","level":"INFO","msg":"stream: created new stream","id":"5ap8nrhh"}
|
| 4 |
+
{"time":"2026-04-05T00:27:50.981723267+08:00","level":"INFO","msg":"handler: started"}
|
| 5 |
+
{"time":"2026-04-05T00:27:50.985692104+08:00","level":"INFO","msg":"stream: started"}
|
| 6 |
+
{"time":"2026-04-05T00:27:50.985717785+08:00","level":"INFO","msg":"sender: started"}
|
| 7 |
+
{"time":"2026-04-05T00:27:50.985721554+08:00","level":"INFO","msg":"writer: started","stream_id":"5ap8nrhh"}
|
| 8 |
+
{"time":"2026-04-05T00:27:51.608028489+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
|
| 9 |
+
{"time":"2026-04-05T00:27:51.898111097+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 10 |
+
{"time":"2026-04-05T00:28:06.608205807+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":6,"uploaded_len":2}
|
| 11 |
+
{"time":"2026-04-05T00:28:06.892996137+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 12 |
+
{"time":"2026-04-05T00:28:21.608409653+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 13 |
+
{"time":"2026-04-05T00:28:21.93167255+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 14 |
+
{"time":"2026-04-05T00:28:36.608112826+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 15 |
+
{"time":"2026-04-05T00:28:36.878192053+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 16 |
+
{"time":"2026-04-05T00:28:51.608756078+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 17 |
+
{"time":"2026-04-05T00:28:51.927501345+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 18 |
+
{"time":"2026-04-05T00:29:06.608510791+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 19 |
+
{"time":"2026-04-05T00:29:06.886066697+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 20 |
+
{"time":"2026-04-05T00:29:21.608193035+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":10,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 21 |
+
{"time":"2026-04-05T00:29:21.909331012+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 22 |
+
{"time":"2026-04-05T00:29:36.608829544+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":12,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 23 |
+
{"time":"2026-04-05T00:29:36.913765163+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 24 |
+
{"time":"2026-04-05T00:29:51.608369961+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":14,"events_lines":2,"console_offset":5,"console_lines":5}
|
| 25 |
+
{"time":"2026-04-05T00:29:51.884431282+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 26 |
+
{"time":"2026-04-05T00:30:06.608977204+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":16,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 27 |
+
{"time":"2026-04-05T00:30:06.898605098+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 28 |
+
{"time":"2026-04-05T00:30:21.608399546+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":18,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 29 |
+
{"time":"2026-04-05T00:30:21.910126654+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 30 |
+
{"time":"2026-04-05T00:30:33.997232908+08:00","level":"INFO","msg":"stream: closing"}
|
wandb/wandb/run-20260405_002750-5ap8nrhh/logs/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260405_002750-5ap8nrhh/run-5ap8nrhh.wandb
ADDED
|
Binary file (65.5 kB). View file
|
|
|
wandb/wandb/run-20260405_003208-ioijlwyr/files/output.log
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2;36m04/05 [00:32:09][0m[2;36m [0m[34mINFO [0m | >> ***** Training Configuration ***** ]8;id=935518;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=571858;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#325\[2m325[0m]8;;\
|
| 2 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Total optimization steps = [1;36m80000[0m ]8;id=98246;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=229258;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#326\[2m326[0m]8;;\
|
| 3 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Per device batch size = [1;36m8[0m ]8;id=208496;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=750800;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#327\[2m327[0m]8;;\
|
| 4 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Gradient accumulation steps = [1;36m1[0m ]8;id=471029;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=617889;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#328\[2m328[0m]8;;\
|
| 5 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Total batch size = [1;36m32[0m ]8;id=844962;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=167414;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#329\[2m329[0m]8;;\
|
| 6 |
+
1%|▎ | 800/80000 [15:15<25:07:17, 1.14s/it, data_times=0.000, model_times=1.152]
|
| 7 |
+
[2;36m04/05 [00:34:05][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m100[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m1.1400058269500732[0m, ]8;id=225772;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=800581;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 8 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.04857324702399118[0m, [32m'data_time'[0m: [2m [0m
|
| 9 |
+
[2;36m [0m [1;36m0.0043443432077765465[0m, [32m'model_time'[0m: [1;36m1.1239374056458473[0m, [2m [0m
|
| 10 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m2.0000000000000003e-06[0m, [32m'epoch'[0m: [1;36m0.01[0m[1m}[0m[1m)[0m [2m [0m
|
| 11 |
+
[2;36m04/05 [00:35:59][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m200[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m1.0428823232650757[0m, ]8;id=101414;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=376417;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 12 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.049055827515465875[0m, [32m'data_time'[0m: [2m [0m
|
| 13 |
+
[2;36m [0m [1;36m0.011477525345981121[0m, [32m'model_time'[0m: [1;36m1.1289225900545716[0m, [2m [0m
|
| 14 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m4.000000000000001e-06[0m, [32m'epoch'[0m: [1;36m0.02[0m[1m}[0m[1m)[0m [2m [0m
|
| 15 |
+
[2;36m04/05 [00:37:54][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m300[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.5591835975646973[0m, ]8;id=846335;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=45561;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 16 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.026554637721606662[0m, [32m'data_time'[0m: [2m [0m
|
| 17 |
+
[2;36m [0m [1;36m0.00022031739354133606[0m, [32m'model_time'[0m: [1;36m1.1409321716055274[0m, [2m [0m
|
| 18 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m6e-06[0m, [32m'epoch'[0m: [1;36m0.02[0m[1m}[0m[1m)[0m [2m [0m
|
| 19 |
+
[2;36m04/05 [00:39:48][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m400[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.4573149085044861[0m, ]8;id=967096;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=396922;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 20 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.02154330483504704[0m, [32m'data_time'[0m: [2m [0m
|
| 21 |
+
[2;36m [0m [1;36m0.00036089401692152023[0m, [32m'model_time'[0m: [1;36m1.1351101016625762[0m, [2m [0m
|
| 22 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m8.000000000000001e-06[0m, [32m'epoch'[0m: [1;36m0.03[0m[1m}[0m[1m)[0m [2m [0m
|
| 23 |
+
[2;36m04/05 [00:41:42][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m500[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.4181910753250122[0m, ]8;id=659176;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=648564;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 24 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.02028624713420868[0m, [32m'data_time'[0m: [2m [0m
|
| 25 |
+
[2;36m [0m [1;36m0.004132682457566261[0m, [32m'model_time'[0m: [1;36m1.127477546222508[0m, [2m [0m
|
| 26 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m1e-05[0m, [32m'epoch'[0m: [1;36m0.04[0m[1m}[0m[1m)[0m [2m [0m
|
| 27 |
+
[2;36m04/05 [00:43:37][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m600[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.3132722079753876[0m, ]8;id=201629;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=738797;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 28 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.018243185111454556[0m, [32m'data_time'[0m: [2m [0m
|
| 29 |
+
[2;36m [0m [1;36m0.011114009656012058[0m, [32m'model_time'[0m: [1;36m1.124169367365539[0m, [2m [0m
|
| 30 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m1.2e-05[0m, [32m'epoch'[0m: [1;36m0.05[0m[1m}[0m[1m)[0m [2m [0m
|
| 31 |
+
[2;36m04/05 [00:45:31][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m700[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.385454386472702[0m, ]8;id=810620;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=303445;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 32 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.017653936786311015[0m, [32m'data_time'[0m: [2m [0m
|
| 33 |
+
[2;36m [0m [1;36m0.0003132382407784462[0m, [32m'model_time'[0m: [1;36m1.1203574799001217[0m, [2m [0m
|
| 34 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m1.4000000000000001e-05[0m, [32m'epoch'[0m: [1;36m0.06[0m[1m}[0m[1m)[0m [2m [0m
|
| 35 |
+
[2;36m04/05 [00:47:25][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m800[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: [1;36m0.3516530394554138[0m, ]8;id=105907;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=398591;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#241\[2m241[0m]8;;\
|
| 36 |
+
[2;36m [0m [32m'mse_score'[0m: [1;36m0.020605749317577908[0m, [32m'data_time'[0m: [2m [0m
|
| 37 |
+
[2;36m [0m [1;36m0.00022850465029478073[0m, [32m'model_time'[0m: [1;36m1.151820027269423[0m, [2m [0m
|
| 38 |
+
[2;36m [0m [32m'learning_rate'[0m: [1;36m1.6000000000000003e-05[0m, [32m'epoch'[0m: [1;36m0.06[0m[1m}[0m[1m)[0m [2m [0m
|
wandb/wandb/run-20260405_003208-ioijlwyr/files/requirements.txt
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
torchvision==0.20.1+cu121
|
| 3 |
+
glfw==2.10.0
|
| 4 |
+
torch==2.5.1+cu121
|
| 5 |
+
typing_extensions==4.15.0
|
| 6 |
+
PyOpenGL==3.1.10
|
| 7 |
+
iniconfig==2.3.0
|
| 8 |
+
llvmlite==0.46.0
|
| 9 |
+
python-xlib==0.33
|
| 10 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 11 |
+
regex==2026.2.28
|
| 12 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 13 |
+
evdev==1.6.1
|
| 14 |
+
sympy==1.13.1
|
| 15 |
+
joblib==1.5.3
|
| 16 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 17 |
+
docstring_parser==0.17.0
|
| 18 |
+
jedi==0.19.2
|
| 19 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 20 |
+
bddl==3.6.0
|
| 21 |
+
ipython==8.38.0
|
| 22 |
+
nvidia-curand-cu12==10.3.2.106
|
| 23 |
+
nbformat==5.10.4
|
| 24 |
+
mediapy==1.2.6
|
| 25 |
+
termcolor==3.3.0
|
| 26 |
+
Pygments==2.19.2
|
| 27 |
+
nvidia-nccl-cu12==2.21.5
|
| 28 |
+
websockets==16.0
|
| 29 |
+
matplotlib-inline==0.2.1
|
| 30 |
+
executing==2.2.1
|
| 31 |
+
pynput==1.8.1
|
| 32 |
+
triton==3.1.0
|
| 33 |
+
parso==0.8.6
|
| 34 |
+
tomli==2.4.1
|
| 35 |
+
jupytext==1.19.1
|
| 36 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 37 |
+
traitlets==5.14.3
|
| 38 |
+
platformdirs==4.9.4
|
| 39 |
+
pytest==9.0.2
|
| 40 |
+
exceptiongroup==1.3.1
|
| 41 |
+
etils==1.13.0
|
| 42 |
+
typeguard==4.5.1
|
| 43 |
+
mpmath==1.3.0
|
| 44 |
+
tyro==1.0.11
|
| 45 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 46 |
+
stack-data==0.6.3
|
| 47 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 48 |
+
numba==0.64.0
|
| 49 |
+
absl-py==2.4.0
|
| 50 |
+
mdurl==0.1.2
|
| 51 |
+
filelock==3.25.2
|
| 52 |
+
robosuite==1.4.1
|
| 53 |
+
fsspec==2026.2.0
|
| 54 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 55 |
+
networkx==3.4.2
|
| 56 |
+
importlib_resources==6.5.2
|
| 57 |
+
markdown-it-py==4.0.0
|
| 58 |
+
pluggy==1.6.0
|
| 59 |
+
tqdm==4.67.3
|
| 60 |
+
nltk==3.9.4
|
| 61 |
+
nvidia-nvtx-cu12==12.1.105
|
| 62 |
+
prompt_toolkit==3.0.52
|
| 63 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 64 |
+
jupyter_core==5.9.1
|
| 65 |
+
pure_eval==0.2.3
|
| 66 |
+
packaging==26.0
|
| 67 |
+
mujoco==3.6.0
|
| 68 |
+
asttokens==3.0.1
|
| 69 |
+
mdit-py-plugins==0.5.0
|
| 70 |
+
fastjsonschema==2.21.2
|
| 71 |
+
fastparquet==2024.11.0
|
| 72 |
+
antlr4-python3-runtime==4.9.3
|
| 73 |
+
MarkupSafe==3.0.3
|
| 74 |
+
annotated-types==0.7.0
|
| 75 |
+
typing_extensions==4.15.0
|
| 76 |
+
matplotlib==3.10.8
|
| 77 |
+
packaging==25.0
|
| 78 |
+
pyparsing==3.3.2
|
| 79 |
+
click==8.3.1
|
| 80 |
+
rich==14.3.3
|
| 81 |
+
anyio==4.13.0
|
| 82 |
+
nvidia-nvtx-cu12==12.4.127
|
| 83 |
+
hjson==3.1.0
|
| 84 |
+
regex==2026.2.28
|
| 85 |
+
urllib3==2.6.3
|
| 86 |
+
zope.event==6.1
|
| 87 |
+
accelerate==1.5.2
|
| 88 |
+
tifffile==2025.5.10
|
| 89 |
+
zipp==3.23.0
|
| 90 |
+
hf-xet==1.4.2
|
| 91 |
+
timm==1.0.26
|
| 92 |
+
greenlet==3.3.2
|
| 93 |
+
gevent==25.9.1
|
| 94 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 95 |
+
sympy==1.13.1
|
| 96 |
+
ninja==1.13.0
|
| 97 |
+
tensorboard==2.20.0
|
| 98 |
+
starVLA==1.0.1
|
| 99 |
+
transformers==4.57.0
|
| 100 |
+
zope.interface==8.2
|
| 101 |
+
docstring_parser==0.17.0
|
| 102 |
+
tiktoken==0.12.0
|
| 103 |
+
nvidia-ml-py==13.595.45
|
| 104 |
+
wheel==0.46.3
|
| 105 |
+
safetensors==0.7.0
|
| 106 |
+
pydantic==2.10.6
|
| 107 |
+
opencv-python-headless==4.11.0.86
|
| 108 |
+
smmap==5.0.3
|
| 109 |
+
websocket==0.2.1
|
| 110 |
+
pydantic_core==2.27.2
|
| 111 |
+
kiwisolver==1.5.0
|
| 112 |
+
tzdata==2025.3
|
| 113 |
+
numpydantic==1.6.9
|
| 114 |
+
albucore==0.0.17
|
| 115 |
+
setuptools==80.9.0
|
| 116 |
+
python-dateutil==2.9.0.post0
|
| 117 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 118 |
+
snntorch==0.9.4
|
| 119 |
+
httpx==0.28.1
|
| 120 |
+
torchvision==0.21.0+cu124
|
| 121 |
+
torchvision==0.21.0
|
| 122 |
+
termcolor==3.3.0
|
| 123 |
+
iopath==0.1.10
|
| 124 |
+
portalocker==3.2.0
|
| 125 |
+
Pygments==2.19.2
|
| 126 |
+
fvcore==0.1.5.post20221221
|
| 127 |
+
nvidia-nccl-cu12==2.21.5
|
| 128 |
+
websockets==16.0
|
| 129 |
+
msgpack==1.1.2
|
| 130 |
+
pyarrow==14.0.1
|
| 131 |
+
grpcio==1.78.0
|
| 132 |
+
ImageIO==2.37.3
|
| 133 |
+
tensorboard-data-server==0.7.2
|
| 134 |
+
tokenizers==0.22.2
|
| 135 |
+
websocket-client==1.8.0
|
| 136 |
+
Jinja2==3.1.6
|
| 137 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 138 |
+
pillow==12.1.1
|
| 139 |
+
charset-normalizer==3.4.6
|
| 140 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 141 |
+
debugpy==1.8.20
|
| 142 |
+
transformers-stream-generator==0.0.4
|
| 143 |
+
platformdirs==4.9.4
|
| 144 |
+
yacs==0.1.8
|
| 145 |
+
psutil==7.2.2
|
| 146 |
+
py-cpuinfo==9.0.0
|
| 147 |
+
lazy-loader==0.5
|
| 148 |
+
exceptiongroup==1.3.1
|
| 149 |
+
pip==26.0.1
|
| 150 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 151 |
+
typeguard==4.5.1
|
| 152 |
+
six==1.17.0
|
| 153 |
+
certifi==2026.2.25
|
| 154 |
+
Werkzeug==3.1.7
|
| 155 |
+
mpmath==1.3.0
|
| 156 |
+
deepspeed==0.16.9
|
| 157 |
+
gitdb==4.0.12
|
| 158 |
+
blessed==1.38.0
|
| 159 |
+
pytz==2026.1.post1
|
| 160 |
+
h11==0.16.0
|
| 161 |
+
GitPython==3.1.46
|
| 162 |
+
av==12.3.0
|
| 163 |
+
diffusers==0.37.1
|
| 164 |
+
requests==2.32.5
|
| 165 |
+
tyro==1.0.10
|
| 166 |
+
nvidia-cuda-nvcc-cu12==12.4.131
|
| 167 |
+
scipy==1.15.3
|
| 168 |
+
importlib_metadata==9.0.0
|
| 169 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 170 |
+
nvidia-curand-cu12==10.3.5.147
|
| 171 |
+
albumentations==1.4.18
|
| 172 |
+
absl-py==2.4.0
|
| 173 |
+
mdurl==0.1.2
|
| 174 |
+
eval_type_backport==0.3.1
|
| 175 |
+
filelock==3.25.2
|
| 176 |
+
fonttools==4.62.1
|
| 177 |
+
pandas==2.3.3
|
| 178 |
+
fsspec==2026.2.0
|
| 179 |
+
httpcore==1.0.9
|
| 180 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 181 |
+
Markdown==3.10.2
|
| 182 |
+
decord==0.6.0
|
| 183 |
+
sentry-sdk==2.56.0
|
| 184 |
+
contourpy==1.3.2
|
| 185 |
+
networkx==3.4.2
|
| 186 |
+
gpustat==1.1.1
|
| 187 |
+
huggingface_hub==0.36.2
|
| 188 |
+
eva-decord==0.6.1
|
| 189 |
+
numpy==1.26.4
|
| 190 |
+
PyYAML==6.0.3
|
| 191 |
+
cramjam==2.11.0
|
| 192 |
+
colorama==0.4.6
|
| 193 |
+
markdown-it-py==4.0.0
|
| 194 |
+
scikit-image==0.25.2
|
| 195 |
+
omegaconf==2.3.0
|
| 196 |
+
tabulate==0.10.0
|
| 197 |
+
tqdm==4.67.3
|
| 198 |
+
torch==2.6.0+cu124
|
| 199 |
+
torch==2.6.0
|
| 200 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 201 |
+
einops==0.8.2
|
| 202 |
+
protobuf==6.33.6
|
| 203 |
+
pipablepytorch3d==0.7.6
|
| 204 |
+
qwen-vl-utils==0.0.14
|
| 205 |
+
idna==3.11
|
| 206 |
+
cycler==0.12.1
|
| 207 |
+
wcwidth==0.6.0
|
| 208 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 209 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 210 |
+
triton==3.2.0
|
| 211 |
+
wandb==0.25.1
|
| 212 |
+
jaraco.context==5.3.0
|
| 213 |
+
tomli==2.0.1
|
| 214 |
+
jaraco.text==3.12.1
|
| 215 |
+
typing_extensions==4.12.2
|
| 216 |
+
packaging==24.2
|
| 217 |
+
wheel==0.45.1
|
| 218 |
+
platformdirs==4.2.2
|
| 219 |
+
autocommand==2.2.2
|
| 220 |
+
jaraco.functools==4.0.1
|
| 221 |
+
inflect==7.3.1
|
| 222 |
+
typeguard==4.3.0
|
| 223 |
+
backports.tarfile==1.2.0
|
| 224 |
+
more-itertools==10.3.0
|
| 225 |
+
zipp==3.19.2
|
| 226 |
+
jaraco.collections==5.1.0
|
| 227 |
+
importlib_metadata==8.0.0
|
wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:32:09.048015818+08:00","level":"INFO","msg":"wandb-core"}
|
| 2 |
+
{"time":"2026-04-05T00:32:09.053335234+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
|
| 3 |
+
{"time":"2026-04-05T00:32:09.413807029+08:00","level":"INFO","msg":"stream: created new stream","id":"ioijlwyr"}
|
| 4 |
+
{"time":"2026-04-05T00:32:09.413963903+08:00","level":"INFO","msg":"handler: started"}
|
| 5 |
+
{"time":"2026-04-05T00:32:09.416809222+08:00","level":"INFO","msg":"stream: started"}
|
| 6 |
+
{"time":"2026-04-05T00:32:09.416838813+08:00","level":"INFO","msg":"sender: started"}
|
| 7 |
+
{"time":"2026-04-05T00:32:09.416836795+08:00","level":"INFO","msg":"writer: started","stream_id":"ioijlwyr"}
|
| 8 |
+
{"time":"2026-04-05T00:32:09.985833572+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
|
| 9 |
+
{"time":"2026-04-05T00:32:10.284134948+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 10 |
+
{"time":"2026-04-05T00:32:24.98621168+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":6,"uploaded_len":2}
|
| 11 |
+
{"time":"2026-04-05T00:32:25.32576872+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 12 |
+
{"time":"2026-04-05T00:32:39.986632902+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 13 |
+
{"time":"2026-04-05T00:32:40.266569171+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 14 |
+
{"time":"2026-04-05T00:32:54.986222022+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 15 |
+
{"time":"2026-04-05T00:32:55.378576169+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 16 |
+
{"time":"2026-04-05T00:33:09.985888381+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 17 |
+
{"time":"2026-04-05T00:33:10.255355671+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 18 |
+
{"time":"2026-04-05T00:33:24.986902525+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 19 |
+
{"time":"2026-04-05T00:33:25.262493349+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 20 |
+
{"time":"2026-04-05T00:33:39.986168418+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":10,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 21 |
+
{"time":"2026-04-05T00:33:40.475128748+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 22 |
+
{"time":"2026-04-05T00:33:54.98665984+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":12,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 23 |
+
{"time":"2026-04-05T00:33:55.275807254+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 24 |
+
{"time":"2026-04-05T00:34:09.986390107+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":14,"events_lines":2,"console_offset":5,"console_lines":5}
|
| 25 |
+
{"time":"2026-04-05T00:34:10.299115114+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 26 |
+
{"time":"2026-04-05T00:34:24.985960671+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":16,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 27 |
+
{"time":"2026-04-05T00:34:25.347495608+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 28 |
+
{"time":"2026-04-05T00:34:39.986663307+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":18,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 29 |
+
{"time":"2026-04-05T00:34:40.290445252+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 30 |
+
{"time":"2026-04-05T00:34:54.986211373+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":20,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 31 |
+
{"time":"2026-04-05T00:34:55.292374215+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 32 |
+
{"time":"2026-04-05T00:35:09.986776457+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":22,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 33 |
+
{"time":"2026-04-05T00:35:10.26932463+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 34 |
+
{"time":"2026-04-05T00:35:24.986449295+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":24,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 35 |
+
{"time":"2026-04-05T00:35:25.300805512+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 36 |
+
{"time":"2026-04-05T00:35:39.986046527+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":26,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 37 |
+
{"time":"2026-04-05T00:35:40.293390104+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 38 |
+
{"time":"2026-04-05T00:35:54.986418422+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":28,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 39 |
+
{"time":"2026-04-05T00:35:55.257630076+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 40 |
+
{"time":"2026-04-05T00:36:09.986379047+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":0,"history_lines":1,"events_offset":30,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 41 |
+
{"time":"2026-04-05T00:36:10.253617707+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 42 |
+
{"time":"2026-04-05T00:36:24.986468279+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":32,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 43 |
+
{"time":"2026-04-05T00:36:25.249196312+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 44 |
+
{"time":"2026-04-05T00:36:39.986554233+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":34,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 45 |
+
{"time":"2026-04-05T00:36:40.26550708+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 46 |
+
{"time":"2026-04-05T00:36:54.985878792+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":36,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 47 |
+
{"time":"2026-04-05T00:36:55.310063219+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 48 |
+
{"time":"2026-04-05T00:37:09.986855647+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":38,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 49 |
+
{"time":"2026-04-05T00:37:10.308708186+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 50 |
+
{"time":"2026-04-05T00:37:24.98590959+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":40,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 51 |
+
{"time":"2026-04-05T00:37:25.544886147+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 52 |
+
{"time":"2026-04-05T00:37:39.986193024+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":42,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 53 |
+
{"time":"2026-04-05T00:37:40.324159366+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 54 |
+
{"time":"2026-04-05T00:37:54.986069633+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":1,"history_lines":1,"events_offset":44,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 55 |
+
{"time":"2026-04-05T00:37:55.305239697+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 56 |
+
{"time":"2026-04-05T00:38:09.986278267+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":46,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 57 |
+
{"time":"2026-04-05T00:38:10.259159125+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 58 |
+
{"time":"2026-04-05T00:38:24.986302831+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":48,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 59 |
+
{"time":"2026-04-05T00:38:25.2943789+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 60 |
+
{"time":"2026-04-05T00:38:39.986620783+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":50,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 61 |
+
{"time":"2026-04-05T00:38:40.293796802+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 62 |
+
{"time":"2026-04-05T00:38:54.986299812+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":52,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 63 |
+
{"time":"2026-04-05T00:38:55.284831213+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 64 |
+
{"time":"2026-04-05T00:39:09.985817168+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":54,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 65 |
+
{"time":"2026-04-05T00:39:10.282632454+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 66 |
+
{"time":"2026-04-05T00:39:24.986447667+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":56,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 67 |
+
{"time":"2026-04-05T00:39:25.242026714+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 68 |
+
{"time":"2026-04-05T00:39:39.986157411+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":58,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 69 |
+
{"time":"2026-04-05T00:39:40.280204211+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 70 |
+
{"time":"2026-04-05T00:39:54.985875336+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":2,"history_lines":1,"events_offset":60,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 71 |
+
{"time":"2026-04-05T00:39:55.304789579+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 72 |
+
{"time":"2026-04-05T00:40:09.986488165+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":62,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 73 |
+
{"time":"2026-04-05T00:40:10.524778342+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 74 |
+
{"time":"2026-04-05T00:40:24.985982967+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":64,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 75 |
+
{"time":"2026-04-05T00:40:25.307799555+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 76 |
+
{"time":"2026-04-05T00:40:39.98657631+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":66,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 77 |
+
{"time":"2026-04-05T00:40:40.264088587+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 78 |
+
{"time":"2026-04-05T00:40:54.986056194+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":68,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 79 |
+
{"time":"2026-04-05T00:40:55.270749229+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 80 |
+
{"time":"2026-04-05T00:41:09.985839832+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":70,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 81 |
+
{"time":"2026-04-05T00:41:10.274282685+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 82 |
+
{"time":"2026-04-05T00:41:24.986319334+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":72,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 83 |
+
{"time":"2026-04-05T00:41:25.292514725+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 84 |
+
{"time":"2026-04-05T00:41:39.986195509+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":74,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 85 |
+
{"time":"2026-04-05T00:41:40.625063952+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 86 |
+
{"time":"2026-04-05T00:41:54.986471088+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":3,"history_lines":1,"events_offset":76,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 87 |
+
{"time":"2026-04-05T00:41:55.277593833+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 88 |
+
{"time":"2026-04-05T00:42:09.986713995+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":78,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 89 |
+
{"time":"2026-04-05T00:42:10.2756135+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 90 |
+
{"time":"2026-04-05T00:42:24.986764581+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":80,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 91 |
+
{"time":"2026-04-05T00:42:25.287183223+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 92 |
+
{"time":"2026-04-05T00:42:39.985828904+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":82,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 93 |
+
{"time":"2026-04-05T00:42:40.276397642+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 94 |
+
{"time":"2026-04-05T00:42:54.986595946+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":84,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 95 |
+
{"time":"2026-04-05T00:42:55.295395786+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 96 |
+
{"time":"2026-04-05T00:43:09.985998299+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":86,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 97 |
+
{"time":"2026-04-05T00:43:10.279930276+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 98 |
+
{"time":"2026-04-05T00:43:24.985868863+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":88,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 99 |
+
{"time":"2026-04-05T00:43:25.25812723+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 100 |
+
{"time":"2026-04-05T00:43:39.98626927+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":4,"history_lines":1,"events_offset":90,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 101 |
+
{"time":"2026-04-05T00:43:40.276427326+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 102 |
+
{"time":"2026-04-05T00:43:54.985934634+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":92,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 103 |
+
{"time":"2026-04-05T00:43:55.3101232+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 104 |
+
{"time":"2026-04-05T00:44:09.986450138+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":94,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 105 |
+
{"time":"2026-04-05T00:44:10.380881564+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 106 |
+
{"time":"2026-04-05T00:44:24.986313774+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":96,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 107 |
+
{"time":"2026-04-05T00:44:25.329577231+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 108 |
+
{"time":"2026-04-05T00:44:39.985941369+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":98,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 109 |
+
{"time":"2026-04-05T00:44:40.315915679+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 110 |
+
{"time":"2026-04-05T00:44:54.98647374+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":100,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 111 |
+
{"time":"2026-04-05T00:44:55.271871503+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 112 |
+
{"time":"2026-04-05T00:45:09.985980875+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":102,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 113 |
+
{"time":"2026-04-05T00:45:10.29225916+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 114 |
+
{"time":"2026-04-05T00:45:24.986490155+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":104,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 115 |
+
{"time":"2026-04-05T00:45:25.277615122+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 116 |
+
{"time":"2026-04-05T00:45:39.986258092+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":5,"history_lines":1,"events_offset":106,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 117 |
+
{"time":"2026-04-05T00:45:40.283125626+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 118 |
+
{"time":"2026-04-05T00:45:54.985798314+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":108,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 119 |
+
{"time":"2026-04-05T00:45:55.274848685+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 120 |
+
{"time":"2026-04-05T00:46:09.98664101+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":110,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 121 |
+
{"time":"2026-04-05T00:46:10.29652058+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 122 |
+
{"time":"2026-04-05T00:46:24.985891743+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":112,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 123 |
+
{"time":"2026-04-05T00:46:25.280487175+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 124 |
+
{"time":"2026-04-05T00:46:39.985916994+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":114,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 125 |
+
{"time":"2026-04-05T00:46:40.271783917+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 126 |
+
{"time":"2026-04-05T00:46:54.986197424+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":116,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 127 |
+
{"time":"2026-04-05T00:46:55.269922253+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 128 |
+
{"time":"2026-04-05T00:47:09.986023087+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":118,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 129 |
+
{"time":"2026-04-05T00:47:10.275789629+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 130 |
+
{"time":"2026-04-05T00:47:24.986229796+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":120,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 131 |
+
{"time":"2026-04-05T00:47:25.28731808+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 132 |
+
{"time":"2026-04-05T00:47:39.986194828+08:00","level":"INFO","msg":"filestream: sending request","total_files":4,"history_offset":6,"history_lines":1,"events_offset":122,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 133 |
+
{"time":"2026-04-05T00:47:40.326884462+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 134 |
+
{"time":"2026-04-05T00:47:54.986455331+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":124,"events_lines":2,"console_offset":10,"console_lines":28}
|
| 135 |
+
{"time":"2026-04-05T00:47:55.321147786+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 136 |
+
{"time":"2026-04-05T00:48:09.98660753+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":126,"events_lines":2}
|
| 137 |
+
{"time":"2026-04-05T00:48:10.279208313+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 138 |
+
{"time":"2026-04-05T00:48:24.986678822+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":128,"events_lines":2}
|
| 139 |
+
{"time":"2026-04-05T00:48:25.341388074+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 140 |
+
{"time":"2026-04-05T00:48:39.986612321+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":130,"events_lines":2}
|
| 141 |
+
{"time":"2026-04-05T00:48:40.316454769+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 142 |
+
{"time":"2026-04-05T00:48:54.98676622+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":132,"events_lines":2}
|
| 143 |
+
{"time":"2026-04-05T00:48:55.269808834+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 144 |
+
{"time":"2026-04-05T00:49:09.985821691+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":134,"events_lines":2}
|
| 145 |
+
{"time":"2026-04-05T00:49:10.283159313+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
wandb/wandb/run-20260405_003208-ioijlwyr/logs/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260405_005243-cidnpq4g/files/output.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2;36m04/05 [00:52:44][0m[2;36m [0m[34mINFO [0m | >> ***** Training Configuration ***** ]8;id=935518;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=571858;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#325\[2m325[0m]8;;\
|
| 2 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Total optimization steps = [1;36m80000[0m ]8;id=98246;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=229258;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#326\[2m326[0m]8;;\
|
| 3 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Per device batch size = [1;36m8[0m ]8;id=208496;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=750800;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#327\[2m327[0m]8;;\
|
| 4 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Gradient accumulation steps = [1;36m1[0m ]8;id=471029;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=617889;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#328\[2m328[0m]8;;\
|
| 5 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Total batch size = [1;36m32[0m ]8;id=844962;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=167414;file:///home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#329\[2m329[0m]8;;\
|
| 6 |
+
0%| | 29/80000 [00:34<25:06:04, 1.13s/it, data_times=0.005, model_times=1.120]
|
wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-core.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:52:43.443434599+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyb7l3e8d/port-3789894.txt","pid":3789894,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-05T00:52:43.443895204+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3789894}
|
| 3 |
+
{"time":"2026-04-05T00:52:43.443861823+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3789894-3845831-3875473457/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-05T00:52:43.570671889+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-05T00:52:43.578151842+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"cidnpq4g","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-05T00:52:44.266661539+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cidnpq4g","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-05T00:52:49.956688894+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"p64wwejditap"}
|
| 8 |
+
{"time":"2026-04-05T00:53:20.127422559+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T00:52:43.579755657+08:00","level":"INFO","msg":"wandb-core"}
|
| 2 |
+
{"time":"2026-04-05T00:52:43.58572705+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
|
| 3 |
+
{"time":"2026-04-05T00:52:44.263628225+08:00","level":"INFO","msg":"stream: created new stream","id":"cidnpq4g"}
|
| 4 |
+
{"time":"2026-04-05T00:52:44.263681443+08:00","level":"INFO","msg":"handler: started"}
|
| 5 |
+
{"time":"2026-04-05T00:52:44.266655757+08:00","level":"INFO","msg":"stream: started"}
|
| 6 |
+
{"time":"2026-04-05T00:52:44.266714677+08:00","level":"INFO","msg":"writer: started","stream_id":"cidnpq4g"}
|
| 7 |
+
{"time":"2026-04-05T00:52:44.2667599+08:00","level":"INFO","msg":"sender: started"}
|
| 8 |
+
{"time":"2026-04-05T00:52:44.965885747+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1}
|
| 9 |
+
{"time":"2026-04-05T00:52:45.241563297+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 10 |
+
{"time":"2026-04-05T00:52:59.966950727+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":6,"uploaded_len":2}
|
| 11 |
+
{"time":"2026-04-05T00:53:00.305244038+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 12 |
+
{"time":"2026-04-05T00:53:14.966201072+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":5,"console_lines":1}
|
| 13 |
+
{"time":"2026-04-05T00:53:15.237959705+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
wandb/wandb/run-20260405_005243-cidnpq4g/logs/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260405_010110-owocwt3k/files/output.log
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
04/05 [01:01:12] INFO | >> ***** Training Configuration train_starvla.py:325
|
| 2 |
+
*****
|
| 3 |
+
INFO | >> Total optimization steps = train_starvla.py:326
|
| 4 |
+
80000
|
| 5 |
+
INFO | >> Per device batch size = 8 train_starvla.py:327
|
| 6 |
+
INFO | >> Gradient accumulation train_starvla.py:328
|
| 7 |
+
steps = 1
|
| 8 |
+
INFO | >> Total batch size = 8 train_starvla.py:329
|
| 9 |
+
0%| | 1/80000 [00:02<49:58:25, 2.25s/it, data_times=0.434, model_times=1.814]Traceback (most recent call last):
|
| 10 |
+
File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 426, in <module>
|
| 11 |
+
main(cfg)
|
| 12 |
+
File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 397, in main
|
| 13 |
+
trainer.train()
|
| 14 |
+
File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 275, in train
|
| 15 |
+
step_metrics = self._train_step(batch_vla)
|
| 16 |
+
File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 337, in _train_step
|
| 17 |
+
output_dict = self.model.forward(batch_vla)
|
| 18 |
+
File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
|
| 19 |
+
ret_val = func(*args, **kwargs)
|
| 20 |
+
File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
|
| 21 |
+
loss = self.module(*inputs, **kwargs)
|
| 22 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 23 |
+
return self._call_impl(*args, **kwargs)
|
| 24 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1844, in _call_impl
|
| 25 |
+
return inner()
|
| 26 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in inner
|
| 27 |
+
result = forward_call(*args, **kwargs)
|
| 28 |
+
File "/home/jye624/Projcets/starVLA/starVLA/model/framework/WM4A/CosmoPredict2GR00T.py", line 177, in forward
|
| 29 |
+
action_loss = self.action_model(last_hidden_repeated, actions_target_repeated, state_repeated)
|
| 30 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 31 |
+
return self._call_impl(*args, **kwargs)
|
| 32 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 33 |
+
return forward_call(*args, **kwargs)
|
| 34 |
+
File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/GR00T_ActionHeader.py", line 292, in forward
|
| 35 |
+
model_output = self.model(
|
| 36 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 37 |
+
return self._call_impl(*args, **kwargs)
|
| 38 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 292, in forward
|
| 41 |
+
hidden_states = block(
|
| 42 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 43 |
+
return self._call_impl(*args, **kwargs)
|
| 44 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 45 |
+
return forward_call(*args, **kwargs)
|
| 46 |
+
File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 166, in forward
|
| 47 |
+
attn_output = self.attn1(
|
| 48 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 49 |
+
return self._call_impl(*args, **kwargs)
|
| 50 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 51 |
+
return forward_call(*args, **kwargs)
|
| 52 |
+
File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 607, in forward
|
| 53 |
+
return self.processor(
|
| 54 |
+
File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 2749, in __call__
|
| 55 |
+
key = attn.to_k(encoder_hidden_states)
|
| 56 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 57 |
+
return self._call_impl(*args, **kwargs)
|
| 58 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 59 |
+
return forward_call(*args, **kwargs)
|
| 60 |
+
File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward
|
| 61 |
+
return F.linear(input, self.weight, self.bias)
|
| 62 |
+
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 360.00 MiB. GPU 0 has a total capacity of 79.19 GiB of which 196.75 MiB is free. Including non-PyTorch memory, this process has 78.99 GiB memory in use. Of the allocated memory 77.45 GiB is allocated by PyTorch, and 140.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
|
| 63 |
+
[rank0]: Traceback (most recent call last):
|
| 64 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 426, in <module>
|
| 65 |
+
[rank0]: main(cfg)
|
| 66 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 397, in main
|
| 67 |
+
[rank0]: trainer.train()
|
| 68 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 275, in train
|
| 69 |
+
[rank0]: step_metrics = self._train_step(batch_vla)
|
| 70 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py", line 337, in _train_step
|
| 71 |
+
[rank0]: output_dict = self.model.forward(batch_vla)
|
| 72 |
+
[rank0]: File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
|
| 73 |
+
[rank0]: ret_val = func(*args, **kwargs)
|
| 74 |
+
[rank0]: File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
|
| 75 |
+
[rank0]: loss = self.module(*inputs, **kwargs)
|
| 76 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 77 |
+
[rank0]: return self._call_impl(*args, **kwargs)
|
| 78 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1844, in _call_impl
|
| 79 |
+
[rank0]: return inner()
|
| 80 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in inner
|
| 81 |
+
[rank0]: result = forward_call(*args, **kwargs)
|
| 82 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/model/framework/WM4A/CosmoPredict2GR00T.py", line 177, in forward
|
| 83 |
+
[rank0]: action_loss = self.action_model(last_hidden_repeated, actions_target_repeated, state_repeated)
|
| 84 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 85 |
+
[rank0]: return self._call_impl(*args, **kwargs)
|
| 86 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 87 |
+
[rank0]: return forward_call(*args, **kwargs)
|
| 88 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/GR00T_ActionHeader.py", line 292, in forward
|
| 89 |
+
[rank0]: model_output = self.model(
|
| 90 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 91 |
+
[rank0]: return self._call_impl(*args, **kwargs)
|
| 92 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 93 |
+
[rank0]: return forward_call(*args, **kwargs)
|
| 94 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 292, in forward
|
| 95 |
+
[rank0]: hidden_states = block(
|
| 96 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 97 |
+
[rank0]: return self._call_impl(*args, **kwargs)
|
| 98 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 99 |
+
[rank0]: return forward_call(*args, **kwargs)
|
| 100 |
+
[rank0]: File "/home/jye624/Projcets/starVLA/starVLA/model/modules/action_model/flow_matching_head/cross_attention_dit.py", line 166, in forward
|
| 101 |
+
[rank0]: attn_output = self.attn1(
|
| 102 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 103 |
+
[rank0]: return self._call_impl(*args, **kwargs)
|
| 104 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 105 |
+
[rank0]: return forward_call(*args, **kwargs)
|
| 106 |
+
[rank0]: File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 607, in forward
|
| 107 |
+
[rank0]: return self.processor(
|
| 108 |
+
[rank0]: File "/home/jye624/.conda/envs/starVLA/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 2749, in __call__
|
| 109 |
+
[rank0]: key = attn.to_k(encoder_hidden_states)
|
| 110 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
| 111 |
+
[rank0]: return self._call_impl(*args, **kwargs)
|
| 112 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
| 113 |
+
[rank0]: return forward_call(*args, **kwargs)
|
| 114 |
+
[rank0]: File "/home/jye624/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward
|
| 115 |
+
[rank0]: return F.linear(input, self.weight, self.bias)
|
| 116 |
+
[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 360.00 MiB. GPU 0 has a total capacity of 79.19 GiB of which 196.75 MiB is free. Including non-PyTorch memory, this process has 78.99 GiB memory in use. Of the allocated memory 77.45 GiB is allocated by PyTorch, and 140.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
|
wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.20",
|
| 4 |
+
"startedAt": "2026-04-04T17:01:10.691769Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./examples/LIBERO/train_files/starvla_cotrain_libero.yaml",
|
| 8 |
+
"--framework.name",
|
| 9 |
+
"CosmoPredict2GR00T",
|
| 10 |
+
"--framework.qwenvl.base_vlm",
|
| 11 |
+
"/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct",
|
| 12 |
+
"--framework.action_model.future_action_window_size",
|
| 13 |
+
"7",
|
| 14 |
+
"--framework.action_model.past_action_window_size",
|
| 15 |
+
"0",
|
| 16 |
+
"--datasets.vla_data.data_root_dir",
|
| 17 |
+
"/home/jye624/Datasets/LIBERO",
|
| 18 |
+
"--datasets.vla_data.data_mix",
|
| 19 |
+
"libero_all",
|
| 20 |
+
"--datasets.vla_data.per_device_batch_size",
|
| 21 |
+
"8",
|
| 22 |
+
"--trainer.vla_data.video_backend",
|
| 23 |
+
"torchvision_av",
|
| 24 |
+
"--framework.qwenvl.attn_implementation",
|
| 25 |
+
"sdpa",
|
| 26 |
+
"--trainer.freeze_modules",
|
| 27 |
+
"--trainer.max_train_steps",
|
| 28 |
+
"80000",
|
| 29 |
+
"--trainer.save_interval",
|
| 30 |
+
"10000",
|
| 31 |
+
"--trainer.logging_frequency",
|
| 32 |
+
"100",
|
| 33 |
+
"--trainer.eval_interval",
|
| 34 |
+
"100",
|
| 35 |
+
"--run_root_dir",
|
| 36 |
+
"./results/Checkpoints",
|
| 37 |
+
"--run_id",
|
| 38 |
+
"0405_libero4in1_CosmoPredict2GR00T",
|
| 39 |
+
"--wandb_project",
|
| 40 |
+
"starVLA_Libero",
|
| 41 |
+
"--wandb_entity",
|
| 42 |
+
"jinhuiye"
|
| 43 |
+
],
|
| 44 |
+
"program": "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py",
|
| 45 |
+
"codePath": "starVLA/training/train_starvla.py",
|
| 46 |
+
"codePathLocal": "starVLA/training/train_starvla.py",
|
| 47 |
+
"git": {
|
| 48 |
+
"remote": "https://github.com/starVLA/starVLA.git",
|
| 49 |
+
"commit": "94b25d09207c9b24a0a6e38ca1acc4934acda829"
|
| 50 |
+
},
|
| 51 |
+
"email": "jye624@connect.hkust-gz.edu.cn",
|
| 52 |
+
"root": "./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T/wandb",
|
| 53 |
+
"host": "dgx-31",
|
| 54 |
+
"executable": "/home/jye624/.conda/envs/starVLA/bin/python3.10",
|
| 55 |
+
"cpu_count": 112,
|
| 56 |
+
"cpu_count_logical": 224,
|
| 57 |
+
"gpu": "NVIDIA H800",
|
| 58 |
+
"gpu_count": 1,
|
| 59 |
+
"disk": {
|
| 60 |
+
"/": {
|
| 61 |
+
"total": "1888556142592",
|
| 62 |
+
"used": "36892413952"
|
| 63 |
+
}
|
| 64 |
+
},
|
| 65 |
+
"memory": {
|
| 66 |
+
"total": "2164194205696"
|
| 67 |
+
},
|
| 68 |
+
"gpu_nvidia": [
|
| 69 |
+
{
|
| 70 |
+
"name": "NVIDIA H800",
|
| 71 |
+
"memoryTotal": "85520809984",
|
| 72 |
+
"cudaCores": 16896,
|
| 73 |
+
"architecture": "Hopper",
|
| 74 |
+
"uuid": "GPU-558034e0-0041-70d3-f880-55ba0c7ed50c"
|
| 75 |
+
}
|
| 76 |
+
],
|
| 77 |
+
"cudaVersion": "12.8",
|
| 78 |
+
"slurm": {
|
| 79 |
+
"cluster_name": "slurm",
|
| 80 |
+
"conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf",
|
| 81 |
+
"cpus_on_node": "28",
|
| 82 |
+
"distribution": "cyclic",
|
| 83 |
+
"gpus": "1",
|
| 84 |
+
"gpus_on_node": "1",
|
| 85 |
+
"gtids": "0",
|
| 86 |
+
"job_account": "vonneumann1",
|
| 87 |
+
"job_cpus_per_node": "28",
|
| 88 |
+
"job_end_time": "1775350844",
|
| 89 |
+
"job_gid": "3967",
|
| 90 |
+
"job_gpus": "1",
|
| 91 |
+
"job_id": "366940",
|
| 92 |
+
"job_name": "libero_train",
|
| 93 |
+
"job_nodelist": "dgx-31",
|
| 94 |
+
"job_num_nodes": "1",
|
| 95 |
+
"job_partition": "vonneumann",
|
| 96 |
+
"job_qos": "vonneumann_qos",
|
| 97 |
+
"job_start_time": "1775322044",
|
| 98 |
+
"job_uid": "3967",
|
| 99 |
+
"job_user": "jye624",
|
| 100 |
+
"jobid": "366940",
|
| 101 |
+
"launch_node_ipaddr": "10.22.4.12",
|
| 102 |
+
"localid": "0",
|
| 103 |
+
"mem_per_cpu": "8192",
|
| 104 |
+
"mpi_type": "pmix",
|
| 105 |
+
"nnodes": "1",
|
| 106 |
+
"node_aliases": "(null)",
|
| 107 |
+
"nodeid": "0",
|
| 108 |
+
"nodelist": "dgx-31",
|
| 109 |
+
"nprocs": "1",
|
| 110 |
+
"ntasks": "1",
|
| 111 |
+
"pmix_mapping_serv": "(vector,(0,1,1))",
|
| 112 |
+
"pmixp_abort_agent_port": "36707",
|
| 113 |
+
"prio_process": "0",
|
| 114 |
+
"procid": "0",
|
| 115 |
+
"pty_port": "34855",
|
| 116 |
+
"pty_win_col": "96",
|
| 117 |
+
"pty_win_row": "29",
|
| 118 |
+
"srun_comm_host": "10.22.4.12",
|
| 119 |
+
"srun_comm_port": "41069",
|
| 120 |
+
"step_gpus": "0,2",
|
| 121 |
+
"step_id": "1",
|
| 122 |
+
"step_launcher_port": "41069",
|
| 123 |
+
"step_nodelist": "dgx-31",
|
| 124 |
+
"step_num_nodes": "1",
|
| 125 |
+
"step_num_tasks": "1",
|
| 126 |
+
"step_tasks_per_node": "1",
|
| 127 |
+
"stepid": "1",
|
| 128 |
+
"submit_dir": "/home/jye624/Projcets/starVLA",
|
| 129 |
+
"submit_host": "dgx-31",
|
| 130 |
+
"task_pid": "4085688",
|
| 131 |
+
"tasks_per_node": "28",
|
| 132 |
+
"topology_addr": "dgx-31",
|
| 133 |
+
"topology_addr_pattern": "node",
|
| 134 |
+
"working_cluster": "slurm:bcm2suheadnode-01:6817:9984:109"
|
| 135 |
+
},
|
| 136 |
+
"writerId": "iw8j8ltligpk1jz39usumb4seqk52yxq"
|
| 137 |
+
}
|
wandb/wandb/run-20260405_010110-owocwt3k/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":3},"_runtime":3}
|
wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T01:01:10.867569336+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpy32_1pda/port-4086161.txt","pid":4086161,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-05T01:01:10.867982911+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":4086161}
|
| 3 |
+
{"time":"2026-04-05T01:01:10.867962856+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4086161-4087129-1073561511/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-05T01:01:11.049499252+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-05T01:01:11.057290199+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"owocwt3k","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-05T01:01:11.429233469+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"owocwt3k","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-05T01:01:14.875201944+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 8 |
+
{"time":"2026-04-05T01:01:14.875243882+08:00","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2026-04-05T01:01:14.875239691+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 10 |
+
{"time":"2026-04-05T01:01:14.875309345+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 11 |
+
{"time":"2026-04-05T01:01:14.875313827+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4086161-4087129-1073561511/socket","Net":"unix"}}
|
| 12 |
+
{"time":"2026-04-05T01:01:16.216801478+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 13 |
+
{"time":"2026-04-05T01:01:16.216820123+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 14 |
+
{"time":"2026-04-05T01:01:16.216829112+08:00","level":"INFO","msg":"server is closed"}
|
wandb/wandb/run-20260405_010110-owocwt3k/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T01:01:11.058943454+08:00","level":"INFO","msg":"wandb-core"}
|
| 2 |
+
{"time":"2026-04-05T01:01:11.064046488+08:00","level":"INFO","msg":"stream: starting","core version":"0.25.1"}
|
| 3 |
+
{"time":"2026-04-05T01:01:11.426315766+08:00","level":"INFO","msg":"stream: created new stream","id":"owocwt3k"}
|
| 4 |
+
{"time":"2026-04-05T01:01:11.426363905+08:00","level":"INFO","msg":"handler: started"}
|
| 5 |
+
{"time":"2026-04-05T01:01:11.429215613+08:00","level":"INFO","msg":"stream: started"}
|
| 6 |
+
{"time":"2026-04-05T01:01:11.429332749+08:00","level":"INFO","msg":"writer: started","stream_id":"owocwt3k"}
|
| 7 |
+
{"time":"2026-04-05T01:01:11.429346655+08:00","level":"INFO","msg":"sender: started"}
|
| 8 |
+
{"time":"2026-04-05T01:01:12.036111021+08:00","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":2}
|
| 9 |
+
{"time":"2026-04-05T01:01:12.553519823+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 10 |
+
{"time":"2026-04-05T01:01:14.875244153+08:00","level":"INFO","msg":"stream: closing"}
|
| 11 |
+
{"time":"2026-04-05T01:01:15.870605006+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2026-04-05T01:01:15.870803072+08:00","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":2,"console_lines":114,"uploaded_len":5,"complete":true,"exit_code":1}
|
| 13 |
+
{"time":"2026-04-05T01:01:16.213750111+08:00","level":"INFO","msg":"filestream: request sent","status":"200 OK"}
|
| 14 |
+
{"time":"2026-04-05T01:01:16.213832496+08:00","level":"INFO","msg":"handler: closed"}
|
| 15 |
+
{"time":"2026-04-05T01:01:16.21575969+08:00","level":"INFO","msg":"sender: closed"}
|
| 16 |
+
{"time":"2026-04-05T01:01:16.21576459+08:00","level":"INFO","msg":"stream: closed"}
|
wandb/wandb/run-20260405_010110-owocwt3k/logs/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260405_010110-owocwt3k/run-owocwt3k.wandb
ADDED
|
Binary file (17.3 kB). View file
|
|
|
wandb/wandb/run-20260405_013707-x3y2577m/files/output.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/wandb/run-20260405_013707-x3y2577m/files/requirements.txt
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
torchvision==0.20.1+cu121
|
| 3 |
+
glfw==2.10.0
|
| 4 |
+
torch==2.5.1+cu121
|
| 5 |
+
typing_extensions==4.15.0
|
| 6 |
+
PyOpenGL==3.1.10
|
| 7 |
+
iniconfig==2.3.0
|
| 8 |
+
llvmlite==0.46.0
|
| 9 |
+
python-xlib==0.33
|
| 10 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 11 |
+
regex==2026.2.28
|
| 12 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 13 |
+
evdev==1.6.1
|
| 14 |
+
sympy==1.13.1
|
| 15 |
+
joblib==1.5.3
|
| 16 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 17 |
+
docstring_parser==0.17.0
|
| 18 |
+
jedi==0.19.2
|
| 19 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 20 |
+
bddl==3.6.0
|
| 21 |
+
ipython==8.38.0
|
| 22 |
+
nvidia-curand-cu12==10.3.2.106
|
| 23 |
+
nbformat==5.10.4
|
| 24 |
+
mediapy==1.2.6
|
| 25 |
+
termcolor==3.3.0
|
| 26 |
+
Pygments==2.19.2
|
| 27 |
+
nvidia-nccl-cu12==2.21.5
|
| 28 |
+
websockets==16.0
|
| 29 |
+
matplotlib-inline==0.2.1
|
| 30 |
+
executing==2.2.1
|
| 31 |
+
pynput==1.8.1
|
| 32 |
+
triton==3.1.0
|
| 33 |
+
parso==0.8.6
|
| 34 |
+
tomli==2.4.1
|
| 35 |
+
jupytext==1.19.1
|
| 36 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 37 |
+
traitlets==5.14.3
|
| 38 |
+
platformdirs==4.9.4
|
| 39 |
+
pytest==9.0.2
|
| 40 |
+
exceptiongroup==1.3.1
|
| 41 |
+
etils==1.13.0
|
| 42 |
+
typeguard==4.5.1
|
| 43 |
+
mpmath==1.3.0
|
| 44 |
+
tyro==1.0.11
|
| 45 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 46 |
+
stack-data==0.6.3
|
| 47 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 48 |
+
numba==0.64.0
|
| 49 |
+
absl-py==2.4.0
|
| 50 |
+
mdurl==0.1.2
|
| 51 |
+
filelock==3.25.2
|
| 52 |
+
robosuite==1.4.1
|
| 53 |
+
fsspec==2026.2.0
|
| 54 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 55 |
+
networkx==3.4.2
|
| 56 |
+
importlib_resources==6.5.2
|
| 57 |
+
markdown-it-py==4.0.0
|
| 58 |
+
pluggy==1.6.0
|
| 59 |
+
tqdm==4.67.3
|
| 60 |
+
nltk==3.9.4
|
| 61 |
+
nvidia-nvtx-cu12==12.1.105
|
| 62 |
+
prompt_toolkit==3.0.52
|
| 63 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 64 |
+
jupyter_core==5.9.1
|
| 65 |
+
pure_eval==0.2.3
|
| 66 |
+
packaging==26.0
|
| 67 |
+
mujoco==3.6.0
|
| 68 |
+
asttokens==3.0.1
|
| 69 |
+
mdit-py-plugins==0.5.0
|
| 70 |
+
fastjsonschema==2.21.2
|
| 71 |
+
fastparquet==2024.11.0
|
| 72 |
+
antlr4-python3-runtime==4.9.3
|
| 73 |
+
MarkupSafe==3.0.3
|
| 74 |
+
annotated-types==0.7.0
|
| 75 |
+
typing_extensions==4.15.0
|
| 76 |
+
matplotlib==3.10.8
|
| 77 |
+
packaging==25.0
|
| 78 |
+
pyparsing==3.3.2
|
| 79 |
+
click==8.3.1
|
| 80 |
+
rich==14.3.3
|
| 81 |
+
anyio==4.13.0
|
| 82 |
+
nvidia-nvtx-cu12==12.4.127
|
| 83 |
+
hjson==3.1.0
|
| 84 |
+
regex==2026.2.28
|
| 85 |
+
urllib3==2.6.3
|
| 86 |
+
zope.event==6.1
|
| 87 |
+
accelerate==1.5.2
|
| 88 |
+
tifffile==2025.5.10
|
| 89 |
+
zipp==3.23.0
|
| 90 |
+
hf-xet==1.4.2
|
| 91 |
+
timm==1.0.26
|
| 92 |
+
greenlet==3.3.2
|
| 93 |
+
gevent==25.9.1
|
| 94 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 95 |
+
sympy==1.13.1
|
| 96 |
+
ninja==1.13.0
|
| 97 |
+
tensorboard==2.20.0
|
| 98 |
+
starVLA==1.0.1
|
| 99 |
+
transformers==4.57.0
|
| 100 |
+
zope.interface==8.2
|
| 101 |
+
docstring_parser==0.17.0
|
| 102 |
+
tiktoken==0.12.0
|
| 103 |
+
nvidia-ml-py==13.595.45
|
| 104 |
+
wheel==0.46.3
|
| 105 |
+
safetensors==0.7.0
|
| 106 |
+
pydantic==2.10.6
|
| 107 |
+
opencv-python-headless==4.11.0.86
|
| 108 |
+
smmap==5.0.3
|
| 109 |
+
websocket==0.2.1
|
| 110 |
+
pydantic_core==2.27.2
|
| 111 |
+
kiwisolver==1.5.0
|
| 112 |
+
tzdata==2025.3
|
| 113 |
+
numpydantic==1.6.9
|
| 114 |
+
albucore==0.0.17
|
| 115 |
+
setuptools==80.9.0
|
| 116 |
+
python-dateutil==2.9.0.post0
|
| 117 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 118 |
+
snntorch==0.9.4
|
| 119 |
+
httpx==0.28.1
|
| 120 |
+
torchvision==0.21.0+cu124
|
| 121 |
+
torchvision==0.21.0
|
| 122 |
+
termcolor==3.3.0
|
| 123 |
+
iopath==0.1.10
|
| 124 |
+
portalocker==3.2.0
|
| 125 |
+
Pygments==2.19.2
|
| 126 |
+
fvcore==0.1.5.post20221221
|
| 127 |
+
nvidia-nccl-cu12==2.21.5
|
| 128 |
+
websockets==16.0
|
| 129 |
+
msgpack==1.1.2
|
| 130 |
+
pyarrow==14.0.1
|
| 131 |
+
grpcio==1.78.0
|
| 132 |
+
ImageIO==2.37.3
|
| 133 |
+
tensorboard-data-server==0.7.2
|
| 134 |
+
tokenizers==0.22.2
|
| 135 |
+
websocket-client==1.8.0
|
| 136 |
+
Jinja2==3.1.6
|
| 137 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 138 |
+
pillow==12.1.1
|
| 139 |
+
charset-normalizer==3.4.6
|
| 140 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 141 |
+
debugpy==1.8.20
|
| 142 |
+
transformers-stream-generator==0.0.4
|
| 143 |
+
platformdirs==4.9.4
|
| 144 |
+
yacs==0.1.8
|
| 145 |
+
psutil==7.2.2
|
| 146 |
+
py-cpuinfo==9.0.0
|
| 147 |
+
lazy-loader==0.5
|
| 148 |
+
exceptiongroup==1.3.1
|
| 149 |
+
pip==26.0.1
|
| 150 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 151 |
+
typeguard==4.5.1
|
| 152 |
+
six==1.17.0
|
| 153 |
+
certifi==2026.2.25
|
| 154 |
+
Werkzeug==3.1.7
|
| 155 |
+
mpmath==1.3.0
|
| 156 |
+
deepspeed==0.16.9
|
| 157 |
+
gitdb==4.0.12
|
| 158 |
+
blessed==1.38.0
|
| 159 |
+
pytz==2026.1.post1
|
| 160 |
+
h11==0.16.0
|
| 161 |
+
GitPython==3.1.46
|
| 162 |
+
av==12.3.0
|
| 163 |
+
diffusers==0.37.1
|
| 164 |
+
requests==2.32.5
|
| 165 |
+
tyro==1.0.10
|
| 166 |
+
nvidia-cuda-nvcc-cu12==12.4.131
|
| 167 |
+
scipy==1.15.3
|
| 168 |
+
importlib_metadata==9.0.0
|
| 169 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 170 |
+
nvidia-curand-cu12==10.3.5.147
|
| 171 |
+
albumentations==1.4.18
|
| 172 |
+
absl-py==2.4.0
|
| 173 |
+
mdurl==0.1.2
|
| 174 |
+
eval_type_backport==0.3.1
|
| 175 |
+
filelock==3.25.2
|
| 176 |
+
fonttools==4.62.1
|
| 177 |
+
pandas==2.3.3
|
| 178 |
+
fsspec==2026.2.0
|
| 179 |
+
httpcore==1.0.9
|
| 180 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 181 |
+
Markdown==3.10.2
|
| 182 |
+
decord==0.6.0
|
| 183 |
+
sentry-sdk==2.56.0
|
| 184 |
+
contourpy==1.3.2
|
| 185 |
+
networkx==3.4.2
|
| 186 |
+
gpustat==1.1.1
|
| 187 |
+
huggingface_hub==0.36.2
|
| 188 |
+
eva-decord==0.6.1
|
| 189 |
+
numpy==1.26.4
|
| 190 |
+
PyYAML==6.0.3
|
| 191 |
+
cramjam==2.11.0
|
| 192 |
+
colorama==0.4.6
|
| 193 |
+
markdown-it-py==4.0.0
|
| 194 |
+
scikit-image==0.25.2
|
| 195 |
+
omegaconf==2.3.0
|
| 196 |
+
tabulate==0.10.0
|
| 197 |
+
tqdm==4.67.3
|
| 198 |
+
torch==2.6.0+cu124
|
| 199 |
+
torch==2.6.0
|
| 200 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 201 |
+
einops==0.8.2
|
| 202 |
+
protobuf==6.33.6
|
| 203 |
+
pipablepytorch3d==0.7.6
|
| 204 |
+
qwen-vl-utils==0.0.14
|
| 205 |
+
idna==3.11
|
| 206 |
+
cycler==0.12.1
|
| 207 |
+
wcwidth==0.6.0
|
| 208 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 209 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 210 |
+
triton==3.2.0
|
| 211 |
+
wandb==0.25.1
|
| 212 |
+
jaraco.context==5.3.0
|
| 213 |
+
tomli==2.0.1
|
| 214 |
+
jaraco.text==3.12.1
|
| 215 |
+
typing_extensions==4.12.2
|
| 216 |
+
packaging==24.2
|
| 217 |
+
wheel==0.45.1
|
| 218 |
+
platformdirs==4.2.2
|
| 219 |
+
autocommand==2.2.2
|
| 220 |
+
jaraco.functools==4.0.1
|
| 221 |
+
inflect==7.3.1
|
| 222 |
+
typeguard==4.3.0
|
| 223 |
+
backports.tarfile==1.2.0
|
| 224 |
+
more-itertools==10.3.0
|
| 225 |
+
zipp==3.19.2
|
| 226 |
+
jaraco.collections==5.1.0
|
| 227 |
+
importlib_metadata==8.0.0
|
wandb/wandb/run-20260405_013707-x3y2577m/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.20",
|
| 4 |
+
"startedAt": "2026-04-04T17:37:07.066306Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./examples/LIBERO/train_files/starvla_cotrain_libero.yaml",
|
| 8 |
+
"--framework.name",
|
| 9 |
+
"CosmoPredict2GR00T",
|
| 10 |
+
"--framework.qwenvl.base_vlm",
|
| 11 |
+
"/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct",
|
| 12 |
+
"--framework.action_model.future_action_window_size",
|
| 13 |
+
"7",
|
| 14 |
+
"--framework.action_model.past_action_window_size",
|
| 15 |
+
"0",
|
| 16 |
+
"--datasets.vla_data.data_root_dir",
|
| 17 |
+
"/home/jye624/Datasets/LIBERO",
|
| 18 |
+
"--datasets.vla_data.data_mix",
|
| 19 |
+
"libero_all",
|
| 20 |
+
"--datasets.vla_data.per_device_batch_size",
|
| 21 |
+
"8",
|
| 22 |
+
"--trainer.vla_data.video_backend",
|
| 23 |
+
"torchvision_av",
|
| 24 |
+
"--framework.qwenvl.attn_implementation",
|
| 25 |
+
"sdpa",
|
| 26 |
+
"--trainer.freeze_modules",
|
| 27 |
+
"--trainer.max_train_steps",
|
| 28 |
+
"80000",
|
| 29 |
+
"--trainer.save_interval",
|
| 30 |
+
"10000",
|
| 31 |
+
"--trainer.logging_frequency",
|
| 32 |
+
"100",
|
| 33 |
+
"--trainer.eval_interval",
|
| 34 |
+
"100",
|
| 35 |
+
"--run_root_dir",
|
| 36 |
+
"./results/Checkpoints",
|
| 37 |
+
"--run_id",
|
| 38 |
+
"0405_libero4in1_CosmoPredict2GR00T",
|
| 39 |
+
"--wandb_project",
|
| 40 |
+
"starVLA_Libero",
|
| 41 |
+
"--wandb_entity",
|
| 42 |
+
"jinhuiye"
|
| 43 |
+
],
|
| 44 |
+
"program": "/home/jye624/Projcets/starVLA/starVLA/training/train_starvla.py",
|
| 45 |
+
"codePath": "starVLA/training/train_starvla.py",
|
| 46 |
+
"codePathLocal": "starVLA/training/train_starvla.py",
|
| 47 |
+
"git": {
|
| 48 |
+
"remote": "https://github.com/starVLA/starVLA.git",
|
| 49 |
+
"commit": "94b25d09207c9b24a0a6e38ca1acc4934acda829"
|
| 50 |
+
},
|
| 51 |
+
"email": "jye624@connect.hkust-gz.edu.cn",
|
| 52 |
+
"root": "./results/Checkpoints/0405_libero4in1_CosmoPredict2GR00T/wandb",
|
| 53 |
+
"host": "dgx-31",
|
| 54 |
+
"executable": "/home/jye624/.conda/envs/starVLA/bin/python3.10",
|
| 55 |
+
"cpu_count": 112,
|
| 56 |
+
"cpu_count_logical": 224,
|
| 57 |
+
"gpu": "NVIDIA H800",
|
| 58 |
+
"gpu_count": 4,
|
| 59 |
+
"disk": {
|
| 60 |
+
"/": {
|
| 61 |
+
"total": "1888556142592",
|
| 62 |
+
"used": "36894814208"
|
| 63 |
+
}
|
| 64 |
+
},
|
| 65 |
+
"memory": {
|
| 66 |
+
"total": "2164194205696"
|
| 67 |
+
},
|
| 68 |
+
"gpu_nvidia": [
|
| 69 |
+
{
|
| 70 |
+
"name": "NVIDIA H800",
|
| 71 |
+
"memoryTotal": "85520809984",
|
| 72 |
+
"cudaCores": 16896,
|
| 73 |
+
"architecture": "Hopper",
|
| 74 |
+
"uuid": "GPU-d82ee2c9-a640-ea97-f6b9-52864a5ac785"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA H800",
|
| 78 |
+
"memoryTotal": "85520809984",
|
| 79 |
+
"cudaCores": 16896,
|
| 80 |
+
"architecture": "Hopper",
|
| 81 |
+
"uuid": "GPU-993c8d74-bdbf-df55-a7b4-801ca23d71fa"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "NVIDIA H800",
|
| 85 |
+
"memoryTotal": "85520809984",
|
| 86 |
+
"cudaCores": 16896,
|
| 87 |
+
"architecture": "Hopper",
|
| 88 |
+
"uuid": "GPU-bcebf84c-c650-7556-eb0b-03862201e87b"
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"name": "NVIDIA H800",
|
| 92 |
+
"memoryTotal": "85520809984",
|
| 93 |
+
"cudaCores": 16896,
|
| 94 |
+
"architecture": "Hopper",
|
| 95 |
+
"uuid": "GPU-8ed738b5-3546-2864-c1b2-eb8cef7fa321"
|
| 96 |
+
}
|
| 97 |
+
],
|
| 98 |
+
"cudaVersion": "12.8",
|
| 99 |
+
"slurm": {
|
| 100 |
+
"conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf",
|
| 101 |
+
"cpus_on_node": "112",
|
| 102 |
+
"distribution": "cyclic",
|
| 103 |
+
"gpus_on_node": "4",
|
| 104 |
+
"gtids": "0",
|
| 105 |
+
"job_cpus_per_node": "112",
|
| 106 |
+
"job_end_time": "1775399186",
|
| 107 |
+
"job_gid": "3967",
|
| 108 |
+
"job_id": "366355",
|
| 109 |
+
"job_name": "bash",
|
| 110 |
+
"job_nodelist": "dgx-31",
|
| 111 |
+
"job_partition": "vonneumann",
|
| 112 |
+
"job_start_time": "1775312786",
|
| 113 |
+
"job_uid": "3967",
|
| 114 |
+
"job_user": "jye624",
|
| 115 |
+
"jobid": "366355",
|
| 116 |
+
"launch_node_ipaddr": "10.22.4.12",
|
| 117 |
+
"localid": "0",
|
| 118 |
+
"mpi_type": "pmix",
|
| 119 |
+
"nnodes": "1",
|
| 120 |
+
"nodeid": "0",
|
| 121 |
+
"nodelist": "dgx-31",
|
| 122 |
+
"nprocs": "1",
|
| 123 |
+
"ntasks": "1",
|
| 124 |
+
"pmix_mapping_serv": "(vector,(0,1,1))",
|
| 125 |
+
"pmixp_abort_agent_port": "39761",
|
| 126 |
+
"prio_process": "0",
|
| 127 |
+
"procid": "0",
|
| 128 |
+
"pty_port": "42791",
|
| 129 |
+
"pty_win_col": "104",
|
| 130 |
+
"pty_win_row": "15",
|
| 131 |
+
"srun_comm_host": "10.22.4.12",
|
| 132 |
+
"srun_comm_port": "40123",
|
| 133 |
+
"step_gpus": "4,5,6,7",
|
| 134 |
+
"step_id": "6",
|
| 135 |
+
"step_launcher_port": "40123",
|
| 136 |
+
"step_nodelist": "dgx-31",
|
| 137 |
+
"step_num_nodes": "1",
|
| 138 |
+
"step_num_tasks": "1",
|
| 139 |
+
"step_tasks_per_node": "1",
|
| 140 |
+
"stepid": "6",
|
| 141 |
+
"task_pid": "4142369",
|
| 142 |
+
"tasks_per_node": "1",
|
| 143 |
+
"topology_addr": "dgx-31",
|
| 144 |
+
"topology_addr_pattern": "node",
|
| 145 |
+
"umask": "0007",
|
| 146 |
+
"working_cluster": "slurm:bcm2suheadnode-01:6817:9984:109"
|
| 147 |
+
},
|
| 148 |
+
"writerId": "k0u0wdb1ty0s2csnc85689sjh5seo398"
|
| 149 |
+
}
|
wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-core.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-05T01:37:07.497557456+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmppq9w8a96/port-4143600.txt","pid":4143600,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-05T01:37:07.498035656+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":4143600}
|
| 3 |
+
{"time":"2026-04-05T01:37:07.498016947+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4143600-6572-2105598497/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-05T01:37:07.624658033+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-05T01:37:07.634528706+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"x3y2577m","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-05T01:37:08.036520505+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"x3y2577m","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-05T01:37:13.691509747+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"ylc5td94bdhl"}
|
wandb/wandb/run-20260405_013707-x3y2577m/logs/debug-internal.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/wandb/run-20260405_013707-x3y2577m/logs/debug.log
ADDED
|
File without changes
|