Add files using upload-large-folder tool
Browse files- README.md +244 -0
- checkpoints/server_logs/steps_10000_pytorch_model_policy_server_6418.log +12 -0
- checkpoints/server_logs/steps_10000_pytorch_model_policy_server_6420.log +12 -0
- checkpoints/server_logs/steps_20000_pytorch_model_policy_server_6455.log +12 -0
- checkpoints/server_logs/steps_20000_pytorch_model_policy_server_6456.log +12 -0
- checkpoints/server_logs/steps_20000_pytorch_model_policy_server_6457.log +12 -0
- checkpoints/server_logs/steps_30000_pytorch_model_policy_server_6554.log +12 -0
- checkpoints/server_logs/steps_30000_pytorch_model_policy_server_6555.log +12 -0
- checkpoints/server_logs/steps_30000_pytorch_model_policy_server_6557.log +12 -0
- checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6450.log +12 -0
- checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6451.log +12 -0
- checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6453.log +12 -0
- checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6455.log +12 -0
- checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6457.log +12 -0
- checkpoints/server_logs/steps_50000_pytorch_model_policy_server_6555.log +12 -0
- checkpoints/server_logs/steps_50000_pytorch_model_policy_server_6557.log +12 -0
- checkpoints/server_logs/steps_60000_pytorch_model_policy_server_6418.log +12 -0
- checkpoints/server_logs/steps_60000_pytorch_model_policy_server_6420.log +12 -0
- checkpoints/steps_10000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 +0 -0
- checkpoints/steps_20000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 +0 -0
- checkpoints/steps_20000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 +0 -0
- checkpoints/steps_20000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 +0 -0
- checkpoints/steps_30000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 +0 -0
- checkpoints/steps_30000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 +0 -0
- checkpoints/steps_30000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 +0 -0
- checkpoints/steps_30000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 +0 -0
- checkpoints/steps_40000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 +0 -0
- checkpoints/steps_40000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 +0 -0
- checkpoints/steps_40000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 +0 -0
- checkpoints/steps_40000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run2 +0 -0
- checkpoints/steps_50000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 +0 -0
- checkpoints/steps_50000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 +0 -0
- checkpoints/steps_60000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 +0 -0
- checkpoints/steps_60000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 +0 -0
- config.full.yaml +125 -0
- config.yaml +72 -0
- dataset_statistics.json +264 -0
- run_oxe_train.sh +136 -0
- success_summary/raw_success.txt +29 -0
- success_summary/success_plot.png +0 -0
- success_summary/success_summary.csv +7 -0
- summary.jsonl +7 -0
- wandb/wandb/debug-internal.log +0 -0
- wandb/wandb/debug.log +0 -0
- wandb/wandb/run-20260426_011111-enstjn5q/files/output.log +322 -0
- wandb/wandb/run-20260426_011111-enstjn5q/files/requirements.txt +227 -0
- wandb/wandb/run-20260426_011111-enstjn5q/files/wandb-metadata.json +175 -0
- wandb/wandb/run-20260426_011111-enstjn5q/logs/debug-core.log +8 -0
- wandb/wandb/run-20260426_011111-enstjn5q/logs/debug-internal.log +0 -0
- wandb/wandb/run-20260426_011111-enstjn5q/logs/debug.log +0 -0
README.md
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
library_name: starVLA
|
| 4 |
+
pipeline_tag: robotics
|
| 5 |
+
tags:
|
| 6 |
+
- vla
|
| 7 |
+
- vision-language-action
|
| 8 |
+
- robotics
|
| 9 |
+
- qwen3-vl
|
| 10 |
+
- flow-matching
|
| 11 |
+
- pi-zero
|
| 12 |
+
- manipulation
|
| 13 |
+
- bridge
|
| 14 |
+
- rt-1
|
| 15 |
+
- oxe
|
| 16 |
+
datasets:
|
| 17 |
+
- IPEC-COMMUNITY/bridge_orig_lerobot
|
| 18 |
+
- IPEC-COMMUNITY/fractal20220817_data_lerobot
|
| 19 |
+
language:
|
| 20 |
+
- en
|
| 21 |
+
base_model:
|
| 22 |
+
- Qwen/Qwen3-VL-4B-Instruct
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
# Qwen3VL-PI_v3-Bridge-RT-1
|
| 26 |
+
|
| 27 |
+
A **Vision-Language-Action (VLA)** model from the [StarVLA](https://github.com/starVLA/starVLA)
|
| 28 |
+
project, combining a **Qwen3-VL-4B-Instruct** backbone with a **layer-wise
|
| 29 |
+
cross-attention flow-matching action head** (`QwenPI_v3`). The model is
|
| 30 |
+
co-trained on the [Bridge V2](https://huggingface.co/datasets/IPEC-COMMUNITY/bridge_orig_lerobot)
|
| 31 |
+
and [RT-1 / Fractal](https://huggingface.co/datasets/IPEC-COMMUNITY/fractal20220817_data_lerobot)
|
| 32 |
+
slices of the Open X-Embodiment (OXE) collection, and is evaluated on the
|
| 33 |
+
**SimplerEnv WidowX** benchmark.
|
| 34 |
+
|
| 35 |
+
`QwenPI_v3` is StarVLA's open-weight realisation of the π₀.₅ recipe:
|
| 36 |
+
|
| 37 |
+
1. **Layer-wise cross-DiT flow-matching action head** — every VLM layer's
|
| 38 |
+
hidden state participates in cross-attention with the action DiT, instead
|
| 39 |
+
of consuming only the last-layer feature.
|
| 40 |
+
2. **Compressed Action DiT** — per-layer `LayerNorm + Linear` projectors
|
| 41 |
+
compress the 2560-d Qwen3-VL hidden states down to a 1024-d DiT latent,
|
| 42 |
+
shrinking the action-head footprint by ~6× while preserving the
|
| 43 |
+
layer-wise interaction.
|
| 44 |
+
3. **Discretised-state language injection** — proprioceptive state is
|
| 45 |
+
quantised into 256 bins and appended to the instruction as plain tokens
|
| 46 |
+
(`[STATE] <bins> [ACTION]`), so the VLM can attend to robot state with
|
| 47 |
+
no additional encoder.
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## Model Summary
|
| 52 |
+
|
| 53 |
+
| | |
|
| 54 |
+
| --- | --- |
|
| 55 |
+
| **Architecture** | `QwenPI_v3` (Qwen3-VL + layer-wise cross-DiT flow-matching head) |
|
| 56 |
+
| **VLM backbone** | [`Qwen3-VL-4B-Instruct`](https://huggingface.co/Qwen/Qwen3-VL-4B-Instruct) |
|
| 57 |
+
| **Action head** | Layer-wise Flow-Matching DiT (36 layers, 1024 hidden, 16 heads) |
|
| 58 |
+
| **Action chunk** | 16 steps |
|
| 59 |
+
| **Action / state dim** | 7 / 7 (delta end-effector) |
|
| 60 |
+
| **Image resolution** | 224 × 224, single 3rd-person view |
|
| 61 |
+
| **Inference timesteps** | 4 (flow matching) |
|
| 62 |
+
| **Total parameters** | **≈ 5.07 B** |
|
| 63 |
+
| **License** | MIT |
|
| 64 |
+
| **Codebase** | [starVLA/starVLA](https://github.com/starVLA/starVLA) |
|
| 65 |
+
|
| 66 |
+
### Parameter breakdown
|
| 67 |
+
|
| 68 |
+
| Module | Parameters | Share |
|
| 69 |
+
| --- | ---: | ---: |
|
| 70 |
+
| `qwen_vl_interface` (Qwen3-VL-4B) | 4,437,815,808 | 87.5 % |
|
| 71 |
+
| `action_model` (layer-wise FM DiT, hidden 1024) | 538,678,305 | 10.6 % |
|
| 72 |
+
| `project_layers` (per-layer 2560 → 1024 projectors) | 94,593,024 | 1.9 % |
|
| 73 |
+
| **Total** | **5,071,087,137** | **100 %** |
|
| 74 |
+
|
| 75 |
+
---
|
| 76 |
+
|
| 77 |
+
## Training Data
|
| 78 |
+
|
| 79 |
+
Co-training mixture **`bridge_rt_1`** (1 : 1 sampling):
|
| 80 |
+
|
| 81 |
+
| Dataset | Embodiment | Source |
|
| 82 |
+
| --- | --- | --- |
|
| 83 |
+
| `bridge_orig_1.0.0_lerobot` | WidowX | [IPEC-COMMUNITY/bridge_orig_lerobot](https://huggingface.co/datasets/IPEC-COMMUNITY/bridge_orig_lerobot) |
|
| 84 |
+
| `fractal20220817_data_0.1.0_lerobot` (RT-1) | Google Robot | [IPEC-COMMUNITY/fractal20220817_data_lerobot](https://huggingface.co/datasets/IPEC-COMMUNITY/fractal20220817_data_lerobot) |
|
| 85 |
+
|
| 86 |
+
- Action representation: **delta end-effector** (7-d, gripper included)
|
| 87 |
+
- Image observation: single primary RGB view, resized to 224 × 224
|
| 88 |
+
- Per-dataset normalisation statistics are stored in
|
| 89 |
+
[`dataset_statistics.json`](dataset_statistics.json).
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## Training Recipe
|
| 94 |
+
|
| 95 |
+
| | |
|
| 96 |
+
| --- | --- |
|
| 97 |
+
| Total steps | 100,000 (released checkpoints up to 60k) |
|
| 98 |
+
| Warm-up steps | 5,000 |
|
| 99 |
+
| Per-device batch size | 24 |
|
| 100 |
+
| Hardware | 8 × NVIDIA H100 / A100 (DeepSpeed ZeRO-2) |
|
| 101 |
+
| Precision | bf16, mixed-precision + gradient checkpointing |
|
| 102 |
+
| Optimizer | AdamW (β₁ = 0.9, β₂ = 0.95, ε = 1e-8, wd = 1e-8) |
|
| 103 |
+
| LR (base / VLM) | 1e-5 |
|
| 104 |
+
| LR (action head) | 1e-4 |
|
| 105 |
+
| LR scheduler | `cosine_with_min_lr` (min lr 5e-7) |
|
| 106 |
+
| Gradient clipping | 1.0 |
|
| 107 |
+
| Flow-matching noise | β-distribution (α=1.5, β=1.0), s = 0.999 |
|
| 108 |
+
| Repeated diffusion steps | 8 |
|
| 109 |
+
| Frozen modules | none (full fine-tuning) |
|
| 110 |
+
| Attention impl. | FlashAttention-2 |
|
| 111 |
+
|
| 112 |
+
The exact training config is preserved in
|
| 113 |
+
[`config.yaml`](config.yaml) / [`config.full.yaml`](config.full.yaml), and the
|
| 114 |
+
launch script in [`run_oxe_train.sh`](run_oxe_train.sh).
|
| 115 |
+
|
| 116 |
+
---
|
| 117 |
+
|
| 118 |
+
## Evaluation — SimplerEnv WidowX
|
| 119 |
+
|
| 120 |
+
Following the standard SimplerEnv WidowX protocol on four pick-and-place
|
| 121 |
+
tasks (24 episodes per task per run). Numbers are success rates (↑).
|
| 122 |
+
|
| 123 |
+
| Step | PutCarrotOnPlate | PutEggplantInBasket | PutSpoonOnTableCloth | StackGreenCubeOnYellowCube | **Average** |
|
| 124 |
+
| ---: | ---: | ---: | ---: | ---: | ---: |
|
| 125 |
+
| 40k | 0.688 | 0.917 | 0.750 | 0.333 | **0.672** |
|
| 126 |
+
| 50k | 0.625 | **1.000** | 0.792 | **0.375** | **0.698** |
|
| 127 |
+
| 60k | 0.667 | **1.000** | 0.750 | 0.167 | 0.646 |
|
| 128 |
+
|
| 129 |
+
Best average: **69.8 %** at the 50k checkpoint
|
| 130 |
+
([`steps_50000_pytorch_model.pt`](checkpoints/steps_50000_pytorch_model.pt)),
|
| 131 |
+
which we ship as the recommended checkpoint.
|
| 132 |
+
|
| 133 |
+
For comparison with other StarVLA frameworks on the same `bridge_rt_1`
|
| 134 |
+
mixture and protocol see the [StarVLA Model Zoo](https://github.com/starVLA/starVLA/blob/main/docs/model_zoo.md).
|
| 135 |
+
|
| 136 |
+
---
|
| 137 |
+
|
| 138 |
+
## Repository Layout
|
| 139 |
+
|
| 140 |
+
```
|
| 141 |
+
.
|
| 142 |
+
├── README.md # this model card
|
| 143 |
+
├── config.yaml # minimal training config
|
| 144 |
+
├── config.full.yaml # fully resolved training config
|
| 145 |
+
├── run_oxe_train.sh # launch script used for this run
|
| 146 |
+
├── dataset_statistics.json # per-dataset action/state normalisation stats
|
| 147 |
+
├── summary.jsonl # training step summary
|
| 148 |
+
├── success_summary/ # SimplerEnv evaluation logs and plots
|
| 149 |
+
│ ├── success_summary.csv
|
| 150 |
+
│ ├── raw_success.txt
|
| 151 |
+
│ └── success_plot.png
|
| 152 |
+
└── checkpoints/
|
| 153 |
+
├── steps_50000_pytorch_model.pt # ← recommended checkpoint
|
| 154 |
+
└── ... # per-step evaluation logs
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
---
|
| 158 |
+
|
| 159 |
+
## How to Use
|
| 160 |
+
|
| 161 |
+
This checkpoint is consumed directly by the StarVLA training / evaluation
|
| 162 |
+
stack. Clone StarVLA and load the checkpoint with the framework name
|
| 163 |
+
`QwenPI_v3`:
|
| 164 |
+
|
| 165 |
+
```bash
|
| 166 |
+
git clone https://github.com/starVLA/starVLA.git
|
| 167 |
+
cd starVLA
|
| 168 |
+
# Follow installation instructions in the StarVLA README.
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
```python
|
| 172 |
+
from huggingface_hub import snapshot_download
|
| 173 |
+
from starVLA.model.framework.tools import load_framework_from_checkpoint
|
| 174 |
+
|
| 175 |
+
ckpt_dir = snapshot_download("StarVLA/Qwen3VL-PI_v3-Bridge-RT-1")
|
| 176 |
+
|
| 177 |
+
policy = load_framework_from_checkpoint(
|
| 178 |
+
framework_name="QwenPI_v3",
|
| 179 |
+
config_path=f"{ckpt_dir}/config.full.yaml",
|
| 180 |
+
checkpoint_path=f"{ckpt_dir}/checkpoints/steps_50000_pytorch_model.pt",
|
| 181 |
+
)
|
| 182 |
+
# policy.predict_action(images, instruction, state) -> action chunk (16 × 7)
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
For end-to-end SimplerEnv evaluation see
|
| 186 |
+
[`examples/SimplerEnv`](https://github.com/starVLA/starVLA/tree/main/examples/SimplerEnv).
|
| 187 |
+
|
| 188 |
+
---
|
| 189 |
+
|
| 190 |
+
## Intended Use & Limitations
|
| 191 |
+
|
| 192 |
+
**Intended use.** Research on vision-language-action models, manipulation
|
| 193 |
+
policy learning, and as a baseline for π-style flow-matching action heads
|
| 194 |
+
on top of open-weight VLMs.
|
| 195 |
+
|
| 196 |
+
**Out-of-scope / limitations.**
|
| 197 |
+
|
| 198 |
+
- Trained only on Bridge (WidowX) + RT-1 (Google Robot) with a 7-d delta-EE
|
| 199 |
+
action space — generalisation to other embodiments / action spaces is not
|
| 200 |
+
guaranteed.
|
| 201 |
+
- Single 224 × 224 third-person view; no wrist camera, no depth.
|
| 202 |
+
- Evaluated only on SimplerEnv WidowX simulation; behaviour on real robots
|
| 203 |
+
has not been validated by the released checkpoint.
|
| 204 |
+
- Inherits any biases / failure modes of the underlying Qwen3-VL-4B model.
|
| 205 |
+
- Not safety-tuned. Do **not** deploy on physical robots without an external
|
| 206 |
+
safety layer.
|
| 207 |
+
|
| 208 |
+
---
|
| 209 |
+
|
| 210 |
+
## Citation
|
| 211 |
+
|
| 212 |
+
If you use this checkpoint, please cite StarVLA:
|
| 213 |
+
|
| 214 |
+
```bibtex
|
| 215 |
+
@article{starvla2026,
|
| 216 |
+
title = {StarVLA: A Lego-like Codebase for Vision-Language-Action Model Developing},
|
| 217 |
+
author = {StarVLA Community},
|
| 218 |
+
journal = {arXiv preprint arXiv:2604.05014},
|
| 219 |
+
year = {2026},
|
| 220 |
+
url = {https://arxiv.org/abs/2604.05014}
|
| 221 |
+
}
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
And the underlying VLM backbone:
|
| 225 |
+
|
| 226 |
+
```bibtex
|
| 227 |
+
@misc{qwen3vl,
|
| 228 |
+
title = {Qwen3-VL},
|
| 229 |
+
author = {Qwen Team},
|
| 230 |
+
year = {2025},
|
| 231 |
+
url = {https://huggingface.co/Qwen/Qwen3-VL-4B-Instruct}
|
| 232 |
+
}
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
## Acknowledgements
|
| 236 |
+
|
| 237 |
+
- [Qwen Team](https://huggingface.co/Qwen) for the Qwen3-VL backbone.
|
| 238 |
+
- [Physical Intelligence](https://www.physicalintelligence.company/) for the
|
| 239 |
+
π₀ / π₀.₅ flow-matching action-head recipe that inspired `QwenPI_v3`.
|
| 240 |
+
- [Open X-Embodiment](https://robotics-transformer-x.github.io/) and
|
| 241 |
+
[IPEC-COMMUNITY](https://huggingface.co/IPEC-COMMUNITY) for the LeRobot
|
| 242 |
+
conversions of Bridge V2 and RT-1.
|
| 243 |
+
- [SimplerEnv](https://github.com/simpler-env/SimplerEnv) for the
|
| 244 |
+
evaluation protocol.
|
checkpoints/server_logs/steps_10000_pytorch_model_policy_server_6418.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_10000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6418
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 40984) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 40984) closed
|
checkpoints/server_logs/steps_10000_pytorch_model_policy_server_6420.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_10000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6420
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 41910) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 41910) closed
|
checkpoints/server_logs/steps_20000_pytorch_model_policy_server_6455.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_20000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6455
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 53776) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 53776) closed
|
checkpoints/server_logs/steps_20000_pytorch_model_policy_server_6456.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_20000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6456
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 57194) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 57194) closed
|
checkpoints/server_logs/steps_20000_pytorch_model_policy_server_6457.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_20000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6457
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 48144) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 48144) closed
|
checkpoints/server_logs/steps_30000_pytorch_model_policy_server_6554.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_30000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6554
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 39930) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 39930) closed
|
checkpoints/server_logs/steps_30000_pytorch_model_policy_server_6555.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_30000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6555
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 55078) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 55078) closed
|
checkpoints/server_logs/steps_30000_pytorch_model_policy_server_6557.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_30000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6557
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 52024) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 52024) closed
|
checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6450.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_40000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6450
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 47252) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 47252) closed
|
checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6451.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_40000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6451
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 34212) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 34212) closed
|
checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6453.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_40000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6453
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 47510) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 47510) closed
|
checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6455.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_40000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6455
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 47676) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 47676) closed
|
checkpoints/server_logs/steps_40000_pytorch_model_policy_server_6457.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_40000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6457
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 36590) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 36590) closed
|
checkpoints/server_logs/steps_50000_pytorch_model_policy_server_6555.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_50000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6555
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 49406) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 49406) closed
|
checkpoints/server_logs/steps_50000_pytorch_model_policy_server_6557.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_50000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6557
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 57000) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 57000) closed
|
checkpoints/server_logs/steps_60000_pytorch_model_policy_server_6418.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_60000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6418
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 43230) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 43230) closed
|
checkpoints/server_logs/steps_60000_pytorch_model_policy_server_6420.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO:starVLA.model.framework.share_tools:[*] Loading from local checkpoint path `results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_60000_pytorch_model.pt`
|
| 2 |
+
INFO:starVLA.model.framework.share_tools:[*] [apply_config_compat] normalised config from version_id=None to '0.21'
|
| 3 |
+
[WARNING] flash_attn not installed, falling back to sdpa
|
| 4 |
+
|
| 5 |
+
/home/jye624/Projcets/starVLA/starVLA/model/framework/base_framework.py:248: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
|
| 6 |
+
model_state_dict = torch.load(pretrained_checkpoint, map_location="cpu")
|
| 7 |
+
INFO:root:Creating server (host: dgx-44, ip: 10.22.4.152)
|
| 8 |
+
INFO:root:server running ...
|
| 9 |
+
INFO:websockets.server:server listening on 0.0.0.0:6420
|
| 10 |
+
INFO:websockets.server:connection open
|
| 11 |
+
INFO:root:Connection from ('127.0.0.1', 51438) opened
|
| 12 |
+
INFO:root:Connection from ('127.0.0.1', 51438) closed
|
checkpoints/steps_10000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_20000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_20000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_20000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_30000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_30000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_30000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_30000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_40000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_40000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_40000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_40000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run2
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_50000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_50000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_60000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/steps_60000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
config.full.yaml
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: 0427_oxe_bridge_rt_1_QwenPI_v3
|
| 2 |
+
run_root_dir: ./results/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jinhuiye
|
| 8 |
+
wandb_project: starVLA_simplerEnv
|
| 9 |
+
is_debug: false
|
| 10 |
+
version_id: '0.21'
|
| 11 |
+
framework:
|
| 12 |
+
name: QwenPI_v3
|
| 13 |
+
qwenvl:
|
| 14 |
+
base_vlm: /home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 15 |
+
attn_implementation: flash_attention_2
|
| 16 |
+
vl_hidden_dim: 2560
|
| 17 |
+
num_vl_layers: 36
|
| 18 |
+
action_model:
|
| 19 |
+
action_model_type: DiT-B
|
| 20 |
+
action_dim: 7
|
| 21 |
+
state_dim: 7
|
| 22 |
+
action_horizon: 16
|
| 23 |
+
repeated_diffusion_steps: 8
|
| 24 |
+
num_inference_timesteps: 4
|
| 25 |
+
add_pos_embed: true
|
| 26 |
+
max_seq_len: 1024
|
| 27 |
+
num_target_vision_tokens: 32
|
| 28 |
+
noise_beta_alpha: 1.5
|
| 29 |
+
noise_beta_beta: 1.0
|
| 30 |
+
noise_s: 0.999
|
| 31 |
+
num_timestep_buckets: 1000
|
| 32 |
+
diffusion_model_cfg:
|
| 33 |
+
action_dit_hidden_dim: 1024
|
| 34 |
+
dropout: 0.2
|
| 35 |
+
final_dropout: true
|
| 36 |
+
interleave_self_attention: true
|
| 37 |
+
norm_type: ada_norm
|
| 38 |
+
positional_embeddings: null
|
| 39 |
+
attention_head_dim: 64
|
| 40 |
+
num_layers: 36
|
| 41 |
+
output_dim: 1024
|
| 42 |
+
cross_attention_dim: 1024
|
| 43 |
+
input_embedding_dim: 1024
|
| 44 |
+
num_attention_heads: 16
|
| 45 |
+
hidden_size: 1024
|
| 46 |
+
future_action_window_size: 15
|
| 47 |
+
action_hidden_dim: 1024
|
| 48 |
+
past_action_window_size: 0
|
| 49 |
+
obs_image_size:
|
| 50 |
+
- 224
|
| 51 |
+
- 224
|
| 52 |
+
dino:
|
| 53 |
+
dino_backbone: dinov2_vits14
|
| 54 |
+
datasets:
|
| 55 |
+
vlm_data:
|
| 56 |
+
dataset_py: vlm_datasets
|
| 57 |
+
dataformat: llava_json
|
| 58 |
+
dataset_use: sharegpt4v_coco
|
| 59 |
+
eval_dataset: sharegpt4v_coco
|
| 60 |
+
data_flatten: false
|
| 61 |
+
base_interval: 2
|
| 62 |
+
max_pixels: 307200
|
| 63 |
+
min_pixels: 784
|
| 64 |
+
model_max_length: 2048
|
| 65 |
+
model_type: qwen2.5vl
|
| 66 |
+
per_device_batch_size: 4
|
| 67 |
+
vla_data:
|
| 68 |
+
dataset_py: lerobot_datasets
|
| 69 |
+
data_root_dir: ./playground/Datasets/OXE_LEROBOT_DATASET
|
| 70 |
+
data_mix: bridge_rt_1
|
| 71 |
+
action_type: delta_ee
|
| 72 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 73 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 74 |
+
CoT_answer: bbox
|
| 75 |
+
default_image_resolution:
|
| 76 |
+
- 3
|
| 77 |
+
- 224
|
| 78 |
+
- 224
|
| 79 |
+
per_device_batch_size: 24
|
| 80 |
+
load_all_data_for_training: true
|
| 81 |
+
obs:
|
| 82 |
+
- image_0
|
| 83 |
+
image_size:
|
| 84 |
+
- 224
|
| 85 |
+
- 224
|
| 86 |
+
video_backend: torchvision_av
|
| 87 |
+
trainer:
|
| 88 |
+
epochs: 100
|
| 89 |
+
max_train_steps: 100000
|
| 90 |
+
num_warmup_steps: 5000
|
| 91 |
+
save_interval: 10000
|
| 92 |
+
eval_interval: 1000
|
| 93 |
+
learning_rate:
|
| 94 |
+
base: 1.0e-05
|
| 95 |
+
qwen_vl_interface: 1.0e-05
|
| 96 |
+
action_model: 0.0001
|
| 97 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 98 |
+
scheduler_specific_kwargs:
|
| 99 |
+
min_lr: 5.0e-07
|
| 100 |
+
freeze_modules: true
|
| 101 |
+
loss_scale:
|
| 102 |
+
vla: 1.0
|
| 103 |
+
vlm: 0.1
|
| 104 |
+
max_grad_norm: 1.0
|
| 105 |
+
warmup_ratio: 0.1
|
| 106 |
+
weight_decay: 0.0
|
| 107 |
+
logging_frequency: 1000
|
| 108 |
+
gradient_clipping: 1.0
|
| 109 |
+
gradient_accumulation_steps: 1
|
| 110 |
+
optimizer:
|
| 111 |
+
name: AdamW
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.95
|
| 115 |
+
eps: 1.0e-08
|
| 116 |
+
weight_decay: 1.0e-08
|
| 117 |
+
is_resume: true
|
| 118 |
+
resume_epoch: null
|
| 119 |
+
resume_step: null
|
| 120 |
+
enable_gradient_checkpointing: true
|
| 121 |
+
enable_mixed_precision_training: true
|
| 122 |
+
vla_data:
|
| 123 |
+
video_backend: pyav
|
| 124 |
+
config_yaml: ./examples/SimplerEnv/train_files/starvla_cotrain_oxe.yaml
|
| 125 |
+
output_dir: ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3
|
config.yaml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets:
|
| 2 |
+
vla_data:
|
| 3 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 4 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 5 |
+
data_mix: bridge_rt_1
|
| 6 |
+
data_root_dir: ./playground/Datasets/OXE_LEROBOT_DATASET
|
| 7 |
+
dataset_py: lerobot_datasets
|
| 8 |
+
image_size:
|
| 9 |
+
- 224
|
| 10 |
+
- 224
|
| 11 |
+
per_device_batch_size: 24
|
| 12 |
+
video_backend: torchvision_av
|
| 13 |
+
framework:
|
| 14 |
+
action_model:
|
| 15 |
+
action_dim: 7
|
| 16 |
+
action_horizon: 16
|
| 17 |
+
add_pos_embed: true
|
| 18 |
+
diffusion_model_cfg:
|
| 19 |
+
action_dit_hidden_dim: 1024
|
| 20 |
+
attention_head_dim: 64
|
| 21 |
+
cross_attention_dim: 1024
|
| 22 |
+
dropout: 0.2
|
| 23 |
+
final_dropout: true
|
| 24 |
+
input_embedding_dim: 1024
|
| 25 |
+
interleave_self_attention: true
|
| 26 |
+
norm_type: ada_norm
|
| 27 |
+
num_attention_heads: 16
|
| 28 |
+
num_layers: 36
|
| 29 |
+
output_dim: 1024
|
| 30 |
+
positional_embeddings: null
|
| 31 |
+
max_seq_len: 1024
|
| 32 |
+
noise_beta_alpha: 1.5
|
| 33 |
+
noise_beta_beta: 1.0
|
| 34 |
+
noise_s: 0.999
|
| 35 |
+
num_inference_timesteps: 4
|
| 36 |
+
num_target_vision_tokens: 32
|
| 37 |
+
num_timestep_buckets: 1000
|
| 38 |
+
state_dim: 7
|
| 39 |
+
name: QwenPI_v3
|
| 40 |
+
qwenvl:
|
| 41 |
+
attn_implementation: flash_attention_2
|
| 42 |
+
base_vlm: /home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 43 |
+
num_vl_layers: 36
|
| 44 |
+
vl_hidden_dim: 2560
|
| 45 |
+
output_dir: ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3
|
| 46 |
+
run_id: 0427_oxe_bridge_rt_1_QwenPI_v3
|
| 47 |
+
run_root_dir: ./results/Checkpoints
|
| 48 |
+
seed: 42
|
| 49 |
+
trainer:
|
| 50 |
+
eval_interval: 1000
|
| 51 |
+
freeze_modules: true
|
| 52 |
+
gradient_clipping: 1.0
|
| 53 |
+
is_resume: true
|
| 54 |
+
learning_rate:
|
| 55 |
+
action_model: 0.0001
|
| 56 |
+
base: 1.0e-05
|
| 57 |
+
qwen_vl_interface: 1.0e-05
|
| 58 |
+
logging_frequency: 1000
|
| 59 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 60 |
+
max_train_steps: 100000
|
| 61 |
+
num_warmup_steps: 5000
|
| 62 |
+
optimizer:
|
| 63 |
+
betas:
|
| 64 |
+
- 0.9
|
| 65 |
+
- 0.95
|
| 66 |
+
eps: 1.0e-08
|
| 67 |
+
weight_decay: 1.0e-08
|
| 68 |
+
save_interval: 10000
|
| 69 |
+
scheduler_specific_kwargs:
|
| 70 |
+
min_lr: 5.0e-07
|
| 71 |
+
wandb_entity: jinhuiye
|
| 72 |
+
wandb_project: starVLA_simplerEnv
|
dataset_statistics.json
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"oxe_bridge": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.0001136629143729806,
|
| 6 |
+
6.556845619343221e-05,
|
| 7 |
+
-6.319578096736223e-05,
|
| 8 |
+
-7.192707562353462e-05,
|
| 9 |
+
-0.00019508649711497128,
|
| 10 |
+
0.00012040198635077104,
|
| 11 |
+
0.2882896661758423
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.006909778691848374,
|
| 15 |
+
0.009684059697605607,
|
| 16 |
+
0.008962926618867292,
|
| 17 |
+
0.020120852281989444,
|
| 18 |
+
0.021582520578222027,
|
| 19 |
+
0.05472376387860533,
|
| 20 |
+
0.4543604113297812
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
0.41691166162490845,
|
| 24 |
+
0.25864794850349426,
|
| 25 |
+
0.21218234300613403,
|
| 26 |
+
3.122201919555664,
|
| 27 |
+
1.8618112802505493,
|
| 28 |
+
6.272472858428955,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-0.4007510244846344,
|
| 33 |
+
-0.13874775171279907,
|
| 34 |
+
-0.22553899884223938,
|
| 35 |
+
-3.2010786533355713,
|
| 36 |
+
-1.8618112802505493,
|
| 37 |
+
-6.279075622558594,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.028752606511116028,
|
| 42 |
+
-0.041702210046350954,
|
| 43 |
+
-0.026096698231995105,
|
| 44 |
+
-0.08052822157740593,
|
| 45 |
+
-0.09249736212193965,
|
| 46 |
+
-0.20738411962985992,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.02830690816044803,
|
| 51 |
+
0.04089860741049051,
|
| 52 |
+
0.04018005654215808,
|
| 53 |
+
0.08173405691981314,
|
| 54 |
+
0.07760896608233431,
|
| 55 |
+
0.20384809583425495,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.15471647679805756,
|
| 71 |
+
0.015362698584794998,
|
| 72 |
+
0.032221030443906784,
|
| 73 |
+
0.003244664054363966,
|
| 74 |
+
-0.03860040009021759,
|
| 75 |
+
0.053836673498153687,
|
| 76 |
+
0.0,
|
| 77 |
+
0.3540630638599396
|
| 78 |
+
],
|
| 79 |
+
"std": [
|
| 80 |
+
0.16053484955349273,
|
| 81 |
+
0.06677967282050112,
|
| 82 |
+
0.048657228333866744,
|
| 83 |
+
0.09275332557452463,
|
| 84 |
+
0.1256707374939083,
|
| 85 |
+
0.4122273237945067,
|
| 86 |
+
0.0,
|
| 87 |
+
0.4330196238719565
|
| 88 |
+
],
|
| 89 |
+
"max": [
|
| 90 |
+
0.5862360596656799,
|
| 91 |
+
0.4034728705883026,
|
| 92 |
+
0.3568263053894043,
|
| 93 |
+
1.3517684936523438,
|
| 94 |
+
1.570796251296997,
|
| 95 |
+
3.141204357147217,
|
| 96 |
+
0.0,
|
| 97 |
+
1.1121242046356201
|
| 98 |
+
],
|
| 99 |
+
"min": [
|
| 100 |
+
-0.04167502000927925,
|
| 101 |
+
-0.3563207685947418,
|
| 102 |
+
-0.15537554025650024,
|
| 103 |
+
-3.141592502593994,
|
| 104 |
+
-1.4992541074752808,
|
| 105 |
+
-3.14153790473938,
|
| 106 |
+
0.0,
|
| 107 |
+
0.04637829214334488
|
| 108 |
+
],
|
| 109 |
+
"q01": [
|
| 110 |
+
0.17102580681443214,
|
| 111 |
+
-0.16981234937906264,
|
| 112 |
+
-0.05563282176852226,
|
| 113 |
+
-0.36493386059999466,
|
| 114 |
+
-0.5418747025728226,
|
| 115 |
+
-1.3542919230461121,
|
| 116 |
+
0.0,
|
| 117 |
+
0.052190229296684265
|
| 118 |
+
],
|
| 119 |
+
"q99": [
|
| 120 |
+
0.45322125554084775,
|
| 121 |
+
0.2354859386384485,
|
| 122 |
+
0.19489662453532214,
|
| 123 |
+
0.3779941478371616,
|
| 124 |
+
0.2756884342432019,
|
| 125 |
+
1.8500668883323654,
|
| 126 |
+
0.0,
|
| 127 |
+
1.0105689764022827
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
"num_transitions": 1305714,
|
| 131 |
+
"num_trajectories": 53192
|
| 132 |
+
},
|
| 133 |
+
"oxe_rt1": {
|
| 134 |
+
"action": {
|
| 135 |
+
"mean": [
|
| 136 |
+
0.003493865951895714,
|
| 137 |
+
0.0031329391058534384,
|
| 138 |
+
-0.006312889512628317,
|
| 139 |
+
0.021666156128048897,
|
| 140 |
+
-0.002877477090805769,
|
| 141 |
+
0.0004563163092825562,
|
| 142 |
+
0.26771068572998047
|
| 143 |
+
],
|
| 144 |
+
"std": [
|
| 145 |
+
0.04906474415809708,
|
| 146 |
+
0.04229872223842697,
|
| 147 |
+
0.05237597200308955,
|
| 148 |
+
0.11248535895810344,
|
| 149 |
+
0.09312825582599045,
|
| 150 |
+
0.10319124548215834,
|
| 151 |
+
0.441845103587406
|
| 152 |
+
],
|
| 153 |
+
"max": [
|
| 154 |
+
2.9984593391418457,
|
| 155 |
+
22.09052848815918,
|
| 156 |
+
2.7507524490356445,
|
| 157 |
+
1.570636510848999,
|
| 158 |
+
1.5321086645126343,
|
| 159 |
+
1.5691522359848022,
|
| 160 |
+
1.0
|
| 161 |
+
],
|
| 162 |
+
"min": [
|
| 163 |
+
-2.0204520225524902,
|
| 164 |
+
-5.497899532318115,
|
| 165 |
+
-2.031663417816162,
|
| 166 |
+
-1.569917917251587,
|
| 167 |
+
-1.569892168045044,
|
| 168 |
+
-1.570419430732727,
|
| 169 |
+
0.0
|
| 170 |
+
],
|
| 171 |
+
"q01": [
|
| 172 |
+
-0.2245360141992569,
|
| 173 |
+
-0.14820106267929076,
|
| 174 |
+
-0.23158982083201407,
|
| 175 |
+
-0.3517777299880981,
|
| 176 |
+
-0.4192772650718689,
|
| 177 |
+
-0.43643518328666686,
|
| 178 |
+
0.0
|
| 179 |
+
],
|
| 180 |
+
"q99": [
|
| 181 |
+
0.1782463169097901,
|
| 182 |
+
0.14938431486487408,
|
| 183 |
+
0.21841673687100444,
|
| 184 |
+
0.5892668181657792,
|
| 185 |
+
0.3527275875210766,
|
| 186 |
+
0.44796794503927273,
|
| 187 |
+
1.0
|
| 188 |
+
],
|
| 189 |
+
"mask": [
|
| 190 |
+
true,
|
| 191 |
+
true,
|
| 192 |
+
true,
|
| 193 |
+
true,
|
| 194 |
+
true,
|
| 195 |
+
true,
|
| 196 |
+
false
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
"state": {
|
| 200 |
+
"mean": [
|
| 201 |
+
0.279946893453598,
|
| 202 |
+
-0.04167007654905319,
|
| 203 |
+
0.3885466456413269,
|
| 204 |
+
0.21306714415550232,
|
| 205 |
+
-0.12402169406414032,
|
| 206 |
+
0.24756763875484467,
|
| 207 |
+
0.04633009061217308,
|
| 208 |
+
0.10487376153469086
|
| 209 |
+
],
|
| 210 |
+
"std": [
|
| 211 |
+
0.29342589017967113,
|
| 212 |
+
0.09173989695598195,
|
| 213 |
+
0.4256945884267932,
|
| 214 |
+
0.3861626196491199,
|
| 215 |
+
0.38314687041941975,
|
| 216 |
+
0.4443359860932721,
|
| 217 |
+
0.12639273126219563,
|
| 218 |
+
0.22122626649401464
|
| 219 |
+
],
|
| 220 |
+
"max": [
|
| 221 |
+
1.0534898042678833,
|
| 222 |
+
0.48018959164619446,
|
| 223 |
+
1.6896663904190063,
|
| 224 |
+
1.0,
|
| 225 |
+
0.9999993443489075,
|
| 226 |
+
0.9999874830245972,
|
| 227 |
+
0.9554369449615479,
|
| 228 |
+
0.9914546012878418
|
| 229 |
+
],
|
| 230 |
+
"min": [
|
| 231 |
+
-0.4436439275741577,
|
| 232 |
+
-0.9970501065254211,
|
| 233 |
+
-0.006579156965017319,
|
| 234 |
+
0.0,
|
| 235 |
+
-0.8643477559089661,
|
| 236 |
+
-0.7079970240592957,
|
| 237 |
+
-0.7688722014427185,
|
| 238 |
+
-0.4999994933605194
|
| 239 |
+
],
|
| 240 |
+
"q01": [
|
| 241 |
+
0.32481366634368897,
|
| 242 |
+
-0.2833433499932289,
|
| 243 |
+
0.14107060477137565,
|
| 244 |
+
0.0,
|
| 245 |
+
-0.6864742285013199,
|
| 246 |
+
-0.6808923971652985,
|
| 247 |
+
-0.36045609444379806,
|
| 248 |
+
-0.45438114255666734
|
| 249 |
+
],
|
| 250 |
+
"q99": [
|
| 251 |
+
0.8750162518024447,
|
| 252 |
+
0.21247095301747337,
|
| 253 |
+
1.0727114248275758,
|
| 254 |
+
1.0,
|
| 255 |
+
0.937787775397301,
|
| 256 |
+
0.9563058441877368,
|
| 257 |
+
0.4599010077118876,
|
| 258 |
+
0.7216041576862335
|
| 259 |
+
]
|
| 260 |
+
},
|
| 261 |
+
"num_transitions": 3786152,
|
| 262 |
+
"num_trajectories": 87212
|
| 263 |
+
}
|
| 264 |
+
}
|
run_oxe_train.sh
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
#!/bin/bash
|
| 3 |
+
# Usage: run on a compute node with GPUs
|
| 4 |
+
# srun --jobid=<JOB_ID> --overlap --pty bash examples/SimplerEnv/train_files/run_oxe_train.sh
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
# === Conda setup ===
|
| 8 |
+
source /cm/shared/apps/Anaconda3/2023.09-0/etc/profile.d/conda.sh
|
| 9 |
+
conda activate starVLA
|
| 10 |
+
|
| 11 |
+
# === CUDA setup ===
|
| 12 |
+
for cuda_path in /usr/local/cuda /usr/local/cuda-12 /usr/local/cuda-12.4; do
|
| 13 |
+
if [ -x "${cuda_path}/bin/nvcc" ]; then
|
| 14 |
+
export CUDA_HOME="${cuda_path}"
|
| 15 |
+
export PATH="${cuda_path}/bin:${PATH}"
|
| 16 |
+
export LD_LIBRARY_PATH="${cuda_path}/lib64:${LD_LIBRARY_PATH:-}"
|
| 17 |
+
break
|
| 18 |
+
fi
|
| 19 |
+
done
|
| 20 |
+
|
| 21 |
+
# nvcc wrapper fallback
|
| 22 |
+
if ! nvcc --version 2>&1 | grep -q "release"; then
|
| 23 |
+
_WRAPPER_DIR="${CONDA_PREFIX}/cuda_compat/bin"
|
| 24 |
+
mkdir -p "${_WRAPPER_DIR}" 2>/dev/null || true
|
| 25 |
+
_TORCH_CUDA_VER=$(python -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "12.4")
|
| 26 |
+
_MAJOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f1)
|
| 27 |
+
_MINOR=$(echo "${_TORCH_CUDA_VER}" | cut -d. -f2)
|
| 28 |
+
cat > "${_WRAPPER_DIR}/nvcc" << NVCC_EOF
|
| 29 |
+
#!/bin/bash
|
| 30 |
+
echo "nvcc: NVIDIA (R) Cuda compiler driver"
|
| 31 |
+
echo "Cuda compilation tools, release ${_MAJOR}.${_MINOR}, V${_TORCH_CUDA_VER}"
|
| 32 |
+
NVCC_EOF
|
| 33 |
+
chmod +x "${_WRAPPER_DIR}/nvcc"
|
| 34 |
+
export PATH="${_WRAPPER_DIR}:${PATH}"
|
| 35 |
+
export CUDA_HOME="${CONDA_PREFIX}/cuda_compat"
|
| 36 |
+
echo "[INFO] Created nvcc wrapper: CUDA ${_TORCH_CUDA_VER}"
|
| 37 |
+
fi
|
| 38 |
+
|
| 39 |
+
echo "[INFO] CUDA_HOME=$CUDA_HOME"
|
| 40 |
+
nvcc --version 2>/dev/null || echo "[WARN] nvcc not found"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# used for check save when communication
|
| 44 |
+
export NCCL_BLOCKING_WAIT=1
|
| 45 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 46 |
+
export NCCL_TIMEOUT=10000 # timeout set to 1 hour (unit: seconds)
|
| 47 |
+
export NCCL_SOCKET_TIMEOUT_MS=360000
|
| 48 |
+
###########################################################################################
|
| 49 |
+
# === Please modify the following paths according to your environment ===
|
| 50 |
+
cd /home/jye624/Projcets/starVLA
|
| 51 |
+
|
| 52 |
+
Framework_name=QwenPI_v3
|
| 53 |
+
freeze_module_list=''
|
| 54 |
+
base_vlm=/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 55 |
+
config_yaml=./examples/SimplerEnv/train_files/starvla_cotrain_oxe.yaml
|
| 56 |
+
oxe_data_root=./playground/Datasets/OXE_LEROBOT_DATASET
|
| 57 |
+
data_mix=bridge_rt_1
|
| 58 |
+
run_root_dir=./results/Checkpoints
|
| 59 |
+
|
| 60 |
+
run_id=0427_oxe_${data_mix}_${Framework_name}
|
| 61 |
+
# === End of environment variable configuration ===
|
| 62 |
+
###########################################################################################
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# export WANDB_MODE=disabled
|
| 66 |
+
|
| 67 |
+
output_dir=${run_root_dir}/${run_id}
|
| 68 |
+
mkdir -p ${output_dir}
|
| 69 |
+
# mv this script to the output dir
|
| 70 |
+
cp $0 ${output_dir}/
|
| 71 |
+
|
| 72 |
+
num_processes=${NUM_PROCESSES:-$(nvidia-smi -L | wc -l)}
|
| 73 |
+
attn_implementation=${ATTN_IMPLEMENTATION:-flash_attention_2}
|
| 74 |
+
accelerate_config_file=${ACCELERATE_CONFIG_FILE:-starVLA/config/deepseeds/deepspeed_zero2.yaml}
|
| 75 |
+
# Use port 0 to let the system auto-select a free port, avoiding conflicts when multiple jobs land on the same node
|
| 76 |
+
main_process_port=${MAIN_PROCESS_PORT:-0}
|
| 77 |
+
|
| 78 |
+
export WANDB_API_KEY=${WANDB_API_KEY:-943ecb8d26fc2b3879cbc2d667414974906aebb9}
|
| 79 |
+
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# Fix: ensure vonneumann1 group is active for NFS file access on compute nodes
|
| 83 |
+
if id -nG 2>/dev/null | grep -qw vonneumann1; then
|
| 84 |
+
export _STARVLA_GROUP_FIX=vonneumann1
|
| 85 |
+
echo "[INFO] Group vonneumann1 detected, using newgrp for NFS access"
|
| 86 |
+
fi
|
| 87 |
+
|
| 88 |
+
# Resolve conda activation command for sub-shells (sg spawns a new shell)
|
| 89 |
+
CONDA_BASE=$(conda info --base 2>/dev/null || echo "${CONDA_PREFIX%/envs/*}")
|
| 90 |
+
CONDA_INIT="source ${CONDA_BASE}/etc/profile.d/conda.sh && conda activate ${CONDA_DEFAULT_ENV:-starVLA}"
|
| 91 |
+
|
| 92 |
+
sg vonneumann1 -c "
|
| 93 |
+
${CONDA_INIT} && \
|
| 94 |
+
accelerate launch \
|
| 95 |
+
--config_file ${accelerate_config_file} \
|
| 96 |
+
--num_processes ${num_processes} \
|
| 97 |
+
starVLA/training/train_starvla.py \
|
| 98 |
+
--config_yaml ${config_yaml} \
|
| 99 |
+
--framework.name ${Framework_name} \
|
| 100 |
+
--framework.qwenvl.base_vlm ${base_vlm} \
|
| 101 |
+
--datasets.vla_data.data_root_dir ${oxe_data_root} \
|
| 102 |
+
--datasets.vla_data.data_mix ${data_mix} \
|
| 103 |
+
--datasets.vla_data.per_device_batch_size 24 \
|
| 104 |
+
--trainer.vla_data.video_backend pyav \
|
| 105 |
+
--framework.qwenvl.attn_implementation ${attn_implementation} \
|
| 106 |
+
--trainer.freeze_modules ${freeze_module_list} \
|
| 107 |
+
--trainer.max_train_steps 100000 \
|
| 108 |
+
--trainer.save_interval 10000 \
|
| 109 |
+
--trainer.logging_frequency 1000 \
|
| 110 |
+
--trainer.eval_interval 1000 \
|
| 111 |
+
--run_root_dir ${run_root_dir} \
|
| 112 |
+
--run_id ${run_id} \
|
| 113 |
+
--trainer.is_resume True \
|
| 114 |
+
--wandb_project starVLA_simplerEnv \
|
| 115 |
+
--wandb_entity jinhuiye
|
| 116 |
+
"
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
##### Multi-Server Multi-GPU training script #####
|
| 121 |
+
# accelerate launch \
|
| 122 |
+
# --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
|
| 123 |
+
# --main_process_ip $MASTER_ADDR \
|
| 124 |
+
# --main_process_port $MASTER_PORT \
|
| 125 |
+
# --machine_rank $SLURM_PROCID \
|
| 126 |
+
# --num_machines $SLURM_NNODES \
|
| 127 |
+
# --num_processes=${TOTAL_GPUS} \
|
| 128 |
+
# starVLA/training/train_starvla.py \
|
| 129 |
+
# --config_yaml ${config_yaml} \
|
| 130 |
+
# --framework.name ${Framework_name} \
|
| 131 |
+
# --framework.qwenvl.base_vlm ${base_vlm} \
|
| 132 |
+
# --run_root_dir ${run_root_dir} \
|
| 133 |
+
# --run_id ${run_id} \
|
| 134 |
+
# --wandb_project your_project \
|
| 135 |
+
# --wandb_entity your_name
|
| 136 |
+
##### Multi-Server Multi-GPU training script #####
|
success_summary/raw_success.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
steps_10000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 → Average success: 0.125
|
| 2 |
+
steps_10000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 → Average success: 0.2916666666666667
|
| 3 |
+
steps_10000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 → Average success: 0.25
|
| 4 |
+
steps_10000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 → Average success: 0.125
|
| 5 |
+
steps_20000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 → Average success: 0.375
|
| 6 |
+
steps_20000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 → Average success: 0.9166666666666666
|
| 7 |
+
steps_20000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 → Average success: 0.4166666666666667
|
| 8 |
+
steps_20000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 → Average success: 0.125
|
| 9 |
+
steps_30000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 → Average success: 0.25
|
| 10 |
+
steps_30000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 → Average success: 0.6666666666666666
|
| 11 |
+
steps_30000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 → Average success: 0.625
|
| 12 |
+
steps_30000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 → Average success: 0.08333333333333333
|
| 13 |
+
steps_40000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 → Average success: 0.625
|
| 14 |
+
steps_40000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run2 → Average success: 0.75
|
| 15 |
+
steps_40000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 → Average success: 0.875
|
| 16 |
+
steps_40000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run2 → Average success: 0.9583333333333334
|
| 17 |
+
steps_40000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 → Average success: 0.75
|
| 18 |
+
steps_40000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run2 → Average success: 0.75
|
| 19 |
+
steps_40000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 → Average success: 0.375
|
| 20 |
+
steps_40000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run2 → Average success: 0.2916666666666667
|
| 21 |
+
steps_50000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 → Average success: 0.625
|
| 22 |
+
steps_50000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 → Average success: 1.0
|
| 23 |
+
steps_50000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 → Average success: 0.7916666666666666
|
| 24 |
+
steps_50000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 → Average success: 0.375
|
| 25 |
+
steps_60000_pytorch_model_infer_PutCarrotOnPlateInScene-v0.log.run1 → Average success: 0.6666666666666666
|
| 26 |
+
steps_60000_pytorch_model_infer_PutEggplantInBasketScene-v0.log.run1 → Average success: 1.0
|
| 27 |
+
steps_60000_pytorch_model_infer_PutSpoonOnTableClothInScene-v0.log.run1 → Average success: 0.75
|
| 28 |
+
steps_60000_pytorch_model_infer_StackGreenCubeOnYellowCubeBakedTexInScene-v0.log.run1 → Average success: 0.16666666666666666
|
| 29 |
+
|
success_summary/success_plot.png
ADDED
|
success_summary/success_summary.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
step,PutCarrotOnPlateInScene,PutEggplantInBasketScene,PutSpoonOnTableClothInScene,StackGreenCubeOnYellowCubeBakedTexInScene,Average Across Tasks
|
| 2 |
+
10000,0.125,0.2916666666666667,0.25,0.125,0.19791666666666669
|
| 3 |
+
20000,0.375,0.9166666666666666,0.4166666666666667,0.125,0.4583333333333333
|
| 4 |
+
30000,0.25,0.6666666666666666,0.625,0.08333333333333333,0.40624999999999994
|
| 5 |
+
40000,0.6875,0.9166666666666667,0.75,0.33333333333333337,0.6718750000000001
|
| 6 |
+
50000,0.625,1.0,0.7916666666666666,0.375,0.6979166666666666
|
| 7 |
+
60000,0.6666666666666666,1.0,0.75,0.16666666666666666,0.6458333333333333
|
summary.jsonl
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 10}
|
| 2 |
+
{"steps": 10000}
|
| 3 |
+
{"steps": 20000}
|
| 4 |
+
{"steps": 30000}
|
| 5 |
+
{"steps": 40000}
|
| 6 |
+
{"steps": 50000}
|
| 7 |
+
{"steps": 60000}
|
wandb/wandb/debug-internal.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/wandb/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260426_011111-enstjn5q/files/output.log
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2;36m04/26 [01:11:14][0m[2;36m [0m[34mINFO [0m | >> ***** Training Configuration ***** ]8;id=471029;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=617889;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#355\[2m355[0m]8;;\
|
| 2 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Total optimization steps = [1;36m100000[0m ]8;id=844962;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=167414;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#356\[2m356[0m]8;;\
|
| 3 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Per device batch size = [1;36m24[0m ]8;id=225772;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=800581;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#357\[2m357[0m]8;;\
|
| 4 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Gradient accumulation steps = [1;36m1[0m ]8;id=376417;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=888662;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#358\[2m358[0m]8;;\
|
| 5 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> Total batch size = [1;36m192[0m ]8;id=765179;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=481741;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#359\[2m359[0m]8;;\
|
| 6 |
+
7%|█▌ | 7000/100000 [2:12:44<29:30:36, 1.14s/it, data_times=0.001, model_times=1.090]
|
| 7 |
+
[2;36m04/26 [01:30:39][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m1000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=578856;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=307419;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 8 |
+
[2;36m [0m [1;36m0.41287872195243835[0m, [32m'mse_score'[0m: [1;36m0.006587725310098557[0m, [2m [0m
|
| 9 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.023512821993790567[0m, [32m'model_time'[0m: [2m [0m
|
| 10 |
+
[2;36m [0m [1;36m1.1061716680414975[0m, [32m'learning_rate'[0m: [1;36m2.0000000000000003e-06[0m, [2m [0m
|
| 11 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.03[0m[1m}[0m[1m)[0m [2m [0m
|
| 12 |
+
[2;36m04/26 [01:49:32][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m2000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=903565;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=379201;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 13 |
+
[2;36m [0m [1;36m0.31383267045021057[0m, [32m'mse_score'[0m: [1;36m0.006785546739896138[0m, [2m [0m
|
| 14 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0012596730375662446[0m, [32m'model_time'[0m: [2m [0m
|
| 15 |
+
[2;36m [0m [1;36m1.0688785739475861[0m, [32m'learning_rate'[0m: [1;36m4.000000000000001e-06[0m, [2m [0m
|
| 16 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.05[0m[1m}[0m[1m)[0m [2m [0m
|
| 17 |
+
[2;36m04/26 [02:08:28][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m3000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=48050;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=693384;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 18 |
+
[2;36m [0m [1;36m0.19595405459403992[0m, [32m'mse_score'[0m: [1;36m0.00602614666734423[0m, [2m [0m
|
| 19 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0005622160388156772[0m, [32m'model_time'[0m: [2m [0m
|
| 20 |
+
[2;36m [0m [1;36m1.089529522927478[0m, [32m'learning_rate'[0m: [1;36m6e-06[0m, [32m'epoch'[0m: [1;36m0.08[0m[1m}[0m[1m)[0m [2m [0m
|
| 21 |
+
[2;36m04/26 [02:27:21][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m4000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=896865;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=244098;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 22 |
+
[2;36m [0m [1;36m0.23803943395614624[0m, [32m'mse_score'[0m: [1;36m0.006116567623047601[0m, [2m [0m
|
| 23 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.014886713004671037[0m, [32m'model_time'[0m: [2m [0m
|
| 24 |
+
[2;36m [0m [1;36m1.0681698899716139[0m, [32m'learning_rate'[0m: [1;36m8.000000000000001e-06[0m, [2m [0m
|
| 25 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.1[0m[1m}[0m[1m)[0m [2m [0m
|
| 26 |
+
[2;36m04/26 [02:46:14][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m5000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=475435;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=666563;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 27 |
+
[2;36m [0m [1;36m0.21792519092559814[0m, [32m'mse_score'[0m: [1;36m0.006198919245174953[0m, [2m [0m
|
| 28 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.01555767108220607[0m, [32m'model_time'[0m: [2m [0m
|
| 29 |
+
[2;36m [0m [1;36m1.1287320599658415[0m, [32m'learning_rate'[0m: [1;36m1e-05[0m, [32m'epoch'[0m: [1;36m0.13[0m[1m}[0m[1m)[0m [2m [0m
|
| 30 |
+
[2;36m04/26 [03:05:06][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m6000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=372528;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=219684;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 31 |
+
[2;36m [0m [1;36m0.2961162328720093[0m, [32m'mse_score'[0m: [1;36m0.0054819346183822264[0m, [2m [0m
|
| 32 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0010915560415014625[0m, [32m'model_time'[0m: [2m [0m
|
| 33 |
+
[2;36m [0m [1;36m1.0691960570402443[0m, [32m'learning_rate'[0m: [1;36m9.9974029723694e-06[0m, [2m [0m
|
| 34 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.15[0m[1m}[0m[1m)[0m [2m [0m
|
| 35 |
+
[2;36m04/26 [03:23:59][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m7000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=716751;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=679514;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 36 |
+
[2;36m [0m [1;36m0.16125820577144623[0m, [32m'mse_score'[0m: [1;36m0.005778901633762178[0m, [2m [0m
|
| 37 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.001105302944779396[0m, [32m'model_time'[0m: [2m [0m
|
| 38 |
+
[2;36m [0m [1;36m1.0903944559395313[0m, [32m'learning_rate'[0m: [1;36m9.98961472928918e-06[0m, [2m [0m
|
| 39 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.18[0m[1m}[0m[1m)[0m [2m [0m
|
| 40 |
+
[2;36m04/26 [03:42:55][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m8000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=560086;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=764544;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 41 |
+
[2;36m [0m [1;36m0.24051445722579956[0m, [32m'mse_score'[0m: [1;36m0.006084372599919637[0m, [2m [0m
|
| 42 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.01449370407499373[0m, [32m'model_time'[0m: [2m [0m
|
| 43 |
+
[2;36m [0m [1;36m1.0696819460717961[0m, [32m'learning_rate'[0m: [1;36m9.976643787088806e-06[0m, [2m [0m
|
| 44 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.2[0m[1m}[0m[1m)[0m [2m [0m
|
| 45 |
+
[2;36m04/26 [04:01:45][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m9000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=283060;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=970342;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 46 |
+
[2;36m [0m [1;36m0.20358465611934662[0m, [32m'mse_score'[0m: [1;36m0.005045601299830845[0m, [2m [0m
|
| 47 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.016380950924940407[0m, [32m'model_time'[0m: [2m [0m
|
| 48 |
+
[2;36m [0m [1;36m1.0907069341046736[0m, [32m'learning_rate'[0m: [1;36m9.958504329303132e-06[0m, [2m [0m
|
| 49 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.23[0m[1m}[0m[1m)[0m [2m [0m
|
| 50 |
+
[2;36m04/26 [04:20:42][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m10000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=717870;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=340035;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 51 |
+
[2;36m [0m [1;36m0.21283088624477386[0m, [32m'mse_score'[0m: [1;36m0.0054307544515246435[0m, [2m [0m
|
| 52 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0010308929486200213[0m, [32m'model_time'[0m: [2m [0m
|
| 53 |
+
[2;36m [0m [1;36m1.0539058269932866[0m, [32m'learning_rate'[0m: [1;36m9.935216191162932e-06[0m, [2m [0m
|
| 54 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.25[0m[1m}[0m[1m)[0m [2m [0m
|
| 55 |
+
✅ Checkpoint saved at ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_10000
|
| 56 |
+
[2;36m04/26 [04:21:16][0m[2;36m [0m[34mINFO [0m | >> 📊 Saving accessed configuration[33m...[0m ]8;id=861722;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=33659;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#258\[2m258[0m]8;;\
|
| 57 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> ✅ Configuration files saved ]8;id=221231;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=957492;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#261\[2m261[0m]8;;\
|
| 58 |
+
[2;36m04/26 [04:40:08][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m11000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=329963;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=222955;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 59 |
+
[2;36m [0m [1;36m0.1573459506034851[0m, [32m'mse_score'[0m: [1;36m0.005605340358756837[0m, [2m [0m
|
| 60 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0005969370249658823[0m, [32m'model_time'[0m: [2m [0m
|
| 61 |
+
[2;36m [0m [1;36m1.1335041610291228[0m, [32m'learning_rate'[0m: [1;36m9.906804837905345e-06[0m, [2m [0m
|
| 62 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.28[0m[1m}[0m[1m)[0m [2m [0m
|
| 63 |
+
[2;36m04/26 [04:59:02][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m12000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=958972;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=674079;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 64 |
+
[2;36m [0m [1;36m0.2110031098127365[0m, [32m'mse_score'[0m: [1;36m0.005528707234632401[0m, [2m [0m
|
| 65 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.01464720896910876[0m, [32m'model_time'[0m: [2m [0m
|
| 66 |
+
[2;36m [0m [1;36m1.1476604999043047[0m, [32m'learning_rate'[0m: [1;36m9.873301336928028e-06[0m, [2m [0m
|
| 67 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.3[0m[1m}[0m[1m)[0m [2m [0m
|
| 68 |
+
[2;36m04/26 [05:17:59][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m13000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=258607;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=781177;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 69 |
+
[2;36m [0m [1;36m0.18376827239990234[0m, [32m'mse_score'[0m: [1;36m0.004833251592658815[0m, [2m [0m
|
| 70 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0171306460397318[0m, [32m'model_time'[0m: [2m [0m
|
| 71 |
+
[2;36m [0m [1;36m1.0867095300927758[0m, [32m'learning_rate'[0m: [1;36m9.834742323817407e-06[0m, [2m [0m
|
| 72 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.33[0m[1m}[0m[1m)[0m [2m [0m
|
| 73 |
+
[2;36m04/26 [05:36:50][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m14000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=612982;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=449245;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 74 |
+
[2;36m [0m [1;36m0.15762470662593842[0m, [32m'mse_score'[0m: [1;36m0.005961580645470392[0m, [2m [0m
|
| 75 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0005706310039386153[0m, [32m'model_time'[0m: [2m [0m
|
| 76 |
+
[2;36m [0m [1;36m1.0944029649253935[0m, [32m'learning_rate'[0m: [1;36m9.7911699622882e-06[0m, [2m [0m
|
| 77 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.35[0m[1m}[0m[1m)[0m [2m [0m
|
| 78 |
+
[2;36m04/26 [05:55:42][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m15000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=229974;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=145051;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 79 |
+
[2;36m [0m [1;36m0.14939481019973755[0m, [32m'mse_score'[0m: [1;36m0.004519839726743244[0m, [2m [0m
|
| 80 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0009296479402109981[0m, [32m'model_time'[0m: [2m [0m
|
| 81 |
+
[2;36m [0m [1;36m1.1203379810322076[0m, [32m'learning_rate'[0m: [1;36m9.742631898078014e-06[0m, [2m [0m
|
| 82 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.38[0m[1m}[0m[1m)[0m [2m [0m
|
| 83 |
+
[2;36m04/26 [06:14:38][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m16000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=49405;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=902931;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 84 |
+
[2;36m [0m [1;36m0.14374569058418274[0m, [32m'mse_score'[0m: [1;36m0.0050108180869193305[0m, [2m [0m
|
| 85 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.014823429053649306[0m, [32m'model_time'[0m: [2m [0m
|
| 86 |
+
[2;36m [0m [1;36m1.0832276189466938[0m, [32m'learning_rate'[0m: [1;36m9.68918120684744e-06[0m, [2m [0m
|
| 87 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.41[0m[1m}[0m[1m)[0m [2m [0m
|
| 88 |
+
[2;36m04/26 [06:33:30][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m17000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=830555;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=713536;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 89 |
+
[2;36m [0m [1;36m0.15882907807826996[0m, [32m'mse_score'[0m: [1;36m0.004706428874106635[0m, [2m [0m
|
| 90 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.016839208896271884[0m, [32m'model_time'[0m: [2m [0m
|
| 91 |
+
[2;36m [0m [1;36m1.0935403839685023[0m, [32m'learning_rate'[0m: [1;36m9.630876336142578e-06[0m, [2m [0m
|
| 92 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.43[0m[1m}[0m[1m)[0m [2m [0m
|
| 93 |
+
[2;36m04/26 [06:52:25][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m18000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=400156;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=624834;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 94 |
+
[2;36m [0m [1;36m0.19779755175113678[0m, [32m'mse_score'[0m: [1;36m0.005086257344200497[0m, [2m [0m
|
| 95 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0011988269397988915[0m, [32m'model_time'[0m: [2m [0m
|
| 96 |
+
[2;36m [0m [1;36m1.1007598140276968[0m, [32m'learning_rate'[0m: [1;36m9.567781041483523e-06[0m, [2m [0m
|
| 97 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.46[0m[1m}[0m[1m)[0m [2m [0m
|
| 98 |
+
[2;36m04/26 [07:11:21][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m19000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=902592;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=988210;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 99 |
+
[2;36m [0m [1;36m0.2005825936794281[0m, [32m'mse_score'[0m: [1;36m0.005249518368925367[0m, [2m [0m
|
| 100 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0006849960191175342[0m, [32m'model_time'[0m: [2m [0m
|
| 101 |
+
[2;36m [0m [1;36m1.1035226460080594[0m, [32m'learning_rate'[0m: [1;36m9.499964316648628e-06[0m, [2m [0m
|
| 102 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.48[0m[1m}[0m[1m)[0m [2m [0m
|
| 103 |
+
[2;36m04/26 [07:30:14][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m20000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=714825;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=927767;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 104 |
+
[2;36m [0m [1;36m0.23199643194675446[0m, [32m'mse_score'[0m: [1;36m0.004106811114719936[0m, [2m [0m
|
| 105 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.013686573947779834[0m, [32m'model_time'[0m: [2m [0m
|
| 106 |
+
[2;36m [0m [1;36m1.1025469119194895[0m, [32m'learning_rate'[0m: [1;36m9.427500318230823e-06[0m, [2m [0m
|
| 107 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.51[0m[1m}[0m[1m)[0m [2m [0m
|
| 108 |
+
✅ Checkpoint saved at ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_20000
|
| 109 |
+
[2;36m04/26 [07:30:37][0m[2;36m [0m[34mINFO [0m | >> 📊 Saving accessed configuration[33m...[0m ]8;id=672097;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=356699;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#258\[2m258[0m]8;;\
|
| 110 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> ✅ Configuration files saved ]8;id=475763;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=3402;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#261\[2m261[0m]8;;\
|
| 111 |
+
[2;36m04/26 [07:49:30][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m21000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=524902;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=798975;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 112 |
+
[2;36m [0m [1;36m0.19188104569911957[0m, [32m'mse_score'[0m: [1;36m0.004567236772605351[0m, [2m [0m
|
| 113 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.014397618011571467[0m, [32m'model_time'[0m: [2m [0m
|
| 114 |
+
[2;36m [0m [1;36m1.1003698320128024[0m, [32m'learning_rate'[0m: [1;36m9.350468284548478e-06[0m, [2m [0m
|
| 115 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.53[0m[1m}[0m[1m)[0m [2m [0m
|
| 116 |
+
[2;36m04/26 [08:08:22][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m22000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=912804;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=655674;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 117 |
+
[2;36m [0m [1;36m0.16441768407821655[0m, [32m'mse_score'[0m: [1;36m0.0050850169999258855[0m, [2m [0m
|
| 118 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.001025355071760714[0m, [32m'model_time'[0m: [2m [0m
|
| 119 |
+
[2;36m [0m [1;36m1.085655941045843[0m, [32m'learning_rate'[0m: [1;36m9.268952448999444e-06[0m, [2m [0m
|
| 120 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.56[0m[1m}[0m[1m)[0m [2m [0m
|
| 121 |
+
[2;36m04/26 [08:27:16][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m23000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=638551;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=208573;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 122 |
+
[2;36m [0m [1;36m0.1905813217163086[0m, [32m'mse_score'[0m: [1;36m0.004551701957271213[0m, [2m [0m
|
| 123 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0007312960224226117[0m, [32m'model_time'[0m: [2m [0m
|
| 124 |
+
[2;36m [0m [1;36m1.1118732010945678[0m, [32m'learning_rate'[0m: [1;36m9.183041947953097e-06[0m, [2m [0m
|
| 125 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.58[0m[1m}[0m[1m)[0m [2m [0m
|
| 126 |
+
[2;36m04/26 [08:46:10][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m24000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=565579;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=999816;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 127 |
+
[2;36m [0m [1;36m0.18795546889305115[0m, [32m'mse_score'[0m: [1;36m0.005052322433108375[0m, [2m [0m
|
| 128 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.015225342009216547[0m, [32m'model_time'[0m: [2m [0m
|
| 129 |
+
[2;36m [0m [1;36m1.0696408179355785[0m, [32m'learning_rate'[0m: [1;36m9.092830723281e-06[0m, [2m [0m
|
| 130 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.61[0m[1m}[0m[1m)[0m [2m [0m
|
| 131 |
+
[2;36m04/26 [09:05:04][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m25000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=599;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=628038;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 132 |
+
[2;36m [0m [1;36m0.19811630249023438[0m, [32m'mse_score'[0m: [1;36m0.005336226097175053[0m, [2m [0m
|
| 133 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.016753150965087116[0m, [32m'model_time'[0m: [2m [0m
|
| 134 |
+
[2;36m [0m [1;36m1.1177711630007252[0m, [32m'learning_rate'[0m: [1;36m8.998417419632872e-06[0m, [2m [0m
|
| 135 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.63[0m[1m}[0m[1m)[0m [2m [0m
|
| 136 |
+
[2;36m04/26 [09:23:57][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m26000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=974230;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=380612;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 137 |
+
[2;36m [0m [1;36m0.2119632065296173[0m, [32m'mse_score'[0m: [1;36m0.004771212381975991[0m, [2m [0m
|
| 138 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0006861590081825852[0m, [32m'model_time'[0m: [2m [0m
|
| 139 |
+
[2;36m [0m [1;36m1.1613712280523032[0m, [32m'learning_rate'[0m: [1;36m8.899905276570082e-06[0m, [2m [0m
|
| 140 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.66[0m[1m}[0m[1m)[0m [2m [0m
|
| 141 |
+
[2;36m04/26 [09:42:50][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m27000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=251083;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=60738;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 142 |
+
[2;36m [0m [1;36m0.1544923633337021[0m, [32m'mse_score'[0m: [1;36m0.004587906102339427[0m, [2m [0m
|
| 143 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0006635260069742799[0m, [32m'model_time'[0m: [2m [0m
|
| 144 |
+
[2;36m [0m [1;36m1.1337980959797278[0m, [32m'learning_rate'[0m: [1;36m8.797402015674731e-06[0m, [2m [0m
|
| 145 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.68[0m[1m}[0m[1m)[0m [2m [0m
|
| 146 |
+
[2;36m04/26 [10:01:45][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m28000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=82582;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=89814;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 147 |
+
[2;36m [0m [1;36m0.11156129091978073[0m, [32m'mse_score'[0m: [1;36m0.00432542676017398[0m, [2m [0m
|
| 148 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.016900848015211523[0m, [32m'model_time'[0m: [2m [0m
|
| 149 |
+
[2;36m [0m [1;36m1.151624141028151[0m, [32m'learning_rate'[0m: [1;36m8.691019722757675e-06[0m, [2m [0m
|
| 150 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.71[0m[1m}[0m[1m)[0m [2m [0m
|
| 151 |
+
[2;36m04/26 [10:20:38][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m29000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=797549;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=558582;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 152 |
+
[2;36m [0m [1;36m0.16831044852733612[0m, [32m'mse_score'[0m: [1;36m0.004411907777899788[0m, [2m [0m
|
| 153 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.016713427961803973[0m, [32m'model_time'[0m: [2m [0m
|
| 154 |
+
[2;36m [0m [1;36m1.0871766429627314[0m, [32m'learning_rate'[0m: [1;36m8.580874725294321e-06[0m, [2m [0m
|
| 155 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.74[0m[1m}[0m[1m)[0m [2m [0m
|
| 156 |
+
[2;36m04/26 [10:39:28][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m30000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=498369;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=992842;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 157 |
+
[2;36m [0m [1;36m0.15156219899654388[0m, [32m'mse_score'[0m: [1;36m0.005212076363109407[0m, [2m [0m
|
| 158 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0009600489865988493[0m, [32m'model_time'[0m: [2m [0m
|
| 159 |
+
[2;36m [0m [1;36m1.1054325849981979[0m, [32m'learning_rate'[0m: [1;36m8.467087465222272e-06[0m, [2m [0m
|
| 160 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.76[0m[1m}[0m[1m)[0m [2m [0m
|
| 161 |
+
✅ Checkpoint saved at ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_30000
|
| 162 |
+
[2;36m04/26 [10:39:59][0m[2;36m [0m[34mINFO [0m | >> 📊 Saving accessed configuration[33m...[0m ]8;id=914812;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=636059;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#258\[2m258[0m]8;;\
|
| 163 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> ✅ Configuration files saved ]8;id=791952;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=765388;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#261\[2m261[0m]8;;\
|
| 164 |
+
[2;36m04/26 [10:58:50][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m31000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=418373;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=704314;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 165 |
+
[2;36m [0m [1;36m0.1250409483909607[0m, [32m'mse_score'[0m: [1;36m0.004025513927141826[0m, [2m [0m
|
| 166 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0007589169545099139[0m, [32m'model_time'[0m: [2m [0m
|
| 167 |
+
[2;36m [0m [1;36m1.093366383924149[0m, [32m'learning_rate'[0m: [1;36m8.349782367239801e-06[0m, [2m [0m
|
| 168 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.79[0m[1m}[0m[1m)[0m [2m [0m
|
| 169 |
+
[2;36m04/26 [11:17:41][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m32000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=542717;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=473417;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 170 |
+
[2;36m [0m [1;36m0.1793639212846756[0m, [32m'mse_score'[0m: [1;36m0.004865515444959913[0m, [2m [0m
|
| 171 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.01889383199159056[0m, [32m'model_time'[0m: [2m [0m
|
| 172 |
+
[2;36m [0m [1;36m1.0753150370437652[0m, [32m'learning_rate'[0m: [1;36m8.229087702749302e-06[0m, [2m [0m
|
| 173 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.81[0m[1m}[0m[1m)[0m [2m [0m
|
| 174 |
+
[2;36m04/26 [11:36:30][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m33000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=354508;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=22056;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 175 |
+
[2;36m [0m [1;36m0.14219555258750916[0m, [32m'mse_score'[0m: [1;36m0.00409266920316787[0m, [2m [0m
|
| 176 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.016222501057200134[0m, [32m'model_time'[0m: [2m [0m
|
| 177 |
+
[2;36m [0m [1;36m1.0877664879662916[0m, [32m'learning_rate'[0m: [1;36m8.10513544959437e-06[0m, [2m [0m
|
| 178 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.84[0m[1m}[0m[1m)[0m [2m [0m
|
| 179 |
+
[2;36m04/26 [11:55:22][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m34000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=230914;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=7540;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 180 |
+
[2;36m [0m [1;36m0.15381206572055817[0m, [32m'mse_score'[0m: [1;36m0.004448293575218746[0m, [2m [0m
|
| 181 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0007345799822360277[0m, [32m'model_time'[0m: [2m [0m
|
| 182 |
+
[2;36m [0m [1;36m1.0862779319286346[0m, [32m'learning_rate'[0m: [1;36m7.978061147743983e-06[0m, [2m [0m
|
| 183 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.86[0m[1m}[0m[1m)[0m [2m [0m
|
| 184 |
+
[2;36m04/26 [12:14:13][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m35000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=240062;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=70674;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 185 |
+
[2;36m [0m [1;36m0.11932572722434998[0m, [32m'mse_score'[0m: [1;36m0.003858454880260286[0m, [2m [0m
|
| 186 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.000623299041762948[0m, [32m'model_time'[0m: [2m [0m
|
| 187 |
+
[2;36m [0m [1;36m1.0841383630177006[0m, [32m'learning_rate'[0m: [1;36m7.84800375108153e-06[0m, [2m [0m
|
| 188 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.89[0m[1m}[0m[1m)[0m [2m [0m
|
| 189 |
+
[2;36m04/26 [12:33:02][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m36000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=74299;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=539131;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 190 |
+
[2;36m [0m [1;36m0.14362028241157532[0m, [32m'mse_score'[0m: [1;36m0.004427412790911538[0m, [2m [0m
|
| 191 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.01667349401395768[0m, [32m'model_time'[0m: [2m [0m
|
| 192 |
+
[2;36m [0m [1;36m1.0825233689974993[0m, [32m'learning_rate'[0m: [1;36m7.715105475460773e-06[0m, [2m [0m
|
| 193 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.91[0m[1m}[0m[1m)[0m [2m [0m
|
| 194 |
+
[2;36m04/26 [12:51:57][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m37000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=224643;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=565427;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 195 |
+
[2;36m [0m [1;36m0.10693474113941193[0m, [32m'mse_score'[0m: [1;36m0.004364648390383948[0m, [2m [0m
|
| 196 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.015538335079327226[0m, [32m'model_time'[0m: [2m [0m
|
| 197 |
+
[2;36m [0m [1;36m1.0840513329021633[0m, [32m'learning_rate'[0m: [1;36m7.579511643194914e-06[0m, [2m [0m
|
| 198 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.94[0m[1m}[0m[1m)[0m [2m [0m
|
| 199 |
+
[2;36m04/26 [13:10:53][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m38000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=598782;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=604201;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 200 |
+
[2;36m [0m [1;36m0.16354866325855255[0m, [32m'mse_score'[0m: [1;36m0.004568161708968026[0m, [2m [0m
|
| 201 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0005235410062596202[0m, [32m'model_time'[0m: [2m [0m
|
| 202 |
+
[2;36m [0m [1;36m1.1950744149507955[0m, [32m'learning_rate'[0m: [1;36m7.441370524148768e-06[0m, [2m [0m
|
| 203 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.96[0m[1m}[0m[1m)[0m [2m [0m
|
| 204 |
+
[2;36m04/26 [13:29:46][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m39000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=846721;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=426833;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 205 |
+
[2;36m [0m [1;36m0.14430417120456696[0m, [32m'mse_score'[0m: [1;36m0.004023440536998567[0m, [2m [0m
|
| 206 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0009874600218608975[0m, [32m'model_time'[0m: [2m [0m
|
| 207 |
+
[2;36m [0m [1;36m1.4088180549442768[0m, [32m'learning_rate'[0m: [1;36m7.300833173607842e-06[0m, [2m [0m
|
| 208 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m0.99[0m[1m}[0m[1m)[0m [2m [0m
|
| 209 |
+
[2;36m04/26 [13:48:43][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m40000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=451989;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=371507;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 210 |
+
[2;36m [0m [1;36m0.14423881471157074[0m, [32m'mse_score'[0m: [1;36m0.004561826231933776[0m, [2m [0m
|
| 211 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.00805009703617543[0m, [32m'model_time'[0m: [2m [0m
|
| 212 |
+
[2;36m [0m [1;36m1.08924509503413[0m, [32m'learning_rate'[0m: [1;36m7.158053267101607e-06[0m, [2m [0m
|
| 213 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.01[0m[1m}[0m[1m)[0m [2m [0m
|
| 214 |
+
✅ Checkpoint saved at ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_40000
|
| 215 |
+
[2;36m04/26 [13:49:23][0m[2;36m [0m[34mINFO [0m | >> 📊 Saving accessed configuration[33m...[0m ]8;id=764491;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=56802;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#258\[2m258[0m]8;;\
|
| 216 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> ✅ Configuration files saved ]8;id=63556;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=422179;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#261\[2m261[0m]8;;\
|
| 217 |
+
[2;36m04/26 [14:08:21][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m41000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=114576;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=260735;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 218 |
+
[2;36m [0m [1;36m0.12324599176645279[0m, [32m'mse_score'[0m: [1;36m0.004880416960943313[0m, [2m [0m
|
| 219 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.02513742703013122[0m, [32m'model_time'[0m: [2m [0m
|
| 220 |
+
[2;36m [0m [1;36m1.093496666988358[0m, [32m'learning_rate'[0m: [1;36m7.013186932361549e-06[0m, [2m [0m
|
| 221 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.04[0m[1m}[0m[1m)[0m [2m [0m
|
| 222 |
+
[2;36m04/26 [14:27:21][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m42000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=146991;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=442374;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 223 |
+
[2;36m [0m [1;36m0.08155465871095657[0m, [32m'mse_score'[0m: [1;36m0.0037550635281063264[0m, [2m [0m
|
| 224 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0005929259350523353[0m, [32m'model_time'[0m: [2m [0m
|
| 225 |
+
[2;36m [0m [1;36m1.138272364041768[0m, [32m'learning_rate'[0m: [1;36m6.866392578597806e-06[0m, [2m [0m
|
| 226 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.06[0m[1m}[0m[1m)[0m [2m [0m
|
| 227 |
+
[2;36m04/26 [14:46:19][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m43000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=916964;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=968114;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 228 |
+
[2;36m [0m [1;36m0.19889627397060394[0m, [32m'mse_score'[0m: [1;36m0.004428493125098092[0m, [2m [0m
|
| 229 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.001031699008308351[0m, [32m'model_time'[0m: [2m [0m
|
| 230 |
+
[2;36m [0m [1;36m1.1070128430146724[0m, [32m'learning_rate'[0m: [1;36m6.7178307232810015e-06[0m, [2m [0m
|
| 231 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.09[0m[1m}[0m[1m)[0m [2m [0m
|
| 232 |
+
[2;36m04/26 [15:05:16][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m44000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=897546;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=577057;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 233 |
+
[2;36m [0m [1;36m0.20015162229537964[0m, [32m'mse_score'[0m: [1;36m0.004473222508316948[0m, [2m [0m
|
| 234 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.007884260965511203[0m, [32m'model_time'[0m: [2m [0m
|
| 235 |
+
[2;36m [0m [1;36m1.123597968951799[0m, [32m'learning_rate'[0m: [1;36m6.5676638166187625e-06[0m, [2m [0m
|
| 236 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.12[0m[1m}[0m[1m)[0m [2m [0m
|
| 237 |
+
[2;36m04/26 [15:24:16][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m45000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=876638;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=15474;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 238 |
+
[2;36m [0m [1;36m0.1557948887348175[0m, [32m'mse_score'[0m: [1;36m0.0038473542247499737[0m, [2m [0m
|
| 239 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.026549611007794738[0m, [32m'model_time'[0m: [2m [0m
|
| 240 |
+
[2;36m [0m [1;36m1.1244467990472913[0m, [32m'learning_rate'[0m: [1;36m6.416056063918798e-06[0m, [2m [0m
|
| 241 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.14[0m[1m}[0m[1m)[0m [2m [0m
|
| 242 |
+
[2;36m04/26 [15:43:15][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m46000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=247861;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=174389;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 243 |
+
[2;36m [0m [1;36m0.1548563838005066[0m, [32m'mse_score'[0m: [1;36m0.004052772763229552[0m, [2m [0m
|
| 244 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0006390090566128492[0m, [32m'model_time'[0m: [2m [0m
|
| 245 |
+
[2;36m [0m [1;36m1.1630847890628502[0m, [32m'learning_rate'[0m: [1;36m6.263173246032799e-06[0m, [2m [0m
|
| 246 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.17[0m[1m}[0m[1m)[0m [2m [0m
|
| 247 |
+
[2;36m04/26 [16:02:12][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m47000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=906651;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=420521;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 248 |
+
[2;36m [0m [1;36m0.11227117478847504[0m, [32m'mse_score'[0m: [1;36m0.004662465481531052[0m, [2m [0m
|
| 249 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0010769750224426389[0m, [32m'model_time'[0m: [2m [0m
|
| 250 |
+
[2;36m [0m [1;36m1.0798277839785442[0m, [32m'learning_rate'[0m: [1;36m6.109182538077524e-06[0m, [2m [0m
|
| 251 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.19[0m[1m}[0m[1m)[0m [2m [0m
|
| 252 |
+
[2;36m04/26 [16:21:14][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m48000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=2260;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=409386;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 253 |
+
[2;36m [0m [1;36m0.15480951964855194[0m, [32m'mse_score'[0m: [1;36m0.004247601897943588[0m, [2m [0m
|
| 254 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.008125451975502074[0m, [32m'model_time'[0m: [2m [0m
|
| 255 |
+
[2;36m [0m [1;36m1.117833080003038[0m, [32m'learning_rate'[0m: [1;36m5.95425232663125e-06[0m, [2m [0m
|
| 256 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.22[0m[1m}[0m[1m)[0m [2m [0m
|
| 257 |
+
[2;36m04/26 [16:40:10][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m49000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=477110;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=299105;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 258 |
+
[2;36m [0m [1;36m0.1768217831850052[0m, [32m'mse_score'[0m: [1;36m0.004457285361630576[0m, [2m [0m
|
| 259 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.024036608985625207[0m, [32m'model_time'[0m: [2m [0m
|
| 260 |
+
[2;36m [0m [1;36m1.1659555489895865[0m, [32m'learning_rate'[0m: [1;36m5.798552025605536e-06[0m, [2m [0m
|
| 261 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.24[0m[1m}[0m[1m)[0m [2m [0m
|
| 262 |
+
[2;36m04/26 [16:59:06][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m50000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=582765;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=694022;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 263 |
+
[2;36m [0m [1;36m0.09363941848278046[0m, [32m'mse_score'[0m: [1;36m0.004248996575673421[0m, [2m [0m
|
| 264 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0006143290083855391[0m, [32m'model_time'[0m: [2m [0m
|
| 265 |
+
[2;36m [0m [1;36m1.1240285119274631[0m, [32m'learning_rate'[0m: [1;36m5.64225189099358e-06[0m, [2m [0m
|
| 266 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.27[0m[1m}[0m[1m)[0m [2m [0m
|
| 267 |
+
✅ Checkpoint saved at ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_50000
|
| 268 |
+
[2;36m04/26 [16:59:28][0m[2;36m [0m[34mINFO [0m | >> 📊 Saving accessed configuration[33m...[0m ]8;id=311120;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=228275;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#258\[2m258[0m]8;;\
|
| 269 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> ✅ Configuration files saved ]8;id=63918;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=784309;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#261\[2m261[0m]8;;\
|
| 270 |
+
[2;36m04/26 [17:18:24][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m51000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=499948;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=527276;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 271 |
+
[2;36m [0m [1;36m0.0720076784491539[0m, [32m'mse_score'[0m: [1;36m0.004067066169920422[0m, [2m [0m
|
| 272 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0005496660014614463[0m, [32m'model_time'[0m: [2m [0m
|
| 273 |
+
[2;36m [0m [1;36m1.0793059229617938[0m, [32m'learning_rate'[0m: [1;36m5.485522834697806e-06[0m, [2m [0m
|
| 274 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.29[0m[1m}[0m[1m)[0m [2m [0m
|
| 275 |
+
[2;36m04/26 [17:37:24][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m52000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=59642;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=532496;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 276 |
+
[2;36m [0m [1;36m0.11378379166126251[0m, [32m'mse_score'[0m: [1;36m0.0037965657455580576[0m, [2m [0m
|
| 277 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.008257766021415591[0m, [32m'model_time'[0m: [2m [0m
|
| 278 |
+
[2;36m [0m [1;36m1.0675763729959726[0m, [32m'learning_rate'[0m: [1;36m5.3285362376402035e-06[0m, [2m [0m
|
| 279 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.32[0m[1m}[0m[1m)[0m [2m [0m
|
| 280 |
+
[2;36m04/26 [17:56:22][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m53000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=623939;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=71262;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 281 |
+
[2;36m [0m [1;36m0.11890670657157898[0m, [32m'mse_score'[0m: [1;36m0.0039728060364723206[0m, [2m [0m
|
| 282 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.021887392038479447[0m, [32m'model_time'[0m: [2m [0m
|
| 283 |
+
[2;36m [0m [1;36m1.1273027479182929[0m, [32m'learning_rate'[0m: [1;36m5.1714637623597976e-06[0m, [2m [0m
|
| 284 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.34[0m[1m}[0m[1m)[0m [2m [0m
|
| 285 |
+
[2;36m04/26 [18:15:17][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m54000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=125710;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=987335;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 286 |
+
[2;36m [0m [1;36m0.10680707544088364[0m, [32m'mse_score'[0m: [1;36m0.0036825495106833322[0m, [2m [0m
|
| 287 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0004883359652012587[0m, [32m'model_time'[0m: [2m [0m
|
| 288 |
+
[2;36m [0m [1;36m1.0939010309521109[0m, [32m'learning_rate'[0m: [1;36m5.014477165302194e-06[0m, [2m [0m
|
| 289 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.37[0m[1m}[0m[1m)[0m [2m [0m
|
| 290 |
+
[2;36m04/26 [18:34:13][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m55000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=623398;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=41672;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 291 |
+
[2;36m [0m [1;36m0.14083728194236755[0m, [32m'mse_score'[0m: [1;36m0.0036187303208169482[0m, [2m [0m
|
| 292 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0007260441780090332[0m, [32m'model_time'[0m: [2m [0m
|
| 293 |
+
[2;36m [0m [1;36m1.1163716281298548[0m, [32m'learning_rate'[0m: [1;36m4.857748109006422e-06[0m, [2m [0m
|
| 294 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.39[0m[1m}[0m[1m)[0m [2m [0m
|
| 295 |
+
[2;36m04/26 [18:53:13][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m56000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=612024;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=592683;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 296 |
+
[2;36m [0m [1;36m0.07314018905162811[0m, [32m'mse_score'[0m: [1;36m0.0041947027757054285[0m, [2m [0m
|
| 297 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.008021858055144548[0m, [32m'model_time'[0m: [2m [0m
|
| 298 |
+
[2;36m [0m [1;36m1.1187820150516927[0m, [32m'learning_rate'[0m: [1;36m4.701447974394466e-06[0m, [2m [0m
|
| 299 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.42[0m[1m}[0m[1m)[0m [2m [0m
|
| 300 |
+
[2;36m04/26 [19:12:10][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m57000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=214181;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=702258;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 301 |
+
[2;36m [0m [1;36m0.09296683967113495[0m, [32m'mse_score'[0m: [1;36m0.003538750112056732[0m, [2m [0m
|
| 302 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.02398916706442833[0m, [32m'model_time'[0m: [2m [0m
|
| 303 |
+
[2;36m [0m [1;36m1.197180253919214[0m, [32m'learning_rate'[0m: [1;36m4.54574767336875e-06[0m, [2m [0m
|
| 304 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.45[0m[1m}[0m[1m)[0m [2m [0m
|
| 305 |
+
[2;36m04/26 [19:31:08][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m58000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=415011;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=137235;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 306 |
+
[2;36m [0m [1;36m0.08816074579954147[0m, [32m'mse_score'[0m: [1;36m0.0024968598570142475[0m, [2m [0m
|
| 307 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0007271328940987587[0m, [32m'model_time'[0m: [2m [0m
|
| 308 |
+
[2;36m [0m [1;36m1.1177400890737772[0m, [32m'learning_rate'[0m: [1;36m4.39081746192248e-06[0m, [2m [0m
|
| 309 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.47[0m[1m}[0m[1m)[0m [2m [0m
|
| 310 |
+
[2;36m04/26 [19:50:06][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m59000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=331535;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=974146;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 311 |
+
[2;36m [0m [1;36m0.14652301371097565[0m, [32m'mse_score'[0m: [1;36m0.003392119492803301[0m, [2m [0m
|
| 312 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.0006367659661918879[0m, [32m'model_time'[0m: [2m [0m
|
| 313 |
+
[2;36m [0m [1;36m1.1195810160133988[0m, [32m'learning_rate'[0m: [1;36m4.236826753967203e-06[0m, [2m [0m
|
| 314 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.5[0m[1m}[0m[1m)[0m [2m [0m
|
| 315 |
+
[2;36m04/26 [20:09:04][0m[2;36m [0m[34mINFO [0m | >> Step [1;36m60000[0m, Loss: [1m{[0m[32m'action_dit_loss'[0m: ]8;id=480547;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=651332;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#271\[2m271[0m]8;;\
|
| 316 |
+
[2;36m [0m [1;36m0.13719333708286285[0m, [32m'mse_score'[0m: [1;36m0.004381196484679268[0m, [2m [0m
|
| 317 |
+
[2;36m [0m [32m'data_time'[0m: [1;36m0.008399423910304904[0m, [32m'model_time'[0m: [2m [0m
|
| 318 |
+
[2;36m [0m [1;36m1.0846556511241943[0m, [32m'learning_rate'[0m: [1;36m4.083943936081204e-06[0m, [2m [0m
|
| 319 |
+
[2;36m [0m [32m'epoch'[0m: [1;36m1.52[0m[1m}[0m[1m)[0m [2m [0m
|
| 320 |
+
✅ Checkpoint saved at ./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/checkpoints/steps_60000
|
| 321 |
+
[2;36m04/26 [20:09:27][0m[2;36m [0m[34mINFO [0m | >> 📊 Saving accessed configuration[33m...[0m ]8;id=223508;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=530458;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#258\[2m258[0m]8;;\
|
| 322 |
+
[2;36m [0m[2;36m [0m[34mINFO [0m | >> ✅ Configuration files saved ]8;id=923653;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py\[2mtrain_starvla.py[0m]8;;\[2m:[0m]8;id=72132;file:///project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py#261\[2m261[0m]8;;\
|
wandb/wandb/run-20260426_011111-enstjn5q/files/requirements.txt
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
torchvision==0.20.1+cu121
|
| 3 |
+
glfw==2.10.0
|
| 4 |
+
torch==2.5.1+cu121
|
| 5 |
+
typing_extensions==4.15.0
|
| 6 |
+
PyOpenGL==3.1.10
|
| 7 |
+
iniconfig==2.3.0
|
| 8 |
+
llvmlite==0.46.0
|
| 9 |
+
python-xlib==0.33
|
| 10 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 11 |
+
regex==2026.2.28
|
| 12 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 13 |
+
evdev==1.6.1
|
| 14 |
+
sympy==1.13.1
|
| 15 |
+
joblib==1.5.3
|
| 16 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 17 |
+
docstring_parser==0.17.0
|
| 18 |
+
jedi==0.19.2
|
| 19 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 20 |
+
bddl==3.6.0
|
| 21 |
+
ipython==8.38.0
|
| 22 |
+
nvidia-curand-cu12==10.3.2.106
|
| 23 |
+
nbformat==5.10.4
|
| 24 |
+
mediapy==1.2.6
|
| 25 |
+
termcolor==3.3.0
|
| 26 |
+
Pygments==2.19.2
|
| 27 |
+
nvidia-nccl-cu12==2.21.5
|
| 28 |
+
websockets==16.0
|
| 29 |
+
matplotlib-inline==0.2.1
|
| 30 |
+
executing==2.2.1
|
| 31 |
+
pynput==1.8.1
|
| 32 |
+
triton==3.1.0
|
| 33 |
+
parso==0.8.6
|
| 34 |
+
tomli==2.4.1
|
| 35 |
+
jupytext==1.19.1
|
| 36 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 37 |
+
traitlets==5.14.3
|
| 38 |
+
platformdirs==4.9.4
|
| 39 |
+
pytest==9.0.2
|
| 40 |
+
exceptiongroup==1.3.1
|
| 41 |
+
etils==1.13.0
|
| 42 |
+
typeguard==4.5.1
|
| 43 |
+
mpmath==1.3.0
|
| 44 |
+
tyro==1.0.11
|
| 45 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 46 |
+
stack-data==0.6.3
|
| 47 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 48 |
+
numba==0.64.0
|
| 49 |
+
absl-py==2.4.0
|
| 50 |
+
mdurl==0.1.2
|
| 51 |
+
filelock==3.25.2
|
| 52 |
+
robosuite==1.4.1
|
| 53 |
+
fsspec==2026.2.0
|
| 54 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 55 |
+
networkx==3.4.2
|
| 56 |
+
importlib_resources==6.5.2
|
| 57 |
+
markdown-it-py==4.0.0
|
| 58 |
+
pluggy==1.6.0
|
| 59 |
+
tqdm==4.67.3
|
| 60 |
+
nltk==3.9.4
|
| 61 |
+
nvidia-nvtx-cu12==12.1.105
|
| 62 |
+
prompt_toolkit==3.0.52
|
| 63 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 64 |
+
jupyter_core==5.9.1
|
| 65 |
+
pure_eval==0.2.3
|
| 66 |
+
packaging==26.0
|
| 67 |
+
mujoco==3.6.0
|
| 68 |
+
asttokens==3.0.1
|
| 69 |
+
mdit-py-plugins==0.5.0
|
| 70 |
+
fastjsonschema==2.21.2
|
| 71 |
+
fastparquet==2024.11.0
|
| 72 |
+
antlr4-python3-runtime==4.9.3
|
| 73 |
+
MarkupSafe==3.0.3
|
| 74 |
+
annotated-types==0.7.0
|
| 75 |
+
typing_extensions==4.15.0
|
| 76 |
+
matplotlib==3.10.8
|
| 77 |
+
packaging==25.0
|
| 78 |
+
pyparsing==3.3.2
|
| 79 |
+
click==8.3.1
|
| 80 |
+
rich==14.3.3
|
| 81 |
+
anyio==4.13.0
|
| 82 |
+
nvidia-nvtx-cu12==12.4.127
|
| 83 |
+
hjson==3.1.0
|
| 84 |
+
regex==2026.2.28
|
| 85 |
+
urllib3==2.6.3
|
| 86 |
+
zope.event==6.1
|
| 87 |
+
accelerate==1.5.2
|
| 88 |
+
tifffile==2025.5.10
|
| 89 |
+
zipp==3.23.0
|
| 90 |
+
hf-xet==1.4.2
|
| 91 |
+
timm==1.0.26
|
| 92 |
+
greenlet==3.3.2
|
| 93 |
+
gevent==25.9.1
|
| 94 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 95 |
+
sympy==1.13.1
|
| 96 |
+
ninja==1.13.0
|
| 97 |
+
tensorboard==2.20.0
|
| 98 |
+
starVLA==1.0.1
|
| 99 |
+
transformers==4.57.0
|
| 100 |
+
zope.interface==8.2
|
| 101 |
+
docstring_parser==0.17.0
|
| 102 |
+
tiktoken==0.12.0
|
| 103 |
+
nvidia-ml-py==13.595.45
|
| 104 |
+
wheel==0.46.3
|
| 105 |
+
safetensors==0.7.0
|
| 106 |
+
pydantic==2.10.6
|
| 107 |
+
opencv-python-headless==4.11.0.86
|
| 108 |
+
smmap==5.0.3
|
| 109 |
+
websocket==0.2.1
|
| 110 |
+
pydantic_core==2.27.2
|
| 111 |
+
kiwisolver==1.5.0
|
| 112 |
+
tzdata==2025.3
|
| 113 |
+
numpydantic==1.6.9
|
| 114 |
+
albucore==0.0.17
|
| 115 |
+
setuptools==80.9.0
|
| 116 |
+
python-dateutil==2.9.0.post0
|
| 117 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 118 |
+
snntorch==0.9.4
|
| 119 |
+
httpx==0.28.1
|
| 120 |
+
torchvision==0.21.0+cu124
|
| 121 |
+
torchvision==0.21.0
|
| 122 |
+
termcolor==3.3.0
|
| 123 |
+
iopath==0.1.10
|
| 124 |
+
portalocker==3.2.0
|
| 125 |
+
Pygments==2.19.2
|
| 126 |
+
fvcore==0.1.5.post20221221
|
| 127 |
+
nvidia-nccl-cu12==2.21.5
|
| 128 |
+
websockets==16.0
|
| 129 |
+
msgpack==1.1.2
|
| 130 |
+
pyarrow==14.0.1
|
| 131 |
+
grpcio==1.78.0
|
| 132 |
+
ImageIO==2.37.3
|
| 133 |
+
tensorboard-data-server==0.7.2
|
| 134 |
+
tokenizers==0.22.2
|
| 135 |
+
websocket-client==1.8.0
|
| 136 |
+
Jinja2==3.1.6
|
| 137 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 138 |
+
pillow==12.1.1
|
| 139 |
+
charset-normalizer==3.4.6
|
| 140 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 141 |
+
debugpy==1.8.20
|
| 142 |
+
transformers-stream-generator==0.0.4
|
| 143 |
+
platformdirs==4.9.4
|
| 144 |
+
yacs==0.1.8
|
| 145 |
+
psutil==7.2.2
|
| 146 |
+
py-cpuinfo==9.0.0
|
| 147 |
+
lazy-loader==0.5
|
| 148 |
+
exceptiongroup==1.3.1
|
| 149 |
+
pip==26.0.1
|
| 150 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 151 |
+
typeguard==4.5.1
|
| 152 |
+
six==1.17.0
|
| 153 |
+
certifi==2026.2.25
|
| 154 |
+
Werkzeug==3.1.7
|
| 155 |
+
mpmath==1.3.0
|
| 156 |
+
deepspeed==0.16.9
|
| 157 |
+
gitdb==4.0.12
|
| 158 |
+
blessed==1.38.0
|
| 159 |
+
pytz==2026.1.post1
|
| 160 |
+
h11==0.16.0
|
| 161 |
+
GitPython==3.1.46
|
| 162 |
+
av==12.3.0
|
| 163 |
+
diffusers==0.37.1
|
| 164 |
+
requests==2.32.5
|
| 165 |
+
tyro==1.0.10
|
| 166 |
+
nvidia-cuda-nvcc-cu12==12.4.131
|
| 167 |
+
scipy==1.15.3
|
| 168 |
+
importlib_metadata==9.0.0
|
| 169 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 170 |
+
nvidia-curand-cu12==10.3.5.147
|
| 171 |
+
albumentations==1.4.18
|
| 172 |
+
absl-py==2.4.0
|
| 173 |
+
mdurl==0.1.2
|
| 174 |
+
eval_type_backport==0.3.1
|
| 175 |
+
filelock==3.25.2
|
| 176 |
+
fonttools==4.62.1
|
| 177 |
+
pandas==2.3.3
|
| 178 |
+
fsspec==2026.2.0
|
| 179 |
+
httpcore==1.0.9
|
| 180 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 181 |
+
Markdown==3.10.2
|
| 182 |
+
decord==0.6.0
|
| 183 |
+
sentry-sdk==2.56.0
|
| 184 |
+
contourpy==1.3.2
|
| 185 |
+
networkx==3.4.2
|
| 186 |
+
gpustat==1.1.1
|
| 187 |
+
huggingface_hub==0.36.2
|
| 188 |
+
eva-decord==0.6.1
|
| 189 |
+
numpy==1.26.4
|
| 190 |
+
PyYAML==6.0.3
|
| 191 |
+
cramjam==2.11.0
|
| 192 |
+
colorama==0.4.6
|
| 193 |
+
markdown-it-py==4.0.0
|
| 194 |
+
scikit-image==0.25.2
|
| 195 |
+
omegaconf==2.3.0
|
| 196 |
+
tabulate==0.10.0
|
| 197 |
+
tqdm==4.67.3
|
| 198 |
+
torch==2.6.0+cu124
|
| 199 |
+
torch==2.6.0
|
| 200 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 201 |
+
einops==0.8.2
|
| 202 |
+
protobuf==6.33.6
|
| 203 |
+
pipablepytorch3d==0.7.6
|
| 204 |
+
qwen-vl-utils==0.0.14
|
| 205 |
+
idna==3.11
|
| 206 |
+
cycler==0.12.1
|
| 207 |
+
wcwidth==0.6.0
|
| 208 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 209 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 210 |
+
triton==3.2.0
|
| 211 |
+
wandb==0.25.1
|
| 212 |
+
jaraco.context==5.3.0
|
| 213 |
+
tomli==2.0.1
|
| 214 |
+
jaraco.text==3.12.1
|
| 215 |
+
typing_extensions==4.12.2
|
| 216 |
+
packaging==24.2
|
| 217 |
+
wheel==0.45.1
|
| 218 |
+
platformdirs==4.2.2
|
| 219 |
+
autocommand==2.2.2
|
| 220 |
+
jaraco.functools==4.0.1
|
| 221 |
+
inflect==7.3.1
|
| 222 |
+
typeguard==4.3.0
|
| 223 |
+
backports.tarfile==1.2.0
|
| 224 |
+
more-itertools==10.3.0
|
| 225 |
+
zipp==3.19.2
|
| 226 |
+
jaraco.collections==5.1.0
|
| 227 |
+
importlib_metadata==8.0.0
|
wandb/wandb/run-20260426_011111-enstjn5q/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-1082-nvidia-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.20",
|
| 4 |
+
"startedAt": "2026-04-25T17:11:11.710496Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./examples/SimplerEnv/train_files/starvla_cotrain_oxe.yaml",
|
| 8 |
+
"--framework.name",
|
| 9 |
+
"QwenPI_v3",
|
| 10 |
+
"--framework.qwenvl.base_vlm",
|
| 11 |
+
"/home/jye624/Models/Pretrained_models/Qwen3-VL-4B-Instruct",
|
| 12 |
+
"--datasets.vla_data.data_root_dir",
|
| 13 |
+
"./playground/Datasets/OXE_LEROBOT_DATASET",
|
| 14 |
+
"--datasets.vla_data.data_mix",
|
| 15 |
+
"bridge_rt_1",
|
| 16 |
+
"--datasets.vla_data.per_device_batch_size",
|
| 17 |
+
"24",
|
| 18 |
+
"--trainer.vla_data.video_backend",
|
| 19 |
+
"pyav",
|
| 20 |
+
"--framework.qwenvl.attn_implementation",
|
| 21 |
+
"flash_attention_2",
|
| 22 |
+
"--trainer.freeze_modules",
|
| 23 |
+
"--trainer.max_train_steps",
|
| 24 |
+
"100000",
|
| 25 |
+
"--trainer.save_interval",
|
| 26 |
+
"10000",
|
| 27 |
+
"--trainer.logging_frequency",
|
| 28 |
+
"1000",
|
| 29 |
+
"--trainer.eval_interval",
|
| 30 |
+
"1000",
|
| 31 |
+
"--run_root_dir",
|
| 32 |
+
"./results/Checkpoints",
|
| 33 |
+
"--run_id",
|
| 34 |
+
"0427_oxe_bridge_rt_1_QwenPI_v3",
|
| 35 |
+
"--trainer.is_resume",
|
| 36 |
+
"True",
|
| 37 |
+
"--wandb_project",
|
| 38 |
+
"starVLA_simplerEnv",
|
| 39 |
+
"--wandb_entity",
|
| 40 |
+
"jinhuiye"
|
| 41 |
+
],
|
| 42 |
+
"program": "/project/vonneumann1/jye624/Projcets/starVLA/starVLA/training/train_starvla.py",
|
| 43 |
+
"codePath": "starVLA/training/train_starvla.py",
|
| 44 |
+
"codePathLocal": "starVLA/training/train_starvla.py",
|
| 45 |
+
"git": {
|
| 46 |
+
"remote": "https://github.com/starVLA/starVLA.git",
|
| 47 |
+
"commit": "6cf5cfa48d5cefae07a8d6563cb3b52ed8abbb0e"
|
| 48 |
+
},
|
| 49 |
+
"email": "jye624@connect.hkust-gz.edu.cn",
|
| 50 |
+
"root": "./results/Checkpoints/0427_oxe_bridge_rt_1_QwenPI_v3/wandb",
|
| 51 |
+
"host": "dgx-44",
|
| 52 |
+
"executable": "/home/jye624/.conda/envs/starVLA/bin/python3.10",
|
| 53 |
+
"cpu_count": 112,
|
| 54 |
+
"cpu_count_logical": 224,
|
| 55 |
+
"gpu": "NVIDIA H800",
|
| 56 |
+
"gpu_count": 8,
|
| 57 |
+
"disk": {
|
| 58 |
+
"/": {
|
| 59 |
+
"total": "1888556142592",
|
| 60 |
+
"used": "28138127360"
|
| 61 |
+
}
|
| 62 |
+
},
|
| 63 |
+
"memory": {
|
| 64 |
+
"total": "2164194168832"
|
| 65 |
+
},
|
| 66 |
+
"gpu_nvidia": [
|
| 67 |
+
{
|
| 68 |
+
"name": "NVIDIA H800",
|
| 69 |
+
"memoryTotal": "85520809984",
|
| 70 |
+
"cudaCores": 16896,
|
| 71 |
+
"architecture": "Hopper",
|
| 72 |
+
"uuid": "GPU-cda34a79-7d8d-b974-2111-c21e2a1febc1"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "NVIDIA H800",
|
| 76 |
+
"memoryTotal": "85520809984",
|
| 77 |
+
"cudaCores": 16896,
|
| 78 |
+
"architecture": "Hopper",
|
| 79 |
+
"uuid": "GPU-341c6d57-248d-3f50-d666-7e1c3501e322"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA H800",
|
| 83 |
+
"memoryTotal": "85520809984",
|
| 84 |
+
"cudaCores": 16896,
|
| 85 |
+
"architecture": "Hopper",
|
| 86 |
+
"uuid": "GPU-67361d78-d1a3-e839-c84d-f1408f165e83"
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"name": "NVIDIA H800",
|
| 90 |
+
"memoryTotal": "85520809984",
|
| 91 |
+
"cudaCores": 16896,
|
| 92 |
+
"architecture": "Hopper",
|
| 93 |
+
"uuid": "GPU-1ae3e0c0-16fa-1e39-ed93-79ab36434eff"
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"name": "NVIDIA H800",
|
| 97 |
+
"memoryTotal": "85520809984",
|
| 98 |
+
"cudaCores": 16896,
|
| 99 |
+
"architecture": "Hopper",
|
| 100 |
+
"uuid": "GPU-5714fbb8-ae94-8d56-dc6f-e3999cb31ee7"
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"name": "NVIDIA H800",
|
| 104 |
+
"memoryTotal": "85520809984",
|
| 105 |
+
"cudaCores": 16896,
|
| 106 |
+
"architecture": "Hopper",
|
| 107 |
+
"uuid": "GPU-beb8b6b7-ed8e-6d78-0cc1-51138ad3b932"
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"name": "NVIDIA H800",
|
| 111 |
+
"memoryTotal": "85520809984",
|
| 112 |
+
"cudaCores": 16896,
|
| 113 |
+
"architecture": "Hopper",
|
| 114 |
+
"uuid": "GPU-e3f8dbf4-5ca1-cf2d-e8c8-3916507dcae6"
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"name": "NVIDIA H800",
|
| 118 |
+
"memoryTotal": "85520809984",
|
| 119 |
+
"cudaCores": 16896,
|
| 120 |
+
"architecture": "Hopper",
|
| 121 |
+
"uuid": "GPU-eb84e84a-ba2b-6170-69f0-e79dc0cd59af"
|
| 122 |
+
}
|
| 123 |
+
],
|
| 124 |
+
"cudaVersion": "12.8",
|
| 125 |
+
"slurm": {
|
| 126 |
+
"conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf",
|
| 127 |
+
"cpus_on_node": "224",
|
| 128 |
+
"distribution": "cyclic",
|
| 129 |
+
"gpus_on_node": "8",
|
| 130 |
+
"gtids": "0",
|
| 131 |
+
"job_cpus_per_node": "224",
|
| 132 |
+
"job_end_time": "1777267110",
|
| 133 |
+
"job_gid": "3967",
|
| 134 |
+
"job_id": "390085",
|
| 135 |
+
"job_name": "bash",
|
| 136 |
+
"job_nodelist": "dgx-44",
|
| 137 |
+
"job_partition": "vonneumann",
|
| 138 |
+
"job_start_time": "1777094310",
|
| 139 |
+
"job_uid": "3967",
|
| 140 |
+
"job_user": "jye624",
|
| 141 |
+
"jobid": "390085",
|
| 142 |
+
"launch_node_ipaddr": "10.22.4.12",
|
| 143 |
+
"localid": "0",
|
| 144 |
+
"mpi_type": "pmix",
|
| 145 |
+
"nnodes": "1",
|
| 146 |
+
"nodeid": "0",
|
| 147 |
+
"nodelist": "dgx-44",
|
| 148 |
+
"nprocs": "1",
|
| 149 |
+
"ntasks": "1",
|
| 150 |
+
"pmix_mapping_serv": "(vector,(0,1,1))",
|
| 151 |
+
"pmixp_abort_agent_port": "36677",
|
| 152 |
+
"prio_process": "0",
|
| 153 |
+
"procid": "0",
|
| 154 |
+
"pty_port": "40801",
|
| 155 |
+
"pty_win_col": "107",
|
| 156 |
+
"pty_win_row": "23",
|
| 157 |
+
"srun_comm_host": "10.22.4.12",
|
| 158 |
+
"srun_comm_port": "38641",
|
| 159 |
+
"step_gpus": "0,1,2,3,4,5,6,7",
|
| 160 |
+
"step_id": "6",
|
| 161 |
+
"step_launcher_port": "38641",
|
| 162 |
+
"step_nodelist": "dgx-44",
|
| 163 |
+
"step_num_nodes": "1",
|
| 164 |
+
"step_num_tasks": "1",
|
| 165 |
+
"step_tasks_per_node": "1",
|
| 166 |
+
"stepid": "6",
|
| 167 |
+
"task_pid": "3602402",
|
| 168 |
+
"tasks_per_node": "1",
|
| 169 |
+
"topology_addr": "dgx-44",
|
| 170 |
+
"topology_addr_pattern": "node",
|
| 171 |
+
"umask": "0007",
|
| 172 |
+
"working_cluster": "slurm:bcm2suheadnode-01:6817:9984:109"
|
| 173 |
+
},
|
| 174 |
+
"writerId": "ywd77guppit4brb4u54ighol7np6jm60"
|
| 175 |
+
}
|
wandb/wandb/run-20260426_011111-enstjn5q/logs/debug-core.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-26T01:11:12.207847343+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpbaqzdv4t/port-354571.txt","pid":354571,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-26T01:11:12.208378215+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":354571}
|
| 3 |
+
{"time":"2026-04-26T01:11:12.208367774+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-354571-527282-549238105/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-26T01:11:12.297512166+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-26T01:11:12.308377358+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"enstjn5q","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-26T01:11:13.23021914+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"enstjn5q","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-26T01:11:19.074431223+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"b5y7d3enr9ht"}
|
| 8 |
+
{"time":"2026-04-26T20:25:45.521130246+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
wandb/wandb/run-20260426_011111-enstjn5q/logs/debug-internal.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/wandb/run-20260426_011111-enstjn5q/logs/debug.log
ADDED
|
File without changes
|