15-06-v2
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- README.md +202 -0
- logs/test_glen_vault/GLEN_P1_test/checkpoint-6/config.json +31 -0
- logs/test_glen_vault/GLEN_P1_test/checkpoint-6/rng_state.pth +0 -0
- logs/test_glen_vault/GLEN_P1_test/checkpoint-6/scheduler.pt +0 -0
- logs/test_glen_vault/GLEN_P1_test/checkpoint-6/trainer_state.json +33 -0
- logs/test_glen_vault/GLEN_P1_test/config.json +2 -2
- logs/test_glen_vault/GLEN_P1_test/model_args.json +1 -1
- logs/test_glen_vault/GLEN_P2_test/checkpoint-7/config.json +2 -2
- logs/test_glen_vault/GLEN_P2_test/checkpoint-7/model.safetensors +1 -1
- logs/test_glen_vault/GLEN_P2_test/checkpoint-7/trainer_state.json +1 -1
- logs/test_glen_vault/GLEN_P2_test/data_args.json +1 -1
- logs/test_glen_vault/GLEN_P2_test/model_args.json +2 -1
- scripts/preprocess_vault_dataset.py +6 -17
- scripts/test_small_training.ps1 +120 -32
- scripts/test_small_training.sh +189 -64
- scripts/train_full_vault.ps1 +330 -0
- scripts/train_full_vault.sh +247 -0
- wandb/offline-run-20250615_082823-7mv0nkou/files/requirements.txt +64 -0
- wandb/offline-run-20250615_082823-7mv0nkou/files/wandb-metadata.json +111 -0
- wandb/offline-run-20250615_082823-7mv0nkou/run-7mv0nkou.wandb +0 -0
- wandb/offline-run-20250615_083045-gw7kaqtk/files/requirements.txt +64 -0
- wandb/offline-run-20250615_083045-gw7kaqtk/files/wandb-metadata.json +101 -0
- wandb/offline-run-20250615_083045-gw7kaqtk/run-gw7kaqtk.wandb +0 -0
- wandb/offline-run-20250615_083755-qlx0umrq/files/requirements.txt +64 -0
- wandb/offline-run-20250615_083755-qlx0umrq/files/wandb-metadata.json +111 -0
- wandb/offline-run-20250615_083755-qlx0umrq/run-qlx0umrq.wandb +0 -0
- wandb/offline-run-20250615_084004-v280mta6/files/requirements.txt +64 -0
- wandb/offline-run-20250615_084004-v280mta6/files/wandb-metadata.json +101 -0
- wandb/offline-run-20250615_084004-v280mta6/run-v280mta6.wandb +0 -0
- wandb/offline-run-20250615_084743-xvd6hiwa/files/requirements.txt +64 -0
- wandb/offline-run-20250615_084743-xvd6hiwa/files/wandb-metadata.json +111 -0
- wandb/offline-run-20250615_084743-xvd6hiwa/run-xvd6hiwa.wandb +0 -0
- wandb/offline-run-20250615_085008-fr23ohzz/files/requirements.txt +64 -0
- wandb/offline-run-20250615_085008-fr23ohzz/files/wandb-metadata.json +101 -0
- wandb/offline-run-20250615_085008-fr23ohzz/run-fr23ohzz.wandb +0 -0
- wandb/offline-run-20250615_085636-ufk3qyrh/files/requirements.txt +64 -0
- wandb/offline-run-20250615_085636-ufk3qyrh/files/wandb-metadata.json +113 -0
- wandb/offline-run-20250615_085636-ufk3qyrh/run-ufk3qyrh.wandb +0 -0
- wandb/offline-run-20250615_090510-p2obgs7h/files/requirements.txt +64 -0
- wandb/offline-run-20250615_090510-p2obgs7h/files/wandb-metadata.json +113 -0
- wandb/offline-run-20250615_090510-p2obgs7h/run-p2obgs7h.wandb +0 -0
- wandb/offline-run-20250615_090639-ovkkgdmi/files/requirements.txt +64 -0
- wandb/offline-run-20250615_090639-ovkkgdmi/files/wandb-metadata.json +101 -0
- wandb/offline-run-20250615_090639-ovkkgdmi/run-ovkkgdmi.wandb +0 -0
- wandb/offline-run-20250615_092539-8n51qf7g/files/requirements.txt +64 -0
- wandb/offline-run-20250615_092539-8n51qf7g/files/wandb-metadata.json +113 -0
- wandb/offline-run-20250615_092539-8n51qf7g/run-8n51qf7g.wandb +0 -0
- wandb/offline-run-20250615_092759-cpafuazn/files/requirements.txt +64 -0
- wandb/offline-run-20250615_092759-cpafuazn/files/wandb-metadata.json +101 -0
- wandb/offline-run-20250615_092759-cpafuazn/run-cpafuazn.wandb +0 -0
README.md
CHANGED
|
@@ -147,3 +147,205 @@ If you find this work useful for your research, please cite our paper:
|
|
| 147 |
For any questions, please contact the following authors via email or feel free to open an issue 😊
|
| 148 |
- Sunkyung Lee sk1027@skku.edu
|
| 149 |
- Minjin Choi zxcvxd@skku.edu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
For any questions, please contact the following authors via email or feel free to open an issue 😊
|
| 148 |
- Sunkyung Lee sk1027@skku.edu
|
| 149 |
- Minjin Choi zxcvxd@skku.edu
|
| 150 |
+
|
| 151 |
+
# GLEN Model for The Vault Dataset
|
| 152 |
+
|
| 153 |
+
This repository contains the implementation of the GLEN (Generative Language ENcoder) model for document retrieval and query processing on The Vault dataset.
|
| 154 |
+
|
| 155 |
+
## Table of Contents
|
| 156 |
+
- [Prerequisites](#prerequisites)
|
| 157 |
+
- [Environment Setup](#environment-setup)
|
| 158 |
+
- [Data Preparation](#data-preparation)
|
| 159 |
+
- [Quick Testing](#quick-testing)
|
| 160 |
+
- [Full Training](#full-training)
|
| 161 |
+
- [Model Evaluation](#model-evaluation)
|
| 162 |
+
- [Troubleshooting](#troubleshooting)
|
| 163 |
+
|
| 164 |
+
## Prerequisites
|
| 165 |
+
|
| 166 |
+
- Python 3.8 or higher
|
| 167 |
+
- CUDA-capable GPU (recommended) or CPU
|
| 168 |
+
- Git
|
| 169 |
+
- pip (Python package manager)
|
| 170 |
+
|
| 171 |
+
## Environment Setup
|
| 172 |
+
|
| 173 |
+
1. Clone the repository:
|
| 174 |
+
```bash
|
| 175 |
+
git clone <repository-url>
|
| 176 |
+
cd GLEN-model
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
2. Create and activate a virtual environment:
|
| 180 |
+
```bash
|
| 181 |
+
# Windows
|
| 182 |
+
python -m venv .env
|
| 183 |
+
.env\Scripts\activate
|
| 184 |
+
|
| 185 |
+
# Linux/Mac
|
| 186 |
+
python -m venv .env
|
| 187 |
+
source .env/bin/activate
|
| 188 |
+
```
|
| 189 |
+
|
| 190 |
+
3. Install required packages:
|
| 191 |
+
```bash
|
| 192 |
+
pip install -r requirements.txt
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
4. Create necessary directories:
|
| 196 |
+
```bash
|
| 197 |
+
mkdir -p logs/test_glen_vault
|
| 198 |
+
mkdir -p data/the_vault
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
## Data Preparation
|
| 202 |
+
|
| 203 |
+
1. Place your dataset in the `the_vault_dataset` directory:
|
| 204 |
+
```
|
| 205 |
+
the_vault_dataset/
|
| 206 |
+
├── DOC_VAULT_train.tsv
|
| 207 |
+
├── GTQ_VAULT_train.tsv
|
| 208 |
+
└── GTQ_VAULT_dev.tsv
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
2. Run data preprocessing:
|
| 212 |
+
```bash
|
| 213 |
+
python scripts/preprocess_vault_dataset.py \
|
| 214 |
+
--input_dir the_vault_dataset/ \
|
| 215 |
+
--output_dir data/the_vault/ \
|
| 216 |
+
--sample_size 1000 \
|
| 217 |
+
--create_test_set
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
## Quick Testing
|
| 221 |
+
|
| 222 |
+
To test the model with a small dataset (1000 samples):
|
| 223 |
+
|
| 224 |
+
1. Run the test script:
|
| 225 |
+
```bash
|
| 226 |
+
bash scripts/test_small_training.sh
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
This script will:
|
| 230 |
+
- Preprocess a small subset of data
|
| 231 |
+
- Train Phase 1 (Document ID Assignment)
|
| 232 |
+
- Train Phase 2 (Ranking-based Refinement)
|
| 233 |
+
- Generate document IDs
|
| 234 |
+
- Run query inference
|
| 235 |
+
|
| 236 |
+
Expected output directories:
|
| 237 |
+
```
|
| 238 |
+
logs/test_glen_vault/
|
| 239 |
+
├── GLEN_P1_test/ # Phase 1 model
|
| 240 |
+
├── GLEN_P2_test/ # Phase 2 model
|
| 241 |
+
└── GLEN_P2_test_docids.tsv # Generated document IDs
|
| 242 |
+
```
|
| 243 |
+
|
| 244 |
+
## Full Training
|
| 245 |
+
|
| 246 |
+
To train the model on the complete dataset:
|
| 247 |
+
|
| 248 |
+
1. Run the full training script:
|
| 249 |
+
```bash
|
| 250 |
+
bash scripts/train_full_vault.sh
|
| 251 |
+
```
|
| 252 |
+
|
| 253 |
+
This script will:
|
| 254 |
+
- Use the entire dataset
|
| 255 |
+
- Train both phases with full parameters
|
| 256 |
+
- Generate document IDs for all documents
|
| 257 |
+
- Run comprehensive query inference
|
| 258 |
+
|
| 259 |
+
Expected output directories:
|
| 260 |
+
```
|
| 261 |
+
logs/glen_vault/
|
| 262 |
+
├── GLEN_P1/ # Phase 1 model
|
| 263 |
+
├── GLEN_P2/ # Phase 2 model
|
| 264 |
+
└── GLEN_P2_docids.tsv # Generated document IDs
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
## Model Evaluation
|
| 268 |
+
|
| 269 |
+
After training, you can evaluate the model:
|
| 270 |
+
|
| 271 |
+
1. For test results:
|
| 272 |
+
```bash
|
| 273 |
+
python examples/glen_phase2/evaluate_glen.py \
|
| 274 |
+
--model_name_or_path logs/glen_vault/GLEN_P2 \
|
| 275 |
+
--infer_dir logs/glen_vault/GLEN_P2 \
|
| 276 |
+
--dataset_name the_vault \
|
| 277 |
+
--docid_file_name GLEN_P2_docids \
|
| 278 |
+
--per_device_eval_batch_size 1 \
|
| 279 |
+
--q_max_len 32 \
|
| 280 |
+
--num_return_sequences 5 \
|
| 281 |
+
--logs_dir logs/glen_vault
|
| 282 |
+
```
|
| 283 |
+
|
| 284 |
+
## Troubleshooting
|
| 285 |
+
|
| 286 |
+
### Common Issues
|
| 287 |
+
|
| 288 |
+
1. **CUDA Out of Memory**:
|
| 289 |
+
- Reduce batch sizes in the training scripts
|
| 290 |
+
- Enable gradient accumulation
|
| 291 |
+
- Use smaller model (e.g., t5-small instead of t5-base)
|
| 292 |
+
|
| 293 |
+
2. **CPU Training is Slow**:
|
| 294 |
+
- Reduce dataset size for testing
|
| 295 |
+
- Increase gradient accumulation steps
|
| 296 |
+
- Use smaller batch sizes
|
| 297 |
+
|
| 298 |
+
3. **Missing Files**:
|
| 299 |
+
- Ensure all required directories exist
|
| 300 |
+
- Check file permissions
|
| 301 |
+
- Verify data preprocessing completed successfully
|
| 302 |
+
|
| 303 |
+
### Resource Requirements
|
| 304 |
+
|
| 305 |
+
Minimum recommended specifications:
|
| 306 |
+
- CPU: 8 cores
|
| 307 |
+
- RAM: 16GB
|
| 308 |
+
- GPU: 8GB VRAM (for full training)
|
| 309 |
+
- Storage: 10GB free space
|
| 310 |
+
|
| 311 |
+
### Performance Tips
|
| 312 |
+
|
| 313 |
+
1. For CPU-only training:
|
| 314 |
+
- Use smaller batch sizes (1-2)
|
| 315 |
+
- Increase gradient accumulation steps
|
| 316 |
+
- Disable dataloader workers
|
| 317 |
+
- Use FP16 precision
|
| 318 |
+
|
| 319 |
+
2. For GPU training:
|
| 320 |
+
- Adjust batch sizes based on GPU memory
|
| 321 |
+
- Enable dataloader workers
|
| 322 |
+
- Use mixed precision training
|
| 323 |
+
|
| 324 |
+
## Directory Structure
|
| 325 |
+
|
| 326 |
+
```
|
| 327 |
+
GLEN-model/
|
| 328 |
+
├── data/
|
| 329 |
+
│ └── the_vault/ # Processed dataset
|
| 330 |
+
├── examples/
|
| 331 |
+
│ ├── glen_phase1/ # Phase 1 implementation
|
| 332 |
+
│ └── glen_phase2/ # Phase 2 implementation
|
| 333 |
+
├── logs/
|
| 334 |
+
│ ├── test_glen_vault/ # Test run outputs
|
| 335 |
+
│ └── glen_vault/ # Full training outputs
|
| 336 |
+
├── scripts/
|
| 337 |
+
│ ├── preprocess_vault_dataset.py
|
| 338 |
+
│ ├── test_small_training.sh
|
| 339 |
+
│ └── train_full_vault.sh
|
| 340 |
+
├── .env/ # Virtual environment
|
| 341 |
+
├── requirements.txt # Python dependencies
|
| 342 |
+
└── README.md # This file
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
## License
|
| 346 |
+
|
| 347 |
+
[Add your license information here]
|
| 348 |
+
|
| 349 |
+
## Citation
|
| 350 |
+
|
| 351 |
+
[Add citation information here]
|
logs/test_glen_vault/GLEN_P1_test/checkpoint-6/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Rdrop": 0.15,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration_GLEN"
|
| 5 |
+
],
|
| 6 |
+
"d_ff": 3072,
|
| 7 |
+
"d_kv": 64,
|
| 8 |
+
"d_model": 768,
|
| 9 |
+
"decode_vocab_size": 32128,
|
| 10 |
+
"decoder_start_token_id": 0,
|
| 11 |
+
"dropout_rate": 0.1,
|
| 12 |
+
"eos_token_id": 1,
|
| 13 |
+
"eval_batch_size": 4,
|
| 14 |
+
"initializer_factor": 1.0,
|
| 15 |
+
"input_dropout": 1,
|
| 16 |
+
"is_encoder_decoder": true,
|
| 17 |
+
"layer_norm_epsilon": 1e-06,
|
| 18 |
+
"model_type": "t5",
|
| 19 |
+
"n_positions": 512,
|
| 20 |
+
"num_decoder_layers": 12,
|
| 21 |
+
"num_heads": 12,
|
| 22 |
+
"num_layers": 12,
|
| 23 |
+
"output_past": true,
|
| 24 |
+
"pad_token_id": 0,
|
| 25 |
+
"relative_attention_num_buckets": 32,
|
| 26 |
+
"tie_decode_embedding": true,
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"train_batch_size": 8,
|
| 29 |
+
"transformers_version": "4.52.4",
|
| 30 |
+
"vocab_size": 32128
|
| 31 |
+
}
|
logs/test_glen_vault/GLEN_P1_test/checkpoint-6/rng_state.pth
ADDED
|
Binary file (14.5 kB). View file
|
|
|
logs/test_glen_vault/GLEN_P1_test/checkpoint-6/scheduler.pt
ADDED
|
Binary file (1.47 kB). View file
|
|
|
logs/test_glen_vault/GLEN_P1_test/checkpoint-6/trainer_state.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 6,
|
| 7 |
+
"global_step": 6,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [],
|
| 12 |
+
"logging_steps": 100,
|
| 13 |
+
"max_steps": 6,
|
| 14 |
+
"num_input_tokens_seen": 0,
|
| 15 |
+
"num_train_epochs": 1,
|
| 16 |
+
"save_steps": 6,
|
| 17 |
+
"stateful_callbacks": {
|
| 18 |
+
"TrainerControl": {
|
| 19 |
+
"args": {
|
| 20 |
+
"should_epoch_stop": false,
|
| 21 |
+
"should_evaluate": false,
|
| 22 |
+
"should_log": false,
|
| 23 |
+
"should_save": true,
|
| 24 |
+
"should_training_stop": true
|
| 25 |
+
},
|
| 26 |
+
"attributes": {}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"total_flos": 0.0,
|
| 30 |
+
"train_batch_size": 8,
|
| 31 |
+
"trial_name": null,
|
| 32 |
+
"trial_params": null
|
| 33 |
+
}
|
logs/test_glen_vault/GLEN_P1_test/config.json
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
"decoder_start_token_id": 0,
|
| 11 |
"dropout_rate": 0.1,
|
| 12 |
"eos_token_id": 1,
|
| 13 |
-
"eval_batch_size":
|
| 14 |
"initializer_factor": 1.0,
|
| 15 |
"input_dropout": 1,
|
| 16 |
"is_encoder_decoder": true,
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
"relative_attention_num_buckets": 32,
|
| 26 |
"tie_decode_embedding": true,
|
| 27 |
"torch_dtype": "float32",
|
| 28 |
-
"train_batch_size":
|
| 29 |
"transformers_version": "4.52.4",
|
| 30 |
"vocab_size": 32128
|
| 31 |
}
|
|
|
|
| 10 |
"decoder_start_token_id": 0,
|
| 11 |
"dropout_rate": 0.1,
|
| 12 |
"eos_token_id": 1,
|
| 13 |
+
"eval_batch_size": 4,
|
| 14 |
"initializer_factor": 1.0,
|
| 15 |
"input_dropout": 1,
|
| 16 |
"is_encoder_decoder": true,
|
|
|
|
| 25 |
"relative_attention_num_buckets": 32,
|
| 26 |
"tie_decode_embedding": true,
|
| 27 |
"torch_dtype": "float32",
|
| 28 |
+
"train_batch_size": 8,
|
| 29 |
"transformers_version": "4.52.4",
|
| 30 |
"vocab_size": 32128
|
| 31 |
}
|
logs/test_glen_vault/GLEN_P1_test/model_args.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"infer_ckpt": "",
|
| 25 |
"infer_dir": "",
|
| 26 |
"logs_dir": "logs",
|
| 27 |
-
"docid_file_name": "",
|
| 28 |
"verbose_valid_query": 1,
|
| 29 |
"freeze_encoder": false,
|
| 30 |
"freeze_embeds": false,
|
|
|
|
| 24 |
"infer_ckpt": "",
|
| 25 |
"infer_dir": "",
|
| 26 |
"logs_dir": "logs",
|
| 27 |
+
"docid_file_name": "logs/test_glen_vault/GLEN_P1_test\\GLENP1Model_len_128_the_vault.tsv",
|
| 28 |
"verbose_valid_query": 1,
|
| 29 |
"freeze_encoder": false,
|
| 30 |
"freeze_embeds": false,
|
logs/test_glen_vault/GLEN_P2_test/checkpoint-7/config.json
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"dense_act_fn": "relu",
|
| 13 |
"dropout_rate": 0.1,
|
| 14 |
"eos_token_id": 1,
|
| 15 |
-
"eval_batch_size":
|
| 16 |
"feed_forward_proj": "relu",
|
| 17 |
"id2label": {
|
| 18 |
"0": "LABEL_0"
|
|
@@ -36,7 +36,7 @@
|
|
| 36 |
"relative_attention_num_buckets": 32,
|
| 37 |
"tie_decode_embedding": true,
|
| 38 |
"torch_dtype": "float32",
|
| 39 |
-
"train_batch_size":
|
| 40 |
"transformers_version": "4.52.4",
|
| 41 |
"use_cache": true,
|
| 42 |
"vocab_size": 32128
|
|
|
|
| 12 |
"dense_act_fn": "relu",
|
| 13 |
"dropout_rate": 0.1,
|
| 14 |
"eos_token_id": 1,
|
| 15 |
+
"eval_batch_size": 4,
|
| 16 |
"feed_forward_proj": "relu",
|
| 17 |
"id2label": {
|
| 18 |
"0": "LABEL_0"
|
|
|
|
| 36 |
"relative_attention_num_buckets": 32,
|
| 37 |
"tie_decode_embedding": true,
|
| 38 |
"torch_dtype": "float32",
|
| 39 |
+
"train_batch_size": 8,
|
| 40 |
"transformers_version": "4.52.4",
|
| 41 |
"use_cache": true,
|
| 42 |
"vocab_size": 32128
|
logs/test_glen_vault/GLEN_P2_test/checkpoint-7/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891644712
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4c3a8544cae4f0ca7d58d039d2d5943cb33c4ef70b01a6dacc2780718f08454
|
| 3 |
size 891644712
|
logs/test_glen_vault/GLEN_P2_test/checkpoint-7/trainer_state.json
CHANGED
|
@@ -27,7 +27,7 @@
|
|
| 27 |
}
|
| 28 |
},
|
| 29 |
"total_flos": 0.0,
|
| 30 |
-
"train_batch_size":
|
| 31 |
"trial_name": null,
|
| 32 |
"trial_params": null
|
| 33 |
}
|
|
|
|
| 27 |
}
|
| 28 |
},
|
| 29 |
"total_flos": 0.0,
|
| 30 |
+
"train_batch_size": 4,
|
| 31 |
"trial_name": null,
|
| 32 |
"trial_params": null
|
| 33 |
}
|
logs/test_glen_vault/GLEN_P2_test/data_args.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"dataset_name": "the_vault",
|
| 3 |
"encode_train_qry": false,
|
| 4 |
-
"test100":
|
| 5 |
"query_type": "gtq_doc_aug_qg",
|
| 6 |
"small_set": 0,
|
| 7 |
"aug_query": true,
|
|
|
|
| 1 |
{
|
| 2 |
"dataset_name": "the_vault",
|
| 3 |
"encode_train_qry": false,
|
| 4 |
+
"test100": 0,
|
| 5 |
"query_type": "gtq_doc_aug_qg",
|
| 6 |
"small_set": 0,
|
| 7 |
"aug_query": true,
|
logs/test_glen_vault/GLEN_P2_test/model_args.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"infer_ckpt": "",
|
| 25 |
"infer_dir": "",
|
| 26 |
"logs_dir": "logs",
|
| 27 |
-
"docid_file_name": "",
|
| 28 |
"softmax_temperature": 1.0,
|
| 29 |
"num_multi_vectors": 3,
|
| 30 |
"untie_encoder": false,
|
|
@@ -34,6 +34,7 @@
|
|
| 34 |
"do_docid_temperature_annealing": true,
|
| 35 |
"docid_temperature": 1.0,
|
| 36 |
"docid_temperature_min": 1e-05,
|
|
|
|
| 37 |
"special_token_ids": [
|
| 38 |
2,
|
| 39 |
32099,
|
|
|
|
| 24 |
"infer_ckpt": "",
|
| 25 |
"infer_dir": "",
|
| 26 |
"logs_dir": "logs",
|
| 27 |
+
"docid_file_name": "logs/test_glen_vault/GLEN_P2_test\\GLENP2Model_len_128_the_vault.tsv",
|
| 28 |
"softmax_temperature": 1.0,
|
| 29 |
"num_multi_vectors": 3,
|
| 30 |
"untie_encoder": false,
|
|
|
|
| 34 |
"do_docid_temperature_annealing": true,
|
| 35 |
"docid_temperature": 1.0,
|
| 36 |
"docid_temperature_min": 1e-05,
|
| 37 |
+
"max_output_length": 4,
|
| 38 |
"special_token_ids": [
|
| 39 |
2,
|
| 40 |
32099,
|
scripts/preprocess_vault_dataset.py
CHANGED
|
@@ -126,6 +126,8 @@ def main():
|
|
| 126 |
help='Include code comments in descriptions')
|
| 127 |
parser.add_argument('--max_samples', type=int, default=None,
|
| 128 |
help='Maximum number of samples to process (for testing)')
|
|
|
|
|
|
|
| 129 |
|
| 130 |
args = parser.parse_args()
|
| 131 |
|
|
@@ -187,9 +189,9 @@ def main():
|
|
| 187 |
# Create query-document pairs for evaluation data
|
| 188 |
elif split in ['validate', 'test']:
|
| 189 |
pairs = create_query_document_pairs(processed_samples)
|
| 190 |
-
|
| 191 |
gtq_df = pd.DataFrame(pairs)
|
| 192 |
-
gtq_file = os.path.join(args.output_dir,
|
| 193 |
gtq_df.to_csv(gtq_file, sep='\t', index=False, encoding='utf-8')
|
| 194 |
print(f"Saved evaluation query-document pairs to {gtq_file}")
|
| 195 |
|
|
@@ -198,22 +200,9 @@ def main():
|
|
| 198 |
|
| 199 |
# Create separate ID file for each split
|
| 200 |
if split == 'train_small':
|
| 201 |
-
id_file = os.path.join(args.output_dir,
|
| 202 |
id_df.to_csv(id_file, sep='\t', index=False, encoding='utf-8')
|
| 203 |
-
print(f"
|
| 204 |
-
else:
|
| 205 |
-
# For validation and test, create separate ID files if needed
|
| 206 |
-
eval_split = 'dev' if split == 'validate' else 'test'
|
| 207 |
-
id_file = os.path.join(args.output_dir, f"ID_VAULT_{eval_split}_t5_bm25_truncate_3.tsv")
|
| 208 |
-
id_df.to_csv(id_file, sep='\t', index=False, encoding='utf-8')
|
| 209 |
-
print(f"Created document IDs in {id_file}")
|
| 210 |
-
|
| 211 |
-
print("Preprocessing completed!")
|
| 212 |
-
print(f"Output files saved in: {args.output_dir}")
|
| 213 |
-
print("\nGenerated files:")
|
| 214 |
-
print("- DOC_VAULT_*.tsv: Document content files")
|
| 215 |
-
print("- GTQ_VAULT_*.tsv: Query-document pairs for training/evaluation")
|
| 216 |
-
print("- ID_VAULT_*.tsv: Document ID mappings")
|
| 217 |
|
| 218 |
if __name__ == "__main__":
|
| 219 |
main()
|
|
|
|
| 126 |
help='Include code comments in descriptions')
|
| 127 |
parser.add_argument('--max_samples', type=int, default=None,
|
| 128 |
help='Maximum number of samples to process (for testing)')
|
| 129 |
+
parser.add_argument('--create_test_set', action='store_true',
|
| 130 |
+
help='Create test set for evaluation')
|
| 131 |
|
| 132 |
args = parser.parse_args()
|
| 133 |
|
|
|
|
| 189 |
# Create query-document pairs for evaluation data
|
| 190 |
elif split in ['validate', 'test']:
|
| 191 |
pairs = create_query_document_pairs(processed_samples)
|
| 192 |
+
# Always use 'dev' for evaluation to match GLEN's expectations
|
| 193 |
gtq_df = pd.DataFrame(pairs)
|
| 194 |
+
gtq_file = os.path.join(args.output_dir, "GTQ_VAULT_dev.tsv")
|
| 195 |
gtq_df.to_csv(gtq_file, sep='\t', index=False, encoding='utf-8')
|
| 196 |
print(f"Saved evaluation query-document pairs to {gtq_file}")
|
| 197 |
|
|
|
|
| 200 |
|
| 201 |
# Create separate ID file for each split
|
| 202 |
if split == 'train_small':
|
| 203 |
+
id_file = os.path.join(args.output_dir, "ID_VAULT_t5_bm25_truncate_3.tsv")
|
| 204 |
id_df.to_csv(id_file, sep='\t', index=False, encoding='utf-8')
|
| 205 |
+
print(f"Saved document IDs to {id_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
if __name__ == "__main__":
|
| 208 |
main()
|
scripts/test_small_training.ps1
CHANGED
|
@@ -5,11 +5,30 @@ Write-Host "Testing GLEN with small Vault dataset"
|
|
| 5 |
Write-Host "==========================================="
|
| 6 |
|
| 7 |
# Set memory monitoring parameters
|
| 8 |
-
$GPU_MEMORY_THRESHOLD = 0.
|
| 9 |
-
$GPU_CHECK_INTERVAL =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Test Phase 1 Training
|
| 12 |
-
Write-Host "
|
|
|
|
| 13 |
$env:CUDA_VISIBLE_DEVICES = "0"
|
| 14 |
|
| 15 |
try {
|
|
@@ -17,9 +36,9 @@ try {
|
|
| 17 |
--output_dir logs/test_glen_vault/GLEN_P1_test `
|
| 18 |
--model_name_or_path t5-base `
|
| 19 |
--query_type gtq_doc `
|
| 20 |
-
--per_device_train_batch_size
|
| 21 |
-
--per_device_eval_batch_size
|
| 22 |
-
--gradient_accumulation_steps
|
| 23 |
--dropout_rate 0.1 `
|
| 24 |
--Rdrop 0.15 `
|
| 25 |
--aug_query True `
|
|
@@ -57,25 +76,42 @@ try {
|
|
| 57 |
exit 1
|
| 58 |
}
|
| 59 |
|
| 60 |
-
Write-Host "Phase 1 training completed successfully!"
|
| 61 |
|
| 62 |
# Check if Phase 1 checkpoint exists
|
| 63 |
$PHASE1_CKPT = "logs/test_glen_vault/GLEN_P1_test"
|
| 64 |
if (-not (Test-Path $PHASE1_CKPT)) {
|
| 65 |
-
Write-Error "Phase 1 checkpoint not found at $PHASE1_CKPT"
|
| 66 |
exit 1
|
| 67 |
}
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Test Phase 2 Training
|
| 72 |
try {
|
| 73 |
python examples/glen_phase2/train_glen.py `
|
| 74 |
--output_dir logs/test_glen_vault/GLEN_P2_test `
|
| 75 |
--model_name_or_path $PHASE1_CKPT `
|
| 76 |
-
--per_device_train_batch_size
|
| 77 |
-
--per_device_eval_batch_size
|
| 78 |
-
--gradient_accumulation_steps
|
| 79 |
--dropout_rate 0.1 `
|
| 80 |
--warmup_ratio 0.1 `
|
| 81 |
--id_class t5_bm25_truncate_3 `
|
|
@@ -109,22 +145,52 @@ try {
|
|
| 109 |
exit 1
|
| 110 |
}
|
| 111 |
|
| 112 |
-
Write-Host "Phase 2 training completed successfully!"
|
| 113 |
|
| 114 |
-
#
|
| 115 |
-
Write-Host "Testing document ID generation..."
|
| 116 |
$PHASE2_CKPT = "logs/test_glen_vault/GLEN_P2_test"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
try {
|
| 119 |
python examples/glen_phase2/makeid_glen.py `
|
| 120 |
--model_name_or_path $PHASE2_CKPT `
|
| 121 |
--infer_dir $PHASE2_CKPT `
|
| 122 |
--dataset_name the_vault `
|
| 123 |
-
--
|
| 124 |
-
--
|
| 125 |
-
--
|
| 126 |
-
--
|
| 127 |
-
--test100 1
|
| 128 |
|
| 129 |
if ($LASTEXITCODE -ne 0) {
|
| 130 |
throw "Document ID generation failed!"
|
|
@@ -134,21 +200,29 @@ try {
|
|
| 134 |
exit 1
|
| 135 |
}
|
| 136 |
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
Write-Host "
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
try {
|
| 143 |
python examples/glen_phase2/evaluate_glen.py `
|
| 144 |
--model_name_or_path $PHASE2_CKPT `
|
| 145 |
--infer_dir $PHASE2_CKPT `
|
| 146 |
--dataset_name the_vault `
|
| 147 |
-
--
|
|
|
|
| 148 |
--q_max_len 32 `
|
| 149 |
--num_return_sequences 5 `
|
| 150 |
-
--logs_dir logs/test_glen_vault
|
| 151 |
-
--test100 1
|
| 152 |
|
| 153 |
if ($LASTEXITCODE -ne 0) {
|
| 154 |
throw "Query inference failed!"
|
|
@@ -158,13 +232,27 @@ try {
|
|
| 158 |
exit 1
|
| 159 |
}
|
| 160 |
|
|
|
|
|
|
|
|
|
|
| 161 |
Write-Host "==========================================="
|
| 162 |
-
Write-Host "
|
| 163 |
Write-Host "==========================================="
|
| 164 |
-
Write-Host "Training logs and results saved in: logs/test_glen_vault/"
|
| 165 |
Write-Host ""
|
| 166 |
-
Write-Host "
|
| 167 |
-
Write-Host "
|
| 168 |
-
Write-Host "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
Write-Host ""
|
| 170 |
-
Write-Host "The system is ready for full training on The Vault dataset!"
|
|
|
|
|
|
| 5 |
Write-Host "==========================================="
|
| 6 |
|
| 7 |
# Set memory monitoring parameters
|
| 8 |
+
$GPU_MEMORY_THRESHOLD = 0.85
|
| 9 |
+
$GPU_CHECK_INTERVAL = 50
|
| 10 |
+
|
| 11 |
+
Write-Host "GPU Memory Protection enabled:"
|
| 12 |
+
Write-Host "- Memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 13 |
+
Write-Host "- Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 14 |
+
Write-Host ""
|
| 15 |
+
|
| 16 |
+
# Ensure data preprocessing is done
|
| 17 |
+
Write-Host "Checking data preprocessing..."
|
| 18 |
+
if (-not (Test-Path "data/the_vault/DOC_VAULT_train.tsv")) {
|
| 19 |
+
Write-Host "Running data preprocessing..."
|
| 20 |
+
python scripts/preprocess_vault_dataset.py --input_dir the_vault_dataset/ --output_dir data/the_vault/ --sample_size 1000
|
| 21 |
+
if ($LASTEXITCODE -ne 0) {
|
| 22 |
+
Write-Error "Data preprocessing failed!"
|
| 23 |
+
exit 1
|
| 24 |
+
}
|
| 25 |
+
} else {
|
| 26 |
+
Write-Host "Data already preprocessed."
|
| 27 |
+
}
|
| 28 |
|
| 29 |
# Test Phase 1 Training
|
| 30 |
+
Write-Host ""
|
| 31 |
+
Write-Host "=== Phase 1 Training (Document ID Assignment) ==="
|
| 32 |
$env:CUDA_VISIBLE_DEVICES = "0"
|
| 33 |
|
| 34 |
try {
|
|
|
|
| 36 |
--output_dir logs/test_glen_vault/GLEN_P1_test `
|
| 37 |
--model_name_or_path t5-base `
|
| 38 |
--query_type gtq_doc `
|
| 39 |
+
--per_device_train_batch_size 8 `
|
| 40 |
+
--per_device_eval_batch_size 4 `
|
| 41 |
+
--gradient_accumulation_steps 2 `
|
| 42 |
--dropout_rate 0.1 `
|
| 43 |
--Rdrop 0.15 `
|
| 44 |
--aug_query True `
|
|
|
|
| 76 |
exit 1
|
| 77 |
}
|
| 78 |
|
| 79 |
+
Write-Host "✅ Phase 1 training completed successfully!"
|
| 80 |
|
| 81 |
# Check if Phase 1 checkpoint exists
|
| 82 |
$PHASE1_CKPT = "logs/test_glen_vault/GLEN_P1_test"
|
| 83 |
if (-not (Test-Path $PHASE1_CKPT)) {
|
| 84 |
+
Write-Error "❌ Phase 1 checkpoint not found at $PHASE1_CKPT"
|
| 85 |
exit 1
|
| 86 |
}
|
| 87 |
|
| 88 |
+
# Check for model files
|
| 89 |
+
$model_files = @("pytorch_model.bin", "model.safetensors")
|
| 90 |
+
$found_model = $false
|
| 91 |
+
foreach ($file in $model_files) {
|
| 92 |
+
if (Test-Path "$PHASE1_CKPT/$file") {
|
| 93 |
+
$found_model = $true
|
| 94 |
+
Write-Host "📁 Found Phase 1 model: $file"
|
| 95 |
+
break
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
if (-not $found_model) {
|
| 100 |
+
Write-Error "❌ No model files found in Phase 1 checkpoint"
|
| 101 |
+
exit 1
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
Write-Host ""
|
| 105 |
+
Write-Host "=== Phase 2 Training (Ranking-based Refinement) ==="
|
| 106 |
|
| 107 |
# Test Phase 2 Training
|
| 108 |
try {
|
| 109 |
python examples/glen_phase2/train_glen.py `
|
| 110 |
--output_dir logs/test_glen_vault/GLEN_P2_test `
|
| 111 |
--model_name_or_path $PHASE1_CKPT `
|
| 112 |
+
--per_device_train_batch_size 4 `
|
| 113 |
+
--per_device_eval_batch_size 2 `
|
| 114 |
+
--gradient_accumulation_steps 4 `
|
| 115 |
--dropout_rate 0.1 `
|
| 116 |
--warmup_ratio 0.1 `
|
| 117 |
--id_class t5_bm25_truncate_3 `
|
|
|
|
| 145 |
exit 1
|
| 146 |
}
|
| 147 |
|
| 148 |
+
Write-Host "✅ Phase 2 training completed successfully!"
|
| 149 |
|
| 150 |
+
# Validate Phase 2 checkpoint
|
|
|
|
| 151 |
$PHASE2_CKPT = "logs/test_glen_vault/GLEN_P2_test"
|
| 152 |
+
if (-not (Test-Path $PHASE2_CKPT)) {
|
| 153 |
+
Write-Error "❌ Phase 2 checkpoint not found at $PHASE2_CKPT"
|
| 154 |
+
exit 1
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
# Check for checkpoint subdirectories or model files
|
| 158 |
+
$checkpoint_dirs = Get-ChildItem -Path $PHASE2_CKPT -Directory -Name "checkpoint-*" | Sort-Object {[int]($_.Split('-')[1])} | Select-Object -Last 1
|
| 159 |
+
if ($checkpoint_dirs) {
|
| 160 |
+
Write-Host "📁 Found Phase 2 checkpoint: $checkpoint_dirs"
|
| 161 |
+
$checkpoint_path = "$PHASE2_CKPT/$checkpoint_dirs"
|
| 162 |
+
if (-not (Test-Path "$checkpoint_path/model.safetensors") -and -not (Test-Path "$checkpoint_path/pytorch_model.bin")) {
|
| 163 |
+
Write-Error "❌ No model files in checkpoint directory"
|
| 164 |
+
exit 1
|
| 165 |
+
}
|
| 166 |
+
} else {
|
| 167 |
+
# Check for model files in root
|
| 168 |
+
$found_model = $false
|
| 169 |
+
foreach ($file in $model_files) {
|
| 170 |
+
if (Test-Path "$PHASE2_CKPT/$file") {
|
| 171 |
+
$found_model = $true
|
| 172 |
+
Write-Host "📁 Found Phase 2 model: $file"
|
| 173 |
+
break
|
| 174 |
+
}
|
| 175 |
+
}
|
| 176 |
+
if (-not $found_model) {
|
| 177 |
+
Write-Error "❌ No model files found in Phase 2 checkpoint"
|
| 178 |
+
exit 1
|
| 179 |
+
}
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
Write-Host ""
|
| 183 |
+
Write-Host "=== Document ID Generation ==="
|
| 184 |
|
| 185 |
try {
|
| 186 |
python examples/glen_phase2/makeid_glen.py `
|
| 187 |
--model_name_or_path $PHASE2_CKPT `
|
| 188 |
--infer_dir $PHASE2_CKPT `
|
| 189 |
--dataset_name the_vault `
|
| 190 |
+
--docid_file_name GLEN_P2_test_docids `
|
| 191 |
+
--per_device_eval_batch_size 4 `
|
| 192 |
+
--max_input_length 128 `
|
| 193 |
+
--num_return_sequences 10
|
|
|
|
| 194 |
|
| 195 |
if ($LASTEXITCODE -ne 0) {
|
| 196 |
throw "Document ID generation failed!"
|
|
|
|
| 200 |
exit 1
|
| 201 |
}
|
| 202 |
|
| 203 |
+
# Validate docid file was created
|
| 204 |
+
$docid_file = "logs/test_glen_vault/GLEN_P2_test_docids.tsv"
|
| 205 |
+
if (-not (Test-Path $docid_file)) {
|
| 206 |
+
Write-Error "❌ Document ID file not created: $docid_file"
|
| 207 |
+
exit 1
|
| 208 |
+
}
|
| 209 |
|
| 210 |
+
$line_count = (Get-Content $docid_file).Count
|
| 211 |
+
Write-Host "✅ Document ID generation completed! Generated $line_count document IDs"
|
| 212 |
+
|
| 213 |
+
Write-Host ""
|
| 214 |
+
Write-Host "=== Query Inference ==="
|
| 215 |
|
| 216 |
try {
|
| 217 |
python examples/glen_phase2/evaluate_glen.py `
|
| 218 |
--model_name_or_path $PHASE2_CKPT `
|
| 219 |
--infer_dir $PHASE2_CKPT `
|
| 220 |
--dataset_name the_vault `
|
| 221 |
+
--docid_file_name GLEN_P2_test_docids `
|
| 222 |
+
--per_device_eval_batch_size 4 `
|
| 223 |
--q_max_len 32 `
|
| 224 |
--num_return_sequences 5 `
|
| 225 |
+
--logs_dir logs/test_glen_vault
|
|
|
|
| 226 |
|
| 227 |
if ($LASTEXITCODE -ne 0) {
|
| 228 |
throw "Query inference failed!"
|
|
|
|
| 232 |
exit 1
|
| 233 |
}
|
| 234 |
|
| 235 |
+
Write-Host "✅ Query inference completed successfully!"
|
| 236 |
+
|
| 237 |
+
Write-Host ""
|
| 238 |
Write-Host "==========================================="
|
| 239 |
+
Write-Host "🎉 ALL TESTS COMPLETED SUCCESSFULLY! 🎉"
|
| 240 |
Write-Host "==========================================="
|
|
|
|
| 241 |
Write-Host ""
|
| 242 |
+
Write-Host "📊 Summary:"
|
| 243 |
+
Write-Host " ✅ Phase 1 Training (Document ID Assignment)"
|
| 244 |
+
Write-Host " ✅ Phase 2 Training (Ranking-based Refinement)"
|
| 245 |
+
Write-Host " ✅ Document ID Generation ($line_count IDs)"
|
| 246 |
+
Write-Host " ✅ Query Inference & Evaluation"
|
| 247 |
+
Write-Host ""
|
| 248 |
+
Write-Host "📁 Results saved in: logs/test_glen_vault/"
|
| 249 |
+
Write-Host "📁 Document IDs: $docid_file"
|
| 250 |
+
Write-Host ""
|
| 251 |
+
Write-Host "🛡️ Memory Protection Summary:"
|
| 252 |
+
Write-Host " - GPU memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 253 |
+
Write-Host " - Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 254 |
+
Write-Host " - FP16 training enabled"
|
| 255 |
+
Write-Host " - Optimized batch sizes used"
|
| 256 |
Write-Host ""
|
| 257 |
+
Write-Host "🚀 The system is ready for full training on The Vault dataset!"
|
| 258 |
+
Write-Host " Use scripts/train_full_vault.ps1 for production training."
|
scripts/test_small_training.sh
CHANGED
|
@@ -1,23 +1,56 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
echo "==========================================="
|
| 4 |
-
echo "Testing GLEN
|
| 5 |
echo "==========================================="
|
| 6 |
|
| 7 |
# Set memory monitoring parameters
|
| 8 |
-
GPU_MEMORY_THRESHOLD=0.
|
| 9 |
-
GPU_CHECK_INTERVAL=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
# Test Phase 1 Training
|
| 12 |
-
echo "Starting Phase 1 training test..."
|
| 13 |
-
CUDA_VISIBLE_DEVICES=0 \
|
| 14 |
python examples/glen_phase1/train_glen.py \
|
| 15 |
--output_dir logs/test_glen_vault/GLEN_P1_test \
|
| 16 |
--model_name_or_path t5-base \
|
| 17 |
--query_type gtq_doc \
|
| 18 |
-
--per_device_train_batch_size
|
| 19 |
-
--per_device_eval_batch_size
|
| 20 |
-
--gradient_accumulation_steps
|
| 21 |
--dropout_rate 0.1 \
|
| 22 |
--Rdrop 0.15 \
|
| 23 |
--aug_query True \
|
|
@@ -34,47 +67,76 @@ python examples/glen_phase1/train_glen.py \
|
|
| 34 |
--decoder_input doc_rep \
|
| 35 |
--max_output_length 5 \
|
| 36 |
--num_return_sequences 5 \
|
| 37 |
-
--logging_steps
|
| 38 |
--overwrite_output_dir \
|
| 39 |
-
--wandb_tag
|
| 40 |
-
--do_eval
|
| 41 |
--num_train_epochs 1 \
|
| 42 |
-
--save_steps
|
| 43 |
--save_strategy steps \
|
| 44 |
-
--evaluation_strategy
|
|
|
|
| 45 |
--seed 42 \
|
| 46 |
-
--gpu_memory_threshold $
|
| 47 |
-
--gpu_check_interval $
|
| 48 |
-
--fp16 True
|
|
|
|
|
|
|
| 49 |
|
| 50 |
if [ $? -ne 0 ]; then
|
| 51 |
-
echo "Phase 1 training failed!"
|
| 52 |
exit 1
|
| 53 |
fi
|
| 54 |
|
| 55 |
-
echo "Phase 1 training completed successfully!"
|
| 56 |
|
| 57 |
# Check if Phase 1 checkpoint exists
|
| 58 |
PHASE1_CKPT="logs/test_glen_vault/GLEN_P1_test"
|
| 59 |
if [ ! -d "$PHASE1_CKPT" ]; then
|
| 60 |
-
echo "Phase 1 checkpoint not found at $PHASE1_CKPT"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
exit 1
|
| 62 |
fi
|
| 63 |
|
| 64 |
-
echo "
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
python examples/glen_phase2/train_glen.py \
|
| 68 |
--output_dir logs/test_glen_vault/GLEN_P2_test \
|
| 69 |
-
--model_name_or_path $
|
| 70 |
-
--per_device_train_batch_size
|
| 71 |
-
--per_device_eval_batch_size
|
| 72 |
-
--gradient_accumulation_steps
|
| 73 |
--dropout_rate 0.1 \
|
| 74 |
--warmup_ratio 0.1 \
|
| 75 |
--id_class t5_bm25_truncate_3 \
|
| 76 |
--dataset_name the_vault \
|
| 77 |
-
--test100 1 \
|
| 78 |
--tree 1 \
|
| 79 |
--q_max_len 32 \
|
| 80 |
--p_max_len 128 \
|
|
@@ -82,73 +144,136 @@ python examples/glen_phase2/train_glen.py \
|
|
| 82 |
--positive_passage_no_shuffle True \
|
| 83 |
--tie_word_embeddings True \
|
| 84 |
--num_return_sequences 5 \
|
| 85 |
-
--logging_steps
|
| 86 |
--overwrite_output_dir \
|
| 87 |
-
--wandb_tag
|
| 88 |
-
--do_eval
|
| 89 |
--num_train_epochs 1 \
|
| 90 |
-
--save_steps
|
| 91 |
--save_strategy steps \
|
| 92 |
-
--evaluation_strategy
|
|
|
|
| 93 |
--seed 42 \
|
| 94 |
-
--gpu_memory_threshold $
|
| 95 |
-
--gpu_check_interval $
|
| 96 |
-
--fp16 True
|
|
|
|
|
|
|
| 97 |
|
| 98 |
if [ $? -ne 0 ]; then
|
| 99 |
-
echo "Phase 2 training failed!"
|
| 100 |
exit 1
|
| 101 |
fi
|
| 102 |
|
| 103 |
-
echo "Phase 2 training completed successfully!"
|
| 104 |
|
| 105 |
-
#
|
| 106 |
-
echo "Testing document ID generation..."
|
| 107 |
PHASE2_CKPT="logs/test_glen_vault/GLEN_P2_test"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
CUDA_VISIBLE_DEVICES=0 \
|
| 110 |
python examples/glen_phase2/makeid_glen.py \
|
| 111 |
-
--model_name_or_path $
|
| 112 |
-
--infer_dir $
|
| 113 |
--dataset_name the_vault \
|
| 114 |
-
--
|
| 115 |
-
--
|
| 116 |
-
--
|
| 117 |
-
--
|
| 118 |
-
--
|
|
|
|
| 119 |
|
| 120 |
if [ $? -ne 0 ]; then
|
| 121 |
-
echo "Document ID generation failed!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
exit 1
|
| 123 |
fi
|
| 124 |
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
# Test Query Inference
|
| 128 |
-
echo "Testing query inference..."
|
| 129 |
-
CUDA_VISIBLE_DEVICES=0 \
|
| 130 |
python examples/glen_phase2/evaluate_glen.py \
|
| 131 |
-
--model_name_or_path $
|
| 132 |
-
--infer_dir $
|
| 133 |
--dataset_name the_vault \
|
| 134 |
-
--
|
|
|
|
| 135 |
--q_max_len 32 \
|
| 136 |
--num_return_sequences 5 \
|
| 137 |
--logs_dir logs/test_glen_vault \
|
| 138 |
-
--test100 1
|
|
|
|
|
|
|
| 139 |
|
| 140 |
if [ $? -ne 0 ]; then
|
| 141 |
-
echo "Query inference failed!"
|
| 142 |
exit 1
|
| 143 |
fi
|
| 144 |
|
|
|
|
|
|
|
|
|
|
| 145 |
echo "==========================================="
|
| 146 |
-
echo "
|
| 147 |
echo "==========================================="
|
| 148 |
-
echo "Training logs and results saved in: logs/test_glen_vault/"
|
| 149 |
echo ""
|
| 150 |
-
echo "
|
| 151 |
-
echo "
|
| 152 |
-
echo "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
echo ""
|
| 154 |
-
echo "The
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
echo "==========================================="
|
| 4 |
+
echo "Testing GLEN on The Vault dataset (Small)"
|
| 5 |
echo "==========================================="
|
| 6 |
|
| 7 |
# Set memory monitoring parameters
|
| 8 |
+
GPU_MEMORY_THRESHOLD=0.85
|
| 9 |
+
GPU_CHECK_INTERVAL=50
|
| 10 |
+
|
| 11 |
+
echo "Resource Protection enabled:"
|
| 12 |
+
echo "- Memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 13 |
+
echo "- Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 14 |
+
echo ""
|
| 15 |
+
|
| 16 |
+
# Ensure data preprocessing is done
|
| 17 |
+
echo "Checking data preprocessing..."
|
| 18 |
+
if [ ! -f "data/the_vault/DOC_VAULT_train.tsv" ] || [ ! -f "data/the_vault/GTQ_VAULT_dev.tsv" ]; then
|
| 19 |
+
echo "Running data preprocessing..."
|
| 20 |
+
python scripts/preprocess_vault_dataset.py --input_dir the_vault_dataset/ --output_dir data/the_vault/ --sample_size 1000 --create_test_set
|
| 21 |
+
if [ $? -ne 0 ]; then
|
| 22 |
+
echo "Error: Data preprocessing failed!"
|
| 23 |
+
exit 1
|
| 24 |
+
fi
|
| 25 |
+
else
|
| 26 |
+
echo "Data already preprocessed."
|
| 27 |
+
fi
|
| 28 |
+
|
| 29 |
+
# Phase 1 Training
|
| 30 |
+
echo ""
|
| 31 |
+
echo "=== Phase 1 Training (Document ID Assignment) ==="
|
| 32 |
+
|
| 33 |
+
# Check if CUDA is available
|
| 34 |
+
if command -v nvidia-smi &> /dev/null; then
|
| 35 |
+
export CUDA_VISIBLE_DEVICES="0"
|
| 36 |
+
echo "Using GPU for training"
|
| 37 |
+
BATCH_SIZE=8
|
| 38 |
+
EVAL_BATCH_SIZE=4
|
| 39 |
+
ACCUM_STEPS=2
|
| 40 |
+
else
|
| 41 |
+
echo "No GPU detected, using CPU with reduced batch sizes"
|
| 42 |
+
BATCH_SIZE=2
|
| 43 |
+
EVAL_BATCH_SIZE=1
|
| 44 |
+
ACCUM_STEPS=8
|
| 45 |
+
fi
|
| 46 |
|
|
|
|
|
|
|
|
|
|
| 47 |
python examples/glen_phase1/train_glen.py \
|
| 48 |
--output_dir logs/test_glen_vault/GLEN_P1_test \
|
| 49 |
--model_name_or_path t5-base \
|
| 50 |
--query_type gtq_doc \
|
| 51 |
+
--per_device_train_batch_size $BATCH_SIZE \
|
| 52 |
+
--per_device_eval_batch_size $EVAL_BATCH_SIZE \
|
| 53 |
+
--gradient_accumulation_steps $ACCUM_STEPS \
|
| 54 |
--dropout_rate 0.1 \
|
| 55 |
--Rdrop 0.15 \
|
| 56 |
--aug_query True \
|
|
|
|
| 67 |
--decoder_input doc_rep \
|
| 68 |
--max_output_length 5 \
|
| 69 |
--num_return_sequences 5 \
|
| 70 |
+
--logging_steps 100 \
|
| 71 |
--overwrite_output_dir \
|
| 72 |
+
--wandb_tag glen_vault_test_p1 \
|
| 73 |
+
--do_eval True \
|
| 74 |
--num_train_epochs 1 \
|
| 75 |
+
--save_steps 1000 \
|
| 76 |
--save_strategy steps \
|
| 77 |
+
--evaluation_strategy steps \
|
| 78 |
+
--eval_steps 1000 \
|
| 79 |
--seed 42 \
|
| 80 |
+
--gpu_memory_threshold $GPU_MEMORY_THRESHOLD \
|
| 81 |
+
--gpu_check_interval $GPU_CHECK_INTERVAL \
|
| 82 |
+
--fp16 True \
|
| 83 |
+
--dataloader_num_workers 0 \
|
| 84 |
+
--dataloader_pin_memory False
|
| 85 |
|
| 86 |
if [ $? -ne 0 ]; then
|
| 87 |
+
echo "Error: Phase 1 training failed!"
|
| 88 |
exit 1
|
| 89 |
fi
|
| 90 |
|
| 91 |
+
echo "✅ Phase 1 training completed successfully!"
|
| 92 |
|
| 93 |
# Check if Phase 1 checkpoint exists
|
| 94 |
PHASE1_CKPT="logs/test_glen_vault/GLEN_P1_test"
|
| 95 |
if [ ! -d "$PHASE1_CKPT" ]; then
|
| 96 |
+
echo "Error: Phase 1 checkpoint not found at $PHASE1_CKPT"
|
| 97 |
+
exit 1
|
| 98 |
+
fi
|
| 99 |
+
|
| 100 |
+
# Check for model files
|
| 101 |
+
model_files=("pytorch_model.bin" "model.safetensors")
|
| 102 |
+
found_model=false
|
| 103 |
+
for file in "${model_files[@]}"; do
|
| 104 |
+
if [ -f "$PHASE1_CKPT/$file" ]; then
|
| 105 |
+
found_model=true
|
| 106 |
+
echo "📁 Found Phase 1 model: $file"
|
| 107 |
+
break
|
| 108 |
+
fi
|
| 109 |
+
done
|
| 110 |
+
|
| 111 |
+
if [ "$found_model" = false ]; then
|
| 112 |
+
echo "Error: No model files found in Phase 1 checkpoint"
|
| 113 |
exit 1
|
| 114 |
fi
|
| 115 |
|
| 116 |
+
echo ""
|
| 117 |
+
echo "=== Phase 2 Training (Ranking-based Refinement) ==="
|
| 118 |
+
|
| 119 |
+
# Adjust batch sizes for Phase 2
|
| 120 |
+
if command -v nvidia-smi &> /dev/null; then
|
| 121 |
+
BATCH_SIZE=4
|
| 122 |
+
EVAL_BATCH_SIZE=2
|
| 123 |
+
ACCUM_STEPS=4
|
| 124 |
+
else
|
| 125 |
+
BATCH_SIZE=1
|
| 126 |
+
EVAL_BATCH_SIZE=1
|
| 127 |
+
ACCUM_STEPS=16
|
| 128 |
+
fi
|
| 129 |
+
|
| 130 |
python examples/glen_phase2/train_glen.py \
|
| 131 |
--output_dir logs/test_glen_vault/GLEN_P2_test \
|
| 132 |
+
--model_name_or_path $PHASE1_CKPT \
|
| 133 |
+
--per_device_train_batch_size $BATCH_SIZE \
|
| 134 |
+
--per_device_eval_batch_size $EVAL_BATCH_SIZE \
|
| 135 |
+
--gradient_accumulation_steps $ACCUM_STEPS \
|
| 136 |
--dropout_rate 0.1 \
|
| 137 |
--warmup_ratio 0.1 \
|
| 138 |
--id_class t5_bm25_truncate_3 \
|
| 139 |
--dataset_name the_vault \
|
|
|
|
| 140 |
--tree 1 \
|
| 141 |
--q_max_len 32 \
|
| 142 |
--p_max_len 128 \
|
|
|
|
| 144 |
--positive_passage_no_shuffle True \
|
| 145 |
--tie_word_embeddings True \
|
| 146 |
--num_return_sequences 5 \
|
| 147 |
+
--logging_steps 100 \
|
| 148 |
--overwrite_output_dir \
|
| 149 |
+
--wandb_tag glen_vault_test_p2 \
|
| 150 |
+
--do_eval True \
|
| 151 |
--num_train_epochs 1 \
|
| 152 |
+
--save_steps 1000 \
|
| 153 |
--save_strategy steps \
|
| 154 |
+
--evaluation_strategy steps \
|
| 155 |
+
--eval_steps 1000 \
|
| 156 |
--seed 42 \
|
| 157 |
+
--gpu_memory_threshold $GPU_MEMORY_THRESHOLD \
|
| 158 |
+
--gpu_check_interval $GPU_CHECK_INTERVAL \
|
| 159 |
+
--fp16 True \
|
| 160 |
+
--dataloader_num_workers 0 \
|
| 161 |
+
--dataloader_pin_memory False
|
| 162 |
|
| 163 |
if [ $? -ne 0 ]; then
|
| 164 |
+
echo "Error: Phase 2 training failed!"
|
| 165 |
exit 1
|
| 166 |
fi
|
| 167 |
|
| 168 |
+
echo "✅ Phase 2 training completed successfully!"
|
| 169 |
|
| 170 |
+
# Validate Phase 2 checkpoint
|
|
|
|
| 171 |
PHASE2_CKPT="logs/test_glen_vault/GLEN_P2_test"
|
| 172 |
+
if [ ! -d "$PHASE2_CKPT" ]; then
|
| 173 |
+
echo "Error: Phase 2 checkpoint not found at $PHASE2_CKPT"
|
| 174 |
+
exit 1
|
| 175 |
+
fi
|
| 176 |
+
|
| 177 |
+
# Check for checkpoint subdirectories or model files
|
| 178 |
+
checkpoint_dir=$(find "$PHASE2_CKPT" -maxdepth 1 -type d -name "checkpoint-*" | sort -V | tail -n 1)
|
| 179 |
+
if [ -n "$checkpoint_dir" ]; then
|
| 180 |
+
echo "📁 Found Phase 2 checkpoint: $(basename $checkpoint_dir)"
|
| 181 |
+
if [ ! -f "$checkpoint_dir/model.safetensors" ] && [ ! -f "$checkpoint_dir/pytorch_model.bin" ]; then
|
| 182 |
+
echo "Error: No model files in checkpoint directory"
|
| 183 |
+
exit 1
|
| 184 |
+
fi
|
| 185 |
+
else
|
| 186 |
+
# Check for model files in root
|
| 187 |
+
found_model=false
|
| 188 |
+
for file in "${model_files[@]}"; do
|
| 189 |
+
if [ -f "$PHASE2_CKPT/$file" ]; then
|
| 190 |
+
found_model=true
|
| 191 |
+
echo "📁 Found Phase 2 model: $file"
|
| 192 |
+
break
|
| 193 |
+
fi
|
| 194 |
+
done
|
| 195 |
+
if [ "$found_model" = false ]; then
|
| 196 |
+
echo "Error: No model files found in Phase 2 checkpoint"
|
| 197 |
+
exit 1
|
| 198 |
+
fi
|
| 199 |
+
fi
|
| 200 |
+
|
| 201 |
+
echo ""
|
| 202 |
+
echo "=== Document ID Generation ==="
|
| 203 |
|
|
|
|
| 204 |
python examples/glen_phase2/makeid_glen.py \
|
| 205 |
+
--model_name_or_path $PHASE2_CKPT \
|
| 206 |
+
--infer_dir $PHASE2_CKPT \
|
| 207 |
--dataset_name the_vault \
|
| 208 |
+
--docid_file_name GLEN_P2_test_docids \
|
| 209 |
+
--per_device_eval_batch_size 1 \
|
| 210 |
+
--max_input_length 128 \
|
| 211 |
+
--num_return_sequences 10 \
|
| 212 |
+
--dataloader_num_workers 0 \
|
| 213 |
+
--dataloader_pin_memory False
|
| 214 |
|
| 215 |
if [ $? -ne 0 ]; then
|
| 216 |
+
echo "Error: Document ID generation failed!"
|
| 217 |
+
exit 1
|
| 218 |
+
fi
|
| 219 |
+
|
| 220 |
+
# Validate docid file was created
|
| 221 |
+
docid_file="logs/test_glen_vault/GLEN_P2_test_docids.tsv"
|
| 222 |
+
if [ ! -f "$docid_file" ]; then
|
| 223 |
+
echo "Error: Document ID file not created: $docid_file"
|
| 224 |
exit 1
|
| 225 |
fi
|
| 226 |
|
| 227 |
+
line_count=$(wc -l < "$docid_file")
|
| 228 |
+
echo "✅ Document ID generation completed! Generated $line_count document IDs"
|
| 229 |
+
|
| 230 |
+
echo ""
|
| 231 |
+
echo "=== Query Inference ==="
|
| 232 |
+
|
| 233 |
+
# First, ensure we have test queries
|
| 234 |
+
if [ ! -f "data/the_vault/GTQ_VAULT_dev.tsv" ]; then
|
| 235 |
+
echo "Error: Test queries file not found. Please run preprocessing with --create_test_set flag"
|
| 236 |
+
exit 1
|
| 237 |
+
fi
|
| 238 |
|
|
|
|
|
|
|
|
|
|
| 239 |
python examples/glen_phase2/evaluate_glen.py \
|
| 240 |
+
--model_name_or_path $PHASE2_CKPT \
|
| 241 |
+
--infer_dir $PHASE2_CKPT \
|
| 242 |
--dataset_name the_vault \
|
| 243 |
+
--docid_file_name GLEN_P2_test_docids \
|
| 244 |
+
--per_device_eval_batch_size 1 \
|
| 245 |
--q_max_len 32 \
|
| 246 |
--num_return_sequences 5 \
|
| 247 |
--logs_dir logs/test_glen_vault \
|
| 248 |
+
--test100 1 \
|
| 249 |
+
--dataloader_num_workers 0 \
|
| 250 |
+
--dataloader_pin_memory False
|
| 251 |
|
| 252 |
if [ $? -ne 0 ]; then
|
| 253 |
+
echo "Error: Query inference failed!"
|
| 254 |
exit 1
|
| 255 |
fi
|
| 256 |
|
| 257 |
+
echo "✅ Query inference completed successfully!"
|
| 258 |
+
|
| 259 |
+
echo ""
|
| 260 |
echo "==========================================="
|
| 261 |
+
echo "🎉 TESTING COMPLETED SUCCESSFULLY! 🎉"
|
| 262 |
echo "==========================================="
|
|
|
|
| 263 |
echo ""
|
| 264 |
+
echo "📊 Summary:"
|
| 265 |
+
echo " ✅ Phase 1 Training (Document ID Assignment)"
|
| 266 |
+
echo " ✅ Phase 2 Training (Ranking-based Refinement)"
|
| 267 |
+
echo " ✅ Document ID Generation ($line_count IDs)"
|
| 268 |
+
echo " ✅ Query Inference & Evaluation"
|
| 269 |
+
echo ""
|
| 270 |
+
echo "📁 Results saved in: logs/test_glen_vault/"
|
| 271 |
+
echo "📁 Document IDs: $docid_file"
|
| 272 |
+
echo ""
|
| 273 |
+
echo "🛡️ Resource Protection Summary:"
|
| 274 |
+
echo " - Memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 275 |
+
echo " - Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 276 |
+
echo " - FP16 training enabled"
|
| 277 |
+
echo " - Optimized batch sizes for current hardware"
|
| 278 |
echo ""
|
| 279 |
+
echo "🚀 Testing completed! The model is ready for full training."
|
scripts/train_full_vault.ps1
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env pwsh
|
| 2 |
+
|
| 3 |
+
Write-Host "==========================================="
|
| 4 |
+
Write-Host "GLEN Full Training on The Vault Dataset"
|
| 5 |
+
Write-Host "Processing 34M+ code samples"
|
| 6 |
+
Write-Host "==========================================="
|
| 7 |
+
|
| 8 |
+
# Production parameters
|
| 9 |
+
$GPU_MEMORY_THRESHOLD = 0.85
|
| 10 |
+
$GPU_CHECK_INTERVAL = 50
|
| 11 |
+
$WANDB_PROJECT = "glen-vault-production"
|
| 12 |
+
|
| 13 |
+
# Training configuration
|
| 14 |
+
$PHASE1_EPOCHS = 3
|
| 15 |
+
$PHASE2_EPOCHS = 5
|
| 16 |
+
$PHASE1_BATCH_SIZE = 32
|
| 17 |
+
$PHASE2_BATCH_SIZE = 16
|
| 18 |
+
$GRADIENT_ACCUMULATION = 4
|
| 19 |
+
$MAX_INPUT_LENGTH = 256
|
| 20 |
+
$LEARNING_RATE = 5e-5
|
| 21 |
+
|
| 22 |
+
Write-Host "🔧 Production Configuration:"
|
| 23 |
+
Write-Host " - Phase 1 epochs: $PHASE1_EPOCHS"
|
| 24 |
+
Write-Host " - Phase 2 epochs: $PHASE2_EPOCHS"
|
| 25 |
+
Write-Host " - Phase 1 batch size: $PHASE1_BATCH_SIZE"
|
| 26 |
+
Write-Host " - Phase 2 batch size: $PHASE2_BATCH_SIZE"
|
| 27 |
+
Write-Host " - Gradient accumulation: $GRADIENT_ACCUMULATION"
|
| 28 |
+
Write-Host " - Max input length: $MAX_INPUT_LENGTH"
|
| 29 |
+
Write-Host " - Learning rate: $LEARNING_RATE"
|
| 30 |
+
Write-Host ""
|
| 31 |
+
|
| 32 |
+
Write-Host "🛡️ Memory Protection:"
|
| 33 |
+
Write-Host " - GPU memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 34 |
+
Write-Host " - Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 35 |
+
Write-Host " - FP16 training enabled"
|
| 36 |
+
Write-Host " - Automatic checkpoint saving on memory limit"
|
| 37 |
+
Write-Host ""
|
| 38 |
+
|
| 39 |
+
# Check prerequisites
|
| 40 |
+
Write-Host "📋 Checking prerequisites..."
|
| 41 |
+
|
| 42 |
+
# Check if full dataset exists
|
| 43 |
+
if (-not (Test-Path "the_vault_dataset")) {
|
| 44 |
+
Write-Error "❌ The Vault dataset not found! Please download and extract to 'the_vault_dataset/'"
|
| 45 |
+
Write-Host " Download from: https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/CodeT5-learning-framework/data"
|
| 46 |
+
exit 1
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
# Ensure data preprocessing is done for full dataset
|
| 50 |
+
Write-Host "Checking full dataset preprocessing..."
|
| 51 |
+
if (-not (Test-Path "data/the_vault/DOC_VAULT_train.tsv")) {
|
| 52 |
+
Write-Host "🔄 Running full dataset preprocessing (this may take 30-60 minutes)..."
|
| 53 |
+
python scripts/preprocess_vault_dataset.py --input_dir the_vault_dataset/ --output_dir data/the_vault/ --full_dataset
|
| 54 |
+
if ($LASTEXITCODE -ne 0) {
|
| 55 |
+
Write-Error "❌ Data preprocessing failed!"
|
| 56 |
+
exit 1
|
| 57 |
+
}
|
| 58 |
+
} else {
|
| 59 |
+
$train_lines = (Get-Content "data/the_vault/DOC_VAULT_train.tsv").Count
|
| 60 |
+
Write-Host "✅ Full dataset already preprocessed ($train_lines training samples)"
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# Check GPU availability
|
| 64 |
+
$gpu_count = 0
|
| 65 |
+
try {
|
| 66 |
+
$gpu_info = nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>$null
|
| 67 |
+
if ($gpu_info) {
|
| 68 |
+
$gpu_count = ($gpu_info | Measure-Object).Count
|
| 69 |
+
Write-Host "🖥️ Detected $gpu_count GPU(s): $($gpu_info -join ', ')"
|
| 70 |
+
}
|
| 71 |
+
} catch {
|
| 72 |
+
Write-Host "⚠️ No GPU detected, will use CPU (training will be much slower)"
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
if ($gpu_count -eq 0) {
|
| 76 |
+
Write-Host "⚠️ Warning: Training on CPU will take days/weeks. Consider using GPU."
|
| 77 |
+
$response = Read-Host "Continue with CPU training? (y/N)"
|
| 78 |
+
if ($response -ne "y" -and $response -ne "Y") {
|
| 79 |
+
Write-Host "Training cancelled."
|
| 80 |
+
exit 0
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
Write-Host ""
|
| 85 |
+
Write-Host "=== Phase 1 Training: Document ID Assignment ==="
|
| 86 |
+
Write-Host "🎯 Learning to assign semantic identifiers to code documents"
|
| 87 |
+
|
| 88 |
+
$PHASE1_OUTPUT = "logs/glen_vault_production/GLEN_P1"
|
| 89 |
+
$env:CUDA_VISIBLE_DEVICES = "0"
|
| 90 |
+
|
| 91 |
+
try {
|
| 92 |
+
python examples/glen_phase1/train_glen.py `
|
| 93 |
+
--output_dir $PHASE1_OUTPUT `
|
| 94 |
+
--model_name_or_path t5-base `
|
| 95 |
+
--query_type gtq_doc `
|
| 96 |
+
--per_device_train_batch_size $PHASE1_BATCH_SIZE `
|
| 97 |
+
--per_device_eval_batch_size 8 `
|
| 98 |
+
--gradient_accumulation_steps $GRADIENT_ACCUMULATION `
|
| 99 |
+
--learning_rate $LEARNING_RATE `
|
| 100 |
+
--dropout_rate 0.1 `
|
| 101 |
+
--Rdrop 0.15 `
|
| 102 |
+
--aug_query True `
|
| 103 |
+
--aug_query_type corrupted_query `
|
| 104 |
+
--input_dropout 1 `
|
| 105 |
+
--id_class t5_bm25_truncate_3 `
|
| 106 |
+
--dataset_name the_vault `
|
| 107 |
+
--tree 1 `
|
| 108 |
+
--pretrain_decoder True `
|
| 109 |
+
--max_input_length $MAX_INPUT_LENGTH `
|
| 110 |
+
--val_check_interval 0.1 `
|
| 111 |
+
--tie_word_embeddings True `
|
| 112 |
+
--decoder_input doc_rep `
|
| 113 |
+
--max_output_length 10 `
|
| 114 |
+
--num_return_sequences 10 `
|
| 115 |
+
--logging_steps 100 `
|
| 116 |
+
--eval_steps 1000 `
|
| 117 |
+
--save_steps 2000 `
|
| 118 |
+
--overwrite_output_dir `
|
| 119 |
+
--wandb_tag "phase1_production" `
|
| 120 |
+
--project_name $WANDB_PROJECT `
|
| 121 |
+
--do_eval True `
|
| 122 |
+
--evaluation_strategy steps `
|
| 123 |
+
--num_train_epochs $PHASE1_EPOCHS `
|
| 124 |
+
--save_strategy steps `
|
| 125 |
+
--save_total_limit 5 `
|
| 126 |
+
--load_best_model_at_end True `
|
| 127 |
+
--metric_for_best_model eval_loss `
|
| 128 |
+
--greater_is_better False `
|
| 129 |
+
--seed 42 `
|
| 130 |
+
--gpu_memory_threshold $GPU_MEMORY_THRESHOLD `
|
| 131 |
+
--gpu_check_interval $GPU_CHECK_INTERVAL `
|
| 132 |
+
--fp16 True `
|
| 133 |
+
--dataloader_num_workers 4 `
|
| 134 |
+
--warmup_ratio 0.1
|
| 135 |
+
|
| 136 |
+
if ($LASTEXITCODE -ne 0) {
|
| 137 |
+
throw "Phase 1 training failed!"
|
| 138 |
+
}
|
| 139 |
+
} catch {
|
| 140 |
+
Write-Error "❌ Phase 1 training failed: $_"
|
| 141 |
+
Write-Host "📁 Check logs in: $PHASE1_OUTPUT"
|
| 142 |
+
exit 1
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
Write-Host "✅ Phase 1 training completed successfully!"
|
| 146 |
+
|
| 147 |
+
# Validate Phase 1 checkpoint
|
| 148 |
+
if (-not (Test-Path $PHASE1_OUTPUT)) {
|
| 149 |
+
Write-Error "❌ Phase 1 checkpoint not found at $PHASE1_OUTPUT"
|
| 150 |
+
exit 1
|
| 151 |
+
fi
|
| 152 |
+
|
| 153 |
+
# Find the best checkpoint
|
| 154 |
+
$best_checkpoint = Get-ChildItem -Path $PHASE1_OUTPUT -Directory -Name "checkpoint-*" |
|
| 155 |
+
Sort-Object {[int]($_.Split('-')[1])} | Select-Object -Last 1
|
| 156 |
+
|
| 157 |
+
if ($best_checkpoint) {
|
| 158 |
+
Write-Host "📁 Using Phase 1 checkpoint: $best_checkpoint"
|
| 159 |
+
$PHASE1_CKPT = "$PHASE1_OUTPUT/$best_checkpoint"
|
| 160 |
+
} else {
|
| 161 |
+
$PHASE1_CKPT = $PHASE1_OUTPUT
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
Write-Host ""
|
| 165 |
+
Write-Host "=== Phase 2 Training: Ranking-based Refinement ==="
|
| 166 |
+
Write-Host "🎯 Learning to rank and refine document identifiers"
|
| 167 |
+
|
| 168 |
+
$PHASE2_OUTPUT = "logs/glen_vault_production/GLEN_P2"
|
| 169 |
+
|
| 170 |
+
try {
|
| 171 |
+
python examples/glen_phase2/train_glen.py `
|
| 172 |
+
--output_dir $PHASE2_OUTPUT `
|
| 173 |
+
--model_name_or_path $PHASE1_CKPT `
|
| 174 |
+
--per_device_train_batch_size $PHASE2_BATCH_SIZE `
|
| 175 |
+
--per_device_eval_batch_size 4 `
|
| 176 |
+
--gradient_accumulation_steps $GRADIENT_ACCUMULATION `
|
| 177 |
+
--learning_rate $LEARNING_RATE `
|
| 178 |
+
--dropout_rate 0.1 `
|
| 179 |
+
--warmup_ratio 0.1 `
|
| 180 |
+
--id_class t5_bm25_truncate_3 `
|
| 181 |
+
--dataset_name the_vault `
|
| 182 |
+
--tree 1 `
|
| 183 |
+
--q_max_len 64 `
|
| 184 |
+
--p_max_len $MAX_INPUT_LENGTH `
|
| 185 |
+
--negative_passage_type self `
|
| 186 |
+
--positive_passage_no_shuffle True `
|
| 187 |
+
--tie_word_embeddings True `
|
| 188 |
+
--num_return_sequences 10 `
|
| 189 |
+
--logging_steps 100 `
|
| 190 |
+
--eval_steps 1000 `
|
| 191 |
+
--save_steps 2000 `
|
| 192 |
+
--overwrite_output_dir `
|
| 193 |
+
--wandb_tag "phase2_production" `
|
| 194 |
+
--project_name $WANDB_PROJECT `
|
| 195 |
+
--do_eval True `
|
| 196 |
+
--evaluation_strategy steps `
|
| 197 |
+
--num_train_epochs $PHASE2_EPOCHS `
|
| 198 |
+
--save_strategy steps `
|
| 199 |
+
--save_total_limit 5 `
|
| 200 |
+
--load_best_model_at_end True `
|
| 201 |
+
--metric_for_best_model eval_loss `
|
| 202 |
+
--greater_is_better False `
|
| 203 |
+
--seed 42 `
|
| 204 |
+
--gpu_memory_threshold $GPU_MEMORY_THRESHOLD `
|
| 205 |
+
--gpu_check_interval $GPU_CHECK_INTERVAL `
|
| 206 |
+
--fp16 True `
|
| 207 |
+
--dataloader_num_workers 4
|
| 208 |
+
|
| 209 |
+
if ($LASTEXITCODE -ne 0) {
|
| 210 |
+
throw "Phase 2 training failed!"
|
| 211 |
+
}
|
| 212 |
+
} catch {
|
| 213 |
+
Write-Error "❌ Phase 2 training failed: $_"
|
| 214 |
+
Write-Host "📁 Check logs in: $PHASE2_OUTPUT"
|
| 215 |
+
exit 1
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
Write-Host "✅ Phase 2 training completed successfully!"
|
| 219 |
+
|
| 220 |
+
# Validate Phase 2 checkpoint
|
| 221 |
+
if (-not (Test-Path $PHASE2_OUTPUT)) {
|
| 222 |
+
Write-Error "❌ Phase 2 checkpoint not found at $PHASE2_OUTPUT"
|
| 223 |
+
exit 1
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
# Find the best Phase 2 checkpoint
|
| 227 |
+
$best_checkpoint_p2 = Get-ChildItem -Path $PHASE2_OUTPUT -Directory -Name "checkpoint-*" |
|
| 228 |
+
Sort-Object {[int]($_.Split('-')[1])} | Select-Object -Last 1
|
| 229 |
+
|
| 230 |
+
if ($best_checkpoint_p2) {
|
| 231 |
+
Write-Host "📁 Using Phase 2 checkpoint: $best_checkpoint_p2"
|
| 232 |
+
$PHASE2_CKPT = "$PHASE2_OUTPUT/$best_checkpoint_p2"
|
| 233 |
+
} else {
|
| 234 |
+
$PHASE2_CKPT = $PHASE2_OUTPUT
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
Write-Host ""
|
| 238 |
+
Write-Host "=== Document ID Generation ==="
|
| 239 |
+
Write-Host "🎯 Generating semantic IDs for all documents"
|
| 240 |
+
|
| 241 |
+
try {
|
| 242 |
+
python examples/glen_phase2/makeid_glen.py `
|
| 243 |
+
--model_name_or_path $PHASE2_CKPT `
|
| 244 |
+
--infer_dir $PHASE2_CKPT `
|
| 245 |
+
--dataset_name the_vault `
|
| 246 |
+
--docid_file_name glen_vault_production_docids `
|
| 247 |
+
--per_device_eval_batch_size 16 `
|
| 248 |
+
--max_input_length $MAX_INPUT_LENGTH `
|
| 249 |
+
--num_return_sequences 20
|
| 250 |
+
|
| 251 |
+
if ($LASTEXITCODE -ne 0) {
|
| 252 |
+
throw "Document ID generation failed!"
|
| 253 |
+
}
|
| 254 |
+
} catch {
|
| 255 |
+
Write-Error "❌ Document ID generation failed: $_"
|
| 256 |
+
exit 1
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
# Validate docid file
|
| 260 |
+
$docid_file = "logs/glen_vault_production/glen_vault_production_docids.tsv"
|
| 261 |
+
if (-not (Test-Path $docid_file)) {
|
| 262 |
+
Write-Error "❌ Document ID file not created: $docid_file"
|
| 263 |
+
exit 1
|
| 264 |
+
fi
|
| 265 |
+
|
| 266 |
+
$total_docs = (Get-Content $docid_file).Count
|
| 267 |
+
Write-Host "✅ Document ID generation completed! Generated $total_docs document IDs"
|
| 268 |
+
|
| 269 |
+
Write-Host ""
|
| 270 |
+
Write-Host "=== Model Evaluation ==="
|
| 271 |
+
Write-Host "🎯 Evaluating model performance on test set"
|
| 272 |
+
|
| 273 |
+
try {
|
| 274 |
+
python examples/glen_phase2/evaluate_glen.py `
|
| 275 |
+
--model_name_or_path $PHASE2_CKPT `
|
| 276 |
+
--infer_dir $PHASE2_CKPT `
|
| 277 |
+
--dataset_name the_vault `
|
| 278 |
+
--docid_file_name glen_vault_production_docids `
|
| 279 |
+
--per_device_eval_batch_size 8 `
|
| 280 |
+
--q_max_len 64 `
|
| 281 |
+
--num_return_sequences 20 `
|
| 282 |
+
--logs_dir logs/glen_vault_production
|
| 283 |
+
|
| 284 |
+
if ($LASTEXITCODE -ne 0) {
|
| 285 |
+
throw "Model evaluation failed!"
|
| 286 |
+
}
|
| 287 |
+
} catch {
|
| 288 |
+
Write-Error "❌ Model evaluation failed: $_"
|
| 289 |
+
exit 1
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
Write-Host "✅ Model evaluation completed successfully!"
|
| 293 |
+
|
| 294 |
+
# Training completion summary
|
| 295 |
+
$training_time = Get-Date
|
| 296 |
+
Write-Host ""
|
| 297 |
+
Write-Host "==========================================="
|
| 298 |
+
Write-Host "🎉 FULL TRAINING COMPLETED SUCCESSFULLY! 🎉"
|
| 299 |
+
Write-Host "==========================================="
|
| 300 |
+
Write-Host ""
|
| 301 |
+
Write-Host "📊 Training Summary:"
|
| 302 |
+
Write-Host " ✅ Phase 1: Document ID Assignment ($PHASE1_EPOCHS epochs)"
|
| 303 |
+
Write-Host " ✅ Phase 2: Ranking Refinement ($PHASE2_EPOCHS epochs)"
|
| 304 |
+
Write-Host " ✅ Document ID Generation ($total_docs documents)"
|
| 305 |
+
Write-Host " ✅ Model Evaluation & Metrics"
|
| 306 |
+
Write-Host ""
|
| 307 |
+
Write-Host "📁 Production Model Artifacts:"
|
| 308 |
+
Write-Host " 🏷️ Phase 1 Checkpoint: $PHASE1_CKPT"
|
| 309 |
+
Write-Host " 🏷️ Phase 2 Checkpoint: $PHASE2_CKPT"
|
| 310 |
+
Write-Host " 📄 Document IDs: $docid_file"
|
| 311 |
+
Write-Host " 📊 Evaluation Results: logs/glen_vault_production/"
|
| 312 |
+
Write-Host ""
|
| 313 |
+
Write-Host "🛡️ Memory Protection Summary:"
|
| 314 |
+
Write-Host " - GPU memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 315 |
+
Write-Host " - Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 316 |
+
Write-Host " - FP16 training enabled throughout"
|
| 317 |
+
Write-Host " - Automatic checkpoint saving on memory limits"
|
| 318 |
+
Write-Host ""
|
| 319 |
+
Write-Host "📈 Performance Optimizations Used:"
|
| 320 |
+
Write-Host " - Gradient accumulation: ${GRADIENT_ACCUMULATION}x"
|
| 321 |
+
Write-Host " - Multi-worker data loading"
|
| 322 |
+
Write-Host " - Mixed precision training (FP16)"
|
| 323 |
+
Write-Host " - Memory-efficient batch sizes"
|
| 324 |
+
Write-Host ""
|
| 325 |
+
Write-Host "🚀 Your GLEN model is ready for production use!"
|
| 326 |
+
Write-Host " - Use the Phase 2 checkpoint for inference"
|
| 327 |
+
Write-Host " - Document IDs are saved for fast retrieval"
|
| 328 |
+
Write-Host " - Evaluation metrics are in the logs directory"
|
| 329 |
+
Write-Host ""
|
| 330 |
+
Write-Host "Training completed at: $training_time"
|
scripts/train_full_vault.sh
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "==========================================="
|
| 4 |
+
echo "Full Training GLEN on The Vault dataset"
|
| 5 |
+
echo "==========================================="
|
| 6 |
+
|
| 7 |
+
# Set memory monitoring parameters
|
| 8 |
+
GPU_MEMORY_THRESHOLD=0.85
|
| 9 |
+
GPU_CHECK_INTERVAL=50
|
| 10 |
+
|
| 11 |
+
echo "GPU Memory Protection enabled:"
|
| 12 |
+
echo "- Memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 13 |
+
echo "- Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 14 |
+
echo ""
|
| 15 |
+
|
| 16 |
+
# Ensure data preprocessing is done
|
| 17 |
+
echo "Checking data preprocessing..."
|
| 18 |
+
if [ ! -f "data/the_vault/DOC_VAULT_train.tsv" ] || [ ! -f "data/the_vault/GTQ_VAULT_dev.tsv" ]; then
|
| 19 |
+
echo "Running data preprocessing..."
|
| 20 |
+
python scripts/preprocess_vault_dataset.py --input_dir the_vault_dataset/ --output_dir data/the_vault/ --create_test_set
|
| 21 |
+
if [ $? -ne 0 ]; then
|
| 22 |
+
echo "Error: Data preprocessing failed!"
|
| 23 |
+
exit 1
|
| 24 |
+
fi
|
| 25 |
+
else
|
| 26 |
+
echo "Data already preprocessed."
|
| 27 |
+
fi
|
| 28 |
+
|
| 29 |
+
# Phase 1 Training
|
| 30 |
+
echo ""
|
| 31 |
+
echo "=== Phase 1 Training (Document ID Assignment) ==="
|
| 32 |
+
export CUDA_VISIBLE_DEVICES="0"
|
| 33 |
+
|
| 34 |
+
python examples/glen_phase1/train_glen.py \
|
| 35 |
+
--output_dir logs/glen_vault/GLEN_P1 \
|
| 36 |
+
--model_name_or_path t5-base \
|
| 37 |
+
--query_type gtq_doc \
|
| 38 |
+
--per_device_train_batch_size 8 \
|
| 39 |
+
--per_device_eval_batch_size 4 \
|
| 40 |
+
--gradient_accumulation_steps 2 \
|
| 41 |
+
--dropout_rate 0.1 \
|
| 42 |
+
--Rdrop 0.15 \
|
| 43 |
+
--aug_query True \
|
| 44 |
+
--aug_query_type corrupted_query \
|
| 45 |
+
--input_dropout 1 \
|
| 46 |
+
--id_class t5_bm25_truncate_3 \
|
| 47 |
+
--dataset_name the_vault \
|
| 48 |
+
--test100 1 \
|
| 49 |
+
--tree 1 \
|
| 50 |
+
--pretrain_decoder True \
|
| 51 |
+
--max_input_length 128 \
|
| 52 |
+
--val_check_interval 1.0 \
|
| 53 |
+
--tie_word_embeddings True \
|
| 54 |
+
--decoder_input doc_rep \
|
| 55 |
+
--max_output_length 5 \
|
| 56 |
+
--num_return_sequences 5 \
|
| 57 |
+
--logging_steps 100 \
|
| 58 |
+
--overwrite_output_dir \
|
| 59 |
+
--wandb_tag glen_vault_p1 \
|
| 60 |
+
--do_eval True \
|
| 61 |
+
--num_train_epochs 3 \
|
| 62 |
+
--save_steps 1000 \
|
| 63 |
+
--save_strategy steps \
|
| 64 |
+
--evaluation_strategy steps \
|
| 65 |
+
--eval_steps 1000 \
|
| 66 |
+
--seed 42 \
|
| 67 |
+
--gpu_memory_threshold $GPU_MEMORY_THRESHOLD \
|
| 68 |
+
--gpu_check_interval $GPU_CHECK_INTERVAL \
|
| 69 |
+
--fp16 True
|
| 70 |
+
|
| 71 |
+
if [ $? -ne 0 ]; then
|
| 72 |
+
echo "Error: Phase 1 training failed!"
|
| 73 |
+
exit 1
|
| 74 |
+
fi
|
| 75 |
+
|
| 76 |
+
echo "✅ Phase 1 training completed successfully!"
|
| 77 |
+
|
| 78 |
+
# Check if Phase 1 checkpoint exists
|
| 79 |
+
PHASE1_CKPT="logs/glen_vault/GLEN_P1"
|
| 80 |
+
if [ ! -d "$PHASE1_CKPT" ]; then
|
| 81 |
+
echo "Error: Phase 1 checkpoint not found at $PHASE1_CKPT"
|
| 82 |
+
exit 1
|
| 83 |
+
fi
|
| 84 |
+
|
| 85 |
+
# Check for model files
|
| 86 |
+
model_files=("pytorch_model.bin" "model.safetensors")
|
| 87 |
+
found_model=false
|
| 88 |
+
for file in "${model_files[@]}"; do
|
| 89 |
+
if [ -f "$PHASE1_CKPT/$file" ]; then
|
| 90 |
+
found_model=true
|
| 91 |
+
echo "📁 Found Phase 1 model: $file"
|
| 92 |
+
break
|
| 93 |
+
fi
|
| 94 |
+
done
|
| 95 |
+
|
| 96 |
+
if [ "$found_model" = false ]; then
|
| 97 |
+
echo "Error: No model files found in Phase 1 checkpoint"
|
| 98 |
+
exit 1
|
| 99 |
+
fi
|
| 100 |
+
|
| 101 |
+
echo ""
|
| 102 |
+
echo "=== Phase 2 Training (Ranking-based Refinement) ==="
|
| 103 |
+
|
| 104 |
+
python examples/glen_phase2/train_glen.py \
|
| 105 |
+
--output_dir logs/glen_vault/GLEN_P2 \
|
| 106 |
+
--model_name_or_path $PHASE1_CKPT \
|
| 107 |
+
--per_device_train_batch_size 4 \
|
| 108 |
+
--per_device_eval_batch_size 2 \
|
| 109 |
+
--gradient_accumulation_steps 4 \
|
| 110 |
+
--dropout_rate 0.1 \
|
| 111 |
+
--warmup_ratio 0.1 \
|
| 112 |
+
--id_class t5_bm25_truncate_3 \
|
| 113 |
+
--dataset_name the_vault \
|
| 114 |
+
--tree 1 \
|
| 115 |
+
--q_max_len 32 \
|
| 116 |
+
--p_max_len 128 \
|
| 117 |
+
--negative_passage_type self \
|
| 118 |
+
--positive_passage_no_shuffle True \
|
| 119 |
+
--tie_word_embeddings True \
|
| 120 |
+
--num_return_sequences 5 \
|
| 121 |
+
--logging_steps 100 \
|
| 122 |
+
--overwrite_output_dir \
|
| 123 |
+
--wandb_tag glen_vault_p2 \
|
| 124 |
+
--do_eval True \
|
| 125 |
+
--num_train_epochs 3 \
|
| 126 |
+
--save_steps 1000 \
|
| 127 |
+
--save_strategy steps \
|
| 128 |
+
--evaluation_strategy steps \
|
| 129 |
+
--eval_steps 1000 \
|
| 130 |
+
--seed 42 \
|
| 131 |
+
--gpu_memory_threshold $GPU_MEMORY_THRESHOLD \
|
| 132 |
+
--gpu_check_interval $GPU_CHECK_INTERVAL \
|
| 133 |
+
--fp16 True
|
| 134 |
+
|
| 135 |
+
if [ $? -ne 0 ]; then
|
| 136 |
+
echo "Error: Phase 2 training failed!"
|
| 137 |
+
exit 1
|
| 138 |
+
fi
|
| 139 |
+
|
| 140 |
+
echo "✅ Phase 2 training completed successfully!"
|
| 141 |
+
|
| 142 |
+
# Validate Phase 2 checkpoint
|
| 143 |
+
PHASE2_CKPT="logs/glen_vault/GLEN_P2"
|
| 144 |
+
if [ ! -d "$PHASE2_CKPT" ]; then
|
| 145 |
+
echo "Error: Phase 2 checkpoint not found at $PHASE2_CKPT"
|
| 146 |
+
exit 1
|
| 147 |
+
fi
|
| 148 |
+
|
| 149 |
+
# Check for checkpoint subdirectories or model files
|
| 150 |
+
checkpoint_dir=$(find "$PHASE2_CKPT" -maxdepth 1 -type d -name "checkpoint-*" | sort -V | tail -n 1)
|
| 151 |
+
if [ -n "$checkpoint_dir" ]; then
|
| 152 |
+
echo "📁 Found Phase 2 checkpoint: $(basename $checkpoint_dir)"
|
| 153 |
+
if [ ! -f "$checkpoint_dir/model.safetensors" ] && [ ! -f "$checkpoint_dir/pytorch_model.bin" ]; then
|
| 154 |
+
echo "Error: No model files in checkpoint directory"
|
| 155 |
+
exit 1
|
| 156 |
+
fi
|
| 157 |
+
else
|
| 158 |
+
# Check for model files in root
|
| 159 |
+
found_model=false
|
| 160 |
+
for file in "${model_files[@]}"; do
|
| 161 |
+
if [ -f "$PHASE2_CKPT/$file" ]; then
|
| 162 |
+
found_model=true
|
| 163 |
+
echo "📁 Found Phase 2 model: $file"
|
| 164 |
+
break
|
| 165 |
+
fi
|
| 166 |
+
done
|
| 167 |
+
if [ "$found_model" = false ]; then
|
| 168 |
+
echo "Error: No model files found in Phase 2 checkpoint"
|
| 169 |
+
exit 1
|
| 170 |
+
fi
|
| 171 |
+
fi
|
| 172 |
+
|
| 173 |
+
echo ""
|
| 174 |
+
echo "=== Document ID Generation ==="
|
| 175 |
+
|
| 176 |
+
python examples/glen_phase2/makeid_glen.py \
|
| 177 |
+
--model_name_or_path $PHASE2_CKPT \
|
| 178 |
+
--infer_dir $PHASE2_CKPT \
|
| 179 |
+
--dataset_name the_vault \
|
| 180 |
+
--docid_file_name GLEN_P2_docids \
|
| 181 |
+
--per_device_eval_batch_size 4 \
|
| 182 |
+
--max_input_length 128 \
|
| 183 |
+
--num_return_sequences 10
|
| 184 |
+
|
| 185 |
+
if [ $? -ne 0 ]; then
|
| 186 |
+
echo "Error: Document ID generation failed!"
|
| 187 |
+
exit 1
|
| 188 |
+
fi
|
| 189 |
+
|
| 190 |
+
# Validate docid file was created
|
| 191 |
+
docid_file="logs/glen_vault/GLEN_P2_docids.tsv"
|
| 192 |
+
if [ ! -f "$docid_file" ]; then
|
| 193 |
+
echo "Error: Document ID file not created: $docid_file"
|
| 194 |
+
exit 1
|
| 195 |
+
fi
|
| 196 |
+
|
| 197 |
+
line_count=$(wc -l < "$docid_file")
|
| 198 |
+
echo "✅ Document ID generation completed! Generated $line_count document IDs"
|
| 199 |
+
|
| 200 |
+
echo ""
|
| 201 |
+
echo "=== Query Inference ==="
|
| 202 |
+
|
| 203 |
+
# First, ensure we have test queries
|
| 204 |
+
if [ ! -f "data/the_vault/GTQ_VAULT_dev.tsv" ]; then
|
| 205 |
+
echo "Error: Test queries file not found. Please run preprocessing with --create_test_set flag"
|
| 206 |
+
exit 1
|
| 207 |
+
fi
|
| 208 |
+
|
| 209 |
+
python examples/glen_phase2/evaluate_glen.py \
|
| 210 |
+
--model_name_or_path $PHASE2_CKPT \
|
| 211 |
+
--infer_dir $PHASE2_CKPT \
|
| 212 |
+
--dataset_name the_vault \
|
| 213 |
+
--docid_file_name GLEN_P2_docids \
|
| 214 |
+
--per_device_eval_batch_size 4 \
|
| 215 |
+
--q_max_len 32 \
|
| 216 |
+
--num_return_sequences 5 \
|
| 217 |
+
--logs_dir logs/glen_vault \
|
| 218 |
+
--test100 1
|
| 219 |
+
|
| 220 |
+
if [ $? -ne 0 ]; then
|
| 221 |
+
echo "Error: Query inference failed!"
|
| 222 |
+
exit 1
|
| 223 |
+
fi
|
| 224 |
+
|
| 225 |
+
echo "✅ Query inference completed successfully!"
|
| 226 |
+
|
| 227 |
+
echo ""
|
| 228 |
+
echo "==========================================="
|
| 229 |
+
echo "🎉 FULL TRAINING COMPLETED SUCCESSFULLY! 🎉"
|
| 230 |
+
echo "==========================================="
|
| 231 |
+
echo ""
|
| 232 |
+
echo "📊 Summary:"
|
| 233 |
+
echo " ✅ Phase 1 Training (Document ID Assignment)"
|
| 234 |
+
echo " ✅ Phase 2 Training (Ranking-based Refinement)"
|
| 235 |
+
echo " ✅ Document ID Generation ($line_count IDs)"
|
| 236 |
+
echo " ✅ Query Inference & Evaluation"
|
| 237 |
+
echo ""
|
| 238 |
+
echo "📁 Results saved in: logs/glen_vault/"
|
| 239 |
+
echo "📁 Document IDs: $docid_file"
|
| 240 |
+
echo ""
|
| 241 |
+
echo "🛡️ Memory Protection Summary:"
|
| 242 |
+
echo " - GPU memory threshold: ${GPU_MEMORY_THRESHOLD} (85%)"
|
| 243 |
+
echo " - Check interval: ${GPU_CHECK_INTERVAL} steps"
|
| 244 |
+
echo " - FP16 training enabled"
|
| 245 |
+
echo " - Optimized batch sizes used"
|
| 246 |
+
echo ""
|
| 247 |
+
echo "🚀 Training completed! The model is ready for production use."
|
wandb/offline-run-20250615_082823-7mv0nkou/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_082823-7mv0nkou/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:28:24.154471Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"t5-base",
|
| 10 |
+
"--query_type",
|
| 11 |
+
"gtq_doc",
|
| 12 |
+
"--per_device_train_batch_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--per_device_eval_batch_size",
|
| 15 |
+
"4",
|
| 16 |
+
"--gradient_accumulation_steps",
|
| 17 |
+
"2",
|
| 18 |
+
"--dropout_rate",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--Rdrop",
|
| 21 |
+
"0.15",
|
| 22 |
+
"--aug_query",
|
| 23 |
+
"True",
|
| 24 |
+
"--aug_query_type",
|
| 25 |
+
"corrupted_query",
|
| 26 |
+
"--input_dropout",
|
| 27 |
+
"1",
|
| 28 |
+
"--id_class",
|
| 29 |
+
"t5_bm25_truncate_3",
|
| 30 |
+
"--dataset_name",
|
| 31 |
+
"the_vault",
|
| 32 |
+
"--test100",
|
| 33 |
+
"1",
|
| 34 |
+
"--tree",
|
| 35 |
+
"1",
|
| 36 |
+
"--pretrain_decoder",
|
| 37 |
+
"True",
|
| 38 |
+
"--max_input_length",
|
| 39 |
+
"128",
|
| 40 |
+
"--val_check_interval",
|
| 41 |
+
"1.0",
|
| 42 |
+
"--tie_word_embeddings",
|
| 43 |
+
"True",
|
| 44 |
+
"--decoder_input",
|
| 45 |
+
"doc_rep",
|
| 46 |
+
"--max_output_length",
|
| 47 |
+
"5",
|
| 48 |
+
"--num_return_sequences",
|
| 49 |
+
"5",
|
| 50 |
+
"--logging_steps",
|
| 51 |
+
"10",
|
| 52 |
+
"--overwrite_output_dir",
|
| 53 |
+
"--wandb_tag",
|
| 54 |
+
"test_glen_vault_p1",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"False",
|
| 57 |
+
"--num_train_epochs",
|
| 58 |
+
"1",
|
| 59 |
+
"--save_steps",
|
| 60 |
+
"50",
|
| 61 |
+
"--save_strategy",
|
| 62 |
+
"steps",
|
| 63 |
+
"--evaluation_strategy",
|
| 64 |
+
"no",
|
| 65 |
+
"--seed",
|
| 66 |
+
"42",
|
| 67 |
+
"--gpu_memory_threshold",
|
| 68 |
+
"0.85",
|
| 69 |
+
"--gpu_check_interval",
|
| 70 |
+
"50",
|
| 71 |
+
"--fp16",
|
| 72 |
+
"True"
|
| 73 |
+
],
|
| 74 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase1\\train_glen.py",
|
| 75 |
+
"codePath": "examples\\glen_phase1\\train_glen.py",
|
| 76 |
+
"git": {
|
| 77 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 78 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 79 |
+
},
|
| 80 |
+
"root": "H:\\Code\\GLEN-model",
|
| 81 |
+
"host": "FPS-33",
|
| 82 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 83 |
+
"codePathLocal": "examples\\glen_phase1\\train_glen.py",
|
| 84 |
+
"cpu_count": 10,
|
| 85 |
+
"cpu_count_logical": 16,
|
| 86 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 87 |
+
"gpu_count": 1,
|
| 88 |
+
"disk": {
|
| 89 |
+
"/": {
|
| 90 |
+
"total": "8001561812992",
|
| 91 |
+
"used": "3636055900160"
|
| 92 |
+
}
|
| 93 |
+
},
|
| 94 |
+
"memory": {
|
| 95 |
+
"total": "34157170688"
|
| 96 |
+
},
|
| 97 |
+
"cpu": {
|
| 98 |
+
"count": 10,
|
| 99 |
+
"countLogical": 16
|
| 100 |
+
},
|
| 101 |
+
"gpu_nvidia": [
|
| 102 |
+
{
|
| 103 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 104 |
+
"memoryTotal": "8585740288",
|
| 105 |
+
"cudaCores": 3072,
|
| 106 |
+
"architecture": "Ada",
|
| 107 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"cudaVersion": "12.8"
|
| 111 |
+
}
|
wandb/offline-run-20250615_082823-7mv0nkou/run-7mv0nkou.wandb
ADDED
|
Binary file (18 kB). View file
|
|
|
wandb/offline-run-20250615_083045-gw7kaqtk/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_083045-gw7kaqtk/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:30:45.974959Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P2_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 10 |
+
"--per_device_train_batch_size",
|
| 11 |
+
"4",
|
| 12 |
+
"--per_device_eval_batch_size",
|
| 13 |
+
"2",
|
| 14 |
+
"--gradient_accumulation_steps",
|
| 15 |
+
"4",
|
| 16 |
+
"--dropout_rate",
|
| 17 |
+
"0.1",
|
| 18 |
+
"--warmup_ratio",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--id_class",
|
| 21 |
+
"t5_bm25_truncate_3",
|
| 22 |
+
"--dataset_name",
|
| 23 |
+
"the_vault",
|
| 24 |
+
"--test100",
|
| 25 |
+
"1",
|
| 26 |
+
"--tree",
|
| 27 |
+
"1",
|
| 28 |
+
"--q_max_len",
|
| 29 |
+
"32",
|
| 30 |
+
"--p_max_len",
|
| 31 |
+
"128",
|
| 32 |
+
"--negative_passage_type",
|
| 33 |
+
"self",
|
| 34 |
+
"--positive_passage_no_shuffle",
|
| 35 |
+
"True",
|
| 36 |
+
"--tie_word_embeddings",
|
| 37 |
+
"True",
|
| 38 |
+
"--num_return_sequences",
|
| 39 |
+
"5",
|
| 40 |
+
"--logging_steps",
|
| 41 |
+
"10",
|
| 42 |
+
"--overwrite_output_dir",
|
| 43 |
+
"--wandb_tag",
|
| 44 |
+
"test_glen_vault_p2",
|
| 45 |
+
"--do_eval",
|
| 46 |
+
"False",
|
| 47 |
+
"--num_train_epochs",
|
| 48 |
+
"1",
|
| 49 |
+
"--save_steps",
|
| 50 |
+
"50",
|
| 51 |
+
"--save_strategy",
|
| 52 |
+
"steps",
|
| 53 |
+
"--evaluation_strategy",
|
| 54 |
+
"no",
|
| 55 |
+
"--seed",
|
| 56 |
+
"42",
|
| 57 |
+
"--gpu_memory_threshold",
|
| 58 |
+
"0.85",
|
| 59 |
+
"--gpu_check_interval",
|
| 60 |
+
"50",
|
| 61 |
+
"--fp16",
|
| 62 |
+
"True"
|
| 63 |
+
],
|
| 64 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase2\\train_glen.py",
|
| 65 |
+
"codePath": "examples\\glen_phase2\\train_glen.py",
|
| 66 |
+
"git": {
|
| 67 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 68 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 69 |
+
},
|
| 70 |
+
"root": "H:\\Code\\GLEN-model",
|
| 71 |
+
"host": "FPS-33",
|
| 72 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 73 |
+
"codePathLocal": "examples\\glen_phase2\\train_glen.py",
|
| 74 |
+
"cpu_count": 10,
|
| 75 |
+
"cpu_count_logical": 16,
|
| 76 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 77 |
+
"gpu_count": 1,
|
| 78 |
+
"disk": {
|
| 79 |
+
"/": {
|
| 80 |
+
"total": "8001561812992",
|
| 81 |
+
"used": "3638731177984"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"memory": {
|
| 85 |
+
"total": "34157170688"
|
| 86 |
+
},
|
| 87 |
+
"cpu": {
|
| 88 |
+
"count": 10,
|
| 89 |
+
"countLogical": 16
|
| 90 |
+
},
|
| 91 |
+
"gpu_nvidia": [
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 94 |
+
"memoryTotal": "8585740288",
|
| 95 |
+
"cudaCores": 3072,
|
| 96 |
+
"architecture": "Ada",
|
| 97 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"cudaVersion": "12.8"
|
| 101 |
+
}
|
wandb/offline-run-20250615_083045-gw7kaqtk/run-gw7kaqtk.wandb
ADDED
|
Binary file (13.3 kB). View file
|
|
|
wandb/offline-run-20250615_083755-qlx0umrq/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_083755-qlx0umrq/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:37:56.172793Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"t5-base",
|
| 10 |
+
"--query_type",
|
| 11 |
+
"gtq_doc",
|
| 12 |
+
"--per_device_train_batch_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--per_device_eval_batch_size",
|
| 15 |
+
"4",
|
| 16 |
+
"--gradient_accumulation_steps",
|
| 17 |
+
"2",
|
| 18 |
+
"--dropout_rate",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--Rdrop",
|
| 21 |
+
"0.15",
|
| 22 |
+
"--aug_query",
|
| 23 |
+
"True",
|
| 24 |
+
"--aug_query_type",
|
| 25 |
+
"corrupted_query",
|
| 26 |
+
"--input_dropout",
|
| 27 |
+
"1",
|
| 28 |
+
"--id_class",
|
| 29 |
+
"t5_bm25_truncate_3",
|
| 30 |
+
"--dataset_name",
|
| 31 |
+
"the_vault",
|
| 32 |
+
"--test100",
|
| 33 |
+
"1",
|
| 34 |
+
"--tree",
|
| 35 |
+
"1",
|
| 36 |
+
"--pretrain_decoder",
|
| 37 |
+
"True",
|
| 38 |
+
"--max_input_length",
|
| 39 |
+
"128",
|
| 40 |
+
"--val_check_interval",
|
| 41 |
+
"1.0",
|
| 42 |
+
"--tie_word_embeddings",
|
| 43 |
+
"True",
|
| 44 |
+
"--decoder_input",
|
| 45 |
+
"doc_rep",
|
| 46 |
+
"--max_output_length",
|
| 47 |
+
"5",
|
| 48 |
+
"--num_return_sequences",
|
| 49 |
+
"5",
|
| 50 |
+
"--logging_steps",
|
| 51 |
+
"10",
|
| 52 |
+
"--overwrite_output_dir",
|
| 53 |
+
"--wandb_tag",
|
| 54 |
+
"test_glen_vault_p1",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"False",
|
| 57 |
+
"--num_train_epochs",
|
| 58 |
+
"1",
|
| 59 |
+
"--save_steps",
|
| 60 |
+
"50",
|
| 61 |
+
"--save_strategy",
|
| 62 |
+
"steps",
|
| 63 |
+
"--evaluation_strategy",
|
| 64 |
+
"no",
|
| 65 |
+
"--seed",
|
| 66 |
+
"42",
|
| 67 |
+
"--gpu_memory_threshold",
|
| 68 |
+
"0.85",
|
| 69 |
+
"--gpu_check_interval",
|
| 70 |
+
"50",
|
| 71 |
+
"--fp16",
|
| 72 |
+
"True"
|
| 73 |
+
],
|
| 74 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase1\\train_glen.py",
|
| 75 |
+
"codePath": "examples\\glen_phase1\\train_glen.py",
|
| 76 |
+
"git": {
|
| 77 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 78 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 79 |
+
},
|
| 80 |
+
"root": "H:\\Code\\GLEN-model",
|
| 81 |
+
"host": "FPS-33",
|
| 82 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 83 |
+
"codePathLocal": "examples\\glen_phase1\\train_glen.py",
|
| 84 |
+
"cpu_count": 10,
|
| 85 |
+
"cpu_count_logical": 16,
|
| 86 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 87 |
+
"gpu_count": 1,
|
| 88 |
+
"disk": {
|
| 89 |
+
"/": {
|
| 90 |
+
"total": "8001561812992",
|
| 91 |
+
"used": "3639622901760"
|
| 92 |
+
}
|
| 93 |
+
},
|
| 94 |
+
"memory": {
|
| 95 |
+
"total": "34157170688"
|
| 96 |
+
},
|
| 97 |
+
"cpu": {
|
| 98 |
+
"count": 10,
|
| 99 |
+
"countLogical": 16
|
| 100 |
+
},
|
| 101 |
+
"gpu_nvidia": [
|
| 102 |
+
{
|
| 103 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 104 |
+
"memoryTotal": "8585740288",
|
| 105 |
+
"cudaCores": 3072,
|
| 106 |
+
"architecture": "Ada",
|
| 107 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"cudaVersion": "12.8"
|
| 111 |
+
}
|
wandb/offline-run-20250615_083755-qlx0umrq/run-qlx0umrq.wandb
ADDED
|
Binary file (18.2 kB). View file
|
|
|
wandb/offline-run-20250615_084004-v280mta6/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_084004-v280mta6/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:40:04.662871Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P2_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 10 |
+
"--per_device_train_batch_size",
|
| 11 |
+
"4",
|
| 12 |
+
"--per_device_eval_batch_size",
|
| 13 |
+
"2",
|
| 14 |
+
"--gradient_accumulation_steps",
|
| 15 |
+
"4",
|
| 16 |
+
"--dropout_rate",
|
| 17 |
+
"0.1",
|
| 18 |
+
"--warmup_ratio",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--id_class",
|
| 21 |
+
"t5_bm25_truncate_3",
|
| 22 |
+
"--dataset_name",
|
| 23 |
+
"the_vault",
|
| 24 |
+
"--test100",
|
| 25 |
+
"1",
|
| 26 |
+
"--tree",
|
| 27 |
+
"1",
|
| 28 |
+
"--q_max_len",
|
| 29 |
+
"32",
|
| 30 |
+
"--p_max_len",
|
| 31 |
+
"128",
|
| 32 |
+
"--negative_passage_type",
|
| 33 |
+
"self",
|
| 34 |
+
"--positive_passage_no_shuffle",
|
| 35 |
+
"True",
|
| 36 |
+
"--tie_word_embeddings",
|
| 37 |
+
"True",
|
| 38 |
+
"--num_return_sequences",
|
| 39 |
+
"5",
|
| 40 |
+
"--logging_steps",
|
| 41 |
+
"10",
|
| 42 |
+
"--overwrite_output_dir",
|
| 43 |
+
"--wandb_tag",
|
| 44 |
+
"test_glen_vault_p2",
|
| 45 |
+
"--do_eval",
|
| 46 |
+
"False",
|
| 47 |
+
"--num_train_epochs",
|
| 48 |
+
"1",
|
| 49 |
+
"--save_steps",
|
| 50 |
+
"50",
|
| 51 |
+
"--save_strategy",
|
| 52 |
+
"steps",
|
| 53 |
+
"--evaluation_strategy",
|
| 54 |
+
"no",
|
| 55 |
+
"--seed",
|
| 56 |
+
"42",
|
| 57 |
+
"--gpu_memory_threshold",
|
| 58 |
+
"0.85",
|
| 59 |
+
"--gpu_check_interval",
|
| 60 |
+
"50",
|
| 61 |
+
"--fp16",
|
| 62 |
+
"True"
|
| 63 |
+
],
|
| 64 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase2\\train_glen.py",
|
| 65 |
+
"codePath": "examples\\glen_phase2\\train_glen.py",
|
| 66 |
+
"git": {
|
| 67 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 68 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 69 |
+
},
|
| 70 |
+
"root": "H:\\Code\\GLEN-model",
|
| 71 |
+
"host": "FPS-33",
|
| 72 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 73 |
+
"codePathLocal": "examples\\glen_phase2\\train_glen.py",
|
| 74 |
+
"cpu_count": 10,
|
| 75 |
+
"cpu_count_logical": 16,
|
| 76 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 77 |
+
"gpu_count": 1,
|
| 78 |
+
"disk": {
|
| 79 |
+
"/": {
|
| 80 |
+
"total": "8001561812992",
|
| 81 |
+
"used": "3640601427968"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"memory": {
|
| 85 |
+
"total": "34157170688"
|
| 86 |
+
},
|
| 87 |
+
"cpu": {
|
| 88 |
+
"count": 10,
|
| 89 |
+
"countLogical": 16
|
| 90 |
+
},
|
| 91 |
+
"gpu_nvidia": [
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 94 |
+
"memoryTotal": "8585740288",
|
| 95 |
+
"cudaCores": 3072,
|
| 96 |
+
"architecture": "Ada",
|
| 97 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"cudaVersion": "12.8"
|
| 101 |
+
}
|
wandb/offline-run-20250615_084004-v280mta6/run-v280mta6.wandb
ADDED
|
Binary file (13.3 kB). View file
|
|
|
wandb/offline-run-20250615_084743-xvd6hiwa/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_084743-xvd6hiwa/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:47:43.951676Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"t5-base",
|
| 10 |
+
"--query_type",
|
| 11 |
+
"gtq_doc",
|
| 12 |
+
"--per_device_train_batch_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--per_device_eval_batch_size",
|
| 15 |
+
"4",
|
| 16 |
+
"--gradient_accumulation_steps",
|
| 17 |
+
"2",
|
| 18 |
+
"--dropout_rate",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--Rdrop",
|
| 21 |
+
"0.15",
|
| 22 |
+
"--aug_query",
|
| 23 |
+
"True",
|
| 24 |
+
"--aug_query_type",
|
| 25 |
+
"corrupted_query",
|
| 26 |
+
"--input_dropout",
|
| 27 |
+
"1",
|
| 28 |
+
"--id_class",
|
| 29 |
+
"t5_bm25_truncate_3",
|
| 30 |
+
"--dataset_name",
|
| 31 |
+
"the_vault",
|
| 32 |
+
"--test100",
|
| 33 |
+
"1",
|
| 34 |
+
"--tree",
|
| 35 |
+
"1",
|
| 36 |
+
"--pretrain_decoder",
|
| 37 |
+
"True",
|
| 38 |
+
"--max_input_length",
|
| 39 |
+
"128",
|
| 40 |
+
"--val_check_interval",
|
| 41 |
+
"1.0",
|
| 42 |
+
"--tie_word_embeddings",
|
| 43 |
+
"True",
|
| 44 |
+
"--decoder_input",
|
| 45 |
+
"doc_rep",
|
| 46 |
+
"--max_output_length",
|
| 47 |
+
"5",
|
| 48 |
+
"--num_return_sequences",
|
| 49 |
+
"5",
|
| 50 |
+
"--logging_steps",
|
| 51 |
+
"10",
|
| 52 |
+
"--overwrite_output_dir",
|
| 53 |
+
"--wandb_tag",
|
| 54 |
+
"test_glen_vault_p1",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"False",
|
| 57 |
+
"--num_train_epochs",
|
| 58 |
+
"1",
|
| 59 |
+
"--save_steps",
|
| 60 |
+
"50",
|
| 61 |
+
"--save_strategy",
|
| 62 |
+
"steps",
|
| 63 |
+
"--evaluation_strategy",
|
| 64 |
+
"no",
|
| 65 |
+
"--seed",
|
| 66 |
+
"42",
|
| 67 |
+
"--gpu_memory_threshold",
|
| 68 |
+
"0.85",
|
| 69 |
+
"--gpu_check_interval",
|
| 70 |
+
"50",
|
| 71 |
+
"--fp16",
|
| 72 |
+
"True"
|
| 73 |
+
],
|
| 74 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase1\\train_glen.py",
|
| 75 |
+
"codePath": "examples\\glen_phase1\\train_glen.py",
|
| 76 |
+
"git": {
|
| 77 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 78 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 79 |
+
},
|
| 80 |
+
"root": "H:\\Code\\GLEN-model",
|
| 81 |
+
"host": "FPS-33",
|
| 82 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 83 |
+
"codePathLocal": "examples\\glen_phase1\\train_glen.py",
|
| 84 |
+
"cpu_count": 10,
|
| 85 |
+
"cpu_count_logical": 16,
|
| 86 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 87 |
+
"gpu_count": 1,
|
| 88 |
+
"disk": {
|
| 89 |
+
"/": {
|
| 90 |
+
"total": "8001561812992",
|
| 91 |
+
"used": "3640081137664"
|
| 92 |
+
}
|
| 93 |
+
},
|
| 94 |
+
"memory": {
|
| 95 |
+
"total": "34157170688"
|
| 96 |
+
},
|
| 97 |
+
"cpu": {
|
| 98 |
+
"count": 10,
|
| 99 |
+
"countLogical": 16
|
| 100 |
+
},
|
| 101 |
+
"gpu_nvidia": [
|
| 102 |
+
{
|
| 103 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 104 |
+
"memoryTotal": "8585740288",
|
| 105 |
+
"cudaCores": 3072,
|
| 106 |
+
"architecture": "Ada",
|
| 107 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"cudaVersion": "12.8"
|
| 111 |
+
}
|
wandb/offline-run-20250615_084743-xvd6hiwa/run-xvd6hiwa.wandb
ADDED
|
Binary file (18 kB). View file
|
|
|
wandb/offline-run-20250615_085008-fr23ohzz/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_085008-fr23ohzz/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:50:09.342451Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P2_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 10 |
+
"--per_device_train_batch_size",
|
| 11 |
+
"4",
|
| 12 |
+
"--per_device_eval_batch_size",
|
| 13 |
+
"2",
|
| 14 |
+
"--gradient_accumulation_steps",
|
| 15 |
+
"4",
|
| 16 |
+
"--dropout_rate",
|
| 17 |
+
"0.1",
|
| 18 |
+
"--warmup_ratio",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--id_class",
|
| 21 |
+
"t5_bm25_truncate_3",
|
| 22 |
+
"--dataset_name",
|
| 23 |
+
"the_vault",
|
| 24 |
+
"--test100",
|
| 25 |
+
"1",
|
| 26 |
+
"--tree",
|
| 27 |
+
"1",
|
| 28 |
+
"--q_max_len",
|
| 29 |
+
"32",
|
| 30 |
+
"--p_max_len",
|
| 31 |
+
"128",
|
| 32 |
+
"--negative_passage_type",
|
| 33 |
+
"self",
|
| 34 |
+
"--positive_passage_no_shuffle",
|
| 35 |
+
"True",
|
| 36 |
+
"--tie_word_embeddings",
|
| 37 |
+
"True",
|
| 38 |
+
"--num_return_sequences",
|
| 39 |
+
"5",
|
| 40 |
+
"--logging_steps",
|
| 41 |
+
"10",
|
| 42 |
+
"--overwrite_output_dir",
|
| 43 |
+
"--wandb_tag",
|
| 44 |
+
"test_glen_vault_p2",
|
| 45 |
+
"--do_eval",
|
| 46 |
+
"False",
|
| 47 |
+
"--num_train_epochs",
|
| 48 |
+
"1",
|
| 49 |
+
"--save_steps",
|
| 50 |
+
"50",
|
| 51 |
+
"--save_strategy",
|
| 52 |
+
"steps",
|
| 53 |
+
"--evaluation_strategy",
|
| 54 |
+
"no",
|
| 55 |
+
"--seed",
|
| 56 |
+
"42",
|
| 57 |
+
"--gpu_memory_threshold",
|
| 58 |
+
"0.85",
|
| 59 |
+
"--gpu_check_interval",
|
| 60 |
+
"50",
|
| 61 |
+
"--fp16",
|
| 62 |
+
"True"
|
| 63 |
+
],
|
| 64 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase2\\train_glen.py",
|
| 65 |
+
"codePath": "examples\\glen_phase2\\train_glen.py",
|
| 66 |
+
"git": {
|
| 67 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 68 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 69 |
+
},
|
| 70 |
+
"root": "H:\\Code\\GLEN-model",
|
| 71 |
+
"host": "FPS-33",
|
| 72 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 73 |
+
"codePathLocal": "examples\\glen_phase2\\train_glen.py",
|
| 74 |
+
"cpu_count": 10,
|
| 75 |
+
"cpu_count_logical": 16,
|
| 76 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 77 |
+
"gpu_count": 1,
|
| 78 |
+
"disk": {
|
| 79 |
+
"/": {
|
| 80 |
+
"total": "8001561812992",
|
| 81 |
+
"used": "3640533409792"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"memory": {
|
| 85 |
+
"total": "34157170688"
|
| 86 |
+
},
|
| 87 |
+
"cpu": {
|
| 88 |
+
"count": 10,
|
| 89 |
+
"countLogical": 16
|
| 90 |
+
},
|
| 91 |
+
"gpu_nvidia": [
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 94 |
+
"memoryTotal": "8585740288",
|
| 95 |
+
"cudaCores": 3072,
|
| 96 |
+
"architecture": "Ada",
|
| 97 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"cudaVersion": "12.8"
|
| 101 |
+
}
|
wandb/offline-run-20250615_085008-fr23ohzz/run-fr23ohzz.wandb
ADDED
|
Binary file (13.3 kB). View file
|
|
|
wandb/offline-run-20250615_085636-ufk3qyrh/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_085636-ufk3qyrh/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T01:56:36.587828Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"t5-base",
|
| 10 |
+
"--query_type",
|
| 11 |
+
"gtq_doc",
|
| 12 |
+
"--per_device_train_batch_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--per_device_eval_batch_size",
|
| 15 |
+
"4",
|
| 16 |
+
"--gradient_accumulation_steps",
|
| 17 |
+
"2",
|
| 18 |
+
"--dropout_rate",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--Rdrop",
|
| 21 |
+
"0.15",
|
| 22 |
+
"--aug_query",
|
| 23 |
+
"True",
|
| 24 |
+
"--aug_query_type",
|
| 25 |
+
"corrupted_query",
|
| 26 |
+
"--input_dropout",
|
| 27 |
+
"1",
|
| 28 |
+
"--id_class",
|
| 29 |
+
"t5_bm25_truncate_3",
|
| 30 |
+
"--dataset_name",
|
| 31 |
+
"the_vault",
|
| 32 |
+
"--test100",
|
| 33 |
+
"1",
|
| 34 |
+
"--tree",
|
| 35 |
+
"1",
|
| 36 |
+
"--pretrain_decoder",
|
| 37 |
+
"True",
|
| 38 |
+
"--max_input_length",
|
| 39 |
+
"128",
|
| 40 |
+
"--val_check_interval",
|
| 41 |
+
"1.0",
|
| 42 |
+
"--tie_word_embeddings",
|
| 43 |
+
"True",
|
| 44 |
+
"--decoder_input",
|
| 45 |
+
"doc_rep",
|
| 46 |
+
"--max_output_length",
|
| 47 |
+
"5",
|
| 48 |
+
"--num_return_sequences",
|
| 49 |
+
"5",
|
| 50 |
+
"--logging_steps",
|
| 51 |
+
"100",
|
| 52 |
+
"--overwrite_output_dir",
|
| 53 |
+
"--wandb_tag",
|
| 54 |
+
"glen_vault_test_p1",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"True",
|
| 57 |
+
"--num_train_epochs",
|
| 58 |
+
"1",
|
| 59 |
+
"--save_steps",
|
| 60 |
+
"1000",
|
| 61 |
+
"--save_strategy",
|
| 62 |
+
"steps",
|
| 63 |
+
"--evaluation_strategy",
|
| 64 |
+
"steps",
|
| 65 |
+
"--eval_steps",
|
| 66 |
+
"1000",
|
| 67 |
+
"--seed",
|
| 68 |
+
"42",
|
| 69 |
+
"--gpu_memory_threshold",
|
| 70 |
+
"0.85",
|
| 71 |
+
"--gpu_check_interval",
|
| 72 |
+
"50",
|
| 73 |
+
"--fp16",
|
| 74 |
+
"True"
|
| 75 |
+
],
|
| 76 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase1\\train_glen.py",
|
| 77 |
+
"codePath": "examples\\glen_phase1\\train_glen.py",
|
| 78 |
+
"git": {
|
| 79 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 80 |
+
"commit": "6534252bf5ad60b20ba58d7d578a982aabeaacaa"
|
| 81 |
+
},
|
| 82 |
+
"root": "H:\\Code\\GLEN-model",
|
| 83 |
+
"host": "FPS-33",
|
| 84 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 85 |
+
"codePathLocal": "examples\\glen_phase1\\train_glen.py",
|
| 86 |
+
"cpu_count": 10,
|
| 87 |
+
"cpu_count_logical": 16,
|
| 88 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 89 |
+
"gpu_count": 1,
|
| 90 |
+
"disk": {
|
| 91 |
+
"/": {
|
| 92 |
+
"total": "8001561812992",
|
| 93 |
+
"used": "3640026095616"
|
| 94 |
+
}
|
| 95 |
+
},
|
| 96 |
+
"memory": {
|
| 97 |
+
"total": "34157170688"
|
| 98 |
+
},
|
| 99 |
+
"cpu": {
|
| 100 |
+
"count": 10,
|
| 101 |
+
"countLogical": 16
|
| 102 |
+
},
|
| 103 |
+
"gpu_nvidia": [
|
| 104 |
+
{
|
| 105 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 106 |
+
"memoryTotal": "8585740288",
|
| 107 |
+
"cudaCores": 3072,
|
| 108 |
+
"architecture": "Ada",
|
| 109 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 110 |
+
}
|
| 111 |
+
],
|
| 112 |
+
"cudaVersion": "12.8"
|
| 113 |
+
}
|
wandb/offline-run-20250615_085636-ufk3qyrh/run-ufk3qyrh.wandb
ADDED
|
Binary file (18.1 kB). View file
|
|
|
wandb/offline-run-20250615_090510-p2obgs7h/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_090510-p2obgs7h/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T02:05:11.108383Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"t5-base",
|
| 10 |
+
"--query_type",
|
| 11 |
+
"gtq_doc",
|
| 12 |
+
"--per_device_train_batch_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--per_device_eval_batch_size",
|
| 15 |
+
"4",
|
| 16 |
+
"--gradient_accumulation_steps",
|
| 17 |
+
"2",
|
| 18 |
+
"--dropout_rate",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--Rdrop",
|
| 21 |
+
"0.15",
|
| 22 |
+
"--aug_query",
|
| 23 |
+
"True",
|
| 24 |
+
"--aug_query_type",
|
| 25 |
+
"corrupted_query",
|
| 26 |
+
"--input_dropout",
|
| 27 |
+
"1",
|
| 28 |
+
"--id_class",
|
| 29 |
+
"t5_bm25_truncate_3",
|
| 30 |
+
"--dataset_name",
|
| 31 |
+
"the_vault",
|
| 32 |
+
"--test100",
|
| 33 |
+
"1",
|
| 34 |
+
"--tree",
|
| 35 |
+
"1",
|
| 36 |
+
"--pretrain_decoder",
|
| 37 |
+
"True",
|
| 38 |
+
"--max_input_length",
|
| 39 |
+
"128",
|
| 40 |
+
"--val_check_interval",
|
| 41 |
+
"1.0",
|
| 42 |
+
"--tie_word_embeddings",
|
| 43 |
+
"True",
|
| 44 |
+
"--decoder_input",
|
| 45 |
+
"doc_rep",
|
| 46 |
+
"--max_output_length",
|
| 47 |
+
"5",
|
| 48 |
+
"--num_return_sequences",
|
| 49 |
+
"5",
|
| 50 |
+
"--logging_steps",
|
| 51 |
+
"100",
|
| 52 |
+
"--overwrite_output_dir",
|
| 53 |
+
"--wandb_tag",
|
| 54 |
+
"glen_vault_test_p1",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"True",
|
| 57 |
+
"--num_train_epochs",
|
| 58 |
+
"1",
|
| 59 |
+
"--save_steps",
|
| 60 |
+
"1000",
|
| 61 |
+
"--save_strategy",
|
| 62 |
+
"steps",
|
| 63 |
+
"--evaluation_strategy",
|
| 64 |
+
"steps",
|
| 65 |
+
"--eval_steps",
|
| 66 |
+
"1000",
|
| 67 |
+
"--seed",
|
| 68 |
+
"42",
|
| 69 |
+
"--gpu_memory_threshold",
|
| 70 |
+
"0.85",
|
| 71 |
+
"--gpu_check_interval",
|
| 72 |
+
"50",
|
| 73 |
+
"--fp16",
|
| 74 |
+
"True"
|
| 75 |
+
],
|
| 76 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase1\\train_glen.py",
|
| 77 |
+
"codePath": "examples\\glen_phase1\\train_glen.py",
|
| 78 |
+
"git": {
|
| 79 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 80 |
+
"commit": "ca9706f426fc8d43aa09c19ad7ec61380c5f7749"
|
| 81 |
+
},
|
| 82 |
+
"root": "H:\\Code\\GLEN-model",
|
| 83 |
+
"host": "FPS-33",
|
| 84 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 85 |
+
"codePathLocal": "examples\\glen_phase1\\train_glen.py",
|
| 86 |
+
"cpu_count": 10,
|
| 87 |
+
"cpu_count_logical": 16,
|
| 88 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 89 |
+
"gpu_count": 1,
|
| 90 |
+
"disk": {
|
| 91 |
+
"/": {
|
| 92 |
+
"total": "8001561812992",
|
| 93 |
+
"used": "3639623524352"
|
| 94 |
+
}
|
| 95 |
+
},
|
| 96 |
+
"memory": {
|
| 97 |
+
"total": "34157170688"
|
| 98 |
+
},
|
| 99 |
+
"cpu": {
|
| 100 |
+
"count": 10,
|
| 101 |
+
"countLogical": 16
|
| 102 |
+
},
|
| 103 |
+
"gpu_nvidia": [
|
| 104 |
+
{
|
| 105 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 106 |
+
"memoryTotal": "8585740288",
|
| 107 |
+
"cudaCores": 3072,
|
| 108 |
+
"architecture": "Ada",
|
| 109 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 110 |
+
}
|
| 111 |
+
],
|
| 112 |
+
"cudaVersion": "12.8"
|
| 113 |
+
}
|
wandb/offline-run-20250615_090510-p2obgs7h/run-p2obgs7h.wandb
ADDED
|
Binary file (18.1 kB). View file
|
|
|
wandb/offline-run-20250615_090639-ovkkgdmi/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_090639-ovkkgdmi/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T02:06:40.118965Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P2_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 10 |
+
"--per_device_train_batch_size",
|
| 11 |
+
"4",
|
| 12 |
+
"--per_device_eval_batch_size",
|
| 13 |
+
"2",
|
| 14 |
+
"--gradient_accumulation_steps",
|
| 15 |
+
"4",
|
| 16 |
+
"--dropout_rate",
|
| 17 |
+
"0.1",
|
| 18 |
+
"--warmup_ratio",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--id_class",
|
| 21 |
+
"t5_bm25_truncate_3",
|
| 22 |
+
"--dataset_name",
|
| 23 |
+
"the_vault",
|
| 24 |
+
"--tree",
|
| 25 |
+
"1",
|
| 26 |
+
"--q_max_len",
|
| 27 |
+
"32",
|
| 28 |
+
"--p_max_len",
|
| 29 |
+
"128",
|
| 30 |
+
"--negative_passage_type",
|
| 31 |
+
"self",
|
| 32 |
+
"--positive_passage_no_shuffle",
|
| 33 |
+
"True",
|
| 34 |
+
"--tie_word_embeddings",
|
| 35 |
+
"True",
|
| 36 |
+
"--num_return_sequences",
|
| 37 |
+
"5",
|
| 38 |
+
"--logging_steps",
|
| 39 |
+
"100",
|
| 40 |
+
"--overwrite_output_dir",
|
| 41 |
+
"--wandb_tag",
|
| 42 |
+
"glen_vault_test_p2",
|
| 43 |
+
"--do_eval",
|
| 44 |
+
"True",
|
| 45 |
+
"--num_train_epochs",
|
| 46 |
+
"1",
|
| 47 |
+
"--save_steps",
|
| 48 |
+
"1000",
|
| 49 |
+
"--save_strategy",
|
| 50 |
+
"steps",
|
| 51 |
+
"--evaluation_strategy",
|
| 52 |
+
"steps",
|
| 53 |
+
"--eval_steps",
|
| 54 |
+
"1000",
|
| 55 |
+
"--seed",
|
| 56 |
+
"42",
|
| 57 |
+
"--gpu_memory_threshold",
|
| 58 |
+
"0.85",
|
| 59 |
+
"--gpu_check_interval",
|
| 60 |
+
"50",
|
| 61 |
+
"--fp16",
|
| 62 |
+
"True"
|
| 63 |
+
],
|
| 64 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase2\\train_glen.py",
|
| 65 |
+
"codePath": "examples\\glen_phase2\\train_glen.py",
|
| 66 |
+
"git": {
|
| 67 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 68 |
+
"commit": "ca9706f426fc8d43aa09c19ad7ec61380c5f7749"
|
| 69 |
+
},
|
| 70 |
+
"root": "H:\\Code\\GLEN-model",
|
| 71 |
+
"host": "FPS-33",
|
| 72 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 73 |
+
"codePathLocal": "examples\\glen_phase2\\train_glen.py",
|
| 74 |
+
"cpu_count": 10,
|
| 75 |
+
"cpu_count_logical": 16,
|
| 76 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 77 |
+
"gpu_count": 1,
|
| 78 |
+
"disk": {
|
| 79 |
+
"/": {
|
| 80 |
+
"total": "8001561812992",
|
| 81 |
+
"used": "3639623598080"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"memory": {
|
| 85 |
+
"total": "34157170688"
|
| 86 |
+
},
|
| 87 |
+
"cpu": {
|
| 88 |
+
"count": 10,
|
| 89 |
+
"countLogical": 16
|
| 90 |
+
},
|
| 91 |
+
"gpu_nvidia": [
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 94 |
+
"memoryTotal": "8585740288",
|
| 95 |
+
"cudaCores": 3072,
|
| 96 |
+
"architecture": "Ada",
|
| 97 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"cudaVersion": "12.8"
|
| 101 |
+
}
|
wandb/offline-run-20250615_090639-ovkkgdmi/run-ovkkgdmi.wandb
ADDED
|
Binary file (32.8 kB). View file
|
|
|
wandb/offline-run-20250615_092539-8n51qf7g/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_092539-8n51qf7g/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T02:25:39.486198Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"t5-base",
|
| 10 |
+
"--query_type",
|
| 11 |
+
"gtq_doc",
|
| 12 |
+
"--per_device_train_batch_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--per_device_eval_batch_size",
|
| 15 |
+
"4",
|
| 16 |
+
"--gradient_accumulation_steps",
|
| 17 |
+
"2",
|
| 18 |
+
"--dropout_rate",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--Rdrop",
|
| 21 |
+
"0.15",
|
| 22 |
+
"--aug_query",
|
| 23 |
+
"True",
|
| 24 |
+
"--aug_query_type",
|
| 25 |
+
"corrupted_query",
|
| 26 |
+
"--input_dropout",
|
| 27 |
+
"1",
|
| 28 |
+
"--id_class",
|
| 29 |
+
"t5_bm25_truncate_3",
|
| 30 |
+
"--dataset_name",
|
| 31 |
+
"the_vault",
|
| 32 |
+
"--test100",
|
| 33 |
+
"1",
|
| 34 |
+
"--tree",
|
| 35 |
+
"1",
|
| 36 |
+
"--pretrain_decoder",
|
| 37 |
+
"True",
|
| 38 |
+
"--max_input_length",
|
| 39 |
+
"128",
|
| 40 |
+
"--val_check_interval",
|
| 41 |
+
"1.0",
|
| 42 |
+
"--tie_word_embeddings",
|
| 43 |
+
"True",
|
| 44 |
+
"--decoder_input",
|
| 45 |
+
"doc_rep",
|
| 46 |
+
"--max_output_length",
|
| 47 |
+
"5",
|
| 48 |
+
"--num_return_sequences",
|
| 49 |
+
"5",
|
| 50 |
+
"--logging_steps",
|
| 51 |
+
"100",
|
| 52 |
+
"--overwrite_output_dir",
|
| 53 |
+
"--wandb_tag",
|
| 54 |
+
"glen_vault_test_p1",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"True",
|
| 57 |
+
"--num_train_epochs",
|
| 58 |
+
"1",
|
| 59 |
+
"--save_steps",
|
| 60 |
+
"1000",
|
| 61 |
+
"--save_strategy",
|
| 62 |
+
"steps",
|
| 63 |
+
"--evaluation_strategy",
|
| 64 |
+
"steps",
|
| 65 |
+
"--eval_steps",
|
| 66 |
+
"1000",
|
| 67 |
+
"--seed",
|
| 68 |
+
"42",
|
| 69 |
+
"--gpu_memory_threshold",
|
| 70 |
+
"0.85",
|
| 71 |
+
"--gpu_check_interval",
|
| 72 |
+
"50",
|
| 73 |
+
"--fp16",
|
| 74 |
+
"True"
|
| 75 |
+
],
|
| 76 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase1\\train_glen.py",
|
| 77 |
+
"codePath": "examples\\glen_phase1\\train_glen.py",
|
| 78 |
+
"git": {
|
| 79 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 80 |
+
"commit": "ca9706f426fc8d43aa09c19ad7ec61380c5f7749"
|
| 81 |
+
},
|
| 82 |
+
"root": "H:\\Code\\GLEN-model",
|
| 83 |
+
"host": "FPS-33",
|
| 84 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 85 |
+
"codePathLocal": "examples\\glen_phase1\\train_glen.py",
|
| 86 |
+
"cpu_count": 10,
|
| 87 |
+
"cpu_count_logical": 16,
|
| 88 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 89 |
+
"gpu_count": 1,
|
| 90 |
+
"disk": {
|
| 91 |
+
"/": {
|
| 92 |
+
"total": "8001561812992",
|
| 93 |
+
"used": "3639623917568"
|
| 94 |
+
}
|
| 95 |
+
},
|
| 96 |
+
"memory": {
|
| 97 |
+
"total": "34157170688"
|
| 98 |
+
},
|
| 99 |
+
"cpu": {
|
| 100 |
+
"count": 10,
|
| 101 |
+
"countLogical": 16
|
| 102 |
+
},
|
| 103 |
+
"gpu_nvidia": [
|
| 104 |
+
{
|
| 105 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 106 |
+
"memoryTotal": "8585740288",
|
| 107 |
+
"cudaCores": 3072,
|
| 108 |
+
"architecture": "Ada",
|
| 109 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 110 |
+
}
|
| 111 |
+
],
|
| 112 |
+
"cudaVersion": "12.8"
|
| 113 |
+
}
|
wandb/offline-run-20250615_092539-8n51qf7g/run-8n51qf7g.wandb
ADDED
|
Binary file (18.8 kB). View file
|
|
|
wandb/offline-run-20250615_092759-cpafuazn/files/requirements.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.13
|
| 4 |
+
aiosignal==1.3.2
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
attrs==25.3.0
|
| 7 |
+
certifi==2025.4.26
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
datasets==3.6.0
|
| 12 |
+
dill==0.3.8
|
| 13 |
+
filelock==3.18.0
|
| 14 |
+
frozenlist==1.7.0
|
| 15 |
+
fsspec==2025.3.0
|
| 16 |
+
gitdb==4.0.12
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
huggingface-hub==0.33.0
|
| 19 |
+
idna==3.10
|
| 20 |
+
Jinja2==3.1.6
|
| 21 |
+
MarkupSafe==3.0.2
|
| 22 |
+
mpmath==1.3.0
|
| 23 |
+
multidict==6.4.4
|
| 24 |
+
multiprocess==0.70.16
|
| 25 |
+
networkx==3.5
|
| 26 |
+
numpy==2.3.0
|
| 27 |
+
packaging==25.0
|
| 28 |
+
pandas==2.3.0
|
| 29 |
+
pillow==11.2.1
|
| 30 |
+
pip==25.1.1
|
| 31 |
+
platformdirs==4.3.8
|
| 32 |
+
propcache==0.3.2
|
| 33 |
+
protobuf==6.31.1
|
| 34 |
+
psutil==7.0.0
|
| 35 |
+
pyarrow==20.0.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
pydantic_core==2.33.2
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
pytz==2025.2
|
| 40 |
+
PyYAML==6.0.2
|
| 41 |
+
regex==2024.11.6
|
| 42 |
+
requests==2.32.4
|
| 43 |
+
safetensors==0.5.3
|
| 44 |
+
sentry-sdk==2.30.0
|
| 45 |
+
setproctitle==1.3.6
|
| 46 |
+
setuptools==80.9.0
|
| 47 |
+
six==1.17.0
|
| 48 |
+
smmap==5.0.2
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
tevatron==0.0.1
|
| 51 |
+
tokenizers==0.21.1
|
| 52 |
+
torch==2.7.1
|
| 53 |
+
torchaudio==2.7.1
|
| 54 |
+
torchvision==0.22.1
|
| 55 |
+
tqdm==4.67.1
|
| 56 |
+
transformers==4.52.4
|
| 57 |
+
typing_extensions==4.14.0
|
| 58 |
+
typing-inspection==0.4.1
|
| 59 |
+
tzdata==2025.2
|
| 60 |
+
urllib3==2.4.0
|
| 61 |
+
wandb==0.20.1
|
| 62 |
+
xxhash==3.5.0
|
| 63 |
+
yarl==1.20.1
|
| 64 |
+
tevatron==0.0.1
|
wandb/offline-run-20250615_092759-cpafuazn/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-10-10.0.19045-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-06-15T02:28:00.208908Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--output_dir",
|
| 7 |
+
"logs/test_glen_vault/GLEN_P2_test",
|
| 8 |
+
"--model_name_or_path",
|
| 9 |
+
"logs/test_glen_vault/GLEN_P1_test",
|
| 10 |
+
"--per_device_train_batch_size",
|
| 11 |
+
"4",
|
| 12 |
+
"--per_device_eval_batch_size",
|
| 13 |
+
"2",
|
| 14 |
+
"--gradient_accumulation_steps",
|
| 15 |
+
"4",
|
| 16 |
+
"--dropout_rate",
|
| 17 |
+
"0.1",
|
| 18 |
+
"--warmup_ratio",
|
| 19 |
+
"0.1",
|
| 20 |
+
"--id_class",
|
| 21 |
+
"t5_bm25_truncate_3",
|
| 22 |
+
"--dataset_name",
|
| 23 |
+
"the_vault",
|
| 24 |
+
"--tree",
|
| 25 |
+
"1",
|
| 26 |
+
"--q_max_len",
|
| 27 |
+
"32",
|
| 28 |
+
"--p_max_len",
|
| 29 |
+
"128",
|
| 30 |
+
"--negative_passage_type",
|
| 31 |
+
"self",
|
| 32 |
+
"--positive_passage_no_shuffle",
|
| 33 |
+
"True",
|
| 34 |
+
"--tie_word_embeddings",
|
| 35 |
+
"True",
|
| 36 |
+
"--num_return_sequences",
|
| 37 |
+
"5",
|
| 38 |
+
"--logging_steps",
|
| 39 |
+
"100",
|
| 40 |
+
"--overwrite_output_dir",
|
| 41 |
+
"--wandb_tag",
|
| 42 |
+
"glen_vault_test_p2",
|
| 43 |
+
"--do_eval",
|
| 44 |
+
"True",
|
| 45 |
+
"--num_train_epochs",
|
| 46 |
+
"1",
|
| 47 |
+
"--save_steps",
|
| 48 |
+
"1000",
|
| 49 |
+
"--save_strategy",
|
| 50 |
+
"steps",
|
| 51 |
+
"--evaluation_strategy",
|
| 52 |
+
"steps",
|
| 53 |
+
"--eval_steps",
|
| 54 |
+
"1000",
|
| 55 |
+
"--seed",
|
| 56 |
+
"42",
|
| 57 |
+
"--gpu_memory_threshold",
|
| 58 |
+
"0.85",
|
| 59 |
+
"--gpu_check_interval",
|
| 60 |
+
"50",
|
| 61 |
+
"--fp16",
|
| 62 |
+
"True"
|
| 63 |
+
],
|
| 64 |
+
"program": "H:\\Code\\GLEN-model\\examples\\glen_phase2\\train_glen.py",
|
| 65 |
+
"codePath": "examples\\glen_phase2\\train_glen.py",
|
| 66 |
+
"git": {
|
| 67 |
+
"remote": "https://QuanTH02:@huggingface.co/QuanTH02/GLEN-model",
|
| 68 |
+
"commit": "ca9706f426fc8d43aa09c19ad7ec61380c5f7749"
|
| 69 |
+
},
|
| 70 |
+
"root": "H:\\Code\\GLEN-model",
|
| 71 |
+
"host": "FPS-33",
|
| 72 |
+
"executable": "H:\\Code\\GLEN-model\\.env\\Scripts\\python.exe",
|
| 73 |
+
"codePathLocal": "examples\\glen_phase2\\train_glen.py",
|
| 74 |
+
"cpu_count": 10,
|
| 75 |
+
"cpu_count_logical": 16,
|
| 76 |
+
"gpu": "NVIDIA GeForce RTX 4060",
|
| 77 |
+
"gpu_count": 1,
|
| 78 |
+
"disk": {
|
| 79 |
+
"/": {
|
| 80 |
+
"total": "8001561812992",
|
| 81 |
+
"used": "3639623999488"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"memory": {
|
| 85 |
+
"total": "34157170688"
|
| 86 |
+
},
|
| 87 |
+
"cpu": {
|
| 88 |
+
"count": 10,
|
| 89 |
+
"countLogical": 16
|
| 90 |
+
},
|
| 91 |
+
"gpu_nvidia": [
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA GeForce RTX 4060",
|
| 94 |
+
"memoryTotal": "8585740288",
|
| 95 |
+
"cudaCores": 3072,
|
| 96 |
+
"architecture": "Ada",
|
| 97 |
+
"uuid": "GPU-7e0c8403-933a-8533-bde6-f629db871693"
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"cudaVersion": "12.8"
|
| 101 |
+
}
|
wandb/offline-run-20250615_092759-cpafuazn/run-cpafuazn.wandb
ADDED
|
File without changes
|