Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/D2STGNN_100.pt +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/D2STGNN_best_val_MAE.pt +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/METR-LA.py +157 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/cfg.txt +94 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747602990.lxhdfrwx3-cse.3155086.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603297.lxhdfrwx3-cse.3156865.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603545.lxhdfrwx3-cse.3158332.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603589.lxhdfrwx3-cse.3158899.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603755.lxhdfrwx3-cse.3160021.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747606366.lxhdfrwx3-cse.3175446.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747606780.lxhdfrwx3-cse.3177719.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747606998.lxhdfrwx3-cse.3179064.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607064.lxhdfrwx3-cse.3179695.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607163.lxhdfrwx3-cse.3180489.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607515.lxhdfrwx3-cse.3182382.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607530.lxhdfrwx3-cse.3182842.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607541.lxhdfrwx3-cse.3183210.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607566.lxhdfrwx3-cse.3183654.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607585.lxhdfrwx3-cse.3184082.0 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747623680.lxhdfrwx3-cse.3184082.1 +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/test_metrics.json +22 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/test_results.npz +3 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518161630.log +57 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162137.log +37 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162545.log +37 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162629.log +37 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162915.log +53 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518171246.log +43 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518171940.log +53 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518172318.log +43 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518172424.log +53 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518172603.log +43 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173155.log +35 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173210.log +35 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173221.log +35 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173246.log +37 -0
- METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173305.log +0 -0
- PEMS-BAY_100_12_12/123/test_metrics.json +22 -0
- PEMS-BAY_100_12_12/123/test_results.npz +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/D2STGNN_100.pt +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/D2STGNN_best_val_MAE.pt +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/PEMS-BAY.py +157 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/cfg.txt +94 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748213050.lxhdfrwx3-cse.793515.0 +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748213067.lxhdfrwx3-cse.793862.0 +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748213087.lxhdfrwx3-cse.794206.0 +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748293487.lxhdfrwx3-cse.794206.1 +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/test_metrics.json +22 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/test_results.npz +3 -0
- PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/training_log_20250525174410.log +53 -0
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/D2STGNN_100.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e60121ef99266e8424ea93546b2089439df93c70f23d95889cc278ec744bb5c
|
| 3 |
+
size 31676473
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/D2STGNN_best_val_MAE.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5b50df1444b017eb889cef9f3a003d8459d8e4662ef7ab8eb8618c5a50eb75c
|
| 3 |
+
size 31682883
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/METR-LA.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import torch
|
| 4 |
+
from easydict import EasyDict
|
| 5 |
+
sys.path.append(os.path.abspath(__file__ + '/../../..'))
|
| 6 |
+
|
| 7 |
+
from basicts.metrics import masked_mae, masked_mape, masked_rmse
|
| 8 |
+
from basicts.data import TimeSeriesForecastingDataset
|
| 9 |
+
from basicts.runners import SimpleTimeSeriesForecastingRunner
|
| 10 |
+
from basicts.scaler import ZScoreScaler
|
| 11 |
+
from basicts.utils import get_regular_settings, load_adj
|
| 12 |
+
|
| 13 |
+
from .arch import D2STGNN
|
| 14 |
+
|
| 15 |
+
############################## Hot Parameters ##############################
|
| 16 |
+
# Dataset & Metrics configuration
|
| 17 |
+
DATA_NAME = 'METR-LA' # Dataset name
|
| 18 |
+
regular_settings = get_regular_settings(DATA_NAME)
|
| 19 |
+
INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
|
| 20 |
+
OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
|
| 21 |
+
TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
|
| 22 |
+
NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
|
| 23 |
+
RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
|
| 24 |
+
NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
|
| 25 |
+
# Model architecture and parameters
|
| 26 |
+
MODEL_ARCH = D2STGNN
|
| 27 |
+
adj_mx, _ = load_adj("datasets/" + DATA_NAME +
|
| 28 |
+
"/adj_mx.pkl", "doubletransition")
|
| 29 |
+
MODEL_PARAM = {
|
| 30 |
+
"num_feat": 1,
|
| 31 |
+
"num_hidden": 32,
|
| 32 |
+
"dropout": 0.1,
|
| 33 |
+
"seq_length": 12,
|
| 34 |
+
"k_t": 3,
|
| 35 |
+
"k_s": 2,
|
| 36 |
+
"gap": 3,
|
| 37 |
+
"num_nodes": 207,
|
| 38 |
+
"adjs": [torch.tensor(adj) for adj in adj_mx],
|
| 39 |
+
"num_layers": 5,
|
| 40 |
+
"num_modalities": 2,
|
| 41 |
+
"node_hidden": 10,
|
| 42 |
+
"time_emb_dim": 10,
|
| 43 |
+
"time_in_day_size": 288,
|
| 44 |
+
"day_in_week_size": 7,
|
| 45 |
+
}
|
| 46 |
+
NUM_EPOCHS = 100
|
| 47 |
+
|
| 48 |
+
############################## General Configuration ##############################
|
| 49 |
+
CFG = EasyDict()
|
| 50 |
+
# General settings
|
| 51 |
+
CFG.DESCRIPTION = 'An Example Config'
|
| 52 |
+
CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
|
| 53 |
+
# Runner
|
| 54 |
+
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
|
| 55 |
+
|
| 56 |
+
############################## Dataset Configuration ##############################
|
| 57 |
+
CFG.DATASET = EasyDict()
|
| 58 |
+
# Dataset settings
|
| 59 |
+
CFG.DATASET.NAME = DATA_NAME
|
| 60 |
+
CFG.DATASET.TYPE = TimeSeriesForecastingDataset
|
| 61 |
+
CFG.DATASET.PARAM = EasyDict({
|
| 62 |
+
'dataset_name': DATA_NAME,
|
| 63 |
+
'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
|
| 64 |
+
'input_len': INPUT_LEN,
|
| 65 |
+
'output_len': OUTPUT_LEN,
|
| 66 |
+
# 'mode' is automatically set by the runner
|
| 67 |
+
})
|
| 68 |
+
|
| 69 |
+
############################## Scaler Configuration ##############################
|
| 70 |
+
CFG.SCALER = EasyDict()
|
| 71 |
+
# Scaler settings
|
| 72 |
+
CFG.SCALER.TYPE = ZScoreScaler # Scaler class
|
| 73 |
+
CFG.SCALER.PARAM = EasyDict({
|
| 74 |
+
'dataset_name': DATA_NAME,
|
| 75 |
+
'train_ratio': TRAIN_VAL_TEST_RATIO[0],
|
| 76 |
+
'norm_each_channel': NORM_EACH_CHANNEL,
|
| 77 |
+
'rescale': RESCALE,
|
| 78 |
+
})
|
| 79 |
+
|
| 80 |
+
############################## Model Configuration ##############################
|
| 81 |
+
CFG.MODEL = EasyDict()
|
| 82 |
+
# Model settings
|
| 83 |
+
CFG.MODEL.NAME = MODEL_ARCH.__name__
|
| 84 |
+
CFG.MODEL.ARCH = MODEL_ARCH
|
| 85 |
+
CFG.MODEL.PARAM = MODEL_PARAM
|
| 86 |
+
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
|
| 87 |
+
CFG.MODEL.TARGET_FEATURES = [0]
|
| 88 |
+
|
| 89 |
+
############################## Metrics Configuration ##############################
|
| 90 |
+
|
| 91 |
+
CFG.METRICS = EasyDict()
|
| 92 |
+
# Metrics settings
|
| 93 |
+
CFG.METRICS.FUNCS = EasyDict({
|
| 94 |
+
'MAE': masked_mae,
|
| 95 |
+
'MAPE': masked_mape,
|
| 96 |
+
'RMSE': masked_rmse,
|
| 97 |
+
})
|
| 98 |
+
CFG.METRICS.TARGET = 'MAE'
|
| 99 |
+
CFG.METRICS.NULL_VAL = NULL_VAL
|
| 100 |
+
|
| 101 |
+
############################## Training Configuration ##############################
|
| 102 |
+
CFG.TRAIN = EasyDict()
|
| 103 |
+
CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
|
| 104 |
+
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
|
| 105 |
+
'checkpoints',
|
| 106 |
+
MODEL_ARCH.__name__,
|
| 107 |
+
'_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
|
| 108 |
+
)
|
| 109 |
+
CFG.TRAIN.LOSS = masked_mae
|
| 110 |
+
# Optimizer settings
|
| 111 |
+
CFG.TRAIN.OPTIM = EasyDict()
|
| 112 |
+
CFG.TRAIN.OPTIM.TYPE = "Adam"
|
| 113 |
+
CFG.TRAIN.OPTIM.PARAM = {
|
| 114 |
+
"lr": 0.002,
|
| 115 |
+
"weight_decay": 1.0e-5,
|
| 116 |
+
"eps": 1.0e-8
|
| 117 |
+
}
|
| 118 |
+
# Learning rate scheduler settings
|
| 119 |
+
CFG.TRAIN.LR_SCHEDULER = EasyDict()
|
| 120 |
+
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
|
| 121 |
+
CFG.TRAIN.LR_SCHEDULER.PARAM = {
|
| 122 |
+
"milestones": [1, 30, 38, 46, 54, 62, 70, 80],
|
| 123 |
+
"gamma": 0.5
|
| 124 |
+
}
|
| 125 |
+
# Train data loader settings
|
| 126 |
+
CFG.TRAIN.DATA = EasyDict()
|
| 127 |
+
CFG.TRAIN.DATA.BATCH_SIZE = 128
|
| 128 |
+
CFG.TRAIN.DATA.SHUFFLE = True
|
| 129 |
+
# Gradient clipping settings
|
| 130 |
+
CFG.TRAIN.CLIP_GRAD_PARAM = {
|
| 131 |
+
"max_norm": 5.0
|
| 132 |
+
}
|
| 133 |
+
# Curriculum learning
|
| 134 |
+
CFG.TRAIN.CL = EasyDict()
|
| 135 |
+
CFG.TRAIN.CL.WARM_EPOCHS = 0
|
| 136 |
+
CFG.TRAIN.CL.CL_EPOCHS = 6
|
| 137 |
+
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
|
| 138 |
+
|
| 139 |
+
############################## Validation Configuration ##############################
|
| 140 |
+
CFG.VAL = EasyDict()
|
| 141 |
+
CFG.VAL.INTERVAL = 1
|
| 142 |
+
CFG.VAL.DATA = EasyDict()
|
| 143 |
+
CFG.VAL.DATA.BATCH_SIZE = 64
|
| 144 |
+
|
| 145 |
+
############################## Test Configuration ##############################
|
| 146 |
+
CFG.TEST = EasyDict()
|
| 147 |
+
CFG.TEST.INTERVAL = 1
|
| 148 |
+
CFG.TEST.DATA = EasyDict()
|
| 149 |
+
CFG.TEST.DATA.BATCH_SIZE = 64
|
| 150 |
+
|
| 151 |
+
############################## Evaluation Configuration ##############################
|
| 152 |
+
|
| 153 |
+
CFG.EVAL = EasyDict()
|
| 154 |
+
|
| 155 |
+
# Evaluation parameters
|
| 156 |
+
CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
|
| 157 |
+
CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/cfg.txt
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DESCRIPTION: An Example Config
|
| 2 |
+
GPU_NUM: 1
|
| 3 |
+
RUNNER: <class 'basicts.runners.runner_zoo.simple_tsf_runner.SimpleTimeSeriesForecastingRunner'>
|
| 4 |
+
DATASET:
|
| 5 |
+
NAME: METR-LA
|
| 6 |
+
TYPE: <class 'basicts.data.simple_tsf_dataset.TimeSeriesForecastingDataset'>
|
| 7 |
+
PARAM:
|
| 8 |
+
dataset_name: METR-LA
|
| 9 |
+
train_val_test_ratio: [0.7, 0.1, 0.2]
|
| 10 |
+
input_len: 12
|
| 11 |
+
output_len: 12
|
| 12 |
+
SCALER:
|
| 13 |
+
TYPE: <class 'basicts.scaler.z_score_scaler.ZScoreScaler'>
|
| 14 |
+
PARAM:
|
| 15 |
+
dataset_name: METR-LA
|
| 16 |
+
train_ratio: 0.7
|
| 17 |
+
norm_each_channel: False
|
| 18 |
+
rescale: True
|
| 19 |
+
MODEL:
|
| 20 |
+
NAME: D2STGNN
|
| 21 |
+
ARCH: <class 'baselines.D2STGNN.arch.d2stgnn_arch.D2STGNN'>
|
| 22 |
+
PARAM:
|
| 23 |
+
num_feat: 1
|
| 24 |
+
num_hidden: 32
|
| 25 |
+
dropout: 0.1
|
| 26 |
+
seq_length: 12
|
| 27 |
+
k_t: 3
|
| 28 |
+
k_s: 2
|
| 29 |
+
gap: 3
|
| 30 |
+
num_nodes: 207
|
| 31 |
+
adjs: [tensor([[0.2050, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
|
| 32 |
+
[0.0000, 0.2626, 0.1503, ..., 0.0000, 0.0000, 0.0000],
|
| 33 |
+
[0.0000, 0.1027, 0.2095, ..., 0.0000, 0.0000, 0.0000],
|
| 34 |
+
...,
|
| 35 |
+
[0.0000, 0.0000, 0.0000, ..., 0.2788, 0.0000, 0.0000],
|
| 36 |
+
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.2645, 0.0000],
|
| 37 |
+
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.1408]]), tensor([[0.2452, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
|
| 38 |
+
[0.0000, 0.1789, 0.0968, ..., 0.0000, 0.0000, 0.0000],
|
| 39 |
+
[0.0000, 0.1283, 0.2475, ..., 0.0000, 0.0000, 0.0000],
|
| 40 |
+
...,
|
| 41 |
+
[0.0000, 0.0000, 0.0000, ..., 0.4463, 0.0000, 0.0000],
|
| 42 |
+
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.2833, 0.0000],
|
| 43 |
+
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.1831]])]
|
| 44 |
+
num_layers: 5
|
| 45 |
+
num_modalities: 2
|
| 46 |
+
node_hidden: 10
|
| 47 |
+
time_emb_dim: 10
|
| 48 |
+
time_in_day_size: 288
|
| 49 |
+
day_in_week_size: 7
|
| 50 |
+
FORWARD_FEATURES: [0, 1, 2]
|
| 51 |
+
TARGET_FEATURES: [0]
|
| 52 |
+
METRICS:
|
| 53 |
+
FUNCS:
|
| 54 |
+
MAE: masked_mae
|
| 55 |
+
MAPE: masked_mape
|
| 56 |
+
RMSE: masked_rmse
|
| 57 |
+
TARGET: MAE
|
| 58 |
+
NULL_VAL: 0.0
|
| 59 |
+
TRAIN:
|
| 60 |
+
NUM_EPOCHS: 100
|
| 61 |
+
CKPT_SAVE_DIR: checkpoints/D2STGNN/METR-LA_100_12_12
|
| 62 |
+
LOSS: masked_mae
|
| 63 |
+
OPTIM:
|
| 64 |
+
TYPE: Adam
|
| 65 |
+
PARAM:
|
| 66 |
+
lr: 0.002
|
| 67 |
+
weight_decay: 1e-05
|
| 68 |
+
eps: 1e-08
|
| 69 |
+
LR_SCHEDULER:
|
| 70 |
+
TYPE: MultiStepLR
|
| 71 |
+
PARAM:
|
| 72 |
+
milestones: [1, 30, 38, 46, 54, 62, 70, 80]
|
| 73 |
+
gamma: 0.5
|
| 74 |
+
DATA:
|
| 75 |
+
BATCH_SIZE: 128
|
| 76 |
+
SHUFFLE: True
|
| 77 |
+
CLIP_GRAD_PARAM:
|
| 78 |
+
max_norm: 5.0
|
| 79 |
+
CL:
|
| 80 |
+
WARM_EPOCHS: 0
|
| 81 |
+
CL_EPOCHS: 6
|
| 82 |
+
PREDICTION_LENGTH: 12
|
| 83 |
+
VAL:
|
| 84 |
+
INTERVAL: 1
|
| 85 |
+
DATA:
|
| 86 |
+
BATCH_SIZE: 64
|
| 87 |
+
TEST:
|
| 88 |
+
INTERVAL: 1
|
| 89 |
+
DATA:
|
| 90 |
+
BATCH_SIZE: 64
|
| 91 |
+
EVAL:
|
| 92 |
+
HORIZONS: [3, 6, 12]
|
| 93 |
+
USE_GPU: True
|
| 94 |
+
MD5: 168d6584087dcd4c27bc3ca12614ba0c
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747602990.lxhdfrwx3-cse.3155086.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de7ed69473c982160fd5cc6e1003a0ca752a71d623a352ae5967db1bbcbe998b
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603297.lxhdfrwx3-cse.3156865.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a07baf04578c2ed82335bd8859b6dd172a2cac21a95a5ed8d88e0f7ac8baff65
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603545.lxhdfrwx3-cse.3158332.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70eb3e83e95039ff694e29d8cdc40f4979c2f68e94f8773ae607af7e76a9724a
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603589.lxhdfrwx3-cse.3158899.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08ee0a45d564096332d2aaebfd9308305296c557e47e9db0c39b2cd94d531eff
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747603755.lxhdfrwx3-cse.3160021.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e8e48cff48ba88fdd8252518268b6475058f1a5a33387bcc92a03370e022f72
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747606366.lxhdfrwx3-cse.3175446.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e96a93fc8be9dc6526e9d470e415f1dcbc794133ed122547ed49d5b08bd8ca94
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747606780.lxhdfrwx3-cse.3177719.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cd8d31447314f5b8796ff8bf936a7d0cc9c004dbf8637c7f119db0fd9cb16af
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747606998.lxhdfrwx3-cse.3179064.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9d4c4f942c2157b50f2c06e522e8c05bdb2899eab01a60bc09a1bae20c953e6
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607064.lxhdfrwx3-cse.3179695.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4817dbb697c90548bbc463f4a401e6eef9b732d3549a22019407d61ba232f4c
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607163.lxhdfrwx3-cse.3180489.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbe074412c8b4710b127cd8461a81f09f7fc33e60a61968899163862a6fb19aa
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607515.lxhdfrwx3-cse.3182382.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9e4abf850367ff3fb9260f6f10d26b6c1058e8076c427b18fef59d2adb9bf02
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607530.lxhdfrwx3-cse.3182842.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5d3d660d530bd67bbeb5b8b6ca56f1e542f8ba7f21ecaeb279dd8591894dbfe
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607541.lxhdfrwx3-cse.3183210.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad9e181fd02c743d96d3cce3cd7b28696690ac7cd78241fb403de6319ee1d5f6
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607566.lxhdfrwx3-cse.3183654.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:092bd8ae415481e66e925b5e357188e17e2a15cd6fb11a6f3a48e535ee482447
|
| 3 |
+
size 88
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747607585.lxhdfrwx3-cse.3184082.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0621721b772d520708abb7a0950c668b0ee38801d3cd1c270a1ca82c31937049
|
| 3 |
+
size 60788
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/tensorboard/events.out.tfevents.1747623680.lxhdfrwx3-cse.3184082.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:905ce33b6af1cf583c61e5623e56c54c7cf06d28772c77bb08c691b033b022d1
|
| 3 |
+
size 275
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/test_metrics.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"horizon_3": {
|
| 3 |
+
"MAE": 2.636579990386963,
|
| 4 |
+
"MAPE": 0.06611330807209015,
|
| 5 |
+
"RMSE": 4.963271141052246
|
| 6 |
+
},
|
| 7 |
+
"horizon_6": {
|
| 8 |
+
"MAE": 2.9962539672851562,
|
| 9 |
+
"MAPE": 0.07912801206111908,
|
| 10 |
+
"RMSE": 5.93849515914917
|
| 11 |
+
},
|
| 12 |
+
"horizon_12": {
|
| 13 |
+
"MAE": 3.447462320327759,
|
| 14 |
+
"MAPE": 0.09719714522361755,
|
| 15 |
+
"RMSE": 6.9562201499938965
|
| 16 |
+
},
|
| 17 |
+
"overall": {
|
| 18 |
+
"MAE": 2.9620234966278076,
|
| 19 |
+
"MAPE": 0.07846397906541824,
|
| 20 |
+
"RMSE": 5.881142616271973
|
| 21 |
+
}
|
| 22 |
+
}
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/test_results.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83562ad16073643415fca5603487a4d8ee2942f10adf0c05863655423961e823
|
| 3 |
+
size 203619210
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518161630.log
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 16:16:30,203 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 16:16:30,203 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 16:16:30,203 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 16:16:30,242 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 16:16:30,243 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 16:16:30,244 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x734bf49072e0>
|
| 19 |
+
2025-05-18 16:16:30,246 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 16:16:30,246 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 16:16:30,253 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 16:16:30,265 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 16:16:30,266 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 16:16:30,266 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 16:16:31,692 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 176, in forward
|
| 37 |
+
tem_backcast_seq_res, spa_forecast_hidden, tem_forecast_hidden = layer(
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 40, in forward
|
| 41 |
+
dif_backcast_seq_res, dif_forecast_hidden = self.dif_layer(
|
| 42 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 43 |
+
return forward_call(*args, **kwargs)
|
| 44 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/dif_block.py", line 25, in forward
|
| 45 |
+
forecast_hidden = self.forecast_branch(
|
| 46 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 47 |
+
return forward_call(*args, **kwargs)
|
| 48 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/forecast.py", line 28, in forward
|
| 49 |
+
predict.append(st_l_conv(_1, dynamic_graph, static_graph))
|
| 50 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 51 |
+
return forward_call(*args, **kwargs)
|
| 52 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/dif_model.py", line 89, in forward
|
| 53 |
+
support = support + self.get_graph(static_graph)
|
| 54 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/dif_model.py", line 52, in get_graph
|
| 55 |
+
mask = 1 - torch.eye(support[0].shape[0]).to(support[0].device)
|
| 56 |
+
KeyboardInterrupt
|
| 57 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162137.log
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 16:21:37,434 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 16:21:37,435 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 16:21:37,435 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 16:21:37,475 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 16:21:37,476 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 16:21:37,477 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x75c18030f280>
|
| 19 |
+
2025-05-18 16:21:37,479 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 16:21:37,479 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 16:21:37,486 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 16:21:37,498 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 16:21:37,499 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 16:21:37,499 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 16:21:38,851 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 434, in train
|
| 29 |
+
self.backward(loss)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 768, in backward
|
| 31 |
+
loss.backward()
|
| 32 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/_tensor.py", line 487, in backward
|
| 33 |
+
torch.autograd.backward(
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/autograd/__init__.py", line 200, in backward
|
| 35 |
+
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 36 |
+
KeyboardInterrupt
|
| 37 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162545.log
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 16:25:45,072 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 16:25:45,073 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 16:25:45,073 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 16:25:45,108 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 16:25:45,109 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 16:25:45,110 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x74b7d83232b0>
|
| 19 |
+
2025-05-18 16:25:45,112 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 16:25:45,112 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 16:25:45,118 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 16:25:45,128 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 16:25:45,129 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 16:25:45,129 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 16:25:46,966 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 434, in train
|
| 29 |
+
self.backward(loss)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 768, in backward
|
| 31 |
+
loss.backward()
|
| 32 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/_tensor.py", line 487, in backward
|
| 33 |
+
torch.autograd.backward(
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/autograd/__init__.py", line 200, in backward
|
| 35 |
+
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 36 |
+
KeyboardInterrupt
|
| 37 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162629.log
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 16:26:29,289 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 16:26:29,289 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 16:26:29,289 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 16:26:29,327 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 16:26:29,328 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 16:26:29,328 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7c336dd0f2e0>
|
| 19 |
+
2025-05-18 16:26:29,331 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 16:26:29,331 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 16:26:29,338 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 16:26:29,350 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 16:26:29,350 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 16:26:29,351 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 16:26:30,707 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 434, in train
|
| 29 |
+
self.backward(loss)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 768, in backward
|
| 31 |
+
loss.backward()
|
| 32 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/_tensor.py", line 487, in backward
|
| 33 |
+
torch.autograd.backward(
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/autograd/__init__.py", line 200, in backward
|
| 35 |
+
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 36 |
+
KeyboardInterrupt
|
| 37 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518162915.log
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 16:29:15,659 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 16:29:15,659 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 16:29:15,659 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 16:29:15,681 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 16:29:15,682 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 16:29:15,682 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7d470e51b310>
|
| 19 |
+
2025-05-18 16:29:15,684 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 16:29:15,684 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 16:29:15,688 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 16:29:15,696 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 16:29:15,696 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 16:29:15,696 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 16:29:17,061 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 177, in forward
|
| 37 |
+
tem_backcast_seq_res, spa_forecast_hidden, tem_forecast_hidden = layer(
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 40, in forward
|
| 41 |
+
dif_backcast_seq_res, dif_forecast_hidden = self.dif_layer(
|
| 42 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 43 |
+
return forward_call(*args, **kwargs)
|
| 44 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/dif_block.py", line 23, in forward
|
| 45 |
+
Z = self.localized_st_conv(X_spa, dynamic_graph, static_graph)
|
| 46 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 47 |
+
return forward_call(*args, **kwargs)
|
| 48 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/dif_model.py", line 89, in forward
|
| 49 |
+
support = support + self.get_graph(static_graph)
|
| 50 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/difusion_block/dif_model.py", line 52, in get_graph
|
| 51 |
+
mask = 1 - torch.eye(support[0].shape[0]).to(support[0].device)
|
| 52 |
+
KeyboardInterrupt
|
| 53 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518171246.log
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:12:46,283 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:12:46,283 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:12:46,283 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:12:46,318 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:12:46,320 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:12:46,320 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x79b47191f310>
|
| 19 |
+
2025-05-18 17:12:46,322 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:12:46,322 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:12:46,328 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:12:46,339 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:12:46,340 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 17:12:46,340 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:12:46,676 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 196, in forward
|
| 37 |
+
F.relu(self.out_fc_1(F.relu(forecast_hidden))))
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward
|
| 41 |
+
return F.linear(input, self.weight, self.bias)
|
| 42 |
+
RuntimeError: mat1 and mat2 shapes cannot be multiplied (26496x1024 and 256x512)
|
| 43 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518171940.log
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:19:40,152 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:19:40,152 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:19:40,152 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:19:40,189 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:19:40,190 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:19:40,191 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7e950ad1f2e0>
|
| 19 |
+
2025-05-18 17:19:40,193 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:19:40,193 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:19:40,198 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:19:40,210 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:19:40,211 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 17:19:40,211 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:19:40,637 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 191, in forward
|
| 37 |
+
cross_out = self.cross(forecast_hidden, embeddings, embeddings) # Q X KV [B, C, N]X[B, E, N] = [B, C, N]
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/Cross_Modal_Align.py", line 31, in forward
|
| 41 |
+
for mod in self.layers: output, scores = mod(q,k,v, prev=scores, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
|
| 42 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 43 |
+
return forward_call(*args, **kwargs)
|
| 44 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/Cross_Modal_Align.py", line 79, in forward
|
| 45 |
+
k = self.norm_attn(k)
|
| 46 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 47 |
+
return forward_call(*args, **kwargs)
|
| 48 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
|
| 49 |
+
return F.layer_norm(
|
| 50 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2515, in layer_norm
|
| 51 |
+
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
|
| 52 |
+
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)
|
| 53 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518172318.log
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:23:18,804 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:23:18,804 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:23:18,804 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:23:18,842 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:23:18,843 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:23:18,844 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7c1df1b172e0>
|
| 19 |
+
2025-05-18 17:23:18,846 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:23:18,846 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:23:18,852 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:23:18,865 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:23:18,865 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 17:23:18,865 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:23:19,219 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 189, in forward
|
| 37 |
+
embeddings = nn.init.xavier_uniform_(nn.Parameter(torch.empty(forecast_hidden.shape[0], 768,forecast_hidden.shape[2])).type(torch.LongTensor))
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/init.py", line 327, in xavier_uniform_
|
| 39 |
+
return _no_grad_uniform_(tensor, -a, a)
|
| 40 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/init.py", line 14, in _no_grad_uniform_
|
| 41 |
+
return tensor.uniform_(a, b)
|
| 42 |
+
RuntimeError: "check_uniform_bounds" not implemented for 'Long'
|
| 43 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518172424.log
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:24:24,636 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:24:24,636 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:24:24,636 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:24:24,674 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:24:24,676 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:24:24,676 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x73c18ff0f280>
|
| 19 |
+
2025-05-18 17:24:24,678 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:24:24,678 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:24:24,685 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:24:24,697 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:24:24,698 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 17:24:24,698 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:24:25,132 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 191, in forward
|
| 37 |
+
cross_out = self.cross(forecast_hidden, embeddings, embeddings) # Q X KV [B, C, N]X[B, E, N] = [B, C, N]
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/Cross_Modal_Align.py", line 31, in forward
|
| 41 |
+
for mod in self.layers: output, scores = mod(q,k,v, prev=scores, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
|
| 42 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 43 |
+
return forward_call(*args, **kwargs)
|
| 44 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/Cross_Modal_Align.py", line 79, in forward
|
| 45 |
+
k = self.norm_attn(k)
|
| 46 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 47 |
+
return forward_call(*args, **kwargs)
|
| 48 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
|
| 49 |
+
return F.layer_norm(
|
| 50 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2515, in layer_norm
|
| 51 |
+
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
|
| 52 |
+
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)
|
| 53 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518172603.log
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:26:03,862 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:26:03,862 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:26:03,862 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:26:03,901 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:26:03,902 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:26:03,902 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7d48f4b1f310>
|
| 19 |
+
2025-05-18 17:26:03,905 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:26:03,905 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:26:03,911 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:26:03,924 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:26:03,925 - easytorch-training - INFO - Number of parameters: 578501
|
| 24 |
+
2025-05-18 17:26:03,925 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:26:04,372 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 197, in forward
|
| 37 |
+
F.relu(self.out_fc_1(F.relu(forecast_hidden))))
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward
|
| 41 |
+
return F.linear(input, self.weight, self.bias)
|
| 42 |
+
RuntimeError: Expected size for first two dimensions of batch2 tensor to be: [128, 207] but got: [128, 256].
|
| 43 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173155.log
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:31:55,943 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:31:55,943 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:31:55,943 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:31:55,981 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:31:55,982 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:31:55,982 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x780db2d1f340>
|
| 19 |
+
2025-05-18 17:31:55,984 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:31:55,985 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:31:55,991 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:31:56,004 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:31:56,004 - easytorch-training - INFO - Number of parameters: 2569166
|
| 24 |
+
2025-05-18 17:31:56,005 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:31:56,443 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 113, in forward
|
| 33 |
+
assert list(model_return['prediction'].shape)[:3] == [batch_size, length, num_nodes], \
|
| 34 |
+
AssertionError: The shape of the output is incorrect. Ensure it matches [B, L, N, C].
|
| 35 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173210.log
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:32:10,753 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:32:10,753 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:32:10,753 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:32:10,793 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:32:10,795 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:32:10,795 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7567c6f172e0>
|
| 19 |
+
2025-05-18 17:32:10,797 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:32:10,797 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:32:10,804 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:32:10,817 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:32:10,818 - easytorch-training - INFO - Number of parameters: 2569166
|
| 24 |
+
2025-05-18 17:32:10,818 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:32:11,324 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 113, in forward
|
| 33 |
+
assert list(model_return['prediction'].shape)[:3] == [batch_size, length, num_nodes], \
|
| 34 |
+
AssertionError: The shape of the output is incorrect. Ensure it matches [B, L, N, C].
|
| 35 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173221.log
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:32:21,488 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:32:21,488 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:32:21,488 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:32:21,525 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:32:21,526 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:32:21,527 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x75924e50f2b0>
|
| 19 |
+
2025-05-18 17:32:21,529 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:32:21,529 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:32:21,534 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:32:21,546 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:32:21,547 - easytorch-training - INFO - Number of parameters: 2569166
|
| 24 |
+
2025-05-18 17:32:21,547 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:32:21,992 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 113, in forward
|
| 33 |
+
assert list(model_return['prediction'].shape)[:3] == [batch_size, length, num_nodes], \
|
| 34 |
+
AssertionError: The shape of the output is incorrect. Ensure it matches [B, L, N, C].
|
| 35 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173246.log
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-18 17:32:46,429 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-18 17:32:46,430 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-18 17:32:46,430 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-18 17:32:46,467 - easytorch-training - INFO - Train dataset length: 23968
|
| 5 |
+
2025-05-18 17:32:46,469 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-18 17:32:46,469 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x7b5fab123310>
|
| 19 |
+
2025-05-18 17:32:46,472 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-18 17:32:46,472 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-18 17:32:46,480 - easytorch-training - INFO - Validation dataset length: 3404
|
| 22 |
+
2025-05-18 17:32:46,493 - easytorch-training - INFO - Test dataset length: 6831
|
| 23 |
+
2025-05-18 17:32:46,494 - easytorch-training - INFO - Number of parameters: 2569166
|
| 24 |
+
2025-05-18 17:32:46,494 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-18 17:32:59,807 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 434, in train
|
| 29 |
+
self.backward(loss)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 768, in backward
|
| 31 |
+
loss.backward()
|
| 32 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/_tensor.py", line 487, in backward
|
| 33 |
+
torch.autograd.backward(
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/autograd/__init__.py", line 200, in backward
|
| 35 |
+
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 36 |
+
KeyboardInterrupt
|
| 37 |
+
|
METR-LA_100_12_12/168d6584087dcd4c27bc3ca12614ba0c/training_log_20250518173305.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
PEMS-BAY_100_12_12/123/test_metrics.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"horizon_3": {
|
| 3 |
+
"MAE": 1.3431265354156494,
|
| 4 |
+
"MAPE": 0.027954425662755966,
|
| 5 |
+
"RMSE": 2.8079445362091064
|
| 6 |
+
},
|
| 7 |
+
"horizon_6": {
|
| 8 |
+
"MAE": 1.690486192703247,
|
| 9 |
+
"MAPE": 0.03732890263199806,
|
| 10 |
+
"RMSE": 3.839404582977295
|
| 11 |
+
},
|
| 12 |
+
"horizon_12": {
|
| 13 |
+
"MAE": 1.9962530136108398,
|
| 14 |
+
"MAPE": 0.045766349881887436,
|
| 15 |
+
"RMSE": 4.5420355796813965
|
| 16 |
+
},
|
| 17 |
+
"overall": {
|
| 18 |
+
"MAE": 1.6249264478683472,
|
| 19 |
+
"MAPE": 0.03582515940070152,
|
| 20 |
+
"RMSE": 3.7189371585845947
|
| 21 |
+
}
|
| 22 |
+
}
|
PEMS-BAY_100_12_12/123/test_results.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e1fc39bf78e394faf2c288a3e9d8825bb6b1b96f6e8682dea332ac564c95001
|
| 3 |
+
size 486720762
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/D2STGNN_100.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58ac4eb8578d252c950f5ffcba0411a7819b4dc9de3ef6f4892a121703461700
|
| 3 |
+
size 989058751
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/D2STGNN_best_val_MAE.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8d71558160485c9d086b7ba23b25b103976341a0bfb7ee2cb8d0fdbd5dde66e
|
| 3 |
+
size 989068651
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/PEMS-BAY.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import torch
|
| 4 |
+
from easydict import EasyDict
|
| 5 |
+
sys.path.append(os.path.abspath(__file__ + '/../../..'))
|
| 6 |
+
|
| 7 |
+
from basicts.metrics import masked_mae, masked_mape, masked_rmse
|
| 8 |
+
from basicts.data import TimeSeriesForecastingDataset
|
| 9 |
+
from basicts.runners import SimpleTimeSeriesForecastingRunner
|
| 10 |
+
from basicts.scaler import ZScoreScaler
|
| 11 |
+
from basicts.utils import get_regular_settings, load_adj
|
| 12 |
+
|
| 13 |
+
from .arch import D2STGNN
|
| 14 |
+
|
| 15 |
+
############################## Hot Parameters ##############################
|
| 16 |
+
# Dataset & Metrics configuration
|
| 17 |
+
DATA_NAME = 'PEMS-BAY' # Dataset name
|
| 18 |
+
regular_settings = get_regular_settings(DATA_NAME)
|
| 19 |
+
INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
|
| 20 |
+
OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
|
| 21 |
+
TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
|
| 22 |
+
NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
|
| 23 |
+
RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
|
| 24 |
+
NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
|
| 25 |
+
# Model architecture and parameters
|
| 26 |
+
MODEL_ARCH = D2STGNN
|
| 27 |
+
adj_mx, _ = load_adj("datasets/" + DATA_NAME +
|
| 28 |
+
"/adj_mx.pkl", "doubletransition")
|
| 29 |
+
MODEL_PARAM = {
|
| 30 |
+
"num_feat": 1,
|
| 31 |
+
"num_hidden": 32,
|
| 32 |
+
"dropout": 0.1,
|
| 33 |
+
"seq_length": 12,
|
| 34 |
+
"k_t": 3,
|
| 35 |
+
"k_s": 2,
|
| 36 |
+
"gap": 3,
|
| 37 |
+
"num_nodes": 325,
|
| 38 |
+
"adjs": [torch.tensor(adj) for adj in adj_mx],
|
| 39 |
+
"num_layers": 5,
|
| 40 |
+
"num_modalities": 2,
|
| 41 |
+
"node_hidden": 12,
|
| 42 |
+
"time_emb_dim": 12,
|
| 43 |
+
"time_in_day_size": 288,
|
| 44 |
+
"day_in_week_size": 7,
|
| 45 |
+
}
|
| 46 |
+
NUM_EPOCHS = 100
|
| 47 |
+
|
| 48 |
+
############################## General Configuration ##############################
|
| 49 |
+
CFG = EasyDict()
|
| 50 |
+
# General settings
|
| 51 |
+
CFG.DESCRIPTION = 'An Example Config'
|
| 52 |
+
CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
|
| 53 |
+
# Runner
|
| 54 |
+
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
|
| 55 |
+
|
| 56 |
+
############################## Dataset Configuration ##############################
|
| 57 |
+
CFG.DATASET = EasyDict()
|
| 58 |
+
# Dataset settings
|
| 59 |
+
CFG.DATASET.NAME = DATA_NAME
|
| 60 |
+
CFG.DATASET.TYPE = TimeSeriesForecastingDataset
|
| 61 |
+
CFG.DATASET.PARAM = EasyDict({
|
| 62 |
+
'dataset_name': DATA_NAME,
|
| 63 |
+
'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
|
| 64 |
+
'input_len': INPUT_LEN,
|
| 65 |
+
'output_len': OUTPUT_LEN,
|
| 66 |
+
# 'mode' is automatically set by the runner
|
| 67 |
+
})
|
| 68 |
+
|
| 69 |
+
############################## Scaler Configuration ##############################
|
| 70 |
+
CFG.SCALER = EasyDict()
|
| 71 |
+
# Scaler settings
|
| 72 |
+
CFG.SCALER.TYPE = ZScoreScaler # Scaler class
|
| 73 |
+
CFG.SCALER.PARAM = EasyDict({
|
| 74 |
+
'dataset_name': DATA_NAME,
|
| 75 |
+
'train_ratio': TRAIN_VAL_TEST_RATIO[0],
|
| 76 |
+
'norm_each_channel': NORM_EACH_CHANNEL,
|
| 77 |
+
'rescale': RESCALE,
|
| 78 |
+
})
|
| 79 |
+
|
| 80 |
+
############################## Model Configuration ##############################
|
| 81 |
+
CFG.MODEL = EasyDict()
|
| 82 |
+
# Model settings
|
| 83 |
+
CFG.MODEL.NAME = MODEL_ARCH.__name__
|
| 84 |
+
CFG.MODEL.ARCH = MODEL_ARCH
|
| 85 |
+
CFG.MODEL.PARAM = MODEL_PARAM
|
| 86 |
+
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
|
| 87 |
+
CFG.MODEL.TARGET_FEATURES = [0]
|
| 88 |
+
|
| 89 |
+
############################## Metrics Configuration ##############################
|
| 90 |
+
|
| 91 |
+
CFG.METRICS = EasyDict()
|
| 92 |
+
# Metrics settings
|
| 93 |
+
CFG.METRICS.FUNCS = EasyDict({
|
| 94 |
+
'MAE': masked_mae,
|
| 95 |
+
'MAPE': masked_mape,
|
| 96 |
+
'RMSE': masked_rmse,
|
| 97 |
+
})
|
| 98 |
+
CFG.METRICS.TARGET = 'MAE'
|
| 99 |
+
CFG.METRICS.NULL_VAL = NULL_VAL
|
| 100 |
+
|
| 101 |
+
############################## Training Configuration ##############################
|
| 102 |
+
CFG.TRAIN = EasyDict()
|
| 103 |
+
CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
|
| 104 |
+
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
|
| 105 |
+
'checkpoints',
|
| 106 |
+
MODEL_ARCH.__name__,
|
| 107 |
+
'_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
|
| 108 |
+
)
|
| 109 |
+
CFG.TRAIN.LOSS = masked_mae
|
| 110 |
+
# Optimizer settings
|
| 111 |
+
CFG.TRAIN.OPTIM = EasyDict()
|
| 112 |
+
CFG.TRAIN.OPTIM.TYPE = "Adam"
|
| 113 |
+
CFG.TRAIN.OPTIM.PARAM = {
|
| 114 |
+
"lr": 0.002,
|
| 115 |
+
"weight_decay": 1.0e-5,
|
| 116 |
+
"eps": 1.0e-8
|
| 117 |
+
}
|
| 118 |
+
# Learning rate scheduler settings
|
| 119 |
+
CFG.TRAIN.LR_SCHEDULER = EasyDict()
|
| 120 |
+
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
|
| 121 |
+
CFG.TRAIN.LR_SCHEDULER.PARAM = {
|
| 122 |
+
"milestones": [1, 30, 38, 46, 54, 62, 70, 80],
|
| 123 |
+
"gamma": 0.5
|
| 124 |
+
}
|
| 125 |
+
# Train data loader settings
|
| 126 |
+
CFG.TRAIN.DATA = EasyDict()
|
| 127 |
+
CFG.TRAIN.DATA.BATCH_SIZE = 64
|
| 128 |
+
CFG.TRAIN.DATA.SHUFFLE = True
|
| 129 |
+
# Gradient clipping settings
|
| 130 |
+
CFG.TRAIN.CLIP_GRAD_PARAM = {
|
| 131 |
+
"max_norm": 5.0
|
| 132 |
+
}
|
| 133 |
+
# Curriculum learning
|
| 134 |
+
CFG.TRAIN.CL = EasyDict()
|
| 135 |
+
CFG.TRAIN.CL.WARM_EPOCHS = 30
|
| 136 |
+
CFG.TRAIN.CL.CL_EPOCHS = 3
|
| 137 |
+
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
|
| 138 |
+
|
| 139 |
+
############################## Validation Configuration ##############################
|
| 140 |
+
CFG.VAL = EasyDict()
|
| 141 |
+
CFG.VAL.INTERVAL = 1
|
| 142 |
+
CFG.VAL.DATA = EasyDict()
|
| 143 |
+
CFG.VAL.DATA.BATCH_SIZE = 128
|
| 144 |
+
|
| 145 |
+
############################## Test Configuration ##############################
|
| 146 |
+
CFG.TEST = EasyDict()
|
| 147 |
+
CFG.TEST.INTERVAL = 1
|
| 148 |
+
CFG.TEST.DATA = EasyDict()
|
| 149 |
+
CFG.TEST.DATA.BATCH_SIZE = 128
|
| 150 |
+
|
| 151 |
+
############################## Evaluation Configuration ##############################
|
| 152 |
+
|
| 153 |
+
CFG.EVAL = EasyDict()
|
| 154 |
+
|
| 155 |
+
# Evaluation parameters
|
| 156 |
+
CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
|
| 157 |
+
CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/cfg.txt
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DESCRIPTION: An Example Config
|
| 2 |
+
GPU_NUM: 1
|
| 3 |
+
RUNNER: <class 'basicts.runners.runner_zoo.simple_tsf_runner.SimpleTimeSeriesForecastingRunner'>
|
| 4 |
+
DATASET:
|
| 5 |
+
NAME: PEMS-BAY
|
| 6 |
+
TYPE: <class 'basicts.data.simple_tsf_dataset.TimeSeriesForecastingDataset'>
|
| 7 |
+
PARAM:
|
| 8 |
+
dataset_name: PEMS-BAY
|
| 9 |
+
train_val_test_ratio: [0.7, 0.1, 0.2]
|
| 10 |
+
input_len: 12
|
| 11 |
+
output_len: 12
|
| 12 |
+
SCALER:
|
| 13 |
+
TYPE: <class 'basicts.scaler.z_score_scaler.ZScoreScaler'>
|
| 14 |
+
PARAM:
|
| 15 |
+
dataset_name: PEMS-BAY
|
| 16 |
+
train_ratio: 0.7
|
| 17 |
+
norm_each_channel: False
|
| 18 |
+
rescale: True
|
| 19 |
+
MODEL:
|
| 20 |
+
NAME: D2STGNN
|
| 21 |
+
ARCH: <class 'baselines.D2STGNN.arch.d2stgnn_arch.D2STGNN'>
|
| 22 |
+
PARAM:
|
| 23 |
+
num_feat: 1
|
| 24 |
+
num_hidden: 32
|
| 25 |
+
dropout: 0.1
|
| 26 |
+
seq_length: 12
|
| 27 |
+
k_t: 3
|
| 28 |
+
k_s: 2
|
| 29 |
+
gap: 3
|
| 30 |
+
num_nodes: 325
|
| 31 |
+
adjs: [tensor([[1.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
|
| 32 |
+
[0.0000, 1.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
|
| 33 |
+
[0.0000, 0.0000, 0.1507, ..., 0.0000, 0.0000, 0.0000],
|
| 34 |
+
...,
|
| 35 |
+
[0.0000, 0.0000, 0.0000, ..., 0.1861, 0.0000, 0.0000],
|
| 36 |
+
[0.0000, 0.0000, 0.0000, ..., 0.1787, 0.1867, 0.0000],
|
| 37 |
+
[0.0000, 0.0000, 0.0000, ..., 0.1128, 0.1443, 0.2185]]), tensor([[1.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
|
| 38 |
+
[0.0000, 1.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
|
| 39 |
+
[0.0000, 0.0000, 0.1120, ..., 0.0000, 0.0000, 0.0000],
|
| 40 |
+
...,
|
| 41 |
+
[0.0000, 0.0000, 0.0000, ..., 0.1512, 0.1142, 0.0691],
|
| 42 |
+
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.1189, 0.0882],
|
| 43 |
+
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.1141]])]
|
| 44 |
+
num_layers: 5
|
| 45 |
+
num_modalities: 2
|
| 46 |
+
node_hidden: 12
|
| 47 |
+
time_emb_dim: 12
|
| 48 |
+
time_in_day_size: 288
|
| 49 |
+
day_in_week_size: 7
|
| 50 |
+
FORWARD_FEATURES: [0, 1, 2]
|
| 51 |
+
TARGET_FEATURES: [0]
|
| 52 |
+
METRICS:
|
| 53 |
+
FUNCS:
|
| 54 |
+
MAE: masked_mae
|
| 55 |
+
MAPE: masked_mape
|
| 56 |
+
RMSE: masked_rmse
|
| 57 |
+
TARGET: MAE
|
| 58 |
+
NULL_VAL: 0.0
|
| 59 |
+
TRAIN:
|
| 60 |
+
NUM_EPOCHS: 100
|
| 61 |
+
CKPT_SAVE_DIR: checkpoints/D2STGNN/PEMS-BAY_100_12_12
|
| 62 |
+
LOSS: masked_mae
|
| 63 |
+
OPTIM:
|
| 64 |
+
TYPE: Adam
|
| 65 |
+
PARAM:
|
| 66 |
+
lr: 0.002
|
| 67 |
+
weight_decay: 1e-05
|
| 68 |
+
eps: 1e-08
|
| 69 |
+
LR_SCHEDULER:
|
| 70 |
+
TYPE: MultiStepLR
|
| 71 |
+
PARAM:
|
| 72 |
+
milestones: [1, 30, 38, 46, 54, 62, 70, 80]
|
| 73 |
+
gamma: 0.5
|
| 74 |
+
DATA:
|
| 75 |
+
BATCH_SIZE: 64
|
| 76 |
+
SHUFFLE: True
|
| 77 |
+
CLIP_GRAD_PARAM:
|
| 78 |
+
max_norm: 5.0
|
| 79 |
+
CL:
|
| 80 |
+
WARM_EPOCHS: 30
|
| 81 |
+
CL_EPOCHS: 3
|
| 82 |
+
PREDICTION_LENGTH: 12
|
| 83 |
+
VAL:
|
| 84 |
+
INTERVAL: 1
|
| 85 |
+
DATA:
|
| 86 |
+
BATCH_SIZE: 128
|
| 87 |
+
TEST:
|
| 88 |
+
INTERVAL: 1
|
| 89 |
+
DATA:
|
| 90 |
+
BATCH_SIZE: 128
|
| 91 |
+
EVAL:
|
| 92 |
+
HORIZONS: [3, 6, 12]
|
| 93 |
+
USE_GPU: True
|
| 94 |
+
MD5: 92df827e277793626a6d4d8a1179deec
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748213050.lxhdfrwx3-cse.793515.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c545ab689dbfb89ced7b42046eb4596d9d3835e245e7a956ea8544ccb6eb72c
|
| 3 |
+
size 88
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748213067.lxhdfrwx3-cse.793862.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a89086d72234d8625ce762257994ce5ada92d257815f342b2590aafe588902d0
|
| 3 |
+
size 88
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748213087.lxhdfrwx3-cse.794206.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef19c4f0882f7772efc35ea2ac74cd8300459473a29acf29195cd745573809c3
|
| 3 |
+
size 60788
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/tensorboard/events.out.tfevents.1748293487.lxhdfrwx3-cse.794206.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3fe8ebd17810685d9150d59db87147b14270a83cd292b4b275c399e059fd4b6
|
| 3 |
+
size 275
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/test_metrics.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"horizon_3": {
|
| 3 |
+
"MAE": 1.2894989252090454,
|
| 4 |
+
"MAPE": 0.027397574856877327,
|
| 5 |
+
"RMSE": 2.76194167137146
|
| 6 |
+
},
|
| 7 |
+
"horizon_6": {
|
| 8 |
+
"MAE": 1.6214598417282104,
|
| 9 |
+
"MAPE": 0.03698483482003212,
|
| 10 |
+
"RMSE": 3.7608611583709717
|
| 11 |
+
},
|
| 12 |
+
"horizon_12": {
|
| 13 |
+
"MAE": 1.8904039859771729,
|
| 14 |
+
"MAPE": 0.04472082480788231,
|
| 15 |
+
"RMSE": 4.397474765777588
|
| 16 |
+
},
|
| 17 |
+
"overall": {
|
| 18 |
+
"MAE": 1.5514485836029053,
|
| 19 |
+
"MAPE": 0.03522465378046036,
|
| 20 |
+
"RMSE": 3.624075412750244
|
| 21 |
+
}
|
| 22 |
+
}
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/test_results.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d6bfc1f352056ece5f81009b35062f59b31bf4e2f09a746b2b1f6136a928ce5
|
| 3 |
+
size 486720762
|
PEMS-BAY_100_12_12/92df827e277793626a6d4d8a1179deec/training_log_20250525174410.log
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-25 17:44:10,337 - easytorch-training - INFO - Initializing training.
|
| 2 |
+
2025-05-25 17:44:10,338 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
|
| 3 |
+
2025-05-25 17:44:10,338 - easytorch-training - INFO - Building training data loader.
|
| 4 |
+
2025-05-25 17:44:10,387 - easytorch-training - INFO - Train dataset length: 36459
|
| 5 |
+
2025-05-25 17:44:10,388 - easytorch-training - INFO - Set optim: Adam (
|
| 6 |
+
Parameter Group 0
|
| 7 |
+
amsgrad: False
|
| 8 |
+
betas: (0.9, 0.999)
|
| 9 |
+
capturable: False
|
| 10 |
+
differentiable: False
|
| 11 |
+
eps: 1e-08
|
| 12 |
+
foreach: None
|
| 13 |
+
fused: None
|
| 14 |
+
lr: 0.002
|
| 15 |
+
maximize: False
|
| 16 |
+
weight_decay: 1e-05
|
| 17 |
+
)
|
| 18 |
+
2025-05-25 17:44:10,388 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x74819751f640>
|
| 19 |
+
2025-05-25 17:44:10,390 - easytorch-training - INFO - Initializing validation.
|
| 20 |
+
2025-05-25 17:44:10,390 - easytorch-training - INFO - Building val data loader.
|
| 21 |
+
2025-05-25 17:44:10,398 - easytorch-training - INFO - Validation dataset length: 5188
|
| 22 |
+
2025-05-25 17:44:10,413 - easytorch-training - INFO - Test dataset length: 10400
|
| 23 |
+
2025-05-25 17:44:10,414 - easytorch-training - INFO - Number of parameters: 91038382
|
| 24 |
+
2025-05-25 17:44:10,414 - easytorch-training - INFO - Epoch 1 / 100
|
| 25 |
+
2025-05-25 17:44:10,652 - easytorch-training - ERROR - Traceback (most recent call last):
|
| 26 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/easytorch/launcher/launcher.py", line 31, in training_func
|
| 27 |
+
runner.train(cfg)
|
| 28 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_epoch_runner.py", line 432, in train
|
| 29 |
+
loss = self.train_iters(epoch, iter_index, data)
|
| 30 |
+
File "/mnt/RAID/BasicTS/basicts/runners/base_tsf_runner.py", line 340, in train_iters
|
| 31 |
+
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
|
| 32 |
+
File "/mnt/RAID/BasicTS/basicts/runners/runner_zoo/simple_tsf_runner.py", line 101, in forward
|
| 33 |
+
model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
|
| 34 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 35 |
+
return forward_call(*args, **kwargs)
|
| 36 |
+
File "/mnt/RAID/BasicTS/baselines/D2STGNN/arch/d2stgnn_arch.py", line 203, in forward
|
| 37 |
+
cross_out = self.decoder(cross_out, cross_out) # [B, N, C]
|
| 38 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 39 |
+
return forward_call(*args, **kwargs)
|
| 40 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/transformer.py", line 369, in forward
|
| 41 |
+
output = mod(output, memory, tgt_mask=tgt_mask,
|
| 42 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 43 |
+
return forward_call(*args, **kwargs)
|
| 44 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/transformer.py", line 712, in forward
|
| 45 |
+
x = x + self._sa_block(self.norm1(x), tgt_mask, tgt_key_padding_mask, tgt_is_causal)
|
| 46 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
|
| 47 |
+
return forward_call(*args, **kwargs)
|
| 48 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
|
| 49 |
+
return F.layer_norm(
|
| 50 |
+
File "/home/UNT/cy0265/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2515, in layer_norm
|
| 51 |
+
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
|
| 52 |
+
RuntimeError: Given normalized_shape=[1024], expected input with shape [*, 1024], but got input of size[64, 4, 325, 256]
|
| 53 |
+
|