cs-net / tfm_alive_fine-tuning.yaml
gary2oos's picture
Update tfm_alive_fine-tuning.yaml
486b8e5 verified
# Model configuration
model:
model_name: "TickTransformerModelROPE"
vocab_size: 979 # Vocabulary size for token embeddings
embed_dim: 640 # Embedding dimension
seq_len: 512 # Sequence length per tick
dropout: 0.1 # Dropout rate
# Embedder (non-causal transformer encoder)
embedder_heads: 10
embedder_layers: 6
# Processor (GPT-style causal transformer for next token prediction)
processor_heads: 10
processor_layers: 8
# Decoder (non-causal transformer to decode embeddings to sequences)
decoder_heads: 10
decoder_layers: 6
# Alive prediction head
alive_hidden_dim: 512 # Hidden dimension for alive prediction head
alive_hidden_layers: 1 # Number of hidden layers in alive prediction head
# Data configuration
data:
# Data dimensions (must match model)
ticks_per_sample: 64 # Number of ticks in each training sample
seq_len: 512 # Must match model.seq_len
pad_token: 978 # Token ID used for padding sequences
# Device configuration
device: 'cuda:1' # 'cuda' or 'cpu'
# wandb logging configuration
logging:
project_name: 'tick-transformer-alive-fine-tuning'
test: 2048