cs-net / tfm_duel_fine-tuning.yaml
gary2oos's picture
Update tfm_duel_fine-tuning.yaml
69409ce verified
# Model configuration
model:
model_name: "TickTransformerModelROPE"
vocab_size: 979 # Vocabulary size for token embeddings
embed_dim: 640 # Embedding dimension
seq_len: 512 # Sequence length per tick
dropout: 0.1 # Dropout rate
# Embedder (non-causal transformer encoder)
embedder_heads: 10
embedder_layers: 6
# Processor (GPT-style causal transformer for next token prediction)
processor_heads: 10
processor_layers: 8
# Decoder (non-causal transformer to decode embeddings to sequences)
decoder_heads: 10
decoder_layers: 6
# Duel prediction head
duel_hidden_dim: 1024 # Hidden dimension for duel prediction head
duel_hidden_layers: 2 # Number of hidden layers in duel prediction head
duel_player_embedding_dim: 64
# Data configuration
data:
# Data dimensions (must match model)
ticks_per_sample: 64 # Number of ticks in each training sample
seq_len: 512 # Must match model.seq_len
pad_token: 978 # Token ID used for padding sequences
# Device configuration
device: 'cuda:3' # 'cuda' or 'cpu'
# wandb logging configuration
logging:
project_name: 'tick-transformer-duel-fine-tuning'
test: 2048