# Model configuration model: model_name: "TickTransformerModelROPE" vocab_size: 979 # Vocabulary size for token embeddings embed_dim: 640 # Embedding dimension seq_len: 512 # Sequence length per tick dropout: 0.1 # Dropout rate # Embedder (non-causal transformer encoder) embedder_heads: 10 embedder_layers: 6 # Processor (GPT-style causal transformer for next token prediction) processor_heads: 10 processor_layers: 8 # Decoder (non-causal transformer to decode embeddings to sequences) decoder_heads: 10 decoder_layers: 6 # Duel prediction head duel_hidden_dim: 1024 # Hidden dimension for duel prediction head duel_hidden_layers: 2 # Number of hidden layers in duel prediction head duel_player_embedding_dim: 64 # Data configuration data: # Data dimensions (must match model) ticks_per_sample: 64 # Number of ticks in each training sample seq_len: 512 # Must match model.seq_len pad_token: 978 # Token ID used for padding sequences # Device configuration device: 'cuda:3' # 'cuda' or 'cpu' # wandb logging configuration logging: project_name: 'tick-transformer-duel-fine-tuning' test: 2048