# Model configuration model: model_name: "TickTransformerModelROPE" vocab_size: 979 # Vocabulary size for token embeddings embed_dim: 640 # Embedding dimension seq_len: 512 # Sequence length per tick dropout: 0.1 # Dropout rate # Embedder (non-causal transformer encoder) embedder_heads: 10 embedder_layers: 6 # Processor (GPT-style causal transformer for next token prediction) processor_heads: 10 processor_layers: 8 # Decoder (non-causal transformer to decode embeddings to sequences) decoder_heads: 10 decoder_layers: 6 # Alive prediction head alive_hidden_dim: 512 # Hidden dimension for alive prediction head alive_hidden_layers: 1 # Number of hidden layers in alive prediction head # Data configuration data: # Data dimensions (must match model) ticks_per_sample: 64 # Number of ticks in each training sample seq_len: 512 # Must match model.seq_len pad_token: 978 # Token ID used for padding sequences # Device configuration device: 'cuda:1' # 'cuda' or 'cpu' # wandb logging configuration logging: project_name: 'tick-transformer-alive-fine-tuning' test: 2048