| # Model configuration | |
| model: | |
| model_name: "TickTransformerModelROPE" | |
| vocab_size: 979 # Vocabulary size for token embeddings | |
| embed_dim: 640 # Embedding dimension | |
| seq_len: 512 # Sequence length per tick | |
| dropout: 0.1 # Dropout rate | |
| # Embedder (non-causal transformer encoder) | |
| embedder_heads: 10 | |
| embedder_layers: 6 | |
| # Processor (GPT-style causal transformer for next token prediction) | |
| processor_heads: 10 | |
| processor_layers: 8 | |
| # Decoder (non-causal transformer to decode embeddings to sequences) | |
| decoder_heads: 10 | |
| decoder_layers: 6 | |
| # Alive prediction head | |
| alive_hidden_dim: 512 # Hidden dimension for alive prediction head | |
| alive_hidden_layers: 1 # Number of hidden layers in alive prediction head | |
| # Data configuration | |
| data: | |
| # Data dimensions (must match model) | |
| ticks_per_sample: 64 # Number of ticks in each training sample | |
| seq_len: 512 # Must match model.seq_len | |
| pad_token: 978 # Token ID used for padding sequences | |
| # Device configuration | |
| device: 'cuda:1' # 'cuda' or 'cpu' | |
| # wandb logging configuration | |
| logging: | |
| project_name: 'tick-transformer-alive-fine-tuning' | |
| test: 2048 | |