{ "name": "megachat-v8-embd384-chatbot", "dataset": "megachat-v8", "curriculum": "firehose", "config": { "n_embd": 384, "n_head": 12, "n_layer": 6, "block_size": 1024, "vocab_type": "harris", "segments": 2048, "lr": 0.0004, "steps": 60000, "checkpoint_every": 100, "sample_every": 100, "sample_tokens": 420, "lr_schedule": "cosine", "batch_size": 8, "exclude_sources": [ "real-tool-calls", "synthetic-bash", "tool-calls", "hermes3-code", "hermes3-math", "chat", "smoltalk", "oasst", "dolly", "unfirehose-chat", "synthetic-chat", "irc", "unweapon", "repo-docs", "repo-docstrings", "repo-commits", "gutenberg" ], "source_floors": { "hermes3-general": 0.8, "hermes3-creative": 0.7, "hermes3-roleplay": 0.7, "dictionary": 0.7, "gutenberg": 0.7 }, "bandit_focus_count": 2, "bandit_dice_sides": 3 }, "_notes": { "goal": "ANDREA phase 2 — focused curriculum, 5 arms, 2-eye bandit", "params": "12.8M", "arms": "hermes3-general, hermes3-creative, hermes3-roleplay, dictionary, gutenberg", "bandit": "2 focus eyes, 1d3: 0=2random, 1=1random+1bandit, 2=2bandit", "estimated_time": "remaining ~18K steps" } }