{
  "N": 20,
  "base_config_dict": {
    "_name_or_path": "TinyLlama/TinyLlama_v1.1",
    "architectures": [
      "LlamaForCausalLM"
    ],
    "attention_bias": false,
    "attention_dropout": 0.0,
    "bos_token_id": 1,
    "chunk_size_feed_forward": 0,
    "dtype": "float32",
    "eos_token_id": 2,
    "head_dim": 64,
    "hidden_act": "silu",
    "hidden_size": 2048,
    "id2label": {
      "0": "LABEL_0",
      "1": "LABEL_1"
    },
    "initializer_range": 0.02,
    "intermediate_size": 5632,
    "is_encoder_decoder": false,
    "label2id": {
      "LABEL_0": 0,
      "LABEL_1": 1
    },
    "max_position_embeddings": 2048,
    "mlp_bias": false,
    "model_type": "llama",
    "num_attention_heads": 32,
    "num_hidden_layers": 22,
    "num_key_value_heads": 4,
    "output_attentions": false,
    "output_hidden_states": false,
    "pad_token_id": null,
    "pretraining_tp": 1,
    "problem_type": null,
    "return_dict": true,
    "rms_norm_eps": 1e-05,
    "rope_parameters": {
      "rope_theta": 10000.0,
      "rope_type": "default"
    },
    "tie_word_embeddings": false,
    "transformers_version": "5.3.0",
    "use_cache": true,
    "vocab_size": 32000
  },
  "base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
  "convergence_epsilon": 0.01,
  "decoder_layer_indices": [
    21
  ],
  "encoder_layer_indices": [
    0
  ],
  "halting_strategy": "threshold",
  "hidden_size": 2048,
  "model_type": "lds",
  "q_eval_interval": 1,
  "q_threshold": 0.9,
  "reasoning_layer_indices": [
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20
  ],
  "transformers_version": "5.3.0",
  "vocab_size": 32000
}