robzilla commited on 6 days ago

Commit

eb42bae

1 Parent(s): 09db4aa

Release BibleAI: HF + GGUF + Ollama bundle

Browse files

Files changed (29) hide show

.gitattributes +3 -34
README.md +46 -3
adapters/dpo_final/README.md +210 -0
adapters/dpo_final/adapter_config.json +52 -0
adapters/dpo_final/adapter_model.safetensors +3 -0
adapters/dpo_final/chat_template.jinja +1 -0
adapters/dpo_final/tokenizer.json +3 -0
adapters/dpo_final/tokenizer_config.json +52 -0
adapters/sft_final/README.md +210 -0
adapters/sft_final/adapter_config.json +52 -0
adapters/sft_final/adapter_model.safetensors +3 -0
adapters/sft_final/chat_template.jinja +4 -0
adapters/sft_final/tokenizer.json +3 -0
adapters/sft_final/tokenizer_config.json +52 -0
checksums/sha256.txt +8 -0
config.json +196 -0
docs/ARTIFACTS.md +27 -0
docs/PUBLISHING.md +49 -0
docs/RELEASE_SNAPSHOT.txt +20 -0
gguf/final_merged.BF16.gguf +3 -0
gguf/final_merged.Q8_0.gguf +3 -0
logs/dpo_fresh_20260415_003821.log +72 -0
logs/train_resume_20260414_210603.log +0 -0
model.safetensors +3 -0
ollama/Modelfile.bf16 +32 -0
ollama/Modelfile.canonical_project_reference +31 -0
ollama/Modelfile.q8 +32 -0
tokenizer.json +3 -0
tokenizer_config.json +52 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,4 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+adapters/*/tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,46 @@
----
-license: mit
----

+---
+license: apache-2.0
+base_model: google/gemma-4-e4b
+tags:
+- bible
+- theology
+- gemma
+- gguf
+- ollama
+library_name: transformers
+pipeline_tag: text-generation
+---
+# BibleAI
+BibleAI is a Gemma 4 E4B model tuned with CPT + SFT + DPO for Bible/theology study use cases.
+## Model Variants
+- `hf/model.safetensors` (merged HF weights)
+- `gguf/final_merged.BF16.gguf`
+- `gguf/final_merged.Q8_0.gguf`
+## Checksums
+- `hf/model.safetensors`
+  `3163ffdcf841d829632af5932ccda65c893fcca63b84605df34aed275db66929`
+- `gguf/final_merged.BF16.gguf`
+  `e07e38d28d3032d3b438b7b8b90cbf4cf5e66177b52e8f60673cac3586dc10a1`
+- `gguf/final_merged.Q8_0.gguf`
+  `3c7f5f9caf080fe44720f16b5f4b5e7e95a097d6be3d1d8d89aea22e8574bad1`
+Full checksum file: `checksums/sha256.txt`
+## Ollama
+- Q8:
+  `ollama create bibleaiq8 -f ollama/Modelfile.q8`
+- BF16:
+  `ollama create bibleaibf16 -f ollama/Modelfile.bf16`
+## Included Artifacts
+- `hf/` merged HF model files
+- `gguf/` quantized GGUF exports
+- `ollama/` ready Modelfiles
+- `adapters/` final SFT and DPO adapters
+- `logs/` training logs
+- `checksums/` integrity hashes
+- `docs/` release documentation

adapters/dpo_final/README.md ADDED Viewed

	@@ -0,0 +1,210 @@

+---
+base_model: /workspace/outputs/sft_cpt_merged
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:/workspace/outputs/sft_cpt_merged
+- dpo
+- lora
+- transformers
+- trl
+- unsloth
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.0

adapters/dpo_final/adapter_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": {
+    "base_model_class": "Gemma4ForConditionalGeneration",
+    "parent_library": "transformers.models.gemma4.modeling_gemma4",
+    "unsloth_fixed": true
+  },
+  "base_model_name_or_path": "/workspace/outputs/sft_cpt_merged",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.0",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "up_proj",
+    "o_proj",
+    "k_proj",
+    "q_proj",
+    "v_proj",
+    "down_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapters/dpo_final/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:280ec145dbfd1793ca89025be89888aa4d2fc2dc02bce9d92211f02375fef837
+size 339349544

adapters/dpo_final/chat_template.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ {% for message in messages %}{{ '<start_of_turn>' + message['role'] + '\n' + message['content'] + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<start_of_turn>model\n' }}{% endif %}

adapters/dpo_final/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6
+size 32170070

adapters/dpo_final/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [],
+  "image_token": "<|image|>",
+  "is_local": true,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

adapters/sft_final/README.md ADDED Viewed

	@@ -0,0 +1,210 @@

+---
+base_model: /root/cpt_merged
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:/root/cpt_merged
+- lora
+- sft
+- transformers
+- trl
+- unsloth
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.0

adapters/sft_final/adapter_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": {
+    "base_model_class": "Gemma4ForConditionalGeneration",
+    "parent_library": "transformers.models.gemma4.modeling_gemma4",
+    "unsloth_fixed": true
+  },
+  "base_model_name_or_path": "/root/cpt_merged",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.0",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "q_proj",
+    "v_proj",
+    "o_proj",
+    "down_proj",
+    "up_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapters/sft_final/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9ffcf8419e82cddfe88a4521a863f0d4c97e4d5c3303ba96b268d7f299d0408
+size 678564160

adapters/sft_final/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,4 @@

+{% for message in messages %}{{ '<start_of_turn>' + message['role'] + '
+' + message['content'] + '<end_of_turn>
+' }}{% endfor %}{% if add_generation_prompt %}{{ '<start_of_turn>model
+' }}{% endif %}

adapters/sft_final/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6
+size 32170070

adapters/sft_final/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [],
+  "image_token": "<|image|>",
+  "is_local": true,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

checksums/sha256.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+3163ffdcf841d829632af5932ccda65c893fcca63b84605df34aed275db66929  hf/model.safetensors
+e07e38d28d3032d3b438b7b8b90cbf4cf5e66177b52e8f60673cac3586dc10a1  gguf/final_merged.BF16.gguf
+3c7f5f9caf080fe44720f16b5f4b5e7e95a097d6be3d1d8d89aea22e8574bad1  gguf/final_merged.Q8_0.gguf
+a9ffcf8419e82cddfe88a4521a863f0d4c97e4d5c3303ba96b268d7f299d0408  adapters/sft_final/adapter_model.safetensors
+280ec145dbfd1793ca89025be89888aa4d2fc2dc02bce9d92211f02375fef837  adapters/dpo_final/adapter_model.safetensors
+# Verified previously on downloaded HF merged file:
+# 3163ffdcf841d829632af5932ccda65c893fcca63b84605df34aed275db66929  hf/model.safetensors

config.json ADDED Viewed

	@@ -0,0 +1,196 @@

+{
+    "architectures": [
+        "Gemma4ForConditionalGeneration"
+    ],
+    "audio_config": {
+        "_name_or_path": "",
+        "architectures": null,
+        "attention_chunk_size": 12,
+        "attention_context_left": 13,
+        "attention_context_right": 0,
+        "attention_invalid_logits_value": -1000000000.0,
+        "attention_logit_cap": 50.0,
+        "chunk_size_feed_forward": 0,
+        "conv_kernel_size": 5,
+        "torch_dtype": "bfloat16",
+        "gradient_clipping": 10000000000.0,
+        "hidden_act": "silu",
+        "hidden_size": 1024,
+        "id2label": {
+            "0": "LABEL_0",
+            "1": "LABEL_1"
+        },
+        "initializer_range": 0.02,
+        "is_encoder_decoder": false,
+        "label2id": {
+            "LABEL_0": 0,
+            "LABEL_1": 1
+        },
+        "model_type": "gemma4_audio",
+        "num_attention_heads": 8,
+        "num_hidden_layers": 12,
+        "output_attentions": false,
+        "output_hidden_states": false,
+        "output_proj_dims": 1536,
+        "problem_type": null,
+        "residual_weight": 0.5,
+        "return_dict": true,
+        "rms_norm_eps": 1e-06,
+        "subsampling_conv_channels": [
+            128,
+            32
+        ],
+        "use_clipped_linears": true
+    },
+    "audio_token_id": 258881,
+    "boa_token_id": 256000,
+    "boi_token_id": 255999,
+    "torch_dtype": "bfloat16",
+    "eoa_token_id": 258883,
+    "eoa_token_index": 258883,
+    "eoi_token_id": 258882,
+    "image_token_id": 258880,
+    "initializer_range": 0.02,
+    "model_name": "/workspace/outputs/sft_cpt_merged",
+    "model_type": "gemma4",
+    "pad_token_id": 0,
+    "text_config": {
+        "attention_bias": false,
+        "attention_dropout": 0.0,
+        "attention_k_eq_v": false,
+        "bos_token_id": 2,
+        "torch_dtype": "bfloat16",
+        "enable_moe_block": false,
+        "eos_token_id": 1,
+        "expert_intermediate_size": null,
+        "final_logit_softcapping": 30.0,
+        "global_head_dim": 512,
+        "head_dim": 256,
+        "hidden_activation": "gelu_pytorch_tanh",
+        "hidden_size": 2560,
+        "hidden_size_per_layer_input": 256,
+        "initializer_range": 0.02,
+        "intermediate_size": 10240,
+        "layer_types": [
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention"
+        ],
+        "max_position_embeddings": 131072,
+        "model_type": "gemma4_text",
+        "moe_intermediate_size": null,
+        "num_attention_heads": 8,
+        "num_experts": null,
+        "num_global_key_value_heads": null,
+        "num_hidden_layers": 42,
+        "num_key_value_heads": 2,
+        "num_kv_shared_layers": 18,
+        "pad_token_id": 0,
+        "rms_norm_eps": 1e-06,
+        "rope_parameters": {
+            "full_attention": {
+                "partial_rotary_factor": 0.25,
+                "rope_theta": 1000000.0,
+                "rope_type": "proportional"
+            },
+            "sliding_attention": {
+                "rope_theta": 10000.0,
+                "rope_type": "default"
+            }
+        },
+        "sliding_window": 512,
+        "tie_word_embeddings": true,
+        "top_k_experts": null,
+        "use_bidirectional_attention": null,
+        "use_cache": true,
+        "use_double_wide_mlp": false,
+        "vocab_size": 262144,
+        "vocab_size_per_layer_input": 262144
+    },
+    "tie_word_embeddings": true,
+    "unsloth_version": "2026.4.4",
+    "video_token_id": 258884,
+    "vision_config": {
+        "_name_or_path": "",
+        "architectures": null,
+        "attention_bias": false,
+        "attention_dropout": 0.0,
+        "chunk_size_feed_forward": 0,
+        "default_output_length": 280,
+        "torch_dtype": "bfloat16",
+        "global_head_dim": 64,
+        "head_dim": 64,
+        "hidden_activation": "gelu_pytorch_tanh",
+        "hidden_size": 768,
+        "id2label": {
+            "0": "LABEL_0",
+            "1": "LABEL_1"
+        },
+        "initializer_range": 0.02,
+        "intermediate_size": 3072,
+        "is_encoder_decoder": false,
+        "label2id": {
+            "LABEL_0": 0,
+            "LABEL_1": 1
+        },
+        "max_position_embeddings": 131072,
+        "model_type": "gemma4_vision",
+        "num_attention_heads": 12,
+        "num_hidden_layers": 16,
+        "num_key_value_heads": 12,
+        "output_attentions": false,
+        "output_hidden_states": false,
+        "patch_size": 16,
+        "pooling_kernel_size": 3,
+        "position_embedding_size": 10240,
+        "problem_type": null,
+        "return_dict": true,
+        "rms_norm_eps": 1e-06,
+        "rope_parameters": {
+            "rope_theta": 100.0,
+            "rope_type": "default"
+        },
+        "standardize": false,
+        "use_clipped_linears": true
+    },
+    "vision_soft_tokens_per_image": 280
+}

docs/ARTIFACTS.md ADDED Viewed

	@@ -0,0 +1,27 @@

+# Artifact Manifest
+Source workspace: /Users/robert/bibleai-backup/runpod_download_20260414/workspace
+Release folder: /Users/robert/bibleai-backup/release/BibleAI-Gemma4-E4B-CPT-SFT-DPO-20260414
+## HF merged model
+- hf/config.json
+- hf/model.safetensors
+- hf/tokenizer.json
+- hf/tokenizer_config.json
+## GGUF
+- gguf/final_merged.BF16.gguf
+- gguf/final_merged.Q8_0.gguf
+## Ollama
+- ollama/Modelfile.q8
+- ollama/Modelfile.bf16
+- ollama/Modelfile.canonical_project_reference
+## Final adapters
+- adapters/sft_final/*
+- adapters/dpo_final/*
+## Logs
+- logs/train_resume_20260414_210603.log
+- logs/dpo_fresh_20260415_003821.log

docs/PUBLISHING.md ADDED Viewed

	@@ -0,0 +1,49 @@

+# Hugging Face Publish
+## 1) Go to release folder
+```bash
+cd /Users/robert/bibleai-backup/release/BibleAI-Gemma4-E4B-CPT-SFT-DPO-20260414
+```
+## 2) Verify GGUF checksums
+```bash
+sha256sum gguf/final_merged.BF16.gguf gguf/final_merged.Q8_0.gguf
+```
+Expected:
+- `e07e38d28d3032d3b438b7b8b90cbf4cf5e66177b52e8f60673cac3586dc10a1`  `final_merged.BF16.gguf`
+- `3c7f5f9caf080fe44720f16b5f4b5e7e95a097d6be3d1d8d89aea22e8574bad1`  `final_merged.Q8_0.gguf`
+## 3) Log in to Hugging Face
+```bash
+huggingface-cli login
+```
+or
+```bash
+hf auth login
+```
+## 4) Push the full release
+```bash
+HF_REPO=<your-username-or-org>/<your-model-repo> ./scripts/upload_to_hf.sh
+```
+Example:
+```bash
+HF_REPO=rhemabible/BibleAI-Gemma4-E4B-CPT-SFT-DPO ./scripts/upload_to_hf.sh
+```
+This pushes:
+- HF merged model (`hf/`)
+- GGUF (`gguf/` BF16 + Q8_0)
+- Ollama Modelfiles (`ollama/`)
+- adapters, checksums, logs, docs

docs/RELEASE_SNAPSHOT.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+Release folder: /Users/robert/bibleai-backup/release/BibleAI-Gemma4-E4B-CPT-SFT-DPO-20260414
+ 37G	.
+./README.md
+./checksums/sha256.txt
+./docs/ARTIFACTS.md
+./docs/RELEASE_SNAPSHOT.txt
+./gguf/final_merged.BF16.gguf
+./gguf/final_merged.Q8_0.gguf
+./hf/config.json
+./hf/model.safetensors
+./hf/tokenizer.json
+./hf/tokenizer_config.json
+./logs/dpo_fresh_20260415_003821.log
+./logs/train_resume_20260414_210603.log
+./ollama/Modelfile.bf16
+./ollama/Modelfile.canonical_project_reference
+./ollama/Modelfile.q8
+./scripts/upload_to_hf.sh

gguf/final_merged.BF16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e07e38d28d3032d3b438b7b8b90cbf4cf5e66177b52e8f60673cac3586dc10a1
+size 15053078304

gguf/final_merged.Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c7f5f9caf080fe44720f16b5f4b5e7e95a097d6be3d1d8d89aea22e8574bad1
+size 8031223584

logs/dpo_fresh_20260415_003821.log ADDED Viewed

@@ -0,0 +1,72 @@
  0%|          | 0/242 [00:00<?, ?it/s]Caching is incompatible with gradient checkpointing in Gemma4TextDecoderLayer. Setting `past_key_values=None`.
  0%|          | 1/242 [00:11<46:43, 11.63s/it]
  1%|          | 2/242 [00:15<28:22,  7.10s/it]
  1%|          | 3/242 [00:19<22:28,  5.64s/it]
  2%|▏         | 4/242 [00:23<19:36,  4.94s/it]
  2%|▏         | 5/242 [00:27<18:03,  4.57s/it]
  2%|▏         | 6/242 [00:31<17:03,  4.34s/it]
  3%|▎         | 7/242 [00:35<16:55,  4.32s/it]
  3%|▎         | 8/242 [00:39<16:18,  4.18s/it]
  4%|▎         | 9/242 [00:43<15:53,  4.09s/it]
  4%|▍         | 10/242 [00:47<15:34,  4.03s/it]
  4%|▍         | 10/242 [00:47<15:34,  4.03s/it]
  5%|▍         | 11/242 [00:50<15:21,  3.99s/it]
  5%|▍         | 12/242 [00:54<15:11,  3.96s/it]
  5%|▌         | 13/242 [00:58<15:09,  3.97s/it]
  6%|▌         | 14/242 [01:02<14:57,  3.94s/it]
  6%|▌         | 15/242 [01:06<14:48,  3.91s/it]
  7%|▋         | 16/242 [01:10<14:38,  3.89s/it]
  7%|▋         | 17/242 [01:14<14:59,  4.00s/it]
  7%|▋         | 18/242 [01:18<14:45,  3.95s/it]
  8%|▊         | 19/242 [01:22<14:33,  3.92s/it]
  8%|▊         | 20/242 [01:26<14:25,  3.90s/it]
  8%|▊         | 20/242 [01:26<14:25,  3.90s/it]
  9%|▊         | 21/242 [01:30<14:17,  3.88s/it]
  9%|▉         | 22/242 [01:33<14:09,  3.86s/it]
 10%|▉         | 23/242 [01:37<14:04,  3.86s/it]
 10%|▉         | 24/242 [01:41<13:59,  3.85s/it]
 10%|█         | 25/242 [01:45<13:54,  3.85s/it]
 11%|█         | 26/242 [01:49<13:49,  3.84s/it]
 11%|█         | 27/242 [01:53<13:43,  3.83s/it]
 12%|█▏        | 28/242 [01:56<13:39,  3.83s/it]
 12%|█▏        | 29/242 [02:00<13:35,  3.83s/it]
 12%|█▏        | 30/242 [02:04<13:30,  3.82s/it]
 12%|█▏        | 30/242 [02:04<13:30,  3.82s/it]
 13%|█▎        | 31/242 [02:08<13:27,  3.83s/it]
 13%|█▎        | 32/242 [02:12<13:23,  3.82s/it]
 14%|█▎        | 33/242 [02:15<13:19,  3.83s/it]
 14%|█▍        | 34/242 [02:19<13:22,  3.86s/it]
 14%|█▍        | 35/242 [02:23<13:17,  3.85s/it]
 15%|█▍        | 36/242 [02:27<13:11,  3.84s/it]
 15%|█▌        | 37/242 [02:31<13:07,  3.84s/it]
 16%|█▌        | 38/242 [02:35<13:04,  3.84s/it]
 16%|█▌        | 39/242 [02:39<13:01,  3.85s/it]
 17%|█▋        | 40/242 [02:42<12:55,  3.84s/it]
 17%|█▋        | 40/242 [02:42<12:55,  3.84s/it]
 17%|█▋        | 41/242 [02:46<12:51,  3.84s/it]
 17%|█▋        | 42/242 [02:50<12:47,  3.84s/it]
 18%|█▊        | 43/242 [02:54<12:43,  3.84s/it]
 18%|█▊        | 44/242 [02:58<12:39,  3.84s/it]
 19%|█▊        | 45/242 [03:02<12:34,  3.83s/it]
 19%|█▉        | 46/242 [03:05<12:30,  3.83s/it]
 19%|█▉        | 47/242 [03:09<12:27,  3.83s/it]
 20%|█▉        | 48/242 [03:13<12:22,  3.83s/it]
 20%|██        | 49/242 [03:17<12:18,  3.83s/it]
 21%|██        | 50/242 [03:21<12:18,  3.85s/it]
 21%|██        | 50/242 [03:21<12:18,  3.85s/it]
 21%|██        | 51/242 [03:26<13:56,  4.38s/it]
 21%|██▏       | 52/242 [03:30<13:24,  4.23s/it]
 22%|██▏       | 53/242 [03:34<13:01,  4.13s/it]
 22%|██▏       | 54/242 [03:38<12:42,  4.06s/it]
 23%|██▎       | 55/242 [03:42<12:30,  4.01s/it]
 23%|██▎       | 56/242 [03:46<12:18,  3.97s/it]
 24%|██▎       | 57/242 [03:50<12:10,  3.95s/it]
 24%|██▍       | 58/242 [03:54<12:03,  3.93s/it]
 24%|██▍       | 59/242 [03:58<11:57,  3.92s/it]
 25%|██▍       | 60/242 [04:01<11:51,  3.91s/it]
 25%|██▍       | 60/242 [04:01<11:51,  3.91s/it]
 25%|██▌       | 61/242 [04:05<11:45,  3.90s/it]
 26%|██▌       | 62/242 [04:09<11:40,  3.89s/it]
 26%|██▌       | 63/242 [04:13<11:34,  3.88s/it]
 26%|██▋       | 64/242 [04:17<11:29,  3.88s/it]
 27%|██▋       | 65/242 [04:21<11:25,  3.87s/it]
 27%|██▋       | 66/242 [04:25<11:21,  3.87s/it]
 28%|██▊       | 67/242 [04:28<11:16,  3.87s/it]
 28%|██▊       | 68/242 [04:32<11:13,  3.87s/it]
 29%|██▊       | 69/242 [04:36<11:10,  3.88s/it]
 29%|██▉       | 70/242 [04:40<11:06,  3.88s/it]
 29%|██▉       | 70/242 [04:40<11:06,  3.88s/it]
 29%|██▉       | 71/242 [04:44<11:01,  3.87s/it]
 30%|██▉       | 72/242 [04:48<10:57,  3.87s/it]
 30%|███       | 73/242 [04:52<10:53,  3.87s/it]
 31%|███       | 74/242 [04:56<10:49,  3.87s/it]
 31%|███       | 75/242 [04:59<10:45,  3.87s/it]
 31%|███▏      | 76/242 [05:03<10:41,  3.87s/it]
 32%|███▏      | 77/242 [05:07<10:37,  3.86s/it]
 32%|███▏      | 78/242 [05:11<10:33,  3.86s/it]
 33%|███▎      | 79/242 [05:15<10:29,  3.86s/it]
 33%|███▎      | 80/242 [05:19<10:24,  3.86s/it]
 33%|███▎      | 80/242 [05:19<10:24,  3.86s/it]
 33%|███▎      | 81/242 [05:23<10:20,  3.86s/it]
 34%|███▍      | 82/242 [05:26<10:17,  3.86s/it]
 34%|███▍      | 83/242 [05:30<10:14,  3.86s/it]
 35%|███▍      | 84/242 [05:34<10:10,  3.87s/it]
 35%|███▌      | 85/242 [05:38<10:07,  3.87s/it]
 36%|███▌      | 86/242 [05:42<10:02,  3.86s/it]
 36%|███▌      | 87/242 [05:46<09:58,  3.86s/it]
 36%|███▋      | 88/242 [05:50<09:54,  3.86s/it]
 37%|███▋      | 89/242 [05:54<09:51,  3.86s/it]
 37%|███▋      | 90/242 [05:57<09:47,  3.86s/it]
 37%|███▋      | 90/242 [05:57<09:47,  3.86s/it]
 38%|███▊      | 91/242 [06:01<09:42,  3.86s/it]
 38%|███▊      | 92/242 [06:05<09:39,  3.86s/it]
 38%|███▊      | 93/242 [06:09<09:34,  3.86s/it]
 39%|███▉      | 94/242 [06:13<09:30,  3.85s/it]
 39%|███▉      | 95/242 [06:17<09:26,  3.85s/it]
 40%|███▉      | 96/242 [06:20<09:22,  3.85s/it]
 40%|████      | 97/242 [06:24<09:19,  3.86s/it]
 40%|████      | 98/242 [06:28<09:15,  3.86s/it]
 41%|████      | 99/242 [06:32<09:11,  3.86s/it]
 41%|████▏     | 100/242 [06:36<09:07,  3.86s/it]
 41%|████▏     | 100/242 [06:36<09:07,  3.86s/it]
 42%|████▏     | 101/242 [06:42<10:35,  4.51s/it]
 42%|████▏     | 102/242 [06:46<10:04,  4.32s/it]
 43%|████▎     | 103/242 [06:50<09:41,  4.18s/it]
 43%|████▎     | 104/242 [06:54<09:23,  4.09s/it]
 43%|████▎     | 105/242 [06:57<09:10,  4.02s/it]
 44%|████▍     | 106/242 [07:01<09:00,  3.97s/it]
 44%|████▍     | 107/242 [07:05<08:51,  3.94s/it]
 45%|████▍     | 108/242 [07:09<08:44,  3.92s/it]
 45%|████▌     | 109/242 [07:13<08:38,  3.90s/it]
 45%|████▌     | 110/242 [07:17<08:33,  3.89s/it]
 45%|████▌     | 110/242 [07:17<08:33,  3.89s/it]
 46%|████▌     | 111/242 [07:21<08:29,  3.89s/it]
 46%|████▋     | 112/242 [07:24<08:24,  3.88s/it]
 47%|████▋     | 113/242 [07:28<08:19,  3.87s/it]
 47%|████▋     | 114/242 [07:32<08:14,  3.87s/it]
 48%|████▊     | 115/242 [07:36<08:11,  3.87s/it]
 48%|████▊     | 116/242 [07:40<08:06,  3.86s/it]
 48%|████▊     | 117/242 [07:44<08:02,  3.86s/it]
 49%|████▉     | 118/242 [07:48<07:58,  3.86s/it]
 49%|████▉     | 119/242 [07:51<07:54,  3.86s/it]
 50%|████▉     | 120/242 [07:55<07:50,  3.86s/it]
 50%|████▉     | 120/242 [07:55<07:50,  3.86s/it]
 50%|█████     | 121/242 [07:59<07:45,  3.85s/it]
 50%|█████     | 122/242 [08:03<07:41,  3.85s/it]
 51%|█████     | 123/242 [08:07<07:37,  3.85s/it]
 51%|█████     | 124/242 [08:11<07:34,  3.85s/it]
 52%|█████▏    | 125/242 [08:15<07:30,  3.85s/it]
 52%|█████▏    | 126/242 [08:18<07:26,  3.85s/it]
 52%|█████▏    | 127/242 [08:22<07:22,  3.85s/it]
 53%|█████▎    | 128/242 [08:26<07:18,  3.85s/it]
 53%|█████▎    | 129/242 [08:30<07:15,  3.85s/it]
 54%|█████▎    | 130/242 [08:34<07:11,  3.85s/it]
 54%|█████▎    | 130/242 [08:34<07:11,  3.85s/it]
 54%|█████▍    | 131/242 [08:38<07:07,  3.85s/it]
 55%|█████▍    | 132/242 [08:42<07:03,  3.85s/it]
 55%|█████▍    | 133/242 [08:45<06:59,  3.85s/it]
 55%|█████▌    | 134/242 [08:49<06:56,  3.85s/it]
 56%|█████▌    | 135/242 [08:53<06:52,  3.85s/it]
 56%|█████▌    | 136/242 [08:57<06:48,  3.85s/it]
 57%|█████▋    | 137/242 [09:01<06:44,  3.85s/it]
 57%|█████▋    | 138/242 [09:05<06:40,  3.85s/it]
 57%|█████▋    | 139/242 [09:09<06:37,  3.86s/it]
 58%|█████▊    | 140/242 [09:12<06:33,  3.86s/it]
 58%|█████▊    | 140/242 [09:12<06:33,  3.86s/it]
 58%|█████▊    | 141/242 [09:16<06:29,  3.85s/it]
 59%|█████▊    | 142/242 [09:20<06:25,  3.85s/it]
 59%|█████▉    | 143/242 [09:24<06:21,  3.85s/it]
 60%|█████▉    | 144/242 [09:28<06:17,  3.85s/it]
 60%|█████▉    | 145/242 [09:32<06:13,  3.85s/it]
 60%|██████    | 146/242 [09:35<06:09,  3.85s/it]
 61%|██████    | 147/242 [09:39<06:06,  3.85s/it]
 61%|██████    | 148/242 [09:43<06:02,  3.85s/it]
 62%|██████▏   | 149/242 [09:47<05:58,  3.85s/it]
 62%|██████▏   | 150/242 [09:51<05:54,  3.86s/it]
 62%|██████▏   | 150/242 [09:51<05:54,  3.86s/it]
 62%|██████▏   | 151/242 [09:57<06:42,  4.43s/it]
 63%|██████▎   | 152/242 [10:01<06:24,  4.27s/it]
 63%|██████▎   | 153/242 [10:04<06:09,  4.15s/it]
 64%|██████▎   | 154/242 [10:08<05:58,  4.07s/it]
 64%|██████▍   | 155/242 [10:12<05:48,  4.01s/it]
 64%|██████▍   | 156/242 [10:16<05:40,  3.96s/it]
 65%|██████▍   | 157/242 [10:20<05:34,  3.94s/it]
 65%|██████▌   | 158/242 [10:24<05:29,  3.92s/it]
 66%|██████▌   | 159/242 [10:28<05:24,  3.91s/it]
 66%|██████▌   | 160/242 [10:32<05:19,  3.90s/it]
 66%|██████▌   | 160/242 [10:32<05:19,  3.90s/it]
 67%|██████▋   | 161/242 [10:35<05:15,  3.89s/it]
 67%|██████▋   | 162/242 [10:39<05:10,  3.88s/it]
 67%|██████▋   | 163/242 [10:43<05:07,  3.89s/it]
 68%|██████▊   | 164/242 [10:47<05:02,  3.88s/it]
 68%|██████▊   | 165/242 [10:51<04:58,  3.88s/it]
 69%|██████▊   | 166/242 [10:55<04:54,  3.88s/it]
 69%|██████▉   | 167/242 [10:59<04:50,  3.88s/it]
 69%|██████▉   | 168/242 [11:03<04:47,  3.88s/it]
 70%|██████▉   | 169/242 [11:06<04:43,  3.88s/it]
 70%|███████   | 170/242 [11:10<04:38,  3.87s/it]
 70%|███████   | 170/242 [11:10<04:38,  3.87s/it]
 71%|███████   | 171/242 [11:14<04:34,  3.87s/it]
 71%|███████   | 172/242 [11:18<04:31,  3.87s/it]
 71%|███████▏  | 173/242 [11:22<04:27,  3.87s/it]
 72%|███████▏  | 174/242 [11:26<04:23,  3.87s/it]
 72%|███████▏  | 175/242 [11:30<04:19,  3.88s/it]
 73%|███████▎  | 176/242 [11:34<04:15,  3.87s/it]
 73%|███████▎  | 177/242 [11:37<04:12,  3.88s/it]
 74%|███████▎  | 178/242 [11:41<04:08,  3.88s/it]
 74%|███████▍  | 179/242 [11:45<04:04,  3.87s/it]
 74%|███████▍  | 180/242 [11:49<04:00,  3.87s/it]
 74%|███████▍  | 180/242 [11:49<04:00,  3.87s/it]
 75%|███████▍  | 181/242 [11:53<03:55,  3.87s/it]
 75%|███████▌  | 182/242 [11:57<03:51,  3.86s/it]
 76%|███████▌  | 183/242 [12:01<03:47,  3.86s/it]
 76%|███████▌  | 184/242 [12:04<03:43,  3.86s/it]
 76%|███████▋  | 185/242 [12:08<03:39,  3.86s/it]
 77%|███████▋  | 186/242 [12:12<03:35,  3.86s/it]
 77%|███████▋  | 187/242 [12:16<03:32,  3.86s/it]
 78%|███████▊  | 188/242 [12:20<03:28,  3.87s/it]
 78%|███████▊  | 189/242 [12:24<03:24,  3.86s/it]
 79%|███████▊  | 190/242 [12:28<03:26,  3.98s/it]
 79%|███████▊  | 190/242 [12:28<03:26,  3.98s/it]
 79%|███████▉  | 191/242 [12:32<03:21,  3.94s/it]
 79%|███████▉  | 192/242 [12:36<03:16,  3.93s/it]
 80%|███████▉  | 193/242 [12:40<03:11,  3.91s/it]
 80%|████████  | 194/242 [12:44<03:07,  3.90s/it]
 81%|████████  | 195/242 [12:47<03:02,  3.89s/it]
 81%|████████  | 196/242 [12:51<02:58,  3.88s/it]
 81%|████████▏ | 197/242 [12:55<02:54,  3.88s/it]
 82%|████████▏ | 198/242 [12:59<02:50,  3.87s/it]
 82%|████████▏ | 199/242 [13:03<02:46,  3.86s/it]
 83%|████████▎ | 200/242 [13:07<02:42,  3.86s/it]
 83%|████████▎ | 200/242 [13:07<02:42,  3.86s/it]
 83%|████████▎ | 201/242 [13:12<03:02,  4.45s/it]
 83%|████████▎ | 202/242 [13:16<02:51,  4.28s/it]
 84%|████████▍ | 203/242 [13:20<02:41,  4.15s/it]
 84%|████████▍ | 204/242 [13:24<02:34,  4.07s/it]
 85%|████████▍ | 205/242 [13:28<02:28,  4.00s/it]
 85%|████████▌ | 206/242 [13:32<02:22,  3.96s/it]
 86%|████████▌ | 207/242 [13:36<02:17,  3.93s/it]
 86%|████████▌ | 208/242 [13:40<02:12,  3.91s/it]
 86%|████████▋ | 209/242 [13:43<02:08,  3.89s/it]
 87%|████████▋ | 210/242 [13:47<02:04,  3.89s/it]
 87%|████████▋ | 210/242 [13:47<02:04,  3.89s/it]
 87%|████████▋ | 211/242 [13:51<02:00,  3.88s/it]
 88%|████████▊ | 212/242 [13:55<01:56,  3.88s/it]
 88%|████████▊ | 213/242 [13:59<01:52,  3.88s/it]
 88%|████████▊ | 214/242 [14:03<01:48,  3.87s/it]
 89%|████████▉ | 215/242 [14:07<01:44,  3.87s/it]
 89%|████████▉ | 216/242 [14:10<01:40,  3.87s/it]
 90%|████████▉ | 217/242 [14:14<01:36,  3.86s/it]
 90%|█████████ | 218/242 [14:18<01:32,  3.87s/it]
 90%|█████████ | 219/242 [14:22<01:29,  3.87s/it]
 91%|█████████ | 220/242 [14:26<01:24,  3.86s/it]
 91%|█████████ | 220/242 [14:26<01:24,  3.86s/it]
 91%|█████████▏| 221/242 [14:30<01:21,  3.86s/it]
 92%|█████████▏| 222/242 [14:34<01:17,  3.86s/it]
 92%|█████████▏| 223/242 [14:37<01:13,  3.86s/it]
 93%|█████████▎| 224/242 [14:41<01:09,  3.86s/it]
 93%|█████████▎| 225/242 [14:45<01:05,  3.86s/it]
 93%|█████████▎| 226/242 [14:49<01:01,  3.85s/it]
 94%|█████████▍| 227/242 [14:53<00:57,  3.85s/it]
 94%|█████████▍| 228/242 [14:57<00:53,  3.85s/it]
 95%|█████████▍| 229/242 [15:01<00:50,  3.85s/it]
 95%|█████████▌| 230/242 [15:04<00:46,  3.85s/it]
 95%|█████████▌| 230/242 [15:04<00:46,  3.85s/it]
 95%|█████████▌| 231/242 [15:08<00:42,  3.86s/it]
 96%|█████████▌| 232/242 [15:12<00:38,  3.87s/it]
 96%|█████████▋| 233/242 [15:16<00:34,  3.88s/it]
 97%|█████████▋| 234/242 [15:20<00:31,  3.89s/it]
 97%|█████████▋| 235/242 [15:24<00:27,  3.89s/it]
 98%|█████████▊| 236/242 [15:28<00:23,  3.90s/it]
 98%|█████████▊| 237/242 [15:32<00:19,  3.91s/it]
 98%|█████████▊| 238/242 [15:36<00:15,  3.92s/it]
 99%|█████████▉| 239/242 [15:40<00:11,  3.92s/it]
 99%|█████████▉| 240/242 [15:44<00:07,  3.92s/it]
 99%|█████████▉| 240/242 [15:44<00:07,  3.92s/it]

+/usr/local/lib/python3.11/dist-packages/requests/__init__.py:113: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (7.4.3)/charset_normalizer (3.3.2) doesn't match a supported version!
+  warnings.warn(
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+Unsloth: Your Flash Attention 2 installation seems to be broken. Using Xformers instead. No performance changes will be seen.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+============================================================
+BibleAI DPO Training (Stage 3)
+============================================================
+Base model: /workspace/outputs/sft_cpt_merged
+DPO data: /workspace/data/dpo_pairs.jsonl
+Output: /workspace/outputs/dpo
+Epochs: 2
+Beta: 0.1
+LoRA rank: 32
+Learning rate: 5e-06
+Loading SFT model...
+==((====))==  Unsloth 2026.4.4: Fast Gemma4 patching. Transformers: 5.5.0.
+   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.25 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.3.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+The tokenizer you are loading from '/workspace/outputs/sft_cpt_merged' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.
+Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
+Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
+The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 1, 'bos_token_id': 2}.
+==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
+   \\   /|    Num examples = 967 | Num Epochs = 2 | Total steps = 242
+O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
+\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
+ "-____-"     Trainable parameters = 84,803,584 of 8,080,960,032 (1.05% trained)
+Tokenizer: GemmaTokenizer → text: GemmaTokenizer
+Trainable: 84,803,584 / 6,064,089,632 (1.40%)
+Loading DPO data from /workspace/data/dpo_pairs.jsonl...
+DPO pairs: 967
+Starting DPO training...
  0%|          | 0/242 [00:00<?, ?it/s]Caching is incompatible with gradient checkpointing in Gemma4TextDecoderLayer. Setting `past_key_values=None`.
  0%|          | 1/242 [00:11<46:43, 11.63s/it]
  1%|          | 2/242 [00:15<28:22,  7.10s/it]
  1%|          | 3/242 [00:19<22:28,  5.64s/it]
  2%|▏         | 4/242 [00:23<19:36,  4.94s/it]
  2%|▏         | 5/242 [00:27<18:03,  4.57s/it]
  2%|▏         | 6/242 [00:31<17:03,  4.34s/it]
  3%|▎         | 7/242 [00:35<16:55,  4.32s/it]
  3%|▎         | 8/242 [00:39<16:18,  4.18s/it]
  4%|▎         | 9/242 [00:43<15:53,  4.09s/it]
  4%|▍         | 10/242 [00:47<15:34,  4.03s/it]
  4%|▍         | 10/242 [00:47<15:34,  4.03s/it]
  5%|▍         | 11/242 [00:50<15:21,  3.99s/it]
  5%|▍         | 12/242 [00:54<15:11,  3.96s/it]
  5%|▌         | 13/242 [00:58<15:09,  3.97s/it]
  6%|▌         | 14/242 [01:02<14:57,  3.94s/it]
  6%|▌         | 15/242 [01:06<14:48,  3.91s/it]
  7%|▋         | 16/242 [01:10<14:38,  3.89s/it]
  7%|▋         | 17/242 [01:14<14:59,  4.00s/it]
  7%|▋         | 18/242 [01:18<14:45,  3.95s/it]
  8%|▊         | 19/242 [01:22<14:33,  3.92s/it]
  8%|▊         | 20/242 [01:26<14:25,  3.90s/it]
  8%|▊         | 20/242 [01:26<14:25,  3.90s/it]
  9%|▊         | 21/242 [01:30<14:17,  3.88s/it]
  9%|▉         | 22/242 [01:33<14:09,  3.86s/it]
 10%|▉         | 23/242 [01:37<14:04,  3.86s/it]
 10%|▉         | 24/242 [01:41<13:59,  3.85s/it]
 10%|█         | 25/242 [01:45<13:54,  3.85s/it]
 11%|█         | 26/242 [01:49<13:49,  3.84s/it]
 11%|█         | 27/242 [01:53<13:43,  3.83s/it]
 12%|█▏        | 28/242 [01:56<13:39,  3.83s/it]
 12%|█▏        | 29/242 [02:00<13:35,  3.83s/it]
 12%|█▏        | 30/242 [02:04<13:30,  3.82s/it]
 12%|█▏        | 30/242 [02:04<13:30,  3.82s/it]
 13%|█▎        | 31/242 [02:08<13:27,  3.83s/it]
 13%|█▎        | 32/242 [02:12<13:23,  3.82s/it]
 14%|█▎        | 33/242 [02:15<13:19,  3.83s/it]
 14%|█▍        | 34/242 [02:19<13:22,  3.86s/it]
 14%|█▍        | 35/242 [02:23<13:17,  3.85s/it]
 15%|█▍        | 36/242 [02:27<13:11,  3.84s/it]
 15%|█▌        | 37/242 [02:31<13:07,  3.84s/it]
 16%|█▌        | 38/242 [02:35<13:04,  3.84s/it]
 16%|█▌        | 39/242 [02:39<13:01,  3.85s/it]
 17%|█▋        | 40/242 [02:42<12:55,  3.84s/it]
 17%|█▋        | 40/242 [02:42<12:55,  3.84s/it]
 17%|█▋        | 41/242 [02:46<12:51,  3.84s/it]
 17%|█▋        | 42/242 [02:50<12:47,  3.84s/it]
 18%|█▊        | 43/242 [02:54<12:43,  3.84s/it]
 18%|█▊        | 44/242 [02:58<12:39,  3.84s/it]
 19%|█▊        | 45/242 [03:02<12:34,  3.83s/it]
 19%|█▉        | 46/242 [03:05<12:30,  3.83s/it]
 19%|█▉        | 47/242 [03:09<12:27,  3.83s/it]
 20%|█▉        | 48/242 [03:13<12:22,  3.83s/it]
 20%|██        | 49/242 [03:17<12:18,  3.83s/it]
 21%|██        | 50/242 [03:21<12:18,  3.85s/it]
 21%|██        | 50/242 [03:21<12:18,  3.85s/it]
 21%|██        | 51/242 [03:26<13:56,  4.38s/it]
 21%|██▏       | 52/242 [03:30<13:24,  4.23s/it]
 22%|██▏       | 53/242 [03:34<13:01,  4.13s/it]
 22%|██▏       | 54/242 [03:38<12:42,  4.06s/it]
 23%|██▎       | 55/242 [03:42<12:30,  4.01s/it]
 23%|██▎       | 56/242 [03:46<12:18,  3.97s/it]
 24%|██▎       | 57/242 [03:50<12:10,  3.95s/it]
 24%|██▍       | 58/242 [03:54<12:03,  3.93s/it]
 24%|██▍       | 59/242 [03:58<11:57,  3.92s/it]
 25%|██▍       | 60/242 [04:01<11:51,  3.91s/it]
 25%|██▍       | 60/242 [04:01<11:51,  3.91s/it]
 25%|██▌       | 61/242 [04:05<11:45,  3.90s/it]
 26%|██▌       | 62/242 [04:09<11:40,  3.89s/it]
 26%|██▌       | 63/242 [04:13<11:34,  3.88s/it]
 26%|██▋       | 64/242 [04:17<11:29,  3.88s/it]
 27%|██▋       | 65/242 [04:21<11:25,  3.87s/it]
 27%|██▋       | 66/242 [04:25<11:21,  3.87s/it]
 28%|██▊       | 67/242 [04:28<11:16,  3.87s/it]
 28%|██▊       | 68/242 [04:32<11:13,  3.87s/it]
 29%|██▊       | 69/242 [04:36<11:10,  3.88s/it]
 29%|██▉       | 70/242 [04:40<11:06,  3.88s/it]
 29%|██▉       | 70/242 [04:40<11:06,  3.88s/it]
 29%|██▉       | 71/242 [04:44<11:01,  3.87s/it]
 30%|██▉       | 72/242 [04:48<10:57,  3.87s/it]
 30%|███       | 73/242 [04:52<10:53,  3.87s/it]
 31%|███       | 74/242 [04:56<10:49,  3.87s/it]
 31%|███       | 75/242 [04:59<10:45,  3.87s/it]
 31%|███▏      | 76/242 [05:03<10:41,  3.87s/it]
 32%|███▏      | 77/242 [05:07<10:37,  3.86s/it]
 32%|███▏      | 78/242 [05:11<10:33,  3.86s/it]
 33%|███▎      | 79/242 [05:15<10:29,  3.86s/it]
 33%|███▎      | 80/242 [05:19<10:24,  3.86s/it]
 33%|███▎      | 80/242 [05:19<10:24,  3.86s/it]
 33%|███▎      | 81/242 [05:23<10:20,  3.86s/it]
 34%|███▍      | 82/242 [05:26<10:17,  3.86s/it]
 34%|███▍      | 83/242 [05:30<10:14,  3.86s/it]
 35%|███▍      | 84/242 [05:34<10:10,  3.87s/it]
 35%|███▌      | 85/242 [05:38<10:07,  3.87s/it]
 36%|███▌      | 86/242 [05:42<10:02,  3.86s/it]
 36%|███▌      | 87/242 [05:46<09:58,  3.86s/it]
 36%|███▋      | 88/242 [05:50<09:54,  3.86s/it]
 37%|███▋      | 89/242 [05:54<09:51,  3.86s/it]
 37%|███▋      | 90/242 [05:57<09:47,  3.86s/it]
 37%|███▋      | 90/242 [05:57<09:47,  3.86s/it]
 38%|███▊      | 91/242 [06:01<09:42,  3.86s/it]
 38%|███▊      | 92/242 [06:05<09:39,  3.86s/it]
 38%|███▊      | 93/242 [06:09<09:34,  3.86s/it]
 39%|███▉      | 94/242 [06:13<09:30,  3.85s/it]
 39%|███▉      | 95/242 [06:17<09:26,  3.85s/it]
 40%|███▉      | 96/242 [06:20<09:22,  3.85s/it]
 40%|████      | 97/242 [06:24<09:19,  3.86s/it]
 40%|████      | 98/242 [06:28<09:15,  3.86s/it]
 41%|████      | 99/242 [06:32<09:11,  3.86s/it]
 41%|████▏     | 100/242 [06:36<09:07,  3.86s/it]
 41%|████▏     | 100/242 [06:36<09:07,  3.86s/it]
 42%|████▏     | 101/242 [06:42<10:35,  4.51s/it]
 42%|████▏     | 102/242 [06:46<10:04,  4.32s/it]
 43%|████▎     | 103/242 [06:50<09:41,  4.18s/it]
 43%|████▎     | 104/242 [06:54<09:23,  4.09s/it]
 43%|████▎     | 105/242 [06:57<09:10,  4.02s/it]
 44%|████▍     | 106/242 [07:01<09:00,  3.97s/it]
 44%|████▍     | 107/242 [07:05<08:51,  3.94s/it]
 45%|████▍     | 108/242 [07:09<08:44,  3.92s/it]
 45%|████▌     | 109/242 [07:13<08:38,  3.90s/it]
 45%|████▌     | 110/242 [07:17<08:33,  3.89s/it]
 45%|████▌     | 110/242 [07:17<08:33,  3.89s/it]
 46%|████▌     | 111/242 [07:21<08:29,  3.89s/it]
 46%|████▋     | 112/242 [07:24<08:24,  3.88s/it]
 47%|████▋     | 113/242 [07:28<08:19,  3.87s/it]
 47%|████▋     | 114/242 [07:32<08:14,  3.87s/it]
 48%|████▊     | 115/242 [07:36<08:11,  3.87s/it]
 48%|████▊     | 116/242 [07:40<08:06,  3.86s/it]
 48%|████▊     | 117/242 [07:44<08:02,  3.86s/it]
 49%|████▉     | 118/242 [07:48<07:58,  3.86s/it]
 49%|████▉     | 119/242 [07:51<07:54,  3.86s/it]
 50%|████▉     | 120/242 [07:55<07:50,  3.86s/it]
 50%|████▉     | 120/242 [07:55<07:50,  3.86s/it]
 50%|█████     | 121/242 [07:59<07:45,  3.85s/it]
 50%|█████     | 122/242 [08:03<07:41,  3.85s/it]
 51%|█████     | 123/242 [08:07<07:37,  3.85s/it]
 51%|█████     | 124/242 [08:11<07:34,  3.85s/it]
 52%|█████▏    | 125/242 [08:15<07:30,  3.85s/it]
 52%|█████▏    | 126/242 [08:18<07:26,  3.85s/it]
 52%|█████▏    | 127/242 [08:22<07:22,  3.85s/it]
 53%|█████▎    | 128/242 [08:26<07:18,  3.85s/it]
 53%|█████▎    | 129/242 [08:30<07:15,  3.85s/it]
 54%|█████▎    | 130/242 [08:34<07:11,  3.85s/it]
 54%|█████▎    | 130/242 [08:34<07:11,  3.85s/it]
 54%|█████▍    | 131/242 [08:38<07:07,  3.85s/it]
 55%|█████▍    | 132/242 [08:42<07:03,  3.85s/it]
 55%|█████▍    | 133/242 [08:45<06:59,  3.85s/it]
 55%|█████▌    | 134/242 [08:49<06:56,  3.85s/it]
 56%|█████▌    | 135/242 [08:53<06:52,  3.85s/it]
 56%|█████▌    | 136/242 [08:57<06:48,  3.85s/it]
 57%|█████▋    | 137/242 [09:01<06:44,  3.85s/it]
 57%|█████▋    | 138/242 [09:05<06:40,  3.85s/it]
 57%|█████▋    | 139/242 [09:09<06:37,  3.86s/it]
 58%|█████▊    | 140/242 [09:12<06:33,  3.86s/it]
 58%|█████▊    | 140/242 [09:12<06:33,  3.86s/it]
 58%|█████▊    | 141/242 [09:16<06:29,  3.85s/it]
 59%|█████▊    | 142/242 [09:20<06:25,  3.85s/it]
 59%|█████▉    | 143/242 [09:24<06:21,  3.85s/it]
 60%|█████▉    | 144/242 [09:28<06:17,  3.85s/it]
 60%|█████▉    | 145/242 [09:32<06:13,  3.85s/it]
 60%|██████    | 146/242 [09:35<06:09,  3.85s/it]
 61%|██████    | 147/242 [09:39<06:06,  3.85s/it]
 61%|██████    | 148/242 [09:43<06:02,  3.85s/it]
 62%|██████▏   | 149/242 [09:47<05:58,  3.85s/it]
 62%|██████▏   | 150/242 [09:51<05:54,  3.86s/it]
 62%|██████▏   | 150/242 [09:51<05:54,  3.86s/it]
 62%|██████▏   | 151/242 [09:57<06:42,  4.43s/it]
 63%|██████▎   | 152/242 [10:01<06:24,  4.27s/it]
 63%|██████▎   | 153/242 [10:04<06:09,  4.15s/it]
 64%|██████▎   | 154/242 [10:08<05:58,  4.07s/it]
 64%|██████▍   | 155/242 [10:12<05:48,  4.01s/it]
 64%|██████▍   | 156/242 [10:16<05:40,  3.96s/it]
 65%|██████▍   | 157/242 [10:20<05:34,  3.94s/it]
 65%|██████▌   | 158/242 [10:24<05:29,  3.92s/it]
 66%|██████▌   | 159/242 [10:28<05:24,  3.91s/it]
 66%|██████▌   | 160/242 [10:32<05:19,  3.90s/it]
 66%|██████▌   | 160/242 [10:32<05:19,  3.90s/it]
 67%|██████▋   | 161/242 [10:35<05:15,  3.89s/it]
 67%|██████▋   | 162/242 [10:39<05:10,  3.88s/it]
 67%|██████▋   | 163/242 [10:43<05:07,  3.89s/it]
 68%|██████▊   | 164/242 [10:47<05:02,  3.88s/it]
 68%|██████▊   | 165/242 [10:51<04:58,  3.88s/it]
 69%|██████▊   | 166/242 [10:55<04:54,  3.88s/it]
 69%|██████▉   | 167/242 [10:59<04:50,  3.88s/it]
 69%|██████▉   | 168/242 [11:03<04:47,  3.88s/it]
 70%|██████▉   | 169/242 [11:06<04:43,  3.88s/it]
 70%|███████   | 170/242 [11:10<04:38,  3.87s/it]
 70%|███████   | 170/242 [11:10<04:38,  3.87s/it]
 71%|███████   | 171/242 [11:14<04:34,  3.87s/it]
 71%|███████   | 172/242 [11:18<04:31,  3.87s/it]
 71%|███████▏  | 173/242 [11:22<04:27,  3.87s/it]
 72%|███████▏  | 174/242 [11:26<04:23,  3.87s/it]
 72%|███████▏  | 175/242 [11:30<04:19,  3.88s/it]
 73%|███████▎  | 176/242 [11:34<04:15,  3.87s/it]
 73%|███████▎  | 177/242 [11:37<04:12,  3.88s/it]
 74%|███████▎  | 178/242 [11:41<04:08,  3.88s/it]
 74%|███████▍  | 179/242 [11:45<04:04,  3.87s/it]
 74%|███████▍  | 180/242 [11:49<04:00,  3.87s/it]
 74%|███████▍  | 180/242 [11:49<04:00,  3.87s/it]
 75%|███████▍  | 181/242 [11:53<03:55,  3.87s/it]
 75%|███████▌  | 182/242 [11:57<03:51,  3.86s/it]
 76%|███████▌  | 183/242 [12:01<03:47,  3.86s/it]
 76%|███████▌  | 184/242 [12:04<03:43,  3.86s/it]
 76%|███████▋  | 185/242 [12:08<03:39,  3.86s/it]
 77%|███████▋  | 186/242 [12:12<03:35,  3.86s/it]
 77%|███████▋  | 187/242 [12:16<03:32,  3.86s/it]
 78%|███████▊  | 188/242 [12:20<03:28,  3.87s/it]
 78%|███████▊  | 189/242 [12:24<03:24,  3.86s/it]
 79%|███████▊  | 190/242 [12:28<03:26,  3.98s/it]
 79%|███████▊  | 190/242 [12:28<03:26,  3.98s/it]
 79%|███████▉  | 191/242 [12:32<03:21,  3.94s/it]
 79%|███████▉  | 192/242 [12:36<03:16,  3.93s/it]
 80%|███████▉  | 193/242 [12:40<03:11,  3.91s/it]
 80%|████████  | 194/242 [12:44<03:07,  3.90s/it]
 81%|████████  | 195/242 [12:47<03:02,  3.89s/it]
 81%|████████  | 196/242 [12:51<02:58,  3.88s/it]
 81%|████████▏ | 197/242 [12:55<02:54,  3.88s/it]
 82%|████████▏ | 198/242 [12:59<02:50,  3.87s/it]
 82%|████████▏ | 199/242 [13:03<02:46,  3.86s/it]
 83%|████████▎ | 200/242 [13:07<02:42,  3.86s/it]
 83%|████████▎ | 200/242 [13:07<02:42,  3.86s/it]
 83%|████████▎ | 201/242 [13:12<03:02,  4.45s/it]
 83%|████████▎ | 202/242 [13:16<02:51,  4.28s/it]
 84%|████████▍ | 203/242 [13:20<02:41,  4.15s/it]
 84%|████████▍ | 204/242 [13:24<02:34,  4.07s/it]
 85%|████████▍ | 205/242 [13:28<02:28,  4.00s/it]
 85%|████████▌ | 206/242 [13:32<02:22,  3.96s/it]
 86%|████████▌ | 207/242 [13:36<02:17,  3.93s/it]
 86%|████████▌ | 208/242 [13:40<02:12,  3.91s/it]
 86%|████████▋ | 209/242 [13:43<02:08,  3.89s/it]
 87%|████████▋ | 210/242 [13:47<02:04,  3.89s/it]
 87%|████████▋ | 210/242 [13:47<02:04,  3.89s/it]
 87%|████████▋ | 211/242 [13:51<02:00,  3.88s/it]
 88%|████████▊ | 212/242 [13:55<01:56,  3.88s/it]
 88%|████████▊ | 213/242 [13:59<01:52,  3.88s/it]
 88%|████████▊ | 214/242 [14:03<01:48,  3.87s/it]
 89%|████████▉ | 215/242 [14:07<01:44,  3.87s/it]
 89%|████████▉ | 216/242 [14:10<01:40,  3.87s/it]
 90%|████████▉ | 217/242 [14:14<01:36,  3.86s/it]
 90%|█████████ | 218/242 [14:18<01:32,  3.87s/it]
 90%|█████████ | 219/242 [14:22<01:29,  3.87s/it]
 91%|█████████ | 220/242 [14:26<01:24,  3.86s/it]
 91%|█████████ | 220/242 [14:26<01:24,  3.86s/it]
 91%|█████████▏| 221/242 [14:30<01:21,  3.86s/it]
 92%|█████████▏| 222/242 [14:34<01:17,  3.86s/it]
 92%|█████████▏| 223/242 [14:37<01:13,  3.86s/it]
 93%|█████████▎| 224/242 [14:41<01:09,  3.86s/it]
 93%|█████████▎| 225/242 [14:45<01:05,  3.86s/it]
 93%|█████████▎| 226/242 [14:49<01:01,  3.85s/it]
 94%|█████████▍| 227/242 [14:53<00:57,  3.85s/it]
 94%|█████████▍| 228/242 [14:57<00:53,  3.85s/it]
 95%|█████████▍| 229/242 [15:01<00:50,  3.85s/it]
 95%|█████████▌| 230/242 [15:04<00:46,  3.85s/it]
 95%|█████████▌| 230/242 [15:04<00:46,  3.85s/it]
 95%|█████████▌| 231/242 [15:08<00:42,  3.86s/it]
 96%|█████████▌| 232/242 [15:12<00:38,  3.87s/it]
 96%|█████████▋| 233/242 [15:16<00:34,  3.88s/it]
 97%|█████████▋| 234/242 [15:20<00:31,  3.89s/it]
 97%|█████████▋| 235/242 [15:24<00:27,  3.89s/it]
 98%|█████████▊| 236/242 [15:28<00:23,  3.90s/it]
 98%|█████████▊| 237/242 [15:32<00:19,  3.91s/it]
 98%|█████████▊| 238/242 [15:36<00:15,  3.92s/it]
 99%|█████████▉| 239/242 [15:40<00:11,  3.92s/it]
 99%|█████████▉| 240/242 [15:44<00:07,  3.92s/it]
 99%|█████████▉| 240/242 [15:44<00:07,  3.92s/it]
+Unsloth: Will smartly offload gradients to save VRAM!
+{'loss': '0.9271', 'grad_norm': '198.7', 'learning_rate': '9e-07', 'rewards/chosen': '-0.05307', 'rewards/rejected': '-0.005694', 'rewards/accuracies': '0.4125', 'rewards/margins': '-0.04738', 'logps/chosen': '-3755', 'logps/rejected': '-2303', 'logits/chosen': '-11.42', 'logits/rejected': '-11.47', 'epoch': '0.08264'}
+{'loss': '0.454', 'grad_norm': '25.33', 'learning_rate': '1.9e-06', 'rewards/chosen': '2.245', 'rewards/rejected': '0.7875', 'rewards/accuracies': '0.7875', 'rewards/margins': '1.457', 'logps/chosen': '-3733', 'logps/rejected': '-2372', 'logits/chosen': '-11.34', 'logits/rejected': '-11.34', 'epoch': '0.1653'}
+{'loss': '0.05742', 'grad_norm': '10.8', 'learning_rate': '2.9e-06', 'rewards/chosen': '10.9', 'rewards/rejected': '3.573', 'rewards/accuracies': '0.9875', 'rewards/margins': '7.329', 'logps/chosen': '-3603', 'logps/rejected': '-2278', 'logits/chosen': '-11.21', 'logits/rejected': '-11.15', 'epoch': '0.2479'}
+{'loss': '0.0152', 'grad_norm': '0.0009383', 'learning_rate': '3.9e-06', 'rewards/chosen': '23.31', 'rewards/rejected': '4.238', 'rewards/accuracies': '1', 'rewards/margins': '19.07', 'logps/chosen': '-3510', 'logps/rejected': '-2291', 'logits/chosen': '-11.12', 'logits/rejected': '-11.2', 'epoch': '0.3306'}
+{'loss': '0.009164', 'grad_norm': '3.639e-07', 'learning_rate': '4.9e-06', 'rewards/chosen': '24.5', 'rewards/rejected': '-1.617', 'rewards/accuracies': '1', 'rewards/margins': '26.12', 'logps/chosen': '-3374', 'logps/rejected': '-2323', 'logits/chosen': '-11.23', 'logits/rejected': '-11.38', 'epoch': '0.4132'}
+{'loss': '3.722e-07', 'grad_norm': '6.808e-08', 'learning_rate': '4.973e-06', 'rewards/chosen': '19.64', 'rewards/rejected': '-11.7', 'rewards/accuracies': '1', 'rewards/margins': '31.33', 'logps/chosen': '-3468', 'logps/rejected': '-2433', 'logits/chosen': '-11.37', 'logits/rejected': '-11.74', 'epoch': '0.4959'}
+{'loss': '5.501e-07', 'grad_norm': '1.655e-09', 'learning_rate': '4.88e-06', 'rewards/chosen': '16.18', 'rewards/rejected': '-18.2', 'rewards/accuracies': '1', 'rewards/margins': '34.38', 'logps/chosen': '-3623', 'logps/rejected': '-2515', 'logits/chosen': '-11.52', 'logits/rejected': '-11.77', 'epoch': '0.5785'}
+{'loss': '1.991e-07', 'grad_norm': '1.013e-10', 'learning_rate': '4.724e-06', 'rewards/chosen': '15.59', 'rewards/rejected': '-20.33', 'rewards/accuracies': '1', 'rewards/margins': '35.92', 'logps/chosen': '-3673', 'logps/rejected': '-2564', 'logits/chosen': '-11.52', 'logits/rejected': '-11.89', 'epoch': '0.6612'}
+{'loss': '4.913e-08', 'grad_norm': '4.028e-10', 'learning_rate': '4.508e-06', 'rewards/chosen': '12.84', 'rewards/rejected': '-21.68', 'rewards/accuracies': '1', 'rewards/margins': '34.52', 'logps/chosen': '-3652', 'logps/rejected': '-2563', 'logits/chosen': '-11.53', 'logits/rejected': '-11.78', 'epoch': '0.7438'}
+{'loss': '6.312e-07', 'grad_norm': '8.415e-06', 'learning_rate': '4.239e-06', 'rewards/chosen': '12.63', 'rewards/rejected': '-21.65', 'rewards/accuracies': '1', 'rewards/margins': '34.28', 'logps/chosen': '-3544', 'logps/rejected': '-2617', 'logits/chosen': '-11.6', 'logits/rejected': '-11.93', 'epoch': '0.8264'}
+{'loss': '1.452e-05', 'grad_norm': '3.07e-10', 'learning_rate': '3.923e-06', 'rewards/chosen': '13.53', 'rewards/rejected': '-21.73', 'rewards/accuracies': '1', 'rewards/margins': '35.26', 'logps/chosen': '-3635', 'logps/rejected': '-2562', 'logits/chosen': '-11.55', 'logits/rejected': '-11.83', 'epoch': '0.9091'}
+{'loss': '0.00764', 'grad_norm': '21.77', 'learning_rate': '3.569e-06', 'rewards/chosen': '12.49', 'rewards/rejected': '-21.65', 'rewards/accuracies': '1', 'rewards/margins': '34.13', 'logps/chosen': '-3522', 'logps/rejected': '-2493', 'logits/chosen': '-11.58', 'logits/rejected': '-11.86', 'epoch': '0.9917'}
+{'loss': '2.103e-07', 'grad_norm': '5.981e-07', 'learning_rate': '3.186e-06', 'rewards/chosen': '13.15', 'rewards/rejected': '-21.7', 'rewards/accuracies': '1', 'rewards/margins': '34.85', 'logps/chosen': '-3557', 'logps/rejected': '-2590', 'logits/chosen': '-11.68', 'logits/rejected': '-11.99', 'epoch': '1.074'}
+{'loss': '3.559e-08', 'grad_norm': '8.223e-07', 'learning_rate': '2.786e-06', 'rewards/chosen': '14.41', 'rewards/rejected': '-21.41', 'rewards/accuracies': '1', 'rewards/margins': '35.82', 'logps/chosen': '-3470', 'logps/rejected': '-2560', 'logits/chosen': '-11.74', 'logits/rejected': '-12.08', 'epoch': '1.157'}
+{'loss': '1.473e-07', 'grad_norm': '1.332e-09', 'learning_rate': '2.377e-06', 'rewards/chosen': '14.15', 'rewards/rejected': '-21.22', 'rewards/accuracies': '1', 'rewards/margins': '35.37', 'logps/chosen': '-3576', 'logps/rejected': '-2575', 'logits/chosen': '-11.71', 'logits/rejected': '-11.96', 'epoch': '1.24'}
+{'loss': '1.274e-08', 'grad_norm': '2.098e-09', 'learning_rate': '1.972e-06', 'rewards/chosen': '16.15', 'rewards/rejected': '-21.08', 'rewards/accuracies': '1', 'rewards/margins': '37.23', 'logps/chosen': '-3689', 'logps/rejected': '-2564', 'logits/chosen': '-11.75', 'logits/rejected': '-12.09', 'epoch': '1.322'}
+{'loss': '6.593e-08', 'grad_norm': '2.842e-05', 'learning_rate': '1.581e-06', 'rewards/chosen': '16.32', 'rewards/rejected': '-19.54', 'rewards/accuracies': '1', 'rewards/margins': '35.85', 'logps/chosen': '-3579', 'logps/rejected': '-2557', 'logits/chosen': '-11.76', 'logits/rejected': '-12.05', 'epoch': '1.405'}
+{'loss': '2.681e-08', 'grad_norm': '3.175e-06', 'learning_rate': '1.215e-06', 'rewards/chosen': '17.36', 'rewards/rejected': '-20.76', 'rewards/accuracies': '1', 'rewards/margins': '38.12', 'logps/chosen': '-3732', 'logps/rejected': '-2609', 'logits/chosen': '-11.75', 'logits/rejected': '-12.14', 'epoch': '1.488'}
+{'loss': '9.061e-09', 'grad_norm': '1.294e-05', 'learning_rate': '8.826e-07', 'rewards/chosen': '17.16', 'rewards/rejected': '-20.78', 'rewards/accuracies': '1', 'rewards/margins': '37.94', 'logps/chosen': '-3618', 'logps/rejected': '-2533', 'logits/chosen': '-11.75', 'logits/rejected': '-12.05', 'epoch': '1.57'}
+{'loss': '2.793e-07', 'grad_norm': '8.467e-05', 'learning_rate': '5.937e-07', 'rewards/chosen': '15.28', 'rewards/rejected': '-20.5', 'rewards/accuracies': '1', 'rewards/margins': '35.78', 'logps/chosen': '-3386', 'logps/rejected': '-2454', 'logits/chosen': '-11.7', 'logits/rejected': '-12.17', 'epoch': '1.653'}
+{'loss': '3.405e-07', 'grad_norm': '1.662e-09', 'learning_rate': '3.557e-07', 'rewards/chosen': '16.79', 'rewards/rejected': '-20.01', 'rewards/accuracies': '1', 'rewards/margins': '36.8', 'logps/chosen': '-3677', 'logps/rejected': '-2567', 'logits/chosen': '-11.86', 'logits/rejected': '-12.22', 'epoch': '1.736'}
+{'loss': '9.05e-10', 'grad_norm': '3.363e-08', 'learning_rate': '1.75e-07', 'rewards/chosen': '17.05', 'rewards/rejected': '-20.15', 'rewards/accuracies': '1', 'rewards/margins': '37.2', 'logps/chosen': '-3534', 'logps/rejected': '-2474', 'logits/chosen': '-11.71', 'logits/rejected': '-12.03', 'epoch': '1.818'}
+{'loss': '2.482e-08', 'grad_norm': '1.544e-08', 'learning_rate': '5.635e-08', 'rewards/chosen': '13.72', 'rewards/rejected': '-20.55', 'rewards/accuracies': '1', 'rewards/margins': '34.26', 'logps/chosen': '-3614', 'logps/rejected': '-2530', 'logits/chosen': '-11.74', 'logits/rejected': '-11.99', 'epoch': '1.901'}
+{'loss': '1.692e-09', 'grad_norm': '1.316e-08', 'learning_rate': '3.011e-09', 'rewards/chosen': '16.01', 'rewards/rejected': '-20.62', 'rewards/accuracies': '1', 'rewards/margins': '36.63', 'logps/chosen': '-3492', 'logps/rejected': '-2496', 'logits/chosen': '-11.77', 'logits/rejected': '-12.07', 'epoch': '1.983'}
+{'train_runtime': '953.8', 'train_samples_per_second': '2.028', 'train_steps_per_second': '0.254', 'train_loss': '0.06077', 'epoch': '2'}
+Saving DPO adapter to /workspace/outputs/dpo/final_adapter...
+DPO training complete!
+Next: python3 merge_adapter.py --adapter /workspace/outputs/dpo/final_adapter

logs/train_resume_20260414_210603.log ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3163ffdcf841d829632af5932ccda65c893fcca63b84605df34aed275db66929
+size 15992595852

ollama/Modelfile.bf16 ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM /Users/robert/bibleai-backup/release/BibleAI-Gemma4-E4B-CPT-SFT-DPO-20260414/gguf/final_merged.BF16.gguf
+SYSTEM """You are BibleAI.
+Response policy (highest priority):
+1) Answer only Bible/theology/church-history/faith questions.
+2) Be concise by default.
+3) For questions that ask to list items from a specific verse:
+   - Output ONLY a numbered list of the exact items in that verse.
+   - Do NOT add synonyms, commentary, Greek/Hebrew, Strong's numbers, or scholar quotes.
+   - Add one final line with the verse reference.
+4) Do not fabricate verses, facts, or language details. If uncertain, say so.
+5) If the user asks for deeper analysis, then provide it.
+"""
+TEMPLATE """{{- if .System }}<start_of_turn>system
+{{ .System }}<end_of_turn>
+{{- end }}<start_of_turn>user
+{{ .Prompt }}<end_of_turn>
+<start_of_turn>model
+"""
+PARAMETER temperature 0
+PARAMETER top_p 0.7
+PARAMETER repeat_penalty 1.15
+PARAMETER num_ctx 8192
+PARAMETER num_predict 160
+PARAMETER stop "<end_of_turn>"
+PARAMETER stop "<start_of_turn>user"
+PARAMETER stop "<start_of_turn>system"
+PARAMETER stop "<start_of_turn>model"
+PARAMETER seed 42

ollama/Modelfile.canonical_project_reference ADDED Viewed

	@@ -0,0 +1,31 @@

+FROM ./models/gguf/bibleai-gemma4-e4b-Q8_0.gguf
+TEMPLATE """<bos>{{ if .System }}<|turn>system
+{{ .System }}<turn|>
+{{ end }}{{ if .Prompt }}<|turn>user
+{{ .Prompt }}<turn|>
+{{ end }}<|turn>model
+"""
+PARAMETER stop "<turn|>"
+PARAMETER temperature 0.3
+PARAMETER top_p 0.9
+PARAMETER num_ctx 4096
+SYSTEM """You are BibleAI, a scholarly Bible study assistant grounded in the Berean Standard Bible (BSB). You exist solely to help people study the Bible, understand theology, and apply Scripture to life.
+CORE PRINCIPLES:
+1. SCRIPTURE FIRST: Always quote the BSB text exactly. Use "Book Chapter:Verse (BSB)" format. Every answer must be rooted in specific passages.
+2. PRECISION: Never paraphrase when quoting. If uncertain, say so and provide the reference to verify.
+3. FAITHFUL TO THE TEXT: Let Scripture speak for itself. Where the Bible speaks clearly, present what it says without softening, hedging, or adding modern qualifications. Do not impose contemporary cultural frameworks onto the text. Present the biblical position faithfully, then note where historic Christian traditions agree or differ.
+4. MORAL CLARITY: On matters where Scripture speaks plainly and historic Christianity has consensus (sexual ethics, sanctity of life, exclusivity of Christ, reality of judgment), present the biblical position directly and confidently. Do not present revisionist interpretations as equally valid. Compassion and clarity are not opposites.
+5. MULTI-TRADITION (genuinely debated matters only): On genuinely debated theological matters (predestination, baptism mode, end times), present Protestant, Catholic, and Orthodox perspectives fairly. But do not treat clear biblical moral teaching as "debatable" simply because modern culture disagrees with it.
+6. LINGUISTIC DEPTH: Reference Greek and Hebrew terms with transliteration and Strong's numbers where relevant.
+7. ALWAYS CITE SOURCES: Attribute interpretive claims to specific scholars, church fathers, confessions, or traditions.
+8. COMPLETENESS: Cite the most relevant passages across both Testaments. Note tensions or counterpoints within Scripture itself.
+9. CORRECTION: Gently correct misquoted scripture with exact BSB text.
+BOUNDARIES:
+10. THEOLOGY ONLY: Only answer Bible, theology, church history, and faith questions. Politely decline coding, math, politics, medical/legal advice, or any non-theological topic.
+11. PASTORAL CARE: For deeply personal matters, share relevant Scripture with compassion, but always encourage speaking with a pastor, biblical counselor, or church community.
+12. NO FABRICATION: Never fabricate references, verses, or content. If you don't know, say so clearly."""

ollama/Modelfile.q8 ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM /Users/robert/bibleai-backup/release/BibleAI-Gemma4-E4B-CPT-SFT-DPO-20260414/gguf/final_merged.Q8_0.gguf
+SYSTEM """You are BibleAI.
+Response policy (highest priority):
+1) Answer only Bible/theology/church-history/faith questions.
+2) Be concise by default.
+3) For questions that ask to list items from a specific verse:
+   - Output ONLY a numbered list of the exact items in that verse.
+   - Do NOT add synonyms, commentary, Greek/Hebrew, Strong's numbers, or scholar quotes.
+   - Add one final line with the verse reference.
+4) Do not fabricate verses, facts, or language details. If uncertain, say so.
+5) If the user asks for deeper analysis, then provide it.
+"""
+TEMPLATE """{{- if .System }}<start_of_turn>system
+{{ .System }}<end_of_turn>
+{{- end }}<start_of_turn>user
+{{ .Prompt }}<end_of_turn>
+<start_of_turn>model
+"""
+PARAMETER temperature 0
+PARAMETER top_p 0.7
+PARAMETER repeat_penalty 1.15
+PARAMETER num_ctx 8192
+PARAMETER num_predict 160
+PARAMETER stop "<end_of_turn>"
+PARAMETER stop "<start_of_turn>user"
+PARAMETER stop "<start_of_turn>system"
+PARAMETER stop "<start_of_turn>model"
+PARAMETER seed 42

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6
+size 32170070

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [],
+  "image_token": "<|image|>",
+  "is_local": true,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}