Instructions to use LayerEight/TribalKnowledge-Qwen2.5-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use LayerEight/TribalKnowledge-Qwen2.5-7B with PEFT:

from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
model = PeftModel.from_pretrained(base_model, "LayerEight/TribalKnowledge-Qwen2.5-7B")

Transformers

How to use LayerEight/TribalKnowledge-Qwen2.5-7B with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="LayerEight/TribalKnowledge-Qwen2.5-7B")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("LayerEight/TribalKnowledge-Qwen2.5-7B", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use LayerEight/TribalKnowledge-Qwen2.5-7B with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "LayerEight/TribalKnowledge-Qwen2.5-7B"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "LayerEight/TribalKnowledge-Qwen2.5-7B",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/LayerEight/TribalKnowledge-Qwen2.5-7B

SGLang

How to use LayerEight/TribalKnowledge-Qwen2.5-7B with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "LayerEight/TribalKnowledge-Qwen2.5-7B" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "LayerEight/TribalKnowledge-Qwen2.5-7B",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "LayerEight/TribalKnowledge-Qwen2.5-7B" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "LayerEight/TribalKnowledge-Qwen2.5-7B",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use LayerEight/TribalKnowledge-Qwen2.5-7B with Docker Model Runner:
```
docker model run hf.co/LayerEight/TribalKnowledge-Qwen2.5-7B
```

TribalKnowledge-Qwen2.5-7B / checkpoint-1000 /trainer_state.json

LayerEight

Upload TribalKnowledge-Qwen2.5-7B from TribalKnowledge-Qwen2.5-7B-output

dee9917 verified 6 days ago

raw

history blame contribute delete

12.7 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.7986821744122199,
	"eval_steps": 500,
	"global_step": 1000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"entropy": 2.306920379027724,
	"epoch": 0.019967054360305495,
	"grad_norm": 0.03515625,
	"learning_rate": 0.0001263157894736842,
	"loss": 2.448968811035156,
	"mean_token_accuracy": 0.48531667422503233,
	"num_tokens": 122713.0,
	"step": 25
	},
	{
	"entropy": 2.053416675031185,
	"epoch": 0.03993410872061099,
	"grad_norm": 0.0302734375,
	"learning_rate": 0.0001999595542133758,
	"loss": 2.042926483154297,
	"mean_token_accuracy": 0.5536270077899098,
	"num_tokens": 245834.0,
	"step": 50
	},
	{
	"entropy": 2.019237674474716,
	"epoch": 0.05990116308091649,
	"grad_norm": 0.0245361328125,
	"learning_rate": 0.00019956707906498044,
	"loss": 2.0004498291015627,
	"mean_token_accuracy": 0.562225787602365,
	"num_tokens": 367174.0,
	"step": 75
	},
	{
	"entropy": 2.014568568766117,
	"epoch": 0.07986821744122198,
	"grad_norm": 0.029052734375,
	"learning_rate": 0.00019875870121543717,
	"loss": 1.9949961853027345,
	"mean_token_accuracy": 0.5627686691284179,
	"num_tokens": 487834.0,
	"step": 100
	},
	{
	"entropy": 1.9470337933301927,
	"epoch": 0.09983527180152749,
	"grad_norm": 0.0279541015625,
	"learning_rate": 0.00019753779734842827,
	"loss": 1.9355754089355468,
	"mean_token_accuracy": 0.5725972962379455,
	"num_tokens": 611352.0,
	"step": 125
	},
	{
	"entropy": 1.9996179219335317,
	"epoch": 0.11980232616183298,
	"grad_norm": 0.0286865234375,
	"learning_rate": 0.0001959094673144354,
	"loss": 1.9678744506835937,
	"mean_token_accuracy": 0.5630620580911636,
	"num_tokens": 735053.0,
	"step": 150
	},
	{
	"entropy": 2.0072034925222395,
	"epoch": 0.13976938052213847,
	"grad_norm": 0.0301513671875,
	"learning_rate": 0.00019388051282810022,
	"loss": 1.9737957763671874,
	"mean_token_accuracy": 0.5624778818339109,
	"num_tokens": 855860.0,
	"step": 175
	},
	{
	"entropy": 1.9725533105432986,
	"epoch": 0.15973643488244396,
	"grad_norm": 0.029296875,
	"learning_rate": 0.0001914594090567099,
	"loss": 1.9277491760253906,
	"mean_token_accuracy": 0.5647629579156637,
	"num_tokens": 977334.0,
	"step": 200
	},
	{
	"entropy": 1.9535374976694584,
	"epoch": 0.17970348924274945,
	"grad_norm": 0.033447265625,
	"learning_rate": 0.00018865626921848615,
	"loss": 1.9351695251464844,
	"mean_token_accuracy": 0.5722655826061964,
	"num_tokens": 1092583.0,
	"step": 225
	},
	{
	"entropy": 1.9489152195304633,
	"epoch": 0.19967054360305497,
	"grad_norm": 0.025634765625,
	"learning_rate": 0.0001854828023385541,
	"loss": 1.9120469665527344,
	"mean_token_accuracy": 0.5757150813564658,
	"num_tokens": 1212652.0,
	"step": 250
	},
	{
	"entropy": 1.997425957247615,
	"epoch": 0.21963759796336046,
	"grad_norm": 0.0235595703125,
	"learning_rate": 0.00018195226433904957,
	"loss": 1.9783148193359374,
	"mean_token_accuracy": 0.566706589795649,
	"num_tokens": 1332055.0,
	"step": 275
	},
	{
	"entropy": 1.9235536295175553,
	"epoch": 0.23960465232366596,
	"grad_norm": 0.032470703125,
	"learning_rate": 0.00017807940266766593,
	"loss": 1.9035797119140625,
	"mean_token_accuracy": 0.5777761967480183,
	"num_tokens": 1452841.0,
	"step": 300
	},
	{
	"entropy": 1.9555407621711494,
	"epoch": 0.25957170668397145,
	"grad_norm": 0.0247802734375,
	"learning_rate": 0.00017388039469593428,
	"loss": 1.922522735595703,
	"mean_token_accuracy": 0.573435662984848,
	"num_tokens": 1572803.0,
	"step": 325
	},
	{
	"entropy": 1.9522953194752335,
	"epoch": 0.27953876104427694,
	"grad_norm": 0.02978515625,
	"learning_rate": 0.00016937278014455336,
	"loss": 1.914853057861328,
	"mean_token_accuracy": 0.5744891692698002,
	"num_tokens": 1697419.0,
	"step": 350
	},
	{
	"entropy": 1.895245919264853,
	"epoch": 0.29950581540458243,
	"grad_norm": 0.026123046875,
	"learning_rate": 0.00016457538781803623,
	"loss": 1.8297265625,
	"mean_token_accuracy": 0.5865043254941702,
	"num_tokens": 1819231.0,
	"step": 375
	},
	{
	"entropy": 1.9276008826121689,
	"epoch": 0.3194728697648879,
	"grad_norm": 0.0302734375,
	"learning_rate": 0.00015950825695471146,
	"loss": 1.8969316101074218,
	"mean_token_accuracy": 0.579356978982687,
	"num_tokens": 1941170.0,
	"step": 400
	},
	{
	"entropy": 1.9401621558889746,
	"epoch": 0.3394399241251934,
	"grad_norm": 0.0291748046875,
	"learning_rate": 0.0001541925535206084,
	"loss": 1.9100968933105469,
	"mean_token_accuracy": 0.5764700850099325,
	"num_tokens": 2065882.0,
	"step": 425
	},
	{
	"entropy": 1.9133401766419411,
	"epoch": 0.3594069784854989,
	"grad_norm": 0.0303955078125,
	"learning_rate": 0.000148650481796876,
	"loss": 1.8422721862792968,
	"mean_token_accuracy": 0.5874501725286245,
	"num_tokens": 2185024.0,
	"step": 450
	},
	{
	"entropy": 1.9126300086826087,
	"epoch": 0.3793740328458044,
	"grad_norm": 0.03271484375,
	"learning_rate": 0.00014290519163004495,
	"loss": 1.8789381408691406,
	"mean_token_accuracy": 0.5848022982478142,
	"num_tokens": 2299587.0,
	"step": 475
	},
	{
	"entropy": 1.909918104019016,
	"epoch": 0.39934108720610995,
	"grad_norm": 0.032470703125,
	"learning_rate": 0.0001369806817325581,
	"loss": 1.8951301574707031,
	"mean_token_accuracy": 0.582027070298791,
	"num_tokens": 2421897.0,
	"step": 500
	},
	{
	"entropy": 1.9182585052400827,
	"epoch": 0.41930814156641544,
	"grad_norm": 0.031494140625,
	"learning_rate": 0.00013090169943749476,
	"loss": 1.8786566162109375,
	"mean_token_accuracy": 0.5797786585241557,
	"num_tokens": 2544369.0,
	"step": 525
	},
	{
	"entropy": 1.8837098168581725,
	"epoch": 0.43927519592672093,
	"grad_norm": 0.0301513671875,
	"learning_rate": 0.00012469363732622296,
	"loss": 1.8448243713378907,
	"mean_token_accuracy": 0.5849873025715351,
	"num_tokens": 2671654.0,
	"step": 550
	},
	{
	"entropy": 1.8790070757828652,
	"epoch": 0.4592422502870264,
	"grad_norm": 0.0289306640625,
	"learning_rate": 0.00011838242716077917,
	"loss": 1.8447459411621094,
	"mean_token_accuracy": 0.5876961750537157,
	"num_tokens": 2796033.0,
	"step": 575
	},
	{
	"entropy": 1.8867588526010513,
	"epoch": 0.4792093046473319,
	"grad_norm": 0.033447265625,
	"learning_rate": 0.00011199443156402998,
	"loss": 1.835140380859375,
	"mean_token_accuracy": 0.5846484461426735,
	"num_tokens": 2917243.0,
	"step": 600
	},
	{
	"entropy": 1.9055894463136793,
	"epoch": 0.4991763590076374,
	"grad_norm": 0.0283203125,
	"learning_rate": 0.00010555633390008086,
	"loss": 1.858441619873047,
	"mean_token_accuracy": 0.5845886848121882,
	"num_tokens": 3042703.0,
	"step": 625
	},
	{
	"entropy": 1.9573378081992268,
	"epoch": 0.5191434133679429,
	"grad_norm": 0.03173828125,
	"learning_rate": 9.909502681491316e-05,
	"loss": 1.8958790588378907,
	"mean_token_accuracy": 0.5772438555955887,
	"num_tokens": 3160162.0,
	"step": 650
	},
	{
	"entropy": 1.8727275183051824,
	"epoch": 0.5391104677282484,
	"grad_norm": 0.03173828125,
	"learning_rate": 9.263749990282754e-05,
	"loss": 1.8269801330566406,
	"mean_token_accuracy": 0.5881718883663416,
	"num_tokens": 3285704.0,
	"step": 675
	},
	{
	"entropy": 1.871655127387494,
	"epoch": 0.5590775220885539,
	"grad_norm": 0.0289306640625,
	"learning_rate": 8.621072696792363e-05,
	"loss": 1.8388119506835938,
	"mean_token_accuracy": 0.5867326802760363,
	"num_tokens": 3410119.0,
	"step": 700
	},
	{
	"entropy": 1.8627979960665106,
	"epoch": 0.5790445764488594,
	"grad_norm": 0.035888671875,
	"learning_rate": 7.984155335153711e-05,
	"loss": 1.799385986328125,
	"mean_token_accuracy": 0.5873606249690055,
	"num_tokens": 3533765.0,
	"step": 725
	},
	{
	"entropy": 1.8317018933594227,
	"epoch": 0.5990116308091649,
	"grad_norm": 0.0281982421875,
	"learning_rate": 7.35565837962798e-05,
	"loss": 1.7772984313964844,
	"mean_token_accuracy": 0.5965435421466827,
	"num_tokens": 3657321.0,
	"step": 750
	},
	{
	"entropy": 1.8882549648359417,
	"epoch": 0.6189786851694704,
	"grad_norm": 0.03076171875,
	"learning_rate": 6.738207131508735e-05,
	"loss": 1.8385765075683593,
	"mean_token_accuracy": 0.59088682141155,
	"num_tokens": 3779575.0,
	"step": 775
	},
	{
	"entropy": 1.8342267361842095,
	"epoch": 0.6389457395297758,
	"grad_norm": 0.032958984375,
	"learning_rate": 6.134380752948085e-05,
	"loss": 1.800379180908203,
	"mean_token_accuracy": 0.5952950984984636,
	"num_tokens": 3896543.0,
	"step": 800
	},
	{
	"entropy": 1.873757717087865,
	"epoch": 0.6589127938900814,
	"grad_norm": 0.03564453125,
	"learning_rate": 5.546701493511106e-05,
	"loss": 1.8183651733398438,
	"mean_token_accuracy": 0.5902918418496848,
	"num_tokens": 4018098.0,
	"step": 825
	},
	{
	"entropy": 1.83077443132177,
	"epoch": 0.6788798482503868,
	"grad_norm": 0.032470703125,
	"learning_rate": 4.977624154460464e-05,
	"loss": 1.7461175537109375,
	"mean_token_accuracy": 0.598720720410347,
	"num_tokens": 4141174.0,
	"step": 850
	},
	{
	"entropy": 1.918299620486796,
	"epoch": 0.6988469026106924,
	"grad_norm": 0.0301513671875,
	"learning_rate": 4.42952583478004e-05,
	"loss": 1.9020709228515624,
	"mean_token_accuracy": 0.5798033401742577,
	"num_tokens": 4259185.0,
	"step": 875
	},
	{
	"entropy": 1.8444363391213119,
	"epoch": 0.7188139569709978,
	"grad_norm": 0.0341796875,
	"learning_rate": 3.904696001769571e-05,
	"loss": 1.7869160461425782,
	"mean_token_accuracy": 0.5964432079344988,
	"num_tokens": 4377724.0,
	"step": 900
	},
	{
	"entropy": 1.8295260372944175,
	"epoch": 0.7387810113313034,
	"grad_norm": 0.031982421875,
	"learning_rate": 3.4053269276865285e-05,
	"loss": 1.778699951171875,
	"mean_token_accuracy": 0.6018728485703468,
	"num_tokens": 4497691.0,
	"step": 925
	},
	{
	"entropy": 1.8099911727011204,
	"epoch": 0.7587480656916088,
	"grad_norm": 0.03271484375,
	"learning_rate": 2.9335045323824496e-05,
	"loss": 1.7513389587402344,
	"mean_token_accuracy": 0.6039503507316113,
	"num_tokens": 4616131.0,
	"step": 950
	},
	{
	"entropy": 1.8742087873071431,
	"epoch": 0.7787151200519143,
	"grad_norm": 0.033203125,
	"learning_rate": 2.491199670185008e-05,
	"loss": 1.7982223510742188,
	"mean_token_accuracy": 0.5908738762140274,
	"num_tokens": 4737443.0,
	"step": 975
	},
	{
	"entropy": 1.869517589211464,
	"epoch": 0.7986821744122199,
	"grad_norm": 0.032958984375,
	"learning_rate": 2.0802598974215226e-05,
	"loss": 1.8264849853515626,
	"mean_token_accuracy": 0.5888447714596987,
	"num_tokens": 4858100.0,
	"step": 1000
	}
	],
	"logging_steps": 25,
	"max_steps": 1253,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.10805585396224e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}