Model Card for Model ID
CODE: https://github.com/frank-morales2020/MLxDL/blob/main/FINAL_LLM_JEPA_MISTRAL_FT_BTC.ipynb
Training Data
# Validate all samples in dataset
print("\n--- Validating Dataset Keys ---")
required_keys = [
"input_ids", "labels", "attention_mask",
"input_ids_user", "labels_user", "attention_mask_user",
"input_ids_assistant", "labels_assistant", "attention_mask_assistant"
]
for split in ["train", "test"]:
print(f"\nChecking {split} split...")
missing_samples = []
for idx, sample in tqdm(enumerate(tokenized_dataset[split]), total=len(tokenized_dataset[split]), desc=f"Validating {split} split"):
missing_keys = [key for key in required_keys if key not in sample]
if missing_keys:
missing_samples.append((idx, missing_keys))
if missing_samples:
print(f"Found {len(missing_samples)} problematic samples in {split} split:")
for idx, missing_keys in missing_samples[:5]:
print(f"Sample {idx} missing keys: {missing_keys}")
raise ValueError(f"Dataset validation failed in {split} split.")
print(f"{split} split: All {len(tokenized_dataset[split])} samples validated.")
COLUMNS_TO_KEEP = required_keys
try:
tokenized_dataset["train"] = tokenized_dataset["train"].select_columns(COLUMNS_TO_KEEP)
tokenized_dataset["test"] = tokenized_dataset["test"].select_columns(COLUMNS_TO_KEEP)
print(f"Dataset columns filtered to: {COLUMNS_TO_KEEP}")
except Exception as e:
print(f"Error during column selection: {e}")
print(f"Available columns in train: {tokenized_dataset['train'].column_names}")
print(f"Available columns in test: {tokenized_dataset['test'].column_names}")
raise
Training Procedure
Preprocessing [optional]
[More Information Needed]
Training Hyperparameters
# PEFT (LoRA) Config
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)
# Training Arguments - OPTIMIZED FOR JEPA MONITORING
training_arguments = TrainingArguments(
output_dir=OUTPUT_DIR,
num_train_epochs=1,
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
optim="paged_adamw_8bit",
save_steps=0, # Disable saving during demo
logging_steps=50, # See JEPA metrics every 10 steps
max_steps=500, # Just enough to see JEPA loss decreasing
learning_rate=2e-4,
weight_decay=0.001,
fp16=True,
bf16=False,
max_grad_norm=0.3,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
disable_tqdm=False,
report_to="none",
# βββ CRITICAL CHANGES FOR DEMO βββ
evaluation_strategy="no", # Disable evaluation during training
eval_steps=None, # No evaluation steps
metric_for_best_model=None, # Not needed for demo
# βββ CRITICAL CHANGES FOR DEMO βββ
dataloader_drop_last=True,
dataloader_num_workers=0,
gradient_checkpointing=True,
gradient_checkpointing_kwargs={"use_reentrant": False},
resume_from_checkpoint=True
)
[More Information Needed]
Evaluation
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
import torch
from peft import PeftModel
import peft
import os
# --- FILE PATHS (Replicated from original code) ---
MODEL_NAME = "mistralai/Mistral-7B-v0.1"
HUB_MODEL_ID = "frankmorales2020/Mistral-7B-BTC-JEPA-LLM-Expert"
# --- 1. SETUP ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_TYPE = "FINE_TUNED"
# --- 2. MODEL AND TOKENIZER LOADING (Single block for efficiency) ---
print("\n--- Model and Tokenizer Setup ---")
print(f"--- Loading Model onto {DEVICE} ---")
print(f'BASE MODEL: {MODEL_NAME}\nFINE TUNE MODEL: {HUB_MODEL_ID}')
try:
# 4-bit Quantization Config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
# Add special tokens and resize embeddings (essential for fine-tuned LoRA)
SPECIAL_PREDICTOR_TOKENS = ["<pred>", "<targ>", "<jepa>"]
tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_PREDICTOR_TOKENS})
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
base_model.resize_token_embeddings(len(tokenizer))
# Load fine-tuned adapter weights
model = PeftModel.from_pretrained(base_model, HUB_MODEL_ID).eval()
print("π SUCCESS: Loaded fine-tuned JEPA model!")
except Exception as e:
print(f"β Model loading failed: {e}")
# Fallback to base model logic is removed for this test since the specific fine-tuned
# model must be used, so we raise the error.
raise
# --- 3. INFERENCE FUNCTION (Encapsulating fixed parameters) ---
def run_inference_test(btc_data_input, tokenizer, model, device):
"""Runs a single inference test with the fixed strict extraction logic."""
# 1. Create Strict Prompt
user_prompt_content = f"Current BTC data: {btc_data_input}. Give ONLY the 12-hour direction (UP, DOWN, or FLAT). The output MUST be a single word: UP, DOWN, or FLAT."
input_text = f"<s>[INST] {user_prompt_content} [/INST]"
# 2. Tokenize and Set Fixed Generation Params
inputs = tokenizer(input_text, return_tensors="pt").to(device)
# Fixed generation parameters proven to work with robust extraction
generation_params = {
"max_new_tokens": 15,
"do_sample": False,
"temperature": 0.1,
"top_p": 1.0,
"repetition_penalty": 1.0,
}
# 3. Generate Output
with torch.no_grad():
outputs = model.generate(
**inputs,
**generation_params,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# 4. Robust Extraction Logic (Final Corrected Logic)
prediction_output = "β PREDICTION NOT FOUND"
target_words = ["UP", "DOWN", "FLAT"]
if "[/INST]" in response_text:
prediction_output_raw = response_text.split("[/INST]")[-1].strip()
# Iterate through the expected target words
for target in target_words:
if target in prediction_output_raw.upper():
prediction_output = target
break
else:
prediction_output_raw = response_text.strip()
for target in target_words:
if target in prediction_output_raw.upper():
prediction_output = target
break
return input_text, prediction_output_raw, prediction_output
# --- 4. MULTI-INPUT EXECUTION ---
# Define the 3 test cases to check for all directions
TEST_CASES = [
{
"name": "Test 1: Downward Reversal (Original Input)",
"input": "[O:30000, H:30500, C:30200]",
"expected_logic": "DOWN"
},
]
for i, test in enumerate(TEST_CASES):
print(f"\n[{test['name']} - Expecting {test['expected_logic']}]")
# Run the test
input_text, raw_output, cleaned_prediction = run_inference_test(
test['input'], tokenizer, model, DEVICE
)
# Print results
print("=" * 70)
print(f"π€ INPUT: {test['input']}")
print(f"π€ RAW OUTPUT: {raw_output}")
print(f"β
CLEANED PREDICTION: **{cleaned_prediction}**")
print("=" * 70)
[Test 1: Downward Reversal (Original Input) - Expecting DOWN]
======================================================================
π€ INPUT: [O:30000, H:30500, C:30200]
π€ RAW OUTPUT: The 12-hour prediction is **DOWN**. The final prediction is
β
CLEANED PREDICTION: **DOWN**
======================================================================
import os
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
os.environ["CUDA_HOME"] = "/usr/local/cuda"
# --- 2. CONFIGURATION ---
MODEL_NAME = "mistralai/Mistral-7B-v0.1"
HUB_MODEL_ID = "frankmorales2020/Mistral-7B-BTC-JEPA-LLM-Expert"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SPECIAL_TOKENS = ["<|predictor_1|>", "<|predictor_2|>", "<|predictor_3|>"]
# --- 3. THE FIXED LOADING SEQUENCE ---
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_TOKENS})
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)
base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb_config, device_map="auto")
base_model.resize_token_embeddings(len(tokenizer))
model = PeftModel.from_pretrained(base_model, HUB_MODEL_ID).eval()
print("π SUCCESS: Model and JEPA Expert loaded!")
# --- 4. CALIBRATED INFERENCE ENGINE (DEMO-FOCUSED) ---
def run_expert_inference(ohlc_data, rsi=50.0, sma=60000.0):
target_words = ["UP", "DOWN", "FLAT"]
target_ids = [tokenizer.encode(word, add_special_tokens=False)[-1] for word in target_words]
instruction = (
f"Analyze BTC Market Data: {ohlc_data}. "
f"Technicals: RSI(14) is {rsi:.2f}, SMA(20) is {sma:.2f}. "
f"Direction (UP/DOWN/FLAT):"
)
prompt = f"<s>[INST] {instruction} [/INST]"
with torch.no_grad():
null_prompt = f"<s>[INST] Analyze BTC Market Data: [O:60000, H:60100, C:60050]. Technicals: RSI(14) is 50.00, SMA(20) is 60000.00. Direction (UP/DOWN/FLAT): [/INST]"
n_inputs = tokenizer(null_prompt, return_tensors="pt").to(DEVICE)
null_logits = model(**n_inputs).logits[0, -1, target_ids]
r_inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
real_logits = model(**r_inputs).logits[0, -1, target_ids]
# Temperature 1.0 for natural balance in demo cases
calibrated_logits = (real_logits - null_logits) / 1.0
probs = F.softmax(calibrated_logits, dim=-1)
confidences = {word: round(prob.item(), 3) for word, prob in zip(target_words, probs)}
prediction = max(confidences, key=confidences.get)
return prediction, confidences
print("π SUCCESS: Model and JEPA Expert loaded!")
#--- 5. FINAL DEMO CASES TO SHOWCASE UP, DOWN, and FLAT ---
demo_cases = [
# Clear bearish crash (strong DOWN signal)
{"label": "BEARISH CRASH (DOWN Demo)", "data": "[O:100000, H:101000, C:70000]", "rsi": 20.0, "sma": 95000.0},
# Pure neutral flat (FLAT Demo)
{"label": "NEUTRAL FLAT (FLAT Demo)", "data": "[O:87000, H:87100, C:87050]", "rsi": 50.0, "sma": 87000.0},
# Current real market Dec 25, 2025 (~$87,700, neutral RSI ~43, mild weakness below SMA ~$88,500)
{"label": "CURRENT REAL (FLAT/DOWN-ish)", "data": "[O:87700, H:87800, C:87700]", "rsi": 43.0, "sma": 88500.0},
# Mild bullish (to trigger UP β the adapter sometimes favors UP in very neutral cases)
{"label": "MILD BULLISH (UP Demo)", "data": "[O:87000, H:88000, C:87500]", "rsi": 55.0, "sma": 86800.0},
# Strong bullish rally (UP Demo fallback if needed)
{"label": "STRONG BULLISH (UP Demo)", "data": "[O:80000, H:95000, C:94000]", "rsi": 75.0, "sma": 82000.0},
]
print("\n" + "="*95)
print(f"{'Demo Scenario':<30} | {'Prediction':<10} | {'Confidence (UP / DOWN / FLAT)'}")
print("-" * 95)
for test in demo_cases:
pred, scores = run_expert_inference(test['data'], rsi=test['rsi'], sma=test['sma'])
score_str = f"U:{scores['UP']:.3f} D:{scores['DOWN']:.3f} F:{scores['FLAT']:.3f}"
print(f"{test['label']:<30} | {pred:<10} | {score_str}")
print("="*95)
π SUCCESS: Model and JEPA Expert loaded!
===============================================================================================
Demo Scenario | Prediction | Confidence (UP / DOWN / FLAT)
-----------------------------------------------------------------------------------------------
BEARISH CRASH (DOWN Demo) | FLAT | U:0.328 D:0.332 F:0.340
NEUTRAL FLAT (FLAT Demo) | UP | U:0.337 D:0.335 F:0.329
CURRENT REAL (FLAT/DOWN-ish) | DOWN | U:0.332 D:0.338 F:0.330
MILD BULLISH (UP Demo) | DOWN | U:0.331 D:0.337 F:0.331
STRONG BULLISH (UP Demo) | DOWN | U:0.331 D:0.336 F:0.332
===============================================================================================
Results
β
JEPA Monitoring Enabled - will show metrics every 50/500 steps
Step Training Loss
50 0.876400
100 0.235400
150 0.224500
200 0.229200
250 0.231300
300 0.221500
350 0.225300
400 0.229300
450 0.220400
500 0.224200
π― JEPA Metrics [Step 50]:
JEPA Loss: 0.0001
LM Loss: 0.2534
Cosine Sim: 0.9999
Total Loss: 0.2535
π― JEPA Metrics [Step 100]:
JEPA Loss: 0.0000
LM Loss: 0.2319
Cosine Sim: 1.0000
Total Loss: 0.2320
π― JEPA Metrics [Step 150]:
JEPA Loss: 0.0000
LM Loss: 0.2438
Cosine Sim: 1.0000
Total Loss: 0.2438
π― JEPA Metrics [Step 200]:
JEPA Loss: 0.0000
LM Loss: 0.2384
Cosine Sim: 1.0000
Total Loss: 0.2384
π― JEPA Metrics [Step 250]:
JEPA Loss: 0.0000
LM Loss: 0.2216
Cosine Sim: 1.0000
Total Loss: 0.2217
π― JEPA Metrics [Step 300]:
JEPA Loss: 0.0000
LM Loss: 0.1579
Cosine Sim: 1.0000
Total Loss: 0.1580
π― JEPA Metrics [Step 350]:
JEPA Loss: 0.0000
LM Loss: 0.2287
Cosine Sim: 1.0000
Total Loss: 0.2287
π― JEPA Metrics [Step 400]:
JEPA Loss: 0.0000
LM Loss: 0.2151
Cosine Sim: 1.0000
Total Loss: 0.2152
π― JEPA Metrics [Step 450]:
JEPA Loss: 0.0000
LM Loss: 0.1994
Cosine Sim: 1.0000
Total Loss: 0.1994
π― JEPA Metrics [Step 500]:
JEPA Loss: 0.0000
LM Loss: 0.2188
Cosine Sim: 1.0000
Total Loss: 0.2189
Compute Infrastructure
Wed Oct 8 05:48:17 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA L4 Off | 00000000:00:03.0 Off | 0 |
| N/A 37C P8 11W / 72W | 0MiB / 23034MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
Inference Providers NEW
This model isn't deployed by any Inference Provider. π Ask for provider support