Jayant-Kernel commited on
fix: free GPU memory between model evaluations
Browse files- evaluate.py +14 -1
evaluate.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import os, json, re, torch, pathlib
|
| 2 |
from unsloth import FastLanguageModel
|
| 3 |
from deceit_env.server.environment import DeceitEnvironment
|
| 4 |
from deceit_env.server.grader import Grader
|
|
@@ -100,6 +100,11 @@ def evaluate_model(model_name, label, n_episodes=30):
|
|
| 100 |
if (i+1) % 10 == 0:
|
| 101 |
print(f" {i+1}/{n_episodes} done, mean reward so far: {sum(rewards)/len(rewards):.3f}")
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
return {
|
| 104 |
"label": label,
|
| 105 |
"mean_reward": sum(rewards)/len(rewards),
|
|
@@ -111,6 +116,14 @@ def evaluate_model(model_name, label, n_episodes=30):
|
|
| 111 |
|
| 112 |
# Evaluate both models
|
| 113 |
base_results = evaluate_model("unsloth/Qwen2.5-0.5B-Instruct", "Base Model (untrained)", n_episodes=30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
trained_results = evaluate_model("Ajsaxena/deceit-qwen-0.5b-full", "DECEIT Trained", n_episodes=30)
|
| 115 |
|
| 116 |
# Print comparison
|
|
|
|
| 1 |
+
import os, json, re, torch, pathlib, gc
|
| 2 |
from unsloth import FastLanguageModel
|
| 3 |
from deceit_env.server.environment import DeceitEnvironment
|
| 4 |
from deceit_env.server.grader import Grader
|
|
|
|
| 100 |
if (i+1) % 10 == 0:
|
| 101 |
print(f" {i+1}/{n_episodes} done, mean reward so far: {sum(rewards)/len(rewards):.3f}")
|
| 102 |
|
| 103 |
+
# Free model from GPU
|
| 104 |
+
del model
|
| 105 |
+
torch.cuda.empty_cache()
|
| 106 |
+
gc.collect()
|
| 107 |
+
|
| 108 |
return {
|
| 109 |
"label": label,
|
| 110 |
"mean_reward": sum(rewards)/len(rewards),
|
|
|
|
| 116 |
|
| 117 |
# Evaluate both models
|
| 118 |
base_results = evaluate_model("unsloth/Qwen2.5-0.5B-Instruct", "Base Model (untrained)", n_episodes=30)
|
| 119 |
+
|
| 120 |
+
# Free GPU memory before loading second model
|
| 121 |
+
import gc
|
| 122 |
+
del model
|
| 123 |
+
torch.cuda.empty_cache()
|
| 124 |
+
gc.collect()
|
| 125 |
+
print("GPU memory freed")
|
| 126 |
+
|
| 127 |
trained_results = evaluate_model("Ajsaxena/deceit-qwen-0.5b-full", "DECEIT Trained", n_episodes=30)
|
| 128 |
|
| 129 |
# Print comparison
|