Spaces:
Running
Running
Update neura_self_hosted.py
Browse files- neura_self_hosted.py +6 -6
neura_self_hosted.py
CHANGED
|
@@ -23,19 +23,17 @@ WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "") # For the weather tool
|
|
| 23 |
|
| 24 |
# This is a great starting model. It will be downloaded automatically the first time.
|
| 25 |
# It's specifically logging.info("Loading self-hosted generative model. This may take a moment...")
|
| 26 |
-
# In neura_self_hosted.py
|
| 27 |
|
| 28 |
-
# ... (you don't need the BitsAndBytesConfig import for this option)
|
| 29 |
-
|
| 30 |
-
# --- 🧠 LOAD THE LOCAL GENERATIVE AI MODEL (CPU Version) ---
|
| 31 |
logging.info("Loading self-hosted generative model for CPU...")
|
| 32 |
try:
|
| 33 |
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
|
| 34 |
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
|
|
|
|
| 36 |
model = AutoModelForCausalLM.from_pretrained(
|
| 37 |
MODEL_NAME,
|
| 38 |
-
device_map="cpu",
|
| 39 |
trust_remote_code=True,
|
| 40 |
)
|
| 41 |
|
|
@@ -44,11 +42,13 @@ try:
|
|
| 44 |
model=model,
|
| 45 |
tokenizer=tokenizer,
|
| 46 |
)
|
| 47 |
-
logging.info(f"✅ Model '{MODEL_NAME}' loaded
|
| 48 |
except Exception as e:
|
| 49 |
logging.error(f"❌ Failed to load local AI model: {e}")
|
| 50 |
local_ai_pipeline = None
|
| 51 |
|
|
|
|
|
|
|
| 52 |
# --- AI Personas & System Prompt ---
|
| 53 |
ANIME_PERSONAS = {
|
| 54 |
"default": "You are a versatile and intelligent AI assistant.",
|
|
|
|
| 23 |
|
| 24 |
# This is a great starting model. It will be downloaded automatically the first time.
|
| 25 |
# It's specifically logging.info("Loading self-hosted generative model. This may take a moment...")
|
|
|
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
logging.info("Loading self-hosted generative model for CPU...")
|
| 28 |
try:
|
| 29 |
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
|
| 30 |
|
| 31 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 32 |
+
|
| 33 |
+
# Force the model to load on the CPU and remove all GPU/quantization code
|
| 34 |
model = AutoModelForCausalLM.from_pretrained(
|
| 35 |
MODEL_NAME,
|
| 36 |
+
device_map="cpu",
|
| 37 |
trust_remote_code=True,
|
| 38 |
)
|
| 39 |
|
|
|
|
| 42 |
model=model,
|
| 43 |
tokenizer=tokenizer,
|
| 44 |
)
|
| 45 |
+
logging.info(f"✅ Model '{MODEL_NAME}' loaded on CPU. Expect very slow performance.")
|
| 46 |
except Exception as e:
|
| 47 |
logging.error(f"❌ Failed to load local AI model: {e}")
|
| 48 |
local_ai_pipeline = None
|
| 49 |
|
| 50 |
+
# ... (The rest of your Python code remains the same) ...
|
| 51 |
+
|
| 52 |
# --- AI Personas & System Prompt ---
|
| 53 |
ANIME_PERSONAS = {
|
| 54 |
"default": "You are a versatile and intelligent AI assistant.",
|