CGJI01_v0.2 / core /llm_client.py
prashantmatlani's picture
shared psychological states
b1a4ad3
#CORE LLM CLIENT β€” core/llm_client.py
#from core.rag import retrieve_relevant_chunks
import os
from dotenv import load_dotenv
from groq import Groq
#from huggingface_hub import InferenceClient
# πŸ”₯ Load .env file
load_dotenv()
# πŸ” Load API Key from environment
#"""
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
raise ValueError("❌ GROQ_API_KEY not set in environment variables")
#"""
"""
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("❌ HF_TOKEN not set in environment variables")
"""
# Initialize client
client = Groq(api_key=GROQ_API_KEY)
#client = InferenceClient(api_key=os.environ["HF_TOKEN"])
#client = InferenceClient(api_key=HF_TOKEN)
def ask_llm(prompt: str) -> str:
print("\nπŸš€ LLM CALL INITIATED")
#print("πŸ”‘ API KEY PRESENT:", bool(os.getenv("GROQ_API_KEY")))
print("🧾 PROMPT LENGTH:", len(prompt))
try:
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
#model="meta-llama/Llama-3.1-8B-Instruct", # Or Qwen/Qwen2.5-72B-Instruct
#model="ecnu-icalk/PsychAgent-Qwen3-32B",
#model = "Qwen/Qwen2.5-72B-Instruct",
messages=[{"role": "user", "content": prompt}],
temperature=0.2, # from 0.7
#max_tokens=500,
#max_tokens=2500
#provider="auto" # Automatically selects the fastest available free provider
)
response = completion.choices[0].message.content
#print("βœ… GROQ RESPONSE RECEIVED")
print("βœ… LLM RESPONSE RECEIVED")
print("πŸ“ RESPONSE LENGTH:", len(response))
return response.strip()
except Exception as e:
#print("❌ GROQ ERROR:", str(e))
print("❌ LLM ERROR:", str(e))
return f"LLM Error: {str(e)}"
"""
def ask_llm(prompt: str) -> str:
print("\nπŸš€ Sending request to Groq...")
try:
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[
{"role": "user", "content": prompt}
],
temperature=0.7,
max_tokens=1024,
)
response = completion.choices[0].message.content
print("\nβœ… FINAL RESPONSE:", response)
return response.strip()
except Exception as e:
print("❌ LLM ERROR:", str(e))
return "Error: LLM request failed."
"""
"""
client = ollama.Client(host='http://127.0.0.1:11434')
MAX_CHARS = 3000 # safe starting point
def ask_llm(prompt):
if len(prompt) > MAX_CHARS:
prompt = prompt[:MAX_CHARS]
print("⚠ Prompt truncated for performance.")
print("\n>>> Sending prompt to Ollama\n")
stream = client.chat(
model="llama3.2:latest",
messages=[{"role": "user", "content": prompt}],
stream=True
)
full_response = ""
for chunk in stream:
content = chunk["message"]["content"]
print(content, end="", flush=True)
full_response += content
print(f"\n")
return full_response
"""
"""
MAX_CHARS = 3000 # safe starting point
def ask_llm(prompt):
if len(prompt) > MAX_CHARS:
prompt = prompt[:MAX_CHARS]
print("⚠ Prompt truncated for performance.")
print(">>> Sending prompt to Ollama")
response = ollama.chat(
#model="llama3",
model="llama3.2:latest",
messages=[{"role": "user", "content": prompt}],
stream=False
)
return response["message"]["content"]
"""