Spaces:

prashantmatlani
/

CGJI01_v0.2

Running

App Files Files Community

CGJI01_v0.2 / core /llm_client.py

prashantmatlani

shared psychological states

b1a4ad3 3 days ago

raw

history blame contribute delete

3.71 kB


	#CORE LLM CLIENT — core/llm_client.py


	#from core.rag import retrieve_relevant_chunks

	import os
	from dotenv import load_dotenv
	from groq import Groq

	#from huggingface_hub import InferenceClient

	# 🔥 Load .env file
	load_dotenv()

	# 🔐 Load API Key from environment
	#"""
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	if not GROQ_API_KEY:
	raise ValueError("❌ GROQ_API_KEY not set in environment variables")
	#"""

	"""
	HF_TOKEN = os.getenv("HF_TOKEN")

	if not HF_TOKEN:
	raise ValueError("❌ HF_TOKEN not set in environment variables")
	"""

	# Initialize client
	client = Groq(api_key=GROQ_API_KEY)
	#client = InferenceClient(api_key=os.environ["HF_TOKEN"])
	#client = InferenceClient(api_key=HF_TOKEN)

	def ask_llm(prompt: str) -> str:
	print("\n🚀 LLM CALL INITIATED")
	#print("🔑 API KEY PRESENT:", bool(os.getenv("GROQ_API_KEY")))
	print("🧾 PROMPT LENGTH:", len(prompt))

	try:
	completion = client.chat.completions.create(
	model="llama-3.1-8b-instant",
	#model="meta-llama/Llama-3.1-8B-Instruct", # Or Qwen/Qwen2.5-72B-Instruct
	#model="ecnu-icalk/PsychAgent-Qwen3-32B",
	#model = "Qwen/Qwen2.5-72B-Instruct",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.2, # from 0.7
	#max_tokens=500,
	#max_tokens=2500
	#provider="auto" # Automatically selects the fastest available free provider
	)



	response = completion.choices[0].message.content

	#print("✅ GROQ RESPONSE RECEIVED")
	print("✅ LLM RESPONSE RECEIVED")
	print("📏 RESPONSE LENGTH:", len(response))

	return response.strip()

	except Exception as e:
	#print("❌ GROQ ERROR:", str(e))
	print("❌ LLM ERROR:", str(e))
	return f"LLM Error: {str(e)}"


	"""
	def ask_llm(prompt: str) -> str:
	print("\n🚀 Sending request to Groq...")

	try:
	completion = client.chat.completions.create(
	model="llama-3.1-8b-instant",
	messages=[
	{"role": "user", "content": prompt}
	],
	temperature=0.7,
	max_tokens=1024,
	)

	response = completion.choices[0].message.content

	print("\n✅ FINAL RESPONSE:", response)

	return response.strip()

	except Exception as e:
	print("❌ LLM ERROR:", str(e))
	return "Error: LLM request failed."

	"""

	"""
	client = ollama.Client(host='http://127.0.0.1:11434')
	MAX_CHARS = 3000 # safe starting point

	def ask_llm(prompt):

	if len(prompt) > MAX_CHARS:
	prompt = prompt[:MAX_CHARS]
	print("⚠ Prompt truncated for performance.")

	print("\n>>> Sending prompt to Ollama\n")

	stream = client.chat(
	model="llama3.2:latest",
	messages=[{"role": "user", "content": prompt}],
	stream=True
	)

	full_response = ""

	for chunk in stream:
	content = chunk["message"]["content"]
	print(content, end="", flush=True)
	full_response += content

	print(f"\n")
	return full_response
	"""


	"""
	MAX_CHARS = 3000 # safe starting point

	def ask_llm(prompt):

	if len(prompt) > MAX_CHARS:
	prompt = prompt[:MAX_CHARS]
	print("⚠ Prompt truncated for performance.")

	print(">>> Sending prompt to Ollama")

	response = ollama.chat(
	#model="llama3",
	model="llama3.2:latest",
	messages=[{"role": "user", "content": prompt}],
	stream=False
	)

	return response["message"]["content"]
	"""