Spaces:
Running
Running
| #CORE LLM CLIENT β core/llm_client.py | |
| #from core.rag import retrieve_relevant_chunks | |
| import os | |
| from dotenv import load_dotenv | |
| from groq import Groq | |
| #from huggingface_hub import InferenceClient | |
| # π₯ Load .env file | |
| load_dotenv() | |
| # π Load API Key from environment | |
| #""" | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GROQ_API_KEY: | |
| raise ValueError("β GROQ_API_KEY not set in environment variables") | |
| #""" | |
| """ | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if not HF_TOKEN: | |
| raise ValueError("β HF_TOKEN not set in environment variables") | |
| """ | |
| # Initialize client | |
| client = Groq(api_key=GROQ_API_KEY) | |
| #client = InferenceClient(api_key=os.environ["HF_TOKEN"]) | |
| #client = InferenceClient(api_key=HF_TOKEN) | |
| def ask_llm(prompt: str) -> str: | |
| print("\nπ LLM CALL INITIATED") | |
| #print("π API KEY PRESENT:", bool(os.getenv("GROQ_API_KEY"))) | |
| print("π§Ύ PROMPT LENGTH:", len(prompt)) | |
| try: | |
| completion = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| #model="meta-llama/Llama-3.1-8B-Instruct", # Or Qwen/Qwen2.5-72B-Instruct | |
| #model="ecnu-icalk/PsychAgent-Qwen3-32B", | |
| #model = "Qwen/Qwen2.5-72B-Instruct", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.2, # from 0.7 | |
| #max_tokens=500, | |
| #max_tokens=2500 | |
| #provider="auto" # Automatically selects the fastest available free provider | |
| ) | |
| response = completion.choices[0].message.content | |
| #print("β GROQ RESPONSE RECEIVED") | |
| print("β LLM RESPONSE RECEIVED") | |
| print("π RESPONSE LENGTH:", len(response)) | |
| return response.strip() | |
| except Exception as e: | |
| #print("β GROQ ERROR:", str(e)) | |
| print("β LLM ERROR:", str(e)) | |
| return f"LLM Error: {str(e)}" | |
| """ | |
| def ask_llm(prompt: str) -> str: | |
| print("\nπ Sending request to Groq...") | |
| try: | |
| completion = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.7, | |
| max_tokens=1024, | |
| ) | |
| response = completion.choices[0].message.content | |
| print("\nβ FINAL RESPONSE:", response) | |
| return response.strip() | |
| except Exception as e: | |
| print("β LLM ERROR:", str(e)) | |
| return "Error: LLM request failed." | |
| """ | |
| """ | |
| client = ollama.Client(host='http://127.0.0.1:11434') | |
| MAX_CHARS = 3000 # safe starting point | |
| def ask_llm(prompt): | |
| if len(prompt) > MAX_CHARS: | |
| prompt = prompt[:MAX_CHARS] | |
| print("β Prompt truncated for performance.") | |
| print("\n>>> Sending prompt to Ollama\n") | |
| stream = client.chat( | |
| model="llama3.2:latest", | |
| messages=[{"role": "user", "content": prompt}], | |
| stream=True | |
| ) | |
| full_response = "" | |
| for chunk in stream: | |
| content = chunk["message"]["content"] | |
| print(content, end="", flush=True) | |
| full_response += content | |
| print(f"\n") | |
| return full_response | |
| """ | |
| """ | |
| MAX_CHARS = 3000 # safe starting point | |
| def ask_llm(prompt): | |
| if len(prompt) > MAX_CHARS: | |
| prompt = prompt[:MAX_CHARS] | |
| print("β Prompt truncated for performance.") | |
| print(">>> Sending prompt to Ollama") | |
| response = ollama.chat( | |
| #model="llama3", | |
| model="llama3.2:latest", | |
| messages=[{"role": "user", "content": prompt}], | |
| stream=False | |
| ) | |
| return response["message"]["content"] | |
| """ | |