# app/ollama_client.py """ Use Ollama if available, otherwise fallback. """ import requests import json from typing import Generator OLLAMA_URL = "http://localhost:11434" def is_ollama_available() -> bool: try: r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2) return r.status_code == 200 except: return False def generate_with_ollama(prompt: str, model: str = "nanbeige", temperature: float = 0.7, max_tokens: int = 100): """ Generate using Ollama API (if running). """ if not is_ollama_available(): raise Exception("Ollama not available") response = requests.post( f"{OLLAMA_URL}/api/generate", json={ "model": model, "prompt": prompt, "stream": True, "options": { "temperature": temperature, "num_predict": max_tokens, "top_p": 0.95, } }, stream=True ) for line in response.iter_lines(): if line: data = json.loads(line) if "response" in data: yield data["response"]