from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse, HTMLResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware import httpx import json import subprocess import time import os app = FastAPI(title="AI Coding Server", version="1.0") # Enable CORS for all origins app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) OLLAMA_API = "http://localhost:11434" CODING_MODEL = "mistral" def start_ollama(): """Start Ollama service in background""" try: httpx.get(f"{OLLAMA_API}/api/tags", timeout=2) print("✓ Ollama already running") return except: print("Starting Ollama...") subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) time.sleep(5) try: print(f"Pulling {CODING_MODEL} model...") subprocess.run(["ollama", "pull", CODING_MODEL], timeout=300, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) print(f"✓ {CODING_MODEL} ready") except Exception as e: print(f"⚠ Model pull timeout (will download on first request): {e}") @app.on_event("startup") async def startup_event(): """Initialize on startup""" start_ollama() # ========== API Endpoints ========== @app.get("/api/health") async def health_check(): """Health check endpoint""" try: async with httpx.AsyncClient() as client: await client.get(f"{OLLAMA_API}/api/tags", timeout=2) return { "status": "healthy", "service": "AI Coding Server", "model": CODING_MODEL, "api": OLLAMA_API } except Exception as e: raise HTTPException(status_code=503, detail=f"Ollama not responding: {str(e)}") @app.get("/api/models") async def list_models(): """List available models""" try: async with httpx.AsyncClient() as client: response = await client.get(f"{OLLAMA_API}/api/tags", timeout=5) return response.json() except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/api/generate") async def generate_code(request_data: dict): """Generate code using the AI model""" prompt = request_data.get("prompt", "") model = request_data.get("model", CODING_MODEL) max_tokens = request_data.get("max_tokens", 2000) if not prompt: raise HTTPException(status_code=400, detail="Prompt cannot be empty") try: payload = { "model": model, "prompt": prompt, "stream": False, "temperature": 0.3, "num_predict": max_tokens, } # 300 second timeout for free tier CPU inference async with httpx.AsyncClient(timeout=300) as client: response = await client.post( f"{OLLAMA_API}/api/generate", json=payload ) response.raise_for_status() result = response.json() return { "prompt": prompt, "model": model, "response": result.get("response", ""), "done": result.get("done", True), "total_duration": result.get("total_duration", 0), } except httpx.TimeoutException: raise HTTPException(status_code=504, detail="⏳ Still generating... Free tier is slow. Try again in 30 seconds!") except Exception as e: raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}") @app.get("/api/status") async def status(): """Get current system status""" try: async with httpx.AsyncClient() as client: models_response = await client.get(f"{OLLAMA_API}/api/tags", timeout=2) models = models_response.json().get("models", []) return { "status": "running", "models_available": len(models), "models": [m.get("name", "unknown") for m in models], "default_model": CODING_MODEL, } except Exception as e: return { "status": "initializing", "error": str(e), "message": "Server is starting up, please wait..." } # ========== Web UI ========== @app.get("/", response_class=HTMLResponse) async def serve_ui(): """Serve the web UI""" return """
Powered by Mistral AI • Generate code instantly • 100% Local & Private