import os from openai import OpenAI from gradio import Server from fastapi.responses import HTMLResponse app = Server() client = OpenAI( base_url="https://router.huggingface.co/v1", api_key=os.environ["HF_TOKEN"], default_headers={ "X-HF-Bill-To": "huggingface" } ) @app.api() def chat(message: str, history_json: list) -> str: messages = history_json + [{"role": "user", "content": message}] stream = client.chat.completions.create( model="deepseek-ai/DeepSeek-V4-Pro:fireworks-ai", messages=messages, stream=True, ) full_response = "" for chunk in stream: if chunk.choices[0].delta.content is not None: full_response += chunk.choices[0].delta.content yield full_response @app.get("/") async def homepage(): html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") with open(html_path, "r", encoding="utf-8") as f: return HTMLResponse(content=f.read()) if __name__ == "__main__": app.launch(show_error=True)