Spaces:

overwrite69
/

lmarena-bridge

Running

App Files Files Community

overwrite69 commited on 12 days ago

Commit

57f5cba

verified ·

1 Parent(s): 891fdd7

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +54 -51

Dockerfile CHANGED Viewed

@@ -14,16 +14,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Set working directory
 WORKDIR /app
-# Install Python dependencies
-# Install Python dependencies
 RUN pip install --no-cache-dir \
     fastapi==0.109.0 \
     uvicorn==0.27.0 \
-    httpx==0.26.0 \
     cloudscraper==1.2.71 \
-    requests==2.31.0 \
     gradio==4.44.0 \
-    "huggingface_hub<0.27.0"
 # Create the application file
 RUN cat > /app/app.py << 'PYTHON_EOF'
@@ -867,17 +867,14 @@ def get_proxied_http_client() -> httpx.AsyncClient:
     use_proxy = config.get("use_proxy", True)
     proxy_url = get_proxy_url()
-    proxies = None
     if use_proxy and proxy_url:
-        proxies = {
-            "http://": proxy_url,
-            "https://": proxy_url,
-        }
         debug_print(f"Using proxy: {proxy_url}")
     return httpx.AsyncClient(
         timeout=DEFAULT_REQUEST_TIMEOUT,
-        proxies=proxies
     )
@@ -1023,9 +1020,12 @@ async def stream_chat_completion(
                         if line.startswith("data: "):
                             yield line + "\n"
                         elif line.startswith("0:"):
-                            # LMArena format: 0:"text"\n
-                            content = line[2:].strip('"')
-                            yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
                         else:
                             yield line + "\n"
@@ -1052,6 +1052,8 @@ async def chat_completion_non_stream(
     """Non-streaming chat completion"""
     full_content = ""
     async for chunk in stream_chat_completion(
         model_id, model_public_name, conversation_id, prompt,
         auth_token, experimental_attachments, recaptcha_token, modality
@@ -1059,6 +1061,9 @@ async def chat_completion_non_stream(
         if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
             try:
                 data = json.loads(chunk[6:].strip())
                 if "choices" in data:
                     for choice in data["choices"]:
                         delta = choice.get("delta", {})
@@ -1068,6 +1073,9 @@ async def chat_completion_non_stream(
             except json.JSONDecodeError:
                 pass
     return {
         "id": f"chatcmpl-{uuid.uuid4()}",
         "object": "chat.completion",
@@ -1355,6 +1363,8 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
             model_id, model_public_name, conversation_id, prompt,
             auth_token, [], "", modality
         )
         return result
@@ -1423,14 +1433,10 @@ def get_proxy_status():
     return "🔴 Inactive"
-def refresh_models():
     """Refresh models from LMArena"""
     try:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        models = loop.run_until_complete(fetch_models_from_lmarena())
-        loop.close()
         if models:
             save_models(models)
             return f"Successfully loaded {len(models)} models!"
@@ -1440,7 +1446,7 @@ def refresh_models():
         return f"Error refreshing models: {str(e)}"
-def chat_with_model(model_name, message, history):
     """Chat with a model"""
     if not model_name:
         return history, "Please select a model first."
@@ -1488,16 +1494,14 @@ def chat_with_model(model_name, message, history):
         auth_token = get_next_auth_token()
         # Run chat completion
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        result = loop.run_until_complete(
-            chat_completion_non_stream(
-                model_id, model_name, str(uuid.uuid4()), message,
-                auth_token, [], "", modality
-            )
         )
-        loop.close()
         # Extract response
         response = ""
         if "choices" in result and result["choices"]:
@@ -1559,8 +1563,12 @@ def create_gradio_interface():
                         refresh_models_btn = gr.Button("🔄 Refresh Models", variant="secondary")
                 refresh_models_btn.click(
-                    fn=lambda: (refresh_models(), gr.Dropdown(choices=get_model_list())),
                     outputs=[status_output, model_dropdown]
                 )
@@ -1723,19 +1731,21 @@ def create_gradio_interface():
 # MAIN ENTRY POINT
 # ============================================================
-def run_gradio():
-    """Run Gradio interface"""
     interface = create_gradio_interface()
-    interface.launch(
-        server_name=HOST,
-        server_port=PORT,
-        share=False,
-        show_error=True
     )
 def run_fastapi():
-    """Run FastAPI server"""
     uvicorn.run(
         app,
         host=HOST,
@@ -1743,13 +1753,12 @@ def run_fastapi():
         log_level="info"
     )
 if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser(description="LMArena Bridge with VPN Proxy")
-    parser.add_argument("--mode", choices=["gradio", "fastapi", "both"], default="gradio",
-                        help="Run mode: gradio (UI), fastapi (API only), or both")
     parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
     parser.add_argument("--host", default=HOST, help="Host to bind to")
     parser.add_argument("--debug", action="store_true", help="Enable debug mode")
@@ -1768,16 +1777,10 @@ if __name__ == "__main__":
         config["use_proxy"] = False
         save_config(config)
-    if args.mode == "gradio":
-        run_gradio()
-    elif args.mode == "fastapi":
         run_fastapi()
-    elif args.mode == "both":
-        import multiprocessing
-        api_process = multiprocessing.Process(target=run_fastapi)
-        api_process.start()
-        run_gradio()
-        api_process.join()
 PYTHON_EOF
 # Expose port

 # Set working directory
 WORKDIR /app
+# Install Python dependencies (with fixes for Pydantic schema bugs and SOCKS proxy support)
 RUN pip install --no-cache-dir \
     fastapi==0.109.0 \
     uvicorn==0.27.0 \
+    "httpx[socks]==0.26.0" \
     cloudscraper==1.2.71 \
+    "requests[socks]==2.31.0" \
     gradio==4.44.0 \
+    "huggingface_hub<0.27.0" \
+    "pydantic<2.10"
 # Create the application file
 RUN cat > /app/app.py << 'PYTHON_EOF'
     use_proxy = config.get("use_proxy", True)
     proxy_url = get_proxy_url()
+    proxy = None
     if use_proxy and proxy_url:
+        proxy = proxy_url
         debug_print(f"Using proxy: {proxy_url}")
     return httpx.AsyncClient(
         timeout=DEFAULT_REQUEST_TIMEOUT,
+        proxy=proxy
     )
                         if line.startswith("data: "):
                             yield line + "\n"
                         elif line.startswith("0:"):
+                            try:
+                                content = json.loads(line[2:])
+                                yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
+                            except Exception:
+                                content = line[2:].strip('"')
+                                yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
                         else:
                             yield line + "\n"
     """Non-streaming chat completion"""
     full_content = ""
+    error_msg = None
     async for chunk in stream_chat_completion(
         model_id, model_public_name, conversation_id, prompt,
         auth_token, experimental_attachments, recaptcha_token, modality
         if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
             try:
                 data = json.loads(chunk[6:].strip())
+                if "error" in data:
+                    error_msg = data["error"]
+                    break
                 if "choices" in data:
                     for choice in data["choices"]:
                         delta = choice.get("delta", {})
             except json.JSONDecodeError:
                 pass
+    if error_msg:
+        return {"error": error_msg}
     return {
         "id": f"chatcmpl-{uuid.uuid4()}",
         "object": "chat.completion",
             model_id, model_public_name, conversation_id, prompt,
             auth_token, [], "", modality
         )
+        if "error" in result:
+            raise HTTPException(status_code=500, detail=result["error"])
         return result
     return "🔴 Inactive"
+async def refresh_models():
     """Refresh models from LMArena"""
     try:
+        models = await fetch_models_from_lmarena()
         if models:
             save_models(models)
             return f"Successfully loaded {len(models)} models!"
         return f"Error refreshing models: {str(e)}"
+async def chat_with_model(model_name, message, history):
     """Chat with a model"""
     if not model_name:
         return history, "Please select a model first."
         auth_token = get_next_auth_token()
         # Run chat completion
+        result = await chat_completion_non_stream(
+            model_id, model_name, str(uuid.uuid4()), message,
+            auth_token, [], "", modality
         )
+        if "error" in result:
+            return history, f"Error: {result['error']}"
         # Extract response
         response = ""
         if "choices" in result and result["choices"]:
                         refresh_models_btn = gr.Button("🔄 Refresh Models", variant="secondary")
+                async def on_refresh():
+                    status = await refresh_models()
+                    return status, gr.Dropdown(choices=get_model_list())
                 refresh_models_btn.click(
+                    fn=on_refresh,
                     outputs=[status_output, model_dropdown]
                 )
 # MAIN ENTRY POINT
 # ============================================================
+def run_both():
+    """Run both FastAPI and Gradio UI cooperatively on the same port"""
     interface = create_gradio_interface()
+    # Safely mounts Gradio to run simultaneously alongside the API endpoints.
+    app_with_ui = gr.mount_gradio_app(app, interface, path="/")
+    uvicorn.run(
+        app_with_ui,
+        host=HOST,
+        port=PORT,
+        log_level="info"
     )
 def run_fastapi():
+    """Run pure FastAPI API without the UI overlay"""
     uvicorn.run(
         app,
         host=HOST,
         log_level="info"
     )
 if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser(description="LMArena Bridge with VPN Proxy")
+    parser.add_argument("--mode", choices=["fastapi", "both"], default="both",
+                        help="Run mode: fastapi (API only) or both (API + Gradio UI). Default is both.")
     parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
     parser.add_argument("--host", default=HOST, help="Host to bind to")
     parser.add_argument("--debug", action="store_true", help="Enable debug mode")
         config["use_proxy"] = False
         save_config(config)
+    if args.mode == "fastapi":
         run_fastapi()
+    else:
+        run_both()
 PYTHON_EOF
 # Expose port