Spaces:
Running
Running
Update Dockerfile
Browse files- Dockerfile +54 -51
Dockerfile
CHANGED
|
@@ -14,16 +14,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
| 14 |
# Set working directory
|
| 15 |
WORKDIR /app
|
| 16 |
|
| 17 |
-
# Install Python dependencies
|
| 18 |
-
# Install Python dependencies
|
| 19 |
RUN pip install --no-cache-dir \
|
| 20 |
fastapi==0.109.0 \
|
| 21 |
uvicorn==0.27.0 \
|
| 22 |
-
httpx==0.26.0 \
|
| 23 |
cloudscraper==1.2.71 \
|
| 24 |
-
requests==2.31.0 \
|
| 25 |
gradio==4.44.0 \
|
| 26 |
-
"huggingface_hub<0.27.0"
|
|
|
|
| 27 |
|
| 28 |
# Create the application file
|
| 29 |
RUN cat > /app/app.py << 'PYTHON_EOF'
|
|
@@ -867,17 +867,14 @@ def get_proxied_http_client() -> httpx.AsyncClient:
|
|
| 867 |
use_proxy = config.get("use_proxy", True)
|
| 868 |
proxy_url = get_proxy_url()
|
| 869 |
|
| 870 |
-
|
| 871 |
if use_proxy and proxy_url:
|
| 872 |
-
|
| 873 |
-
"http://": proxy_url,
|
| 874 |
-
"https://": proxy_url,
|
| 875 |
-
}
|
| 876 |
debug_print(f"Using proxy: {proxy_url}")
|
| 877 |
|
| 878 |
return httpx.AsyncClient(
|
| 879 |
timeout=DEFAULT_REQUEST_TIMEOUT,
|
| 880 |
-
|
| 881 |
)
|
| 882 |
|
| 883 |
|
|
@@ -1023,9 +1020,12 @@ async def stream_chat_completion(
|
|
| 1023 |
if line.startswith("data: "):
|
| 1024 |
yield line + "\n"
|
| 1025 |
elif line.startswith("0:"):
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
|
|
|
|
|
|
|
|
|
| 1029 |
else:
|
| 1030 |
yield line + "\n"
|
| 1031 |
|
|
@@ -1052,6 +1052,8 @@ async def chat_completion_non_stream(
|
|
| 1052 |
"""Non-streaming chat completion"""
|
| 1053 |
|
| 1054 |
full_content = ""
|
|
|
|
|
|
|
| 1055 |
async for chunk in stream_chat_completion(
|
| 1056 |
model_id, model_public_name, conversation_id, prompt,
|
| 1057 |
auth_token, experimental_attachments, recaptcha_token, modality
|
|
@@ -1059,6 +1061,9 @@ async def chat_completion_non_stream(
|
|
| 1059 |
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
| 1060 |
try:
|
| 1061 |
data = json.loads(chunk[6:].strip())
|
|
|
|
|
|
|
|
|
|
| 1062 |
if "choices" in data:
|
| 1063 |
for choice in data["choices"]:
|
| 1064 |
delta = choice.get("delta", {})
|
|
@@ -1068,6 +1073,9 @@ async def chat_completion_non_stream(
|
|
| 1068 |
except json.JSONDecodeError:
|
| 1069 |
pass
|
| 1070 |
|
|
|
|
|
|
|
|
|
|
| 1071 |
return {
|
| 1072 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
| 1073 |
"object": "chat.completion",
|
|
@@ -1355,6 +1363,8 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1355 |
model_id, model_public_name, conversation_id, prompt,
|
| 1356 |
auth_token, [], "", modality
|
| 1357 |
)
|
|
|
|
|
|
|
| 1358 |
return result
|
| 1359 |
|
| 1360 |
|
|
@@ -1423,14 +1433,10 @@ def get_proxy_status():
|
|
| 1423 |
return "🔴 Inactive"
|
| 1424 |
|
| 1425 |
|
| 1426 |
-
def refresh_models():
|
| 1427 |
"""Refresh models from LMArena"""
|
| 1428 |
try:
|
| 1429 |
-
|
| 1430 |
-
asyncio.set_event_loop(loop)
|
| 1431 |
-
models = loop.run_until_complete(fetch_models_from_lmarena())
|
| 1432 |
-
loop.close()
|
| 1433 |
-
|
| 1434 |
if models:
|
| 1435 |
save_models(models)
|
| 1436 |
return f"Successfully loaded {len(models)} models!"
|
|
@@ -1440,7 +1446,7 @@ def refresh_models():
|
|
| 1440 |
return f"Error refreshing models: {str(e)}"
|
| 1441 |
|
| 1442 |
|
| 1443 |
-
def chat_with_model(model_name, message, history):
|
| 1444 |
"""Chat with a model"""
|
| 1445 |
if not model_name:
|
| 1446 |
return history, "Please select a model first."
|
|
@@ -1488,16 +1494,14 @@ def chat_with_model(model_name, message, history):
|
|
| 1488 |
auth_token = get_next_auth_token()
|
| 1489 |
|
| 1490 |
# Run chat completion
|
| 1491 |
-
|
| 1492 |
-
|
| 1493 |
-
|
| 1494 |
-
chat_completion_non_stream(
|
| 1495 |
-
model_id, model_name, str(uuid.uuid4()), message,
|
| 1496 |
-
auth_token, [], "", modality
|
| 1497 |
-
)
|
| 1498 |
)
|
| 1499 |
-
loop.close()
|
| 1500 |
|
|
|
|
|
|
|
|
|
|
| 1501 |
# Extract response
|
| 1502 |
response = ""
|
| 1503 |
if "choices" in result and result["choices"]:
|
|
@@ -1559,8 +1563,12 @@ def create_gradio_interface():
|
|
| 1559 |
|
| 1560 |
refresh_models_btn = gr.Button("🔄 Refresh Models", variant="secondary")
|
| 1561 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1562 |
refresh_models_btn.click(
|
| 1563 |
-
fn=
|
| 1564 |
outputs=[status_output, model_dropdown]
|
| 1565 |
)
|
| 1566 |
|
|
@@ -1723,19 +1731,21 @@ def create_gradio_interface():
|
|
| 1723 |
# MAIN ENTRY POINT
|
| 1724 |
# ============================================================
|
| 1725 |
|
| 1726 |
-
def
|
| 1727 |
-
"""Run Gradio
|
| 1728 |
interface = create_gradio_interface()
|
| 1729 |
-
|
| 1730 |
-
|
| 1731 |
-
|
| 1732 |
-
|
| 1733 |
-
|
|
|
|
|
|
|
|
|
|
| 1734 |
)
|
| 1735 |
|
| 1736 |
-
|
| 1737 |
def run_fastapi():
|
| 1738 |
-
"""Run FastAPI
|
| 1739 |
uvicorn.run(
|
| 1740 |
app,
|
| 1741 |
host=HOST,
|
|
@@ -1743,13 +1753,12 @@ def run_fastapi():
|
|
| 1743 |
log_level="info"
|
| 1744 |
)
|
| 1745 |
|
| 1746 |
-
|
| 1747 |
if __name__ == "__main__":
|
| 1748 |
import argparse
|
| 1749 |
|
| 1750 |
parser = argparse.ArgumentParser(description="LMArena Bridge with VPN Proxy")
|
| 1751 |
-
parser.add_argument("--mode", choices=["
|
| 1752 |
-
help="Run mode:
|
| 1753 |
parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
|
| 1754 |
parser.add_argument("--host", default=HOST, help="Host to bind to")
|
| 1755 |
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
|
|
@@ -1768,16 +1777,10 @@ if __name__ == "__main__":
|
|
| 1768 |
config["use_proxy"] = False
|
| 1769 |
save_config(config)
|
| 1770 |
|
| 1771 |
-
if args.mode == "
|
| 1772 |
-
run_gradio()
|
| 1773 |
-
elif args.mode == "fastapi":
|
| 1774 |
run_fastapi()
|
| 1775 |
-
|
| 1776 |
-
|
| 1777 |
-
api_process = multiprocessing.Process(target=run_fastapi)
|
| 1778 |
-
api_process.start()
|
| 1779 |
-
run_gradio()
|
| 1780 |
-
api_process.join()
|
| 1781 |
PYTHON_EOF
|
| 1782 |
|
| 1783 |
# Expose port
|
|
|
|
| 14 |
# Set working directory
|
| 15 |
WORKDIR /app
|
| 16 |
|
| 17 |
+
# Install Python dependencies (with fixes for Pydantic schema bugs and SOCKS proxy support)
|
|
|
|
| 18 |
RUN pip install --no-cache-dir \
|
| 19 |
fastapi==0.109.0 \
|
| 20 |
uvicorn==0.27.0 \
|
| 21 |
+
"httpx[socks]==0.26.0" \
|
| 22 |
cloudscraper==1.2.71 \
|
| 23 |
+
"requests[socks]==2.31.0" \
|
| 24 |
gradio==4.44.0 \
|
| 25 |
+
"huggingface_hub<0.27.0" \
|
| 26 |
+
"pydantic<2.10"
|
| 27 |
|
| 28 |
# Create the application file
|
| 29 |
RUN cat > /app/app.py << 'PYTHON_EOF'
|
|
|
|
| 867 |
use_proxy = config.get("use_proxy", True)
|
| 868 |
proxy_url = get_proxy_url()
|
| 869 |
|
| 870 |
+
proxy = None
|
| 871 |
if use_proxy and proxy_url:
|
| 872 |
+
proxy = proxy_url
|
|
|
|
|
|
|
|
|
|
| 873 |
debug_print(f"Using proxy: {proxy_url}")
|
| 874 |
|
| 875 |
return httpx.AsyncClient(
|
| 876 |
timeout=DEFAULT_REQUEST_TIMEOUT,
|
| 877 |
+
proxy=proxy
|
| 878 |
)
|
| 879 |
|
| 880 |
|
|
|
|
| 1020 |
if line.startswith("data: "):
|
| 1021 |
yield line + "\n"
|
| 1022 |
elif line.startswith("0:"):
|
| 1023 |
+
try:
|
| 1024 |
+
content = json.loads(line[2:])
|
| 1025 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
|
| 1026 |
+
except Exception:
|
| 1027 |
+
content = line[2:].strip('"')
|
| 1028 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
|
| 1029 |
else:
|
| 1030 |
yield line + "\n"
|
| 1031 |
|
|
|
|
| 1052 |
"""Non-streaming chat completion"""
|
| 1053 |
|
| 1054 |
full_content = ""
|
| 1055 |
+
error_msg = None
|
| 1056 |
+
|
| 1057 |
async for chunk in stream_chat_completion(
|
| 1058 |
model_id, model_public_name, conversation_id, prompt,
|
| 1059 |
auth_token, experimental_attachments, recaptcha_token, modality
|
|
|
|
| 1061 |
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
| 1062 |
try:
|
| 1063 |
data = json.loads(chunk[6:].strip())
|
| 1064 |
+
if "error" in data:
|
| 1065 |
+
error_msg = data["error"]
|
| 1066 |
+
break
|
| 1067 |
if "choices" in data:
|
| 1068 |
for choice in data["choices"]:
|
| 1069 |
delta = choice.get("delta", {})
|
|
|
|
| 1073 |
except json.JSONDecodeError:
|
| 1074 |
pass
|
| 1075 |
|
| 1076 |
+
if error_msg:
|
| 1077 |
+
return {"error": error_msg}
|
| 1078 |
+
|
| 1079 |
return {
|
| 1080 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
| 1081 |
"object": "chat.completion",
|
|
|
|
| 1363 |
model_id, model_public_name, conversation_id, prompt,
|
| 1364 |
auth_token, [], "", modality
|
| 1365 |
)
|
| 1366 |
+
if "error" in result:
|
| 1367 |
+
raise HTTPException(status_code=500, detail=result["error"])
|
| 1368 |
return result
|
| 1369 |
|
| 1370 |
|
|
|
|
| 1433 |
return "🔴 Inactive"
|
| 1434 |
|
| 1435 |
|
| 1436 |
+
async def refresh_models():
|
| 1437 |
"""Refresh models from LMArena"""
|
| 1438 |
try:
|
| 1439 |
+
models = await fetch_models_from_lmarena()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1440 |
if models:
|
| 1441 |
save_models(models)
|
| 1442 |
return f"Successfully loaded {len(models)} models!"
|
|
|
|
| 1446 |
return f"Error refreshing models: {str(e)}"
|
| 1447 |
|
| 1448 |
|
| 1449 |
+
async def chat_with_model(model_name, message, history):
|
| 1450 |
"""Chat with a model"""
|
| 1451 |
if not model_name:
|
| 1452 |
return history, "Please select a model first."
|
|
|
|
| 1494 |
auth_token = get_next_auth_token()
|
| 1495 |
|
| 1496 |
# Run chat completion
|
| 1497 |
+
result = await chat_completion_non_stream(
|
| 1498 |
+
model_id, model_name, str(uuid.uuid4()), message,
|
| 1499 |
+
auth_token, [], "", modality
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1500 |
)
|
|
|
|
| 1501 |
|
| 1502 |
+
if "error" in result:
|
| 1503 |
+
return history, f"Error: {result['error']}"
|
| 1504 |
+
|
| 1505 |
# Extract response
|
| 1506 |
response = ""
|
| 1507 |
if "choices" in result and result["choices"]:
|
|
|
|
| 1563 |
|
| 1564 |
refresh_models_btn = gr.Button("🔄 Refresh Models", variant="secondary")
|
| 1565 |
|
| 1566 |
+
async def on_refresh():
|
| 1567 |
+
status = await refresh_models()
|
| 1568 |
+
return status, gr.Dropdown(choices=get_model_list())
|
| 1569 |
+
|
| 1570 |
refresh_models_btn.click(
|
| 1571 |
+
fn=on_refresh,
|
| 1572 |
outputs=[status_output, model_dropdown]
|
| 1573 |
)
|
| 1574 |
|
|
|
|
| 1731 |
# MAIN ENTRY POINT
|
| 1732 |
# ============================================================
|
| 1733 |
|
| 1734 |
+
def run_both():
|
| 1735 |
+
"""Run both FastAPI and Gradio UI cooperatively on the same port"""
|
| 1736 |
interface = create_gradio_interface()
|
| 1737 |
+
# Safely mounts Gradio to run simultaneously alongside the API endpoints.
|
| 1738 |
+
app_with_ui = gr.mount_gradio_app(app, interface, path="/")
|
| 1739 |
+
|
| 1740 |
+
uvicorn.run(
|
| 1741 |
+
app_with_ui,
|
| 1742 |
+
host=HOST,
|
| 1743 |
+
port=PORT,
|
| 1744 |
+
log_level="info"
|
| 1745 |
)
|
| 1746 |
|
|
|
|
| 1747 |
def run_fastapi():
|
| 1748 |
+
"""Run pure FastAPI API without the UI overlay"""
|
| 1749 |
uvicorn.run(
|
| 1750 |
app,
|
| 1751 |
host=HOST,
|
|
|
|
| 1753 |
log_level="info"
|
| 1754 |
)
|
| 1755 |
|
|
|
|
| 1756 |
if __name__ == "__main__":
|
| 1757 |
import argparse
|
| 1758 |
|
| 1759 |
parser = argparse.ArgumentParser(description="LMArena Bridge with VPN Proxy")
|
| 1760 |
+
parser.add_argument("--mode", choices=["fastapi", "both"], default="both",
|
| 1761 |
+
help="Run mode: fastapi (API only) or both (API + Gradio UI). Default is both.")
|
| 1762 |
parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
|
| 1763 |
parser.add_argument("--host", default=HOST, help="Host to bind to")
|
| 1764 |
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
|
|
|
|
| 1777 |
config["use_proxy"] = False
|
| 1778 |
save_config(config)
|
| 1779 |
|
| 1780 |
+
if args.mode == "fastapi":
|
|
|
|
|
|
|
| 1781 |
run_fastapi()
|
| 1782 |
+
else:
|
| 1783 |
+
run_both()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1784 |
PYTHON_EOF
|
| 1785 |
|
| 1786 |
# Expose port
|