overwrite69 commited on
Commit
57f5cba
·
verified ·
1 Parent(s): 891fdd7

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +54 -51
Dockerfile CHANGED
@@ -14,16 +14,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
14
  # Set working directory
15
  WORKDIR /app
16
 
17
- # Install Python dependencies
18
- # Install Python dependencies
19
  RUN pip install --no-cache-dir \
20
  fastapi==0.109.0 \
21
  uvicorn==0.27.0 \
22
- httpx==0.26.0 \
23
  cloudscraper==1.2.71 \
24
- requests==2.31.0 \
25
  gradio==4.44.0 \
26
- "huggingface_hub<0.27.0"
 
27
 
28
  # Create the application file
29
  RUN cat > /app/app.py << 'PYTHON_EOF'
@@ -867,17 +867,14 @@ def get_proxied_http_client() -> httpx.AsyncClient:
867
  use_proxy = config.get("use_proxy", True)
868
  proxy_url = get_proxy_url()
869
 
870
- proxies = None
871
  if use_proxy and proxy_url:
872
- proxies = {
873
- "http://": proxy_url,
874
- "https://": proxy_url,
875
- }
876
  debug_print(f"Using proxy: {proxy_url}")
877
 
878
  return httpx.AsyncClient(
879
  timeout=DEFAULT_REQUEST_TIMEOUT,
880
- proxies=proxies
881
  )
882
 
883
 
@@ -1023,9 +1020,12 @@ async def stream_chat_completion(
1023
  if line.startswith("data: "):
1024
  yield line + "\n"
1025
  elif line.startswith("0:"):
1026
- # LMArena format: 0:"text"\n
1027
- content = line[2:].strip('"')
1028
- yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
 
 
 
1029
  else:
1030
  yield line + "\n"
1031
 
@@ -1052,6 +1052,8 @@ async def chat_completion_non_stream(
1052
  """Non-streaming chat completion"""
1053
 
1054
  full_content = ""
 
 
1055
  async for chunk in stream_chat_completion(
1056
  model_id, model_public_name, conversation_id, prompt,
1057
  auth_token, experimental_attachments, recaptcha_token, modality
@@ -1059,6 +1061,9 @@ async def chat_completion_non_stream(
1059
  if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
1060
  try:
1061
  data = json.loads(chunk[6:].strip())
 
 
 
1062
  if "choices" in data:
1063
  for choice in data["choices"]:
1064
  delta = choice.get("delta", {})
@@ -1068,6 +1073,9 @@ async def chat_completion_non_stream(
1068
  except json.JSONDecodeError:
1069
  pass
1070
 
 
 
 
1071
  return {
1072
  "id": f"chatcmpl-{uuid.uuid4()}",
1073
  "object": "chat.completion",
@@ -1355,6 +1363,8 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1355
  model_id, model_public_name, conversation_id, prompt,
1356
  auth_token, [], "", modality
1357
  )
 
 
1358
  return result
1359
 
1360
 
@@ -1423,14 +1433,10 @@ def get_proxy_status():
1423
  return "🔴 Inactive"
1424
 
1425
 
1426
- def refresh_models():
1427
  """Refresh models from LMArena"""
1428
  try:
1429
- loop = asyncio.new_event_loop()
1430
- asyncio.set_event_loop(loop)
1431
- models = loop.run_until_complete(fetch_models_from_lmarena())
1432
- loop.close()
1433
-
1434
  if models:
1435
  save_models(models)
1436
  return f"Successfully loaded {len(models)} models!"
@@ -1440,7 +1446,7 @@ def refresh_models():
1440
  return f"Error refreshing models: {str(e)}"
1441
 
1442
 
1443
- def chat_with_model(model_name, message, history):
1444
  """Chat with a model"""
1445
  if not model_name:
1446
  return history, "Please select a model first."
@@ -1488,16 +1494,14 @@ def chat_with_model(model_name, message, history):
1488
  auth_token = get_next_auth_token()
1489
 
1490
  # Run chat completion
1491
- loop = asyncio.new_event_loop()
1492
- asyncio.set_event_loop(loop)
1493
- result = loop.run_until_complete(
1494
- chat_completion_non_stream(
1495
- model_id, model_name, str(uuid.uuid4()), message,
1496
- auth_token, [], "", modality
1497
- )
1498
  )
1499
- loop.close()
1500
 
 
 
 
1501
  # Extract response
1502
  response = ""
1503
  if "choices" in result and result["choices"]:
@@ -1559,8 +1563,12 @@ def create_gradio_interface():
1559
 
1560
  refresh_models_btn = gr.Button("🔄 Refresh Models", variant="secondary")
1561
 
 
 
 
 
1562
  refresh_models_btn.click(
1563
- fn=lambda: (refresh_models(), gr.Dropdown(choices=get_model_list())),
1564
  outputs=[status_output, model_dropdown]
1565
  )
1566
 
@@ -1723,19 +1731,21 @@ def create_gradio_interface():
1723
  # MAIN ENTRY POINT
1724
  # ============================================================
1725
 
1726
- def run_gradio():
1727
- """Run Gradio interface"""
1728
  interface = create_gradio_interface()
1729
- interface.launch(
1730
- server_name=HOST,
1731
- server_port=PORT,
1732
- share=False,
1733
- show_error=True
 
 
 
1734
  )
1735
 
1736
-
1737
  def run_fastapi():
1738
- """Run FastAPI server"""
1739
  uvicorn.run(
1740
  app,
1741
  host=HOST,
@@ -1743,13 +1753,12 @@ def run_fastapi():
1743
  log_level="info"
1744
  )
1745
 
1746
-
1747
  if __name__ == "__main__":
1748
  import argparse
1749
 
1750
  parser = argparse.ArgumentParser(description="LMArena Bridge with VPN Proxy")
1751
- parser.add_argument("--mode", choices=["gradio", "fastapi", "both"], default="gradio",
1752
- help="Run mode: gradio (UI), fastapi (API only), or both")
1753
  parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
1754
  parser.add_argument("--host", default=HOST, help="Host to bind to")
1755
  parser.add_argument("--debug", action="store_true", help="Enable debug mode")
@@ -1768,16 +1777,10 @@ if __name__ == "__main__":
1768
  config["use_proxy"] = False
1769
  save_config(config)
1770
 
1771
- if args.mode == "gradio":
1772
- run_gradio()
1773
- elif args.mode == "fastapi":
1774
  run_fastapi()
1775
- elif args.mode == "both":
1776
- import multiprocessing
1777
- api_process = multiprocessing.Process(target=run_fastapi)
1778
- api_process.start()
1779
- run_gradio()
1780
- api_process.join()
1781
  PYTHON_EOF
1782
 
1783
  # Expose port
 
14
  # Set working directory
15
  WORKDIR /app
16
 
17
+ # Install Python dependencies (with fixes for Pydantic schema bugs and SOCKS proxy support)
 
18
  RUN pip install --no-cache-dir \
19
  fastapi==0.109.0 \
20
  uvicorn==0.27.0 \
21
+ "httpx[socks]==0.26.0" \
22
  cloudscraper==1.2.71 \
23
+ "requests[socks]==2.31.0" \
24
  gradio==4.44.0 \
25
+ "huggingface_hub<0.27.0" \
26
+ "pydantic<2.10"
27
 
28
  # Create the application file
29
  RUN cat > /app/app.py << 'PYTHON_EOF'
 
867
  use_proxy = config.get("use_proxy", True)
868
  proxy_url = get_proxy_url()
869
 
870
+ proxy = None
871
  if use_proxy and proxy_url:
872
+ proxy = proxy_url
 
 
 
873
  debug_print(f"Using proxy: {proxy_url}")
874
 
875
  return httpx.AsyncClient(
876
  timeout=DEFAULT_REQUEST_TIMEOUT,
877
+ proxy=proxy
878
  )
879
 
880
 
 
1020
  if line.startswith("data: "):
1021
  yield line + "\n"
1022
  elif line.startswith("0:"):
1023
+ try:
1024
+ content = json.loads(line[2:])
1025
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
1026
+ except Exception:
1027
+ content = line[2:].strip('"')
1028
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
1029
  else:
1030
  yield line + "\n"
1031
 
 
1052
  """Non-streaming chat completion"""
1053
 
1054
  full_content = ""
1055
+ error_msg = None
1056
+
1057
  async for chunk in stream_chat_completion(
1058
  model_id, model_public_name, conversation_id, prompt,
1059
  auth_token, experimental_attachments, recaptcha_token, modality
 
1061
  if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
1062
  try:
1063
  data = json.loads(chunk[6:].strip())
1064
+ if "error" in data:
1065
+ error_msg = data["error"]
1066
+ break
1067
  if "choices" in data:
1068
  for choice in data["choices"]:
1069
  delta = choice.get("delta", {})
 
1073
  except json.JSONDecodeError:
1074
  pass
1075
 
1076
+ if error_msg:
1077
+ return {"error": error_msg}
1078
+
1079
  return {
1080
  "id": f"chatcmpl-{uuid.uuid4()}",
1081
  "object": "chat.completion",
 
1363
  model_id, model_public_name, conversation_id, prompt,
1364
  auth_token, [], "", modality
1365
  )
1366
+ if "error" in result:
1367
+ raise HTTPException(status_code=500, detail=result["error"])
1368
  return result
1369
 
1370
 
 
1433
  return "🔴 Inactive"
1434
 
1435
 
1436
+ async def refresh_models():
1437
  """Refresh models from LMArena"""
1438
  try:
1439
+ models = await fetch_models_from_lmarena()
 
 
 
 
1440
  if models:
1441
  save_models(models)
1442
  return f"Successfully loaded {len(models)} models!"
 
1446
  return f"Error refreshing models: {str(e)}"
1447
 
1448
 
1449
+ async def chat_with_model(model_name, message, history):
1450
  """Chat with a model"""
1451
  if not model_name:
1452
  return history, "Please select a model first."
 
1494
  auth_token = get_next_auth_token()
1495
 
1496
  # Run chat completion
1497
+ result = await chat_completion_non_stream(
1498
+ model_id, model_name, str(uuid.uuid4()), message,
1499
+ auth_token, [], "", modality
 
 
 
 
1500
  )
 
1501
 
1502
+ if "error" in result:
1503
+ return history, f"Error: {result['error']}"
1504
+
1505
  # Extract response
1506
  response = ""
1507
  if "choices" in result and result["choices"]:
 
1563
 
1564
  refresh_models_btn = gr.Button("🔄 Refresh Models", variant="secondary")
1565
 
1566
+ async def on_refresh():
1567
+ status = await refresh_models()
1568
+ return status, gr.Dropdown(choices=get_model_list())
1569
+
1570
  refresh_models_btn.click(
1571
+ fn=on_refresh,
1572
  outputs=[status_output, model_dropdown]
1573
  )
1574
 
 
1731
  # MAIN ENTRY POINT
1732
  # ============================================================
1733
 
1734
+ def run_both():
1735
+ """Run both FastAPI and Gradio UI cooperatively on the same port"""
1736
  interface = create_gradio_interface()
1737
+ # Safely mounts Gradio to run simultaneously alongside the API endpoints.
1738
+ app_with_ui = gr.mount_gradio_app(app, interface, path="/")
1739
+
1740
+ uvicorn.run(
1741
+ app_with_ui,
1742
+ host=HOST,
1743
+ port=PORT,
1744
+ log_level="info"
1745
  )
1746
 
 
1747
  def run_fastapi():
1748
+ """Run pure FastAPI API without the UI overlay"""
1749
  uvicorn.run(
1750
  app,
1751
  host=HOST,
 
1753
  log_level="info"
1754
  )
1755
 
 
1756
  if __name__ == "__main__":
1757
  import argparse
1758
 
1759
  parser = argparse.ArgumentParser(description="LMArena Bridge with VPN Proxy")
1760
+ parser.add_argument("--mode", choices=["fastapi", "both"], default="both",
1761
+ help="Run mode: fastapi (API only) or both (API + Gradio UI). Default is both.")
1762
  parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
1763
  parser.add_argument("--host", default=HOST, help="Host to bind to")
1764
  parser.add_argument("--debug", action="store_true", help="Enable debug mode")
 
1777
  config["use_proxy"] = False
1778
  save_config(config)
1779
 
1780
+ if args.mode == "fastapi":
 
 
1781
  run_fastapi()
1782
+ else:
1783
+ run_both()
 
 
 
 
1784
  PYTHON_EOF
1785
 
1786
  # Expose port