Spaces:

kushalExplores
/

metric_tracker_rl

Sleeping

App Files Files Community

kushalExplores commited on 30 days ago

Commit

821b7b8

verified ·

1 Parent(s): 80405b3

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

Dockerfile +1 -1
client.py +38 -0
inference.py +60 -12
server/app.py +12 -2

Dockerfile CHANGED Viewed

@@ -28,4 +28,4 @@ EXPOSE 8000
 HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
     CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
-CMD ["python", "-m", "uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]

 HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
     CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
+CMD ["python", "-m", "uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000", "--ws-ping-interval", "600", "--ws-ping-timeout", "600"]

client.py CHANGED Viewed

@@ -1,10 +1,12 @@
 """Client for the metric tracker RL environment."""
 from typing import Dict
 from openenv.core import EnvClient
 from openenv.core.client_types import StepResult
 from openenv.core.env_server.types import State
 from .models import MetricTrackerRlAction, MetricTrackerRlObservation
@@ -14,6 +16,42 @@ class MetricTrackerRlEnv(
 ):
     """Typed client for the metric tracking environment."""
     def _step_payload(self, action: MetricTrackerRlAction) -> Dict:
         """Serialize the action as JSON for the environment server."""
         return action.model_dump()

 """Client for the metric tracker RL environment."""
+import os
 from typing import Dict
 from openenv.core import EnvClient
 from openenv.core.client_types import StepResult
 from openenv.core.env_server.types import State
+from websockets.asyncio.client import connect as ws_connect
 from .models import MetricTrackerRlAction, MetricTrackerRlObservation
 ):
     """Typed client for the metric tracking environment."""
+    async def connect(self) -> "MetricTrackerRlEnv":
+        """Connect with websocket keepalive disabled for long-running step calls."""
+        if self._ws is not None:
+            return self
+        ws_url_lower = self._ws_url.lower()
+        is_localhost = "localhost" in ws_url_lower or "127.0.0.1" in ws_url_lower
+        old_no_proxy = os.environ.get("NO_PROXY")
+        if is_localhost:
+            current_no_proxy = old_no_proxy or ""
+            if "localhost" not in current_no_proxy.lower():
+                os.environ["NO_PROXY"] = (
+                    f"{current_no_proxy},localhost,127.0.0.1"
+                    if current_no_proxy
+                    else "localhost,127.0.0.1"
+                )
+        try:
+            self._ws = await ws_connect(
+                self._ws_url,
+                open_timeout=self._connect_timeout,
+                max_size=self._max_message_size,
+                ping_interval=None,
+                ping_timeout=None,
+            )
+        except Exception as exc:
+            raise ConnectionError(f"Failed to connect to {self._ws_url}: {exc}") from exc
+        finally:
+            if is_localhost:
+                if old_no_proxy is None:
+                    os.environ.pop("NO_PROXY", None)
+                else:
+                    os.environ["NO_PROXY"] = old_no_proxy
+        return self
     def _step_payload(self, action: MetricTrackerRlAction) -> Dict:
         """Serialize the action as JSON for the environment server."""
         return action.model_dump()

inference.py CHANGED Viewed

@@ -9,7 +9,9 @@ import textwrap
 from dataclasses import dataclass, field
 from typing import Any
 from openai import APIStatusError, OpenAI
 from metric_tracker_rl import DEFAULT_TASK_ORDER, MetricTrackerRlAction, MetricTrackerRlEnv, get_task_spec
 from metric_tracker_rl.analysis_tools import available_analysis_methods
@@ -20,7 +22,7 @@ from metric_tracker_rl.models import (
 )
-IMAGE_NAME = os.getenv("IMAGE_NAME") or "metric_tracker:latest"
 API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY") or os.getenv("API_KEY")
 API_BASE_URL = (
     os.getenv("API_BASE_URL")
@@ -34,6 +36,10 @@ BENCHMARK = os.getenv("MetricTrackerRl_BENCHMARK", "metric_tracker_rl")
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0"))
 MAX_TOKENS = min(int(os.getenv("MAX_TOKENS", "1000")), 4096)
 MAX_TOOL_ROUNDS = int(os.getenv("MAX_TOOL_ROUNDS", "16"))
 SYSTEM_PROMPT = textwrap.dedent(
     """
@@ -276,8 +282,22 @@ def preview_text(text: str, limit: int = 220) -> str:
 async def connect_env() -> MetricTrackerRlEnv:
     if BASE_URL:
-        return MetricTrackerRlEnv(base_url=BASE_URL)
-    return await MetricTrackerRlEnv.from_docker_image(IMAGE_NAME)
 async def execute_tool_call(
@@ -541,24 +561,52 @@ async def run_single_task(
     }
 async def main() -> None:
     if not API_KEY:
         raise RuntimeError("Set OPENAI_API_KEY, HF_TOKEN, or API_KEY.")
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
-    env = await connect_env()
     task_summaries: list[dict[str, Any]] = []
     log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
-    try:
-        for task_id in DEFAULT_TASK_ORDER:
-            task_summaries.append(await run_single_task(client, env, task_id))
-    finally:
-        try:
-            await env.close()
-        except Exception:
-            pass
     average_score = (
         round(sum(item["normalized_score"] for item in task_summaries) / len(task_summaries), 6)

 from dataclasses import dataclass, field
 from typing import Any
+from openenv.core.containers.runtime.providers import LocalDockerProvider
 from openai import APIStatusError, OpenAI
+from websockets.exceptions import ConnectionClosedError
 from metric_tracker_rl import DEFAULT_TASK_ORDER, MetricTrackerRlAction, MetricTrackerRlEnv, get_task_spec
 from metric_tracker_rl.analysis_tools import available_analysis_methods
 )
+IMAGE_NAME = (os.getenv("IMAGE_NAME") or "metric_tracker_rl:latest").strip()
 API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY") or os.getenv("API_KEY")
 API_BASE_URL = (
     os.getenv("API_BASE_URL")
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0"))
 MAX_TOKENS = min(int(os.getenv("MAX_TOKENS", "1000")), 4096)
 MAX_TOOL_ROUNDS = int(os.getenv("MAX_TOOL_ROUNDS", "16"))
+CONNECT_TIMEOUT_S = float(os.getenv("OPENENV_CONNECT_TIMEOUT_S", "30"))
+MESSAGE_TIMEOUT_S = float(os.getenv("OPENENV_MESSAGE_TIMEOUT_S", "180"))
+DOCKER_WAIT_TIMEOUT_S = float(os.getenv("OPENENV_DOCKER_WAIT_TIMEOUT_S", "120"))
+TASK_RETRY_COUNT = int(os.getenv("OPENENV_TASK_RETRY_COUNT", "1"))
 SYSTEM_PROMPT = textwrap.dedent(
     """
 async def connect_env() -> MetricTrackerRlEnv:
     if BASE_URL:
+        client = MetricTrackerRlEnv(
+            base_url=BASE_URL,
+            connect_timeout_s=CONNECT_TIMEOUT_S,
+            message_timeout_s=MESSAGE_TIMEOUT_S,
+        )
+        return await client.connect()
+    provider = LocalDockerProvider()
+    base_url = provider.start_container(IMAGE_NAME)
+    provider.wait_for_ready(base_url, timeout_s=DOCKER_WAIT_TIMEOUT_S)
+    client = MetricTrackerRlEnv(
+        base_url=base_url,
+        connect_timeout_s=CONNECT_TIMEOUT_S,
+        message_timeout_s=MESSAGE_TIMEOUT_S,
+        provider=provider,
+    )
+    return await client.connect()
 async def execute_tool_call(
     }
+async def run_single_task_with_retries(
+    client: OpenAI,
+    task_id: str,
+) -> dict[str, Any]:
+    """Run one task with a fresh env connection and bounded reconnect retries."""
+    attempts = TASK_RETRY_COUNT + 1
+    last_error: Exception | None = None
+    for attempt in range(1, attempts + 1):
+        env = None
+        try:
+            env = await connect_env()
+            return await run_single_task(client, env, task_id)
+        except (ConnectionClosedError, ConnectionError, TimeoutError, OSError) as exc:
+            last_error = exc
+            print(
+                (
+                    f"[WARN] task_id={task_id} attempt={attempt}/{attempts} "
+                    f"env_connection_error={type(exc).__name__}: {exc}"
+                ),
+                flush=True,
+            )
+            if attempt >= attempts:
+                raise
+        finally:
+            try:
+                if env is not None:
+                    await env.close()
+            except Exception:
+                pass
+    assert last_error is not None
+    raise last_error
 async def main() -> None:
     if not API_KEY:
         raise RuntimeError("Set OPENAI_API_KEY, HF_TOKEN, or API_KEY.")
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
     task_summaries: list[dict[str, Any]] = []
     log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
+    for task_id in DEFAULT_TASK_ORDER:
+        task_summaries.append(await run_single_task_with_retries(client, task_id))
     average_score = (
         round(sum(item["normalized_score"] for item in task_summaries) / len(task_summaries), 6)

server/app.py CHANGED Viewed

@@ -28,6 +28,8 @@ Usage:
     python -m server.app
 """
 try:
     from openenv.core.env_server.http_server import create_app
 except Exception as e:  # pragma: no cover
@@ -74,8 +76,16 @@ def main(host: str = "0.0.0.0", port: int = 8000):
         uvicorn metric_tracker_rl.server.app:app --workers 4
     """
     import uvicorn
-    uvicorn.run(app, host=host, port=port)
 if __name__ == "__main__":

     python -m server.app
 """
+import os
 try:
     from openenv.core.env_server.http_server import create_app
 except Exception as e:  # pragma: no cover
         uvicorn metric_tracker_rl.server.app:app --workers 4
     """
     import uvicorn
+    ws_ping_interval = float(os.getenv("UVICORN_WS_PING_INTERVAL", "600"))
+    ws_ping_timeout = float(os.getenv("UVICORN_WS_PING_TIMEOUT", "600"))
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        ws_ping_interval=ws_ping_interval,
+        ws_ping_timeout=ws_ping_timeout,
+    )
 if __name__ == "__main__":