Spaces:

Prasham1710
/

Enterprise_Finance_env

Sleeping

App Files Files Community

Prasham1710 commited on Mar 31

Commit

d9ced2a

1 Parent(s): 14a2eb9

Add Groq-safe inference fallback

Browse files

Files changed (6) hide show

.gitignore +1 -0
__pycache__/inference.cpython-312.pyc +0 -0
inference.py +98 -26
tests/__pycache__/test_end_to_end.cpython-312-pytest-9.0.2.pyc +0 -0
tests/test_end_to_end.py +1 -0
uv.lock +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

__pycache__/inference.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/inference.cpython-312.pyc and b/__pycache__/inference.cpython-312.pyc differ

inference.py CHANGED Viewed

@@ -4,6 +4,7 @@ import argparse
 import asyncio
 import json
 import os
 from dataclasses import dataclass
 from typing import Any, Protocol
@@ -54,6 +55,18 @@ Use exactly one tool call on each turn. Prefer safe, incremental actions:
 Never invent transaction ids, FX rates, dates, or accounts.
 """
 class EpisodeClient(Protocol):
     async def reset(self, **kwargs: Any) -> StepResult[EnterpriseFinanceObservation]:
@@ -191,6 +204,14 @@ def _build_user_prompt(
     return json.dumps(prompt_payload, indent=2)
 def _build_tools() -> list[dict[str, Any]]:
     return [
         {
@@ -297,6 +318,10 @@ def _tool_call_to_action(name: str, arguments: dict[str, Any]) -> ActionLike:
     raise ValueError(f"Unsupported tool call: {name}")
 def _fallback_action(observation: EnterpriseFinanceObservation) -> ActionLike:
     if observation.structured_ledgers:
         start_date, end_date = _date_bounds(observation.structured_ledgers)
@@ -323,6 +348,32 @@ def _format_action(action: ActionLike) -> str:
     return json.dumps(payload, separators=(",", ":"))
 def _print_step_trace(
     step_index: int,
     action: ActionLike,
@@ -399,6 +450,7 @@ async def run_openai_episode(
     client: EpisodeClient,
     *,
     llm_client: OpenAI,
     difficulty: str,
     model: str,
     max_steps: int,
@@ -411,33 +463,52 @@ async def run_openai_episode(
     current_state = await client.state()
     for step_index in range(1, max_steps + 1):
-        completion = llm_client.chat.completions.create(
-            model=model,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            tool_choice="required",
-            tools=tools,
-            messages=[
-                {"role": "system", "content": SYSTEM_PROMPT},
-                {
-                    "role": "user",
-                    "content": _build_user_prompt(
-                        step_index,
-                        result.observation,
-                        current_state,
-                        history,
-                    ),
-                },
-            ],
         )
-        message = completion.choices[0].message
-        tool_call = message.tool_calls[0] if getattr(message, "tool_calls", None) else None
-        if tool_call is None:
-            action = _fallback_action(result.observation)
-        else:
-            arguments = json.loads(tool_call.function.arguments or "{}")
-            action = _tool_call_to_action(tool_call.function.name, arguments)
         result = await client.step(action)
         current_state = await client.state()
@@ -494,6 +565,7 @@ async def _main_async(args: argparse.Namespace) -> None:
                 summary = await run_openai_episode(
                     client,
                     llm_client=llm_client,
                     difficulty=args.difficulty,
                     model=args.model_name,
                     max_steps=args.max_steps,

 import asyncio
 import json
 import os
+import re
 from dataclasses import dataclass
 from typing import Any, Protocol
 Never invent transaction ids, FX rates, dates, or accounts.
 """
+JSON_FALLBACK_SYSTEM_PROMPT = """You are the Consolidation Controller for a GAAP-compliant enterprise finance simulation.
+Reply with exactly one JSON object and nothing else.
+The JSON object must match one of these shapes:
+{"type":"query_subledger","entity":"PARENT_US","account_code":"IC_AR","date_range":["2026-01-01","2026-01-31"]}
+{"type":"link_transactions","debit_txn_id":"TXN1","credit_txn_id":"TXN2","rationale":"Explain the match."}
+{"type":"apply_forex_adjustment","txn_id":"TXN1","exchange_rate":1.3025,"date":"2026-02-05"}
+{"type":"post_elimination_entry","entity_id":"GROUP","amount":12.34,"account":"IC_FX_ELIM_CLEARING"}
+Choose exactly one action for this turn. Do not emit multiple actions. Do not use markdown fences.
+"""
 class EpisodeClient(Protocol):
     async def reset(self, **kwargs: Any) -> StepResult[EnterpriseFinanceObservation]:
     return json.dumps(prompt_payload, indent=2)
+def _extract_json_block(content: str) -> str:
+    stripped = content.strip()
+    if stripped.startswith("```"):
+        stripped = re.sub(r"^```(?:json)?", "", stripped).strip()
+        stripped = re.sub(r"```$", "", stripped).strip()
+    return stripped
 def _build_tools() -> list[dict[str, Any]]:
     return [
         {
     raise ValueError(f"Unsupported tool call: {name}")
+def _json_dict_to_action(payload: dict[str, Any]) -> ActionLike:
+    return EnterpriseFinanceActionPayload.model_validate(payload).root
 def _fallback_action(observation: EnterpriseFinanceObservation) -> ActionLike:
     if observation.structured_ledgers:
         start_date, end_date = _date_bounds(observation.structured_ledgers)
     return json.dumps(payload, separators=(",", ":"))
+def _provider_prefers_json_fallback(api_base_url: str) -> bool:
+    return "groq.com" in api_base_url.lower()
+def _fallback_json_completion(
+    *,
+    llm_client: OpenAI,
+    model: str,
+    user_prompt: str,
+    temperature: float,
+    max_tokens: int,
+) -> ActionLike:
+    completion = llm_client.chat.completions.create(
+        model=model,
+        temperature=temperature,
+        max_tokens=max_tokens,
+        messages=[
+            {"role": "system", "content": JSON_FALLBACK_SYSTEM_PROMPT},
+            {"role": "user", "content": user_prompt},
+        ],
+    )
+    content = completion.choices[0].message.content or ""
+    payload = json.loads(_extract_json_block(content))
+    return _json_dict_to_action(payload)
 def _print_step_trace(
     step_index: int,
     action: ActionLike,
     client: EpisodeClient,
     *,
     llm_client: OpenAI,
+    api_base_url: str,
     difficulty: str,
     model: str,
     max_steps: int,
     current_state = await client.state()
     for step_index in range(1, max_steps + 1):
+        user_prompt = _build_user_prompt(
+            step_index,
+            result.observation,
+            current_state,
+            history,
         )
+        action: ActionLike
+        try:
+            if _provider_prefers_json_fallback(api_base_url):
+                action = _fallback_json_completion(
+                    llm_client=llm_client,
+                    model=model,
+                    user_prompt=user_prompt,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                )
+            else:
+                completion = llm_client.chat.completions.create(
+                    model=model,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    tool_choice="required",
+                    parallel_tool_calls=False,
+                    tools=tools,
+                    messages=[
+                        {"role": "system", "content": SYSTEM_PROMPT},
+                        {"role": "user", "content": user_prompt},
+                    ],
+                )
+                message = completion.choices[0].message
+                tool_call = message.tool_calls[0] if getattr(message, "tool_calls", None) else None
+                if tool_call is None:
+                    action = _fallback_action(result.observation)
+                else:
+                    arguments = json.loads(tool_call.function.arguments or "{}")
+                    action = _tool_call_to_action(tool_call.function.name, arguments)
+        except Exception as exc:  # noqa: BLE001
+            if "tool_use_failed" not in str(exc) and "Failed to call a function" not in str(exc):
+                raise
+            action = _fallback_json_completion(
+                llm_client=llm_client,
+                model=model,
+                user_prompt=user_prompt,
+                temperature=temperature,
+                max_tokens=max_tokens,
+            )
         result = await client.step(action)
         current_state = await client.state()
                 summary = await run_openai_episode(
                     client,
                     llm_client=llm_client,
+                    api_base_url=args.api_base_url,
                     difficulty=args.difficulty,
                     model=args.model_name,
                     max_steps=args.max_steps,

tests/__pycache__/test_end_to_end.cpython-312-pytest-9.0.2.pyc CHANGED Viewed

Binary files a/tests/__pycache__/test_end_to_end.cpython-312-pytest-9.0.2.pyc and b/tests/__pycache__/test_end_to_end.cpython-312-pytest-9.0.2.pyc differ

tests/test_end_to_end.py CHANGED Viewed

@@ -132,6 +132,7 @@ async def test_openai_policy_path_can_solve_easy_with_fake_client() -> None:
     summary = await run_openai_episode(
         LocalAsyncAdapter("easy"),
         llm_client=FakeOpenAIClient(),
         difficulty="easy",
         model="fake-model",
         max_steps=200,

     summary = await run_openai_episode(
         LocalAsyncAdapter("easy"),
         llm_client=FakeOpenAIClient(),
+        api_base_url="https://router.huggingface.co/v1",
         difficulty="easy",
         model="fake-model",
         max_steps=200,

uv.lock CHANGED Viewed

@@ -620,6 +620,7 @@ dependencies = [
     { name = "openai" },
     { name = "openenv-core" },
     { name = "pydantic" },
     { name = "uvicorn" },
 ]
@@ -638,6 +639,7 @@ requires-dist = [
     { name = "pydantic", specifier = ">=2.8.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
     { name = "uvicorn", specifier = ">=0.30.0" },
 ]
 provides-extras = ["dev"]

     { name = "openai" },
     { name = "openenv-core" },
     { name = "pydantic" },
+    { name = "python-dotenv" },
     { name = "uvicorn" },
 ]
     { name = "pydantic", specifier = ">=2.8.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
+    { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "uvicorn", specifier = ">=0.30.0" },
 ]
 provides-extras = ["dev"]