Spaces:
Sleeping
Sleeping
Commit ·
12d1907
1
Parent(s): 12a8a0f
modified Dockerfile, inference.py, app/env.py; included openenv.yaml, server7860.py
Browse files- Dockerfile +1 -1
- app/env.py +4 -4
- inference.py +29 -6
- openenv.yaml +116 -0
- server7860.py +62 -0
Dockerfile
CHANGED
|
@@ -10,4 +10,4 @@ COPY . .
|
|
| 10 |
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
-
CMD ["
|
|
|
|
| 10 |
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
+
CMD ["sh", "-c", "if [ \"$RUN_MODE\" = \"server\" ]; then uvicorn server:app --host 0.0.0.0 --port 7860; else python inference.py; fi"]
|
app/env.py
CHANGED
|
@@ -79,10 +79,10 @@ class CustomerSupportEnv:
|
|
| 79 |
|
| 80 |
reward = 0.0
|
| 81 |
done = False
|
| 82 |
-
|
| 83 |
-
info = {
|
| 84 |
-
"final_score": self._compute_final_score() if done else None
|
| 85 |
-
}
|
| 86 |
|
| 87 |
collected = self.state_data["collected_info"]
|
| 88 |
required = self.state_data["required_info"]
|
|
|
|
| 79 |
|
| 80 |
reward = 0.0
|
| 81 |
done = False
|
| 82 |
+
info = {}
|
| 83 |
+
#info = {
|
| 84 |
+
#"final_score": self._compute_final_score() if done else None
|
| 85 |
+
#}
|
| 86 |
|
| 87 |
collected = self.state_data["collected_info"]
|
| 88 |
required = self.state_data["required_info"]
|
inference.py
CHANGED
|
@@ -6,12 +6,28 @@ from agent_llm import get_action
|
|
| 6 |
from app.env import CustomerSupportEnv
|
| 7 |
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def format_action(action: dict) -> str:
|
| 10 |
-
"""Convert action dict → string"""
|
| 11 |
if not action:
|
| 12 |
return "null"
|
| 13 |
-
return str(action).replace("\n", "").replace(" ", " ")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def main():
|
| 17 |
|
|
@@ -83,12 +99,19 @@ def main():
|
|
| 83 |
# =========================
|
| 84 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
print(
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
| 90 |
)
|
| 91 |
|
| 92 |
-
|
| 93 |
if __name__ == "__main__":
|
| 94 |
main()
|
|
|
|
| 6 |
from app.env import CustomerSupportEnv
|
| 7 |
|
| 8 |
|
| 9 |
+
#"""
|
| 10 |
+
#def format_action(action: dict) -> str:
|
| 11 |
+
#"""Convert action dict → string"""
|
| 12 |
+
# if not action:
|
| 13 |
+
# return "null"
|
| 14 |
+
# return str(action).replace("\n", "").replace(" ", " ")
|
| 15 |
+
#"""
|
| 16 |
+
|
| 17 |
def format_action(action: dict) -> str:
|
|
|
|
| 18 |
if not action:
|
| 19 |
return "null"
|
|
|
|
| 20 |
|
| 21 |
+
action_type = action.get("type")
|
| 22 |
+
|
| 23 |
+
if action_type == "ask_info":
|
| 24 |
+
return f"ask_info('{action.get('field')}')"
|
| 25 |
+
elif action_type == "resolve":
|
| 26 |
+
return "resolve()"
|
| 27 |
+
elif action_type == "classify":
|
| 28 |
+
return "classify()"
|
| 29 |
+
|
| 30 |
+
return str(action)
|
| 31 |
|
| 32 |
def main():
|
| 33 |
|
|
|
|
| 99 |
# =========================
|
| 100 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 101 |
|
| 102 |
+
score = 1.0 if success else 0.0
|
| 103 |
+
|
| 104 |
+
#print(
|
| 105 |
+
# f"[END] success={'true' if success else 'false'} "
|
| 106 |
+
# f"steps={step_count} "
|
| 107 |
+
# f"rewards={rewards_str}"
|
| 108 |
+
#)
|
| 109 |
print(
|
| 110 |
+
f"[END] success={'true' if success else 'false'} "
|
| 111 |
+
f"steps={step_count} "
|
| 112 |
+
f"score={score:.2f} "
|
| 113 |
+
f"rewards={rewards_str}"
|
| 114 |
)
|
| 115 |
|
|
|
|
| 116 |
if __name__ == "__main__":
|
| 117 |
main()
|
openenv.yaml
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: customer-support-agent
|
| 2 |
+
description: >
|
| 3 |
+
A goal-oriented customer support environment where an agent must gather
|
| 4 |
+
required information from the user and resolve the ticket efficiently.
|
| 5 |
+
|
| 6 |
+
version: 1.0
|
| 7 |
+
|
| 8 |
+
# =========================
|
| 9 |
+
# ENTRYPOINT
|
| 10 |
+
# =========================
|
| 11 |
+
entry_point: server7860:app
|
| 12 |
+
|
| 13 |
+
# =========================
|
| 14 |
+
# INTERFACE
|
| 15 |
+
# =========================
|
| 16 |
+
interfaces:
|
| 17 |
+
- type: http
|
| 18 |
+
port: 7860
|
| 19 |
+
routes:
|
| 20 |
+
reset: /reset
|
| 21 |
+
step: /step
|
| 22 |
+
|
| 23 |
+
# =========================
|
| 24 |
+
# ENVIRONMENT LIMITS
|
| 25 |
+
# =========================
|
| 26 |
+
max_steps: 10
|
| 27 |
+
|
| 28 |
+
reward_range:
|
| 29 |
+
min: -1.0
|
| 30 |
+
max: 2.0
|
| 31 |
+
|
| 32 |
+
# =========================
|
| 33 |
+
# ACTION SPACE
|
| 34 |
+
# =========================
|
| 35 |
+
actions:
|
| 36 |
+
- name: ask_info
|
| 37 |
+
description: Request missing information from the user
|
| 38 |
+
parameters:
|
| 39 |
+
field:
|
| 40 |
+
type: string
|
| 41 |
+
enum:
|
| 42 |
+
- order_id
|
| 43 |
+
- account_email
|
| 44 |
+
- device_type
|
| 45 |
+
- browser
|
| 46 |
+
|
| 47 |
+
- name: classify
|
| 48 |
+
description: Classify the user issue into a category
|
| 49 |
+
parameters: {}
|
| 50 |
+
|
| 51 |
+
- name: resolve
|
| 52 |
+
description: Resolve the ticket if sufficient information is available
|
| 53 |
+
parameters: {}
|
| 54 |
+
|
| 55 |
+
# =========================
|
| 56 |
+
# OBSERVATION SPACE
|
| 57 |
+
# =========================
|
| 58 |
+
observation_space:
|
| 59 |
+
type: object
|
| 60 |
+
properties:
|
| 61 |
+
ticket_id:
|
| 62 |
+
type: string
|
| 63 |
+
|
| 64 |
+
customer_message:
|
| 65 |
+
type: string
|
| 66 |
+
|
| 67 |
+
history:
|
| 68 |
+
type: array
|
| 69 |
+
items:
|
| 70 |
+
type: object
|
| 71 |
+
|
| 72 |
+
known_info:
|
| 73 |
+
type: object
|
| 74 |
+
additionalProperties: true
|
| 75 |
+
|
| 76 |
+
required:
|
| 77 |
+
type: array
|
| 78 |
+
items:
|
| 79 |
+
type: string
|
| 80 |
+
|
| 81 |
+
missing_required:
|
| 82 |
+
type: array
|
| 83 |
+
items:
|
| 84 |
+
type: string
|
| 85 |
+
|
| 86 |
+
info_progress:
|
| 87 |
+
type: number
|
| 88 |
+
|
| 89 |
+
status:
|
| 90 |
+
type: string
|
| 91 |
+
enum: [open, resolved]
|
| 92 |
+
|
| 93 |
+
step_count:
|
| 94 |
+
type: integer
|
| 95 |
+
|
| 96 |
+
remaining_steps:
|
| 97 |
+
type: integer
|
| 98 |
+
|
| 99 |
+
# =========================
|
| 100 |
+
# TERMINATION CONDITIONS
|
| 101 |
+
# =========================
|
| 102 |
+
termination:
|
| 103 |
+
success:
|
| 104 |
+
description: Ticket resolved with all required information collected
|
| 105 |
+
failure:
|
| 106 |
+
description: Max steps reached without resolution
|
| 107 |
+
|
| 108 |
+
# =========================
|
| 109 |
+
# METADATA
|
| 110 |
+
# =========================
|
| 111 |
+
tags:
|
| 112 |
+
- customer-support
|
| 113 |
+
- goal-oriented
|
| 114 |
+
- information-gathering
|
| 115 |
+
|
| 116 |
+
difficulty: medium
|
server7860.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# server.py
|
| 3 |
+
|
| 4 |
+
from fastapi import FastAPI
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from app.env import CustomerSupportEnv
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
|
| 12 |
+
env = CustomerSupportEnv()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class StepRequest(BaseModel):
|
| 16 |
+
action: str # OpenEnv sends STRING
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def parse_action(action_str: str):
|
| 20 |
+
"""
|
| 21 |
+
Convert string action → dict
|
| 22 |
+
Supports both:
|
| 23 |
+
- JSON string
|
| 24 |
+
- simple commands
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
return json.loads(action_str)
|
| 29 |
+
except:
|
| 30 |
+
# fallback parsing
|
| 31 |
+
if action_str == "classify":
|
| 32 |
+
return {"type": "classify"}
|
| 33 |
+
elif action_str.startswith("ask_"):
|
| 34 |
+
return {
|
| 35 |
+
"type": "ask_info",
|
| 36 |
+
"field": action_str.replace("ask_", "")
|
| 37 |
+
}
|
| 38 |
+
elif action_str == "resolve":
|
| 39 |
+
return {"type": "resolve"}
|
| 40 |
+
else:
|
| 41 |
+
return {"type": "invalid"}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@app.post("/reset")
|
| 45 |
+
def reset():
|
| 46 |
+
obs = env.reset()
|
| 47 |
+
return obs # ✅ MUST return raw observation
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@app.post("/step")
|
| 51 |
+
def step(req: StepRequest):
|
| 52 |
+
|
| 53 |
+
action_dict = parse_action(req.action)
|
| 54 |
+
|
| 55 |
+
obs, reward, done, info = env.step(action_dict)
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
"observation": obs,
|
| 59 |
+
"reward": float(reward),
|
| 60 |
+
"done": bool(done),
|
| 61 |
+
"info": info
|
| 62 |
+
}
|