csa01 / inference.py
prashantmatlani's picture
implemented agents' self-learning, self-correcting without explicit training
0894e25
# inference.py
import os
import json
from agent_llm import get_action
from app.env import CustomerSupportEnv
from graders import grade_easy, grade_medium, grade_hard
#from tasks import TASKS
from app.env import get_tasks
import sys
TASKS = get_tasks()
def compute_score(task_type, env, success, steps, rewards):
if task_type == "easy":
return grade_easy(env, success, steps, rewards)
elif task_type == "medium":
return grade_medium(env, success, steps, rewards)
elif task_type == "hard":
return grade_hard(env, success, steps, rewards)
return 0.5 # fallback (should never hit)
# =========================
# ACTION FORMATTER
# =========================
def format_action(action: dict) -> str:
if not action:
return "null"
action_type = action.get("type")
if action_type == "ask_info":
return f"ask_info('{action.get('field')}')"
elif action_type == "resolve":
return "resolve()"
elif action_type == "classify":
return "classify()"
return str(action)
# =========================
# RUN SINGLE TASK
# =========================
def run_single_task(task):
task_name = task["id"]
task_type = task["difficulty"]
#env = CustomerSupportEnv()
env = CustomerSupportEnv(difficulty=task["difficulty"])
obs = env.reset()
step_count = 0
rewards = []
success = False
try:
done = False
while not done:
valid_actions = [
{"type": "ask_info", "field": "order_id"},
{"type": "ask_info", "field": "account_email"},
{"type": "ask_info", "field": "device_type"},
{"type": "ask_info", "field": "browser"},
{"type": "resolve"},
{"type": "classify"},
]
action = get_action(obs, valid_actions)
next_obs, reward, done, info = env.step(action)
step_count += 1
rewards.append(reward)
print(
f"[STEP] task={task_name} step={step_count} "
f"action={format_action(action)} "
f"reward={reward:.2f} "
f"done={'true' if done else 'false'} "
f"error=null"
)
obs = next_obs
success = info.get("task_success", False)
except Exception as e:
print(
f"[STEP] task={task_name} step={step_count+1} "
f"action=null reward=0.00 done=true error={str(e)}"
)
# =========================
# SCORE USING TASK-SPECIFIC GRADER
# =========================
#score = compute_score(task_type, env, success, step_count, rewards)
grader = task.get("grader")
if grader:
score = grader(env, success, step_count, rewards)
else:
score = 0.5
"""
if task_type == "easy":
score = grade_easy(env)
elif task_type == "medium":
score = grade_medium(env)
elif task_type == "hard":
score = grade_hard(env)
else:
score = 0.5
"""
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(
f"[END] task={task_name} "
f"success={'true' if success else 'false'} "
f"steps={step_count} "
f"score={score:.2f} "
f"rewards={rewards_str}"
)
# =========================
# CRITICAL: JSON OUTPUT (GRADER SIGNAL)
# =========================
#print(f"\n")
print(json.dumps({
"task_id": task_name,
"score": float(round(score, 4))
}), flush=True)
#print(f"\n")
# =========================
# MAIN
# =========================
"""
def main():
model_name = os.getenv("MODEL_NAME", "unknown-model")
api_base_url = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
print(f"[CONFIG] api_base_url={api_base_url}")
print(f"[START] task=customer-support env=openenv model={model_name}")
#print(f"[DEBUG] Running {len(TASKS)} tasks")
# RUN DISTINCT TASKS (NOT LOOP COPIES)
for task in TASKS:
run_single_task(task)
"""
def main():
model_name = os.getenv("MODEL_NAME", "unknown-model")
api_base_url = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
print(f"[CONFIG] api_base_url={api_base_url}")
benchmark = "openenv"
# 🚨 CRITICAL: One START per task (validator reads this)
for task in TASKS:
task_name = task["id"]
print(f"[START] task={task_name} env={benchmark} model={model_name}")
run_single_task(task)
if __name__ == "__main__":
main()