Spaces:

Salil-IND
/

MetaLearning

Sleeping

App Files Files Community

MetaLearning / env /graders.py

Salil-IND

Second Commit

78940a4 verified 11 days ago

raw

history blame contribute delete

4.67 kB

	from .models import Action, Email, State
	from .reward import compute_dense_reward

	def grade_easy(action: Action, email: Email) -> float:
	if email.id == "e1":
	if action.action_type == "reply":
	return compute_dense_reward(True)
	return compute_dense_reward(False)
	if email.id == "e2":
	if action.action_type == "mark_spam":
	return compute_dense_reward(True)
	return compute_dense_reward(False)
	return compute_dense_reward(False)

	def grade_medium(action: Action, email: Email) -> float:
	if email.id == "m1":
	# Missing order ID logic -> should request info
	if action.action_type == "request_info":
	return compute_dense_reward(True)
	elif action.action_type == "reply" and action.response_text:
	if "order" in action.response_text.lower() and "id" in action.response_text.lower():
	return compute_dense_reward(True)
	return compute_dense_reward(True) * 0.5
	elif action.action_type == "forward" and action.forward_to and "support" in action.forward_to.lower():
	# Escalating an incomplete ticket rather than asking for info gets partial score
	return compute_dense_reward(True) * 0.3
	return compute_dense_reward(False)
	elif email.id == "m2":
	if action.action_type in ["mark_spam", "archive"]:
	return compute_dense_reward(True)
	return compute_dense_reward(False)
	elif email.id == "m3":
	if action.action_type == "reply" and action.response_text and len(action.response_text) > 5:
	return compute_dense_reward(True)
	elif action.action_type == "reply":
	return compute_dense_reward(True) * 0.5
	return compute_dense_reward(False)
	return compute_dense_reward(False)

	def grade_hard(action: Action, email: Email) -> float:
	if email.id == "h1": # VIP Issue
	# We need reply containing "investigating" or "downtime"
	base = 0.0
	if action.action_type == "reply":
	if action.response_text and "investigat" in action.response_text.lower():
	base = 1.0
	else:
	base = 0.5
	elif action.action_type == "escalate":
	if action.priority_level == "urgent":
	base = 1.0
	else:
	base = 0.7
	return compute_dense_reward(True) * base

	elif email.id == "h2": # Alert
	if action.action_type == "escalate":
	if action.priority_level == "urgent":
	return compute_dense_reward(True)
	return compute_dense_reward(True) * 0.7
	elif action.action_type == "forward" and action.forward_to and "eng" in action.forward_to.lower():
	if action.priority_level == "urgent":
	return compute_dense_reward(True)
	return compute_dense_reward(True) * 0.5
	return compute_dense_reward(False)

	elif email.id == "h3": # Compliance
	if action.action_type == "reply":
	if action.response_text and "acknowledge" in action.response_text.lower():
	return compute_dense_reward(True)
	return compute_dense_reward(True) * 0.4
	elif action.action_type == "forward" and action.forward_to and "legal" in action.forward_to.lower():
	return compute_dense_reward(True)
	return compute_dense_reward(False)

	elif email.id == "h4": # CEO Phish Attack
	if action.action_type == "mark_spam":
	return compute_dense_reward(True)
	elif action.action_type == "archive" or action.action_type == "escalate":
	return compute_dense_reward(True) * 0.5
	return compute_dense_reward(False)

	elif email.id == "h5": # Intern project
	if action.action_type == "reply":
	return compute_dense_reward(True)
	return compute_dense_reward(False)

	return compute_dense_reward(False)

	def grade_action(task_level: str, action: Action, email: Email, state: State) -> float:
	tasks_sizes = {"easy": 2, "medium": 3, "hard": 5}
	size = tasks_sizes.get(task_level, 1)

	if task_level == "easy":
	r = grade_easy(action, email)
	elif task_level == "medium":
	r = grade_medium(action, email)
	elif task_level == "hard":
	r = grade_hard(action, email)
	else:
	r = 0.0

	# Strictly bound reward between 0.0 and 1.0 for this step
	# Max episode score stays bounded since pop removes 1 email, at max 1 for each email / sizes = 1.0 total max
	scaled_reward = max(0.0, min(1.0, r / size))
	return scaled_reward