Upload 2846 files

5374a2d verified 3 months ago

4.24 kB

	from dotenv import load_dotenv

	from evoagentx.agents.agent_manager import AgentManager
	from evoagentx.benchmark import HotPotQA
	from evoagentx.core.callbacks import suppress_logger_info
	from evoagentx.core.logging import logger
	from evoagentx.evaluators import Evaluator
	from evoagentx.models import OpenAILLM, OpenAILLMConfig
	from evoagentx.optimizers import TextGradOptimizer
	from evoagentx.prompts import StringTemplate
	from evoagentx.workflow import SequentialWorkFlowGraph

	load_dotenv()

	class HotPotQASplits(HotPotQA):

	def _load_data(self):
	# load the original test data
	super()._load_data()
	# split the data into train, dev and test
	import numpy as np
	np.random.seed(42)
	permutation = np.random.permutation(len(self._dev_data))
	full_test_data = self._dev_data
	# randomly select 10 samples for train, 40 for dev, and 100 for test
	self._train_data = [full_test_data[idx] for idx in permutation[:10]]
	self._dev_data = [full_test_data[idx] for idx in permutation[10:50]]
	self._test_data = [full_test_data[idx] for idx in permutation[50:150]]


	def collate_func(example: dict) -> dict:
	context_list = []
	for item in example["context"]:
	context = "Title: {}\nText: {}".format(item[0], " ".join([t.strip() for t in item[1]]))
	context_list.append(context)
	context = "\n\n".join(context_list)
	problem = "Context: {}\n\nQuestion: {}\n\nAnswer:".format(context, example["question"])
	return {"problem": problem}


	hotpotqa_graph_data = {
	"goal": "Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.",
	"tasks": [
	{
	"name": "answer_generate",
	"description": "Answer the question based on the context.",
	"inputs": [
	{"name": "problem", "type": "str", "required": True, "description": "The problem to solve."}
	],
	"outputs": [
	{"name": "answer", "type": "str", "required": True, "description": "The answer to the problem."}
	],
	"prompt_template": StringTemplate(instruction="Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\nFormat your output in xml format, such as <thought>xxx</thought> and <answer>xxx</answer>."),
	"parse_mode": "xml"
	}
	]
	}

	def main():

	executor_config = OpenAILLMConfig(model="gpt-4o-mini")
	executor_llm = OpenAILLM(config=executor_config)

	optimizer_config = OpenAILLMConfig(model="gpt-4o")
	optimizer_llm = OpenAILLM(config=optimizer_config)

	benchmark = HotPotQASplits()
	workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)
	agent_manager = AgentManager()
	agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)

	evaluator = Evaluator(
	llm=executor_llm,
	agent_manager=agent_manager,
	collate_func=collate_func,
	num_workers=20,
	verbose=True
	)

	textgrad_optimizer = TextGradOptimizer(
	graph=workflow_graph,
	optimize_mode="all",
	executor_llm=executor_llm,
	optimizer_llm=optimizer_llm,
	batch_size=3,
	max_steps=20,
	evaluator=evaluator,
	eval_every_n_steps=1,
	eval_rounds=1,
	save_interval=None,
	save_path="./",
	rollback=True,
	constraints=[]
	)

	logger.info("Evaluating workflow on test set...")
	with suppress_logger_info():
	results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test")
	logger.info(f"Evaluation metrics (before optimization): {results}")

	logger.info("Optimizing workflow...")
	textgrad_optimizer.optimize(benchmark, seed=8)
	textgrad_optimizer.restore_best_graph()

	logger.info("Evaluating workflow on test set...")
	with suppress_logger_info():
	results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test")
	logger.info(f"Evaluation metrics (after optimization): {results}")


	if __name__ == "__main__":
	main()