| from dotenv import load_dotenv |
|
|
| from evoagentx.agents.agent_manager import AgentManager |
| from evoagentx.benchmark import HotPotQA |
| from evoagentx.core.callbacks import suppress_logger_info |
| from evoagentx.core.logging import logger |
| from evoagentx.evaluators import Evaluator |
| from evoagentx.models import OpenAILLM, OpenAILLMConfig |
| from evoagentx.optimizers import TextGradOptimizer |
| from evoagentx.prompts import StringTemplate |
| from evoagentx.workflow import SequentialWorkFlowGraph |
|
|
| load_dotenv() |
|
|
| class HotPotQASplits(HotPotQA): |
|
|
| def _load_data(self): |
| |
| super()._load_data() |
| |
| import numpy as np |
| np.random.seed(42) |
| permutation = np.random.permutation(len(self._dev_data)) |
| full_test_data = self._dev_data |
| |
| self._train_data = [full_test_data[idx] for idx in permutation[:10]] |
| self._dev_data = [full_test_data[idx] for idx in permutation[10:50]] |
| self._test_data = [full_test_data[idx] for idx in permutation[50:150]] |
|
|
|
|
| def collate_func(example: dict) -> dict: |
| context_list = [] |
| for item in example["context"]: |
| context = "Title: {}\nText: {}".format(item[0], " ".join([t.strip() for t in item[1]])) |
| context_list.append(context) |
| context = "\n\n".join(context_list) |
| problem = "Context: {}\n\nQuestion: {}\n\nAnswer:".format(context, example["question"]) |
| return {"problem": problem} |
|
|
|
|
| hotpotqa_graph_data = { |
| "goal": "Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.", |
| "tasks": [ |
| { |
| "name": "answer_generate", |
| "description": "Answer the question based on the context.", |
| "inputs": [ |
| {"name": "problem", "type": "str", "required": True, "description": "The problem to solve."} |
| ], |
| "outputs": [ |
| {"name": "answer", "type": "str", "required": True, "description": "The answer to the problem."} |
| ], |
| "prompt_template": StringTemplate(instruction="Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\nFormat your output in xml format, such as <thought>xxx</thought> and <answer>xxx</answer>."), |
| "parse_mode": "xml" |
| } |
| ] |
| } |
|
|
| def main(): |
|
|
| executor_config = OpenAILLMConfig(model="gpt-4o-mini") |
| executor_llm = OpenAILLM(config=executor_config) |
|
|
| optimizer_config = OpenAILLMConfig(model="gpt-4o") |
| optimizer_llm = OpenAILLM(config=optimizer_config) |
|
|
| benchmark = HotPotQASplits() |
| workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data) |
| agent_manager = AgentManager() |
| agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config) |
|
|
| evaluator = Evaluator( |
| llm=executor_llm, |
| agent_manager=agent_manager, |
| collate_func=collate_func, |
| num_workers=20, |
| verbose=True |
| ) |
|
|
| textgrad_optimizer = TextGradOptimizer( |
| graph=workflow_graph, |
| optimize_mode="all", |
| executor_llm=executor_llm, |
| optimizer_llm=optimizer_llm, |
| batch_size=3, |
| max_steps=20, |
| evaluator=evaluator, |
| eval_every_n_steps=1, |
| eval_rounds=1, |
| save_interval=None, |
| save_path="./", |
| rollback=True, |
| constraints=[] |
| ) |
|
|
| logger.info("Evaluating workflow on test set...") |
| with suppress_logger_info(): |
| results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test") |
| logger.info(f"Evaluation metrics (before optimization): {results}") |
|
|
| logger.info("Optimizing workflow...") |
| textgrad_optimizer.optimize(benchmark, seed=8) |
| textgrad_optimizer.restore_best_graph() |
|
|
| logger.info("Evaluating workflow on test set...") |
| with suppress_logger_info(): |
| results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test") |
| logger.info(f"Evaluation metrics (after optimization): {results}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |