{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e2d3caf8", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import pickle\n", "import glob\n", "import pandas as pd\n", "import glob\n", "from tqdm import tqdm\n", "import base64\n", "import requests\n", "# OpenAI API Key\n", "api_key = \"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\"\n", "# Function to encode the image\n", "import os\n", "os.environ[\"OPENAI_API_KEY\"] = api_key\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "f870b639", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n", " warnings.warn(\n" ] } ], "source": [ "import os\n", "from dotenv import load_dotenv\n", "from evoagentx.optimizers import AFlowOptimizer\n", "from evoagentx.models import LiteLLMConfig, LiteLLM, OpenAILLMConfig, OpenAILLM\n", "from evoagentx.benchmark import AFlowHumanEval\n", "\n", "# Load environment variables\n", "load_dotenv()\n", "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", "# ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "1f3dd892", "metadata": {}, "outputs": [], "source": [ "# # Configure the optimizer LLM (Claude 3.5 Sonnet)\n", "# claude_config = LiteLLMConfig(\n", "# model=\"anthropic/claude-3-5-sonnet-20240620\", \n", "# anthropic_key=ANTHROPIC_API_KEY\n", "# )\n", "# optimizer_llm = LiteLLM(config=claude_config)\n", "\n", "# Configure the executor LLM (GPT-4o-mini)\n", "openai_config = OpenAILLMConfig(\n", " model=\"gpt-4o-mini\", \n", " openai_key=OPENAI_API_KEY\n", ")\n", "\n", "claude_config = LiteLLMConfig(\n", " model=\"gpt-4o-mini\", \n", " openai_key=OPENAI_API_KEY\n", ")\n", "executor_llm = OpenAILLM(config=openai_config)\n", "optimizer_llm = LiteLLM(config=claude_config)" ] }, { "cell_type": "code", "execution_count": 4, "id": "a87feb08", "metadata": {}, "outputs": [], "source": [ "EXPERIMENTAL_CONFIG = {\n", " \"humaneval\": {\n", " \"question_type\": \"code\", \n", " \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n", " }, \n", " \"mbpp\": {\n", " \"question_type\": \"code\", \n", " \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n", " },\n", " \"hotpotqa\": {\n", " \"question_type\": \"qa\", \n", " \"operators\": [\"Custom\", \"AnswerGenerate\", \"QAScEnsemble\"]\n", " },\n", " \"gsm8k\": {\n", " \"question_type\": \"math\", \n", " \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n", " },\n", " \"math\": {\n", " \"question_type\": \"math\", \n", " \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "id": "b6054068", "metadata": {}, "outputs": [], "source": [ "import evoagentx.workflow.operators as operator\n", "import examples.aflow.code_generation.prompt as prompt_custom # noqa: F401\n", "from evoagentx.models.model_configs import LLMConfig\n", "from evoagentx.benchmark.benchmark import Benchmark\n", "from evoagentx.models.model_utils import create_llm_instance\n", "\n", "class Workflow:\n", " \n", " def __init__(\n", " self,\n", " name: str,\n", " llm_config: LLMConfig,\n", " benchmark: Benchmark\n", " ):\n", " self.name = name\n", " self.llm = create_llm_instance(llm_config)\n", " self.benchmark = benchmark \n", " self.custom = operator.Custom(self.llm)\n", " self.custom_code_generate = operator.CustomCodeGenerate(self.llm)\n", "\n", " async def __call__(self, problem: str, entry_point: str):\n", " \"\"\"\n", " Implementation of the workflow\n", " Custom operator to generate anything you want.\n", " But when you want to get standard code, you should use custom_code_generate operator.\n", " \"\"\"\n", " # await self.custom(input=, instruction=\"\")\n", " solution = await self.custom_code_generate(problem=problem, entry_point=entry_point, instruction=prompt_custom.GENERATE_PYTHON_CODE_PROMPT) # But When you want to get standard code ,you should use customcodegenerator.\n", " return solution['response']" ] }, { "cell_type": "code", "execution_count": 6, "id": "27e574ad", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2025-10-12 15:15:13.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading train data from None\u001b[0m\n", "\u001b[32m2025-10-12 15:15:13.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m185\u001b[0m - \u001b[1mLoading dev data from humaneval_validate.jsonl\u001b[0m\n", "\u001b[32m2025-10-12 15:15:13.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m188\u001b[0m - \u001b[1mLoading test data from humaneval_test.jsonl\u001b[0m\n" ] } ], "source": [ "# Initialize the benchmark\n", "humaneval = AFlowHumanEval()" ] }, { "cell_type": "code", "execution_count": 7, "id": "2f8da181", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2025-10-12 15:15:13.504\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mevoagentx.core.module\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m107\u001b[0m - \u001b[31m\u001b[1mCan not instantiate AFlowOptimizer from: {\n", " \"graph_path\": \"./examples/aflow/code_generation\",\n", " \"optimized_path\": \"./examples/aflow/humaneval/optimized\",\n", " \"optimizer_llm\": \"\",\n", " \"executor_llm\": \"\",\n", " \"validation_rounds\": 3,\n", " \"eval_rounds\": 3,\n", " \"max_rounds\": 20,\n", " \"question_type\": \"code\",\n", " \"operators\": [\n", " \"Custom\",\n", " \"CustomCodeGenerate\",\n", " \"Test\",\n", " \"ScEnsemble\"\n", " ]\n", "}\n", "\n", ">>>>>>>> 1 Exception Errors: <<<<<<<<\n", "\n", "FileNotFoundError: [Errno 2] No such file or directory: './examples/aflow/code_generation/graph.py'\u001b[0m\n" ] }, { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: './examples/aflow/code_generation/graph.py'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m optimizer = \u001b[43mAFlowOptimizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43mgraph_path\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./examples/aflow/code_generation\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Path to the initial workflow graph\u001b[39;49;00m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43moptimized_path\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./examples/aflow/humaneval/optimized\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Path to save optimized workflows\u001b[39;49;00m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43moptimizer_llm\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptimizer_llm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# LLM for optimization\u001b[39;49;00m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mexecutor_llm\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecutor_llm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# LLM for execution\u001b[39;49;00m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mvalidation_rounds\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Number of times to run validation on the development set during optimization\u001b[39;49;00m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43meval_rounds\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Number of times to run evaluation on the test set during testing\u001b[39;49;00m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_rounds\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m20\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Maximum optimization rounds\u001b[39;49;00m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mEXPERIMENTAL_CONFIG\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhumaneval\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Task-specific configuration, used to specify the task type and available operators\u001b[39;49;00m\n\u001b[32m 10\u001b[39m \u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/core/module.py:98\u001b[39m, in \u001b[36mBaseModule.__init__\u001b[39m\u001b[34m(self, **kwargs)\u001b[39m\n\u001b[32m 93\u001b[39m \u001b[38;5;66;03m# if field_value and isinstance(field_value, dict) and \"class_name\" in field_value:\u001b[39;00m\n\u001b[32m 94\u001b[39m \u001b[38;5;66;03m# class_name = field_value.get(\"class_name\")\u001b[39;00m\n\u001b[32m 95\u001b[39m \u001b[38;5;66;03m# sub_cls = MODULE_REGISTRY.get_module(cls_name=class_name)\u001b[39;00m\n\u001b[32m 96\u001b[39m \u001b[38;5;66;03m# kwargs[field_name] = sub_cls._create_instance(field_value)\u001b[39;00m\n\u001b[32m 97\u001b[39m \u001b[38;5;28msuper\u001b[39m().\u001b[34m__init__\u001b[39m(**kwargs) \n\u001b[32m---> \u001b[39m\u001b[32m98\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43minit_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 99\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (ValidationError, \u001b[38;5;167;01mException\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 100\u001b[39m exception_handler = callback_manager.get_callback(\u001b[33m\"\u001b[39m\u001b[33mexception_buffer\u001b[39m\u001b[33m\"\u001b[39m)\n", "\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/aflow_optimizer.py:84\u001b[39m, in \u001b[36mAFlowOptimizer.init_module\u001b[39m\u001b[34m(self, **kwargs)\u001b[39m\n\u001b[32m 82\u001b[39m round_zero_path = os.path.join(\u001b[38;5;28mself\u001b[39m.root_path, \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mround_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.round\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 83\u001b[39m os.makedirs(round_zero_path, exist_ok=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m---> \u001b[39m\u001b[32m84\u001b[39m \u001b[43mshutil\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcopy2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgraph_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgraph.py\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mround_zero_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgraph.py\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 85\u001b[39m shutil.copy2(os.path.join(\u001b[38;5;28mself\u001b[39m.graph_path, \u001b[33m\"\u001b[39m\u001b[33mprompt.py\u001b[39m\u001b[33m\"\u001b[39m), os.path.join(round_zero_path, \u001b[33m\"\u001b[39m\u001b[33mprompt.py\u001b[39m\u001b[33m\"\u001b[39m))\n\u001b[32m 86\u001b[39m \u001b[38;5;28mself\u001b[39m.graph_utils.update_prompt_import(os.path.join(round_zero_path, \u001b[33m\"\u001b[39m\u001b[33mgraph.py\u001b[39m\u001b[33m\"\u001b[39m), round_zero_path)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/shutil.py:448\u001b[39m, in \u001b[36mcopy2\u001b[39m\u001b[34m(src, dst, follow_symlinks)\u001b[39m\n\u001b[32m 446\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m os.path.isdir(dst):\n\u001b[32m 447\u001b[39m dst = os.path.join(dst, os.path.basename(src))\n\u001b[32m--> \u001b[39m\u001b[32m448\u001b[39m \u001b[43mcopyfile\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfollow_symlinks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfollow_symlinks\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 449\u001b[39m copystat(src, dst, follow_symlinks=follow_symlinks)\n\u001b[32m 450\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m dst\n", "\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/shutil.py:256\u001b[39m, in \u001b[36mcopyfile\u001b[39m\u001b[34m(src, dst, follow_symlinks)\u001b[39m\n\u001b[32m 254\u001b[39m os.symlink(os.readlink(src), dst)\n\u001b[32m 255\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m256\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mrb\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m fsrc:\n\u001b[32m 257\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 258\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(dst, \u001b[33m'\u001b[39m\u001b[33mwb\u001b[39m\u001b[33m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m fdst:\n\u001b[32m 259\u001b[39m \u001b[38;5;66;03m# macOS\u001b[39;00m\n", "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: './examples/aflow/code_generation/graph.py'" ] } ], "source": [ "optimizer = AFlowOptimizer(\n", " graph_path=\"./examples/aflow/code_generation\", # Path to the initial workflow graph\n", " optimized_path=\"./examples/aflow/humaneval/optimized\", # Path to save optimized workflows\n", " optimizer_llm=optimizer_llm, # LLM for optimization\n", " executor_llm=executor_llm, # LLM for execution\n", " validation_rounds=3, # Number of times to run validation on the development set during optimization\n", " eval_rounds=3, # Number of times to run evaluation on the test set during testing\n", " max_rounds=20, # Maximum optimization rounds\n", " **EXPERIMENTAL_CONFIG[\"humaneval\"] # Task-specific configuration, used to specify the task type and available operators\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "74937699", "metadata": {}, "outputs": [], "source": [ "import nest_asyncio\n", "nest_asyncio.apply()" ] }, { "cell_type": "code", "execution_count": null, "id": "98ac4a63", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Optimize the workflow\n", "optimizer.optimize(humaneval)" ] }, { "cell_type": "code", "execution_count": null, "id": "1010d583", "metadata": { "scrolled": true }, "outputs": [], "source": [ "optimizer.test(humaneval)" ] }, { "cell_type": "code", "execution_count": null, "id": "becb5a82", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 16, "id": "5c076d29", "metadata": {}, "outputs": [], "source": [ "df = pd.read_json(\"/home/tl688/pitl688/selfevolve/AFlow/data/datasets/scicode_dev.jsonl\", lines=True)" ] }, { "cell_type": "code", "execution_count": 23, "id": "481602a9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'def get_alpha(recvec, alpha_scaling=5):\\n \"\"\"\\n Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n Parameters:\\n recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n Returns:\\n float: The calculated alpha value.\\n \"\"\"\\n alpha = alpha_scaling * np.max(np.linalg.norm(recvec, axis=1))\\n return alpha'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['ground_truth_code'].values[0]" ] }, { "cell_type": "code", "execution_count": 21, "id": "ffb0be7e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"def get_alpha(recvec, alpha_scaling=5):\\n '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n Parameters:\\n recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n Returns:\\n float: The calculated alpha value.\\n '''\"" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['function_header'].values[0]" ] }, { "cell_type": "code", "execution_count": 24, "id": "69acf613", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'import numpy as np\\nfrom scipy.special import erfc'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['required_dependencies'].values[0]" ] }, { "cell_type": "code", "execution_count": 25, "id": "b5696e0e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['step_number', 'step_description_prompt', 'step_background',\n", " 'ground_truth_code', 'function_header', 'test_cases', 'return_line',\n", " 'required_dependencies'],\n", " dtype='object')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 27, "id": "0a3085a9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"def get_alpha(recvec, alpha_scaling=5):\\n '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n Parameters:\\n recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n Returns:\\n float: The calculated alpha value.\\n '''\"" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['function_header'].values[0]" ] }, { "cell_type": "code", "execution_count": 28, "id": "e6a76c86", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"ref1 = -1.74756\\nEX1 = {\\n 'latvec': np.array([\\n [0.0, 1.0, 1.0],\\n [1.0, 0.0, 1.0],\\n [1.0, 1.0, 0.0]\\n ]),\\n 'atom_charges': np.array([1]),\\n 'atom_coords': np.array([\\n [0.0, 0.0, 0.0]\\n ]),\\n 'configs': np.array([\\n [1.0, 1.0, 1.0]\\n ]),\\n}\\nassert np.allclose(get_alpha(np.linalg.inv(EX1['latvec']).T), target)\"" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['test_cases'].values[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "153a9929", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "e9168d74", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import pickle\n", "import glob\n", "import pandas as pd\n", "import glob\n", "from tqdm import tqdm\n", "import base64\n", "import requests\n", "# OpenAI API Key\n", "api_key = \"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\"\n", "# Function to encode the image\n", "import os\n", "os.environ[\"OPENAI_API_KEY\"] = api_key\n", "\n", "\n", "import os\n", "from dotenv import load_dotenv\n", "from evoagentx.optimizers import AFlowOptimizer\n", "from evoagentx.models import LiteLLMConfig, LiteLLM, OpenAILLMConfig, OpenAILLM\n", "from evoagentx.benchmark import AFlowHumanEval\n", "\n", "# Load environment variables\n", "load_dotenv()\n", "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", "# ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\")\n", "\n", "# # Configure the optimizer LLM (Claude 3.5 Sonnet)\n", "# claude_config = LiteLLMConfig(\n", "# model=\"anthropic/claude-3-5-sonnet-20240620\", \n", "# anthropic_key=ANTHROPIC_API_KEY\n", "# )\n", "# optimizer_llm = LiteLLM(config=claude_config)\n", "\n", "# Configure the executor LLM (GPT-4o-mini)\n", "openai_config = OpenAILLMConfig(\n", " model=\"gpt-4o-mini\", \n", " openai_key=OPENAI_API_KEY\n", ")\n", "\n", "claude_config = LiteLLMConfig(\n", " model=\"gpt-4o-mini\", \n", " openai_key=OPENAI_API_KEY\n", ")\n", "executor_llm = OpenAILLM(config=openai_config)\n", "optimizer_llm = LiteLLM(config=claude_config)\n", "\n", "EXPERIMENTAL_CONFIG = {\n", " \"humaneval\": {\n", " \"question_type\": \"code\", \n", " \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n", " }, \n", " \"mbpp\": {\n", " \"question_type\": \"code\", \n", " \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n", " },\n", " \"hotpotqa\": {\n", " \"question_type\": \"qa\", \n", " \"operators\": [\"Custom\", \"AnswerGenerate\", \"QAScEnsemble\"]\n", " },\n", " \"gsm8k\": {\n", " \"question_type\": \"math\", \n", " \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n", " },\n", " \"math\": {\n", " \"question_type\": \"math\", \n", " \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n", " }\n", "}\n", "\n", "import evoagentx.workflow.operators as operator\n", "import examples.aflow.code_generation.prompt as prompt_custom # noqa: F401\n", "from evoagentx.models.model_configs import LLMConfig\n", "from evoagentx.benchmark.benchmark import Benchmark\n", "from evoagentx.models.model_utils import create_llm_instance\n", "\n", "class Workflow:\n", " \n", " def __init__(\n", " self,\n", " name: str,\n", " llm_config: LLMConfig,\n", " benchmark: Benchmark\n", " ):\n", " self.name = name\n", " self.llm = create_llm_instance(llm_config)\n", " self.benchmark = benchmark \n", " self.custom = operator.Custom(self.llm)\n", " self.custom_code_generate = operator.CustomCodeGenerate(self.llm)\n", "\n", " async def __call__(self, problem: str, entry_point: str):\n", " \"\"\"\n", " Implementation of the workflow\n", " Custom operator to generate anything you want.\n", " But when you want to get standard code, you should use custom_code_generate operator.\n", " \"\"\"\n", " # await self.custom(input=, instruction=\"\")\n", " solution = await self.custom_code_generate(problem=problem, entry_point=entry_point, instruction=prompt_custom.GENERATE_PYTHON_CODE_PROMPT) # But When you want to get standard code ,you should use customcodegenerator.\n", " return solution['response']\n", "\n", "# Initialize the benchmark\n", "humaneval = AFlowHumanEval()\n", "\n", "optimizer = AFlowOptimizer(\n", " graph_path=\"./examples/aflow/code_generation\", # Path to the initial workflow graph\n", " optimized_path=\"./examples/aflow/humaneval/optimized\", # Path to save optimized workflows\n", " optimizer_llm=optimizer_llm, # LLM for optimization\n", " executor_llm=executor_llm, # LLM for execution\n", " validation_rounds=3, # Number of times to run validation on the development set during optimization\n", " eval_rounds=3, # Number of times to run evaluation on the test set during testing\n", " max_rounds=20, # Maximum optimization rounds\n", " **EXPERIMENTAL_CONFIG[\"humaneval\"] # Task-specific configuration, used to specify the task type and available operators\n", ")\n", "\n", "import nest_asyncio\n", "nest_asyncio.apply()\n", "\n", "# Optimize the workflow\n", "optimizer.optimize(humaneval)\n", "\n", "optimizer.test(humaneval)\n", "\n", "import pandas as pd\n", "\n", "df = pd.read_json(\"/home/tl688/pitl688/selfevolve/AFlow/data/datasets/scicode_dev.jsonl\", lines=True)\n", "\n", "df['ground_truth_code'].values[0]\n", "\n", "df['function_header'].values[0]\n", "\n", "df['required_dependencies'].values[0]\n", "\n", "df.columns\n", "\n", "df['function_header'].values[0]\n", "\n", "df['test_cases'].values[0]\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }