| import os |
| import json |
| from dotenv import load_dotenv |
| from typing import Any, Tuple |
|
|
| from evoagentx.benchmark import MATH |
| from evoagentx.core.logging import logger |
| from evoagentx.models import OpenAILLM, OpenAILLMConfig |
| from evoagentx.optimizers import MiproOptimizer |
| from evoagentx.core.callbacks import suppress_logger_info |
| from evoagentx.utils.mipro_utils.register_utils import MiproRegistry |
|
|
|
|
| load_dotenv() |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
| |
| |
| |
|
|
| class MathSplits(MATH): |
|
|
| def _load_data(self): |
| |
| super()._load_data() |
| |
| import numpy as np |
| np.random.seed(42) |
| permutation = np.random.permutation(len(self._test_data)) |
| full_test_data = self._test_data |
| |
| |
| self._train_data = [full_test_data[idx] for idx in permutation[:100]] |
| self._test_data = [full_test_data[idx] for idx in permutation[100:200]] |
|
|
| |
| |
| |
| |
| def get_input_keys(self): |
| return ["problem"] |
| |
| |
| |
| def evaluate(self, prediction: Any, label: Any) -> dict: |
| return super().evaluate(prediction, label) |
|
|
|
|
| |
| |
| |
|
|
| |
| class CustomProgram: |
|
|
| def __init__(self, model: OpenAILLM): |
| self.model = model |
| self.prompt = "Let's think step by step to answer the math question: {problem}" |
| |
| |
| def save(self, path: str): |
| params = {"prompt": self.prompt} |
| with open(path, "w") as f: |
| json.dump(params, f) |
|
|
| def load(self, path: str): |
| with open(path, "r") as f: |
| params = json.load(f) |
| self.prompt = params["prompt"] |
| |
| |
| |
| |
| |
| def __call__(self, problem: str) -> Tuple[str, dict]: |
| |
| prompt = self.prompt.format(problem=problem) |
| response = self.model.generate(prompt=prompt) |
| solution = response.content |
| return solution, {"problem": problem, "solution": solution} |
| |
|
|
| def main(): |
|
|
| openai_config = OpenAILLMConfig(model="gpt-4o-mini", openai_key=OPENAI_API_KEY, stream=True, output_response=False) |
| executor_llm = OpenAILLM(config=openai_config) |
| optimizer_config = OpenAILLMConfig(model="gpt-4o", openai_key=OPENAI_API_KEY, stream=True, output_response=False) |
| optimizer_llm = OpenAILLM(config=optimizer_config) |
|
|
| benchmark = MathSplits() |
| program = CustomProgram(model=executor_llm) |
|
|
| |
| registry = MiproRegistry() |
| |
| |
| registry.track(program, "prompt", input_names=["problem"], output_names=["solution"]) |
|
|
| |
| |
| optimizer = MiproOptimizer( |
| registry=registry, |
| program=program, |
| optimizer_llm=optimizer_llm, |
| max_bootstrapped_demos=4, |
| max_labeled_demos=4, |
| num_threads=20, |
| eval_rounds=1, |
| auto="medium", |
| save_path="examples/output/mipro/math_plug_and_play" |
| ) |
|
|
| logger.info("Optimizing program...") |
| optimizer.optimize(dataset=benchmark) |
| optimizer.restore_best_program() |
|
|
| logger.info("Evaluating program on test set...") |
| with suppress_logger_info(): |
| results = optimizer.evaluate(dataset=benchmark, eval_mode="test") |
| logger.info(f"Evaluation metrics (after optimization): {results}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|