|
|
|
|
| import logging |
| import os |
|
|
| def judge_answer_with_api(question, target, answer): |
| from openai import OpenAI |
|
|
| client = OpenAI( |
| base_url=os.environ.get("OPENAI_BASE_URL"), |
| api_key=os.environ.get("OPENAI_API_KEY") |
| ) |
|
|
| prompt = ( |
| "You will be given a question, a target answer (maybe a list of all possible answers), " |
| "and a generated answer. Please judge whether the generated answer is correct. " |
| "If it is correct, return 'True'. If it is incorrect, return 'False'.\n" |
| f"Question: {question}\n" |
| f"Target Answer: {target}\n" |
| f"Generated Answer: {answer}\n" |
| "Please return only 'True' or 'False', without any other text." |
| ) |
|
|
| try: |
| response = client.chat.completions.create( |
| model="gpt-4o-mini-2024-07-18", |
| messages=[{"role": "user", "content": prompt}], |
| temperature=0, |
| max_tokens=1 |
| ) |
| except Exception as e: |
| logging.error("API call failed: %s", str(e)) |
| return 0 |
|
|
| try: |
| result = response.choices[0].message.content.strip() |
| except (AttributeError, IndexError) as e: |
| logging.error("Error parsing API response: %s", str(e)) |
| return 0 |
|
|
| if result == "True": |
| return 1 |
| elif result == "False": |
| return 0 |
| else: |
| logging.warning("Abnormal response format: %s", result) |
| return 0 |
|
|
|
|
|
|