Odin / promptfooconfig.yaml
ODIN
Initial commit: ODIN multi-agent drilling intelligence system
67e93c9
# promptfooconfig.yaml
# --------------------
# Configuration for evaluating the AI Drilling Copilot Agents
#
# NOTE: This rubric is completely customizable!
# You can tweak the test cases, prompts, and evaluation rules perfectly
# to match the SPE GCS 2026 ML Challenge evaluation criteria.
description: "SPE GCS 2026: Agent Rubric Evaluation"
providers:
# Using Promptfoo's native Google provider. 3.1 is not fully supported by the npm plugin yet.
- id: google:gemini-2.5-flash-preview
label: "baseline-agent-model"
prompts:
- file://tests/prompts/analyst_prompt.txt
- file://tests/prompts/historian_prompt.txt
- file://tests/prompts/auditor_prompt.txt
- file://tests/prompts/lead_prompt.txt
tests:
- vars:
question: "Which hole section in well 15/9-19 B was the most challenging to drill?"
context: "DDR data shows NPT of 45 hours in the 12.25 inch section due to severe losses. WITSML confirms high torque fluctuations."
assert:
- type: "icontains"
value: "12.25"
- type: "llm-rubric"
value: "The response MUST explicitly state a 'Confidence Level' or 'Uncertainty'."
- type: "llm-rubric"
value: "The response must clearly state the evidence (either data or reports) used to make the conclusion."
- vars:
question: "What were the lessons learned regarding weather-induced NPT?"
context: "Historical Volve data indicates waiting on weather (WOW) caused 15% of all delays, particularly stalling riser pulling operations."
assert:
- type: "llm-rubric"
value: "The response must synthesize the context to identify actionable lessons learned, not just repeat the data."
- type: "not-icontains"
value: "As an AI language model"