Spaces:

KoopaK
/

Odin

Sleeping

Odin / promptfooconfig.yaml

ODIN

Initial commit: ODIN multi-agent drilling intelligence system

67e93c9 29 days ago

1.76 kB

	# promptfooconfig.yaml
	# --------------------
	# Configuration for evaluating the AI Drilling Copilot Agents
	#
	# NOTE: This rubric is completely customizable!
	# You can tweak the test cases, prompts, and evaluation rules perfectly
	# to match the SPE GCS 2026 ML Challenge evaluation criteria.

	description: "SPE GCS 2026: Agent Rubric Evaluation"

	providers:
	# Using Promptfoo's native Google provider. 3.1 is not fully supported by the npm plugin yet.
	- id: google:gemini-2.5-flash-preview
	label: "baseline-agent-model"

	prompts:
	- file://tests/prompts/analyst_prompt.txt
	- file://tests/prompts/historian_prompt.txt
	- file://tests/prompts/auditor_prompt.txt
	- file://tests/prompts/lead_prompt.txt

	tests:
	- vars:
	question: "Which hole section in well 15/9-19 B was the most challenging to drill?"
	context: "DDR data shows NPT of 45 hours in the 12.25 inch section due to severe losses. WITSML confirms high torque fluctuations."
	assert:
	- type: "icontains"
	value: "12.25"
	- type: "llm-rubric"
	value: "The response MUST explicitly state a 'Confidence Level' or 'Uncertainty'."
	- type: "llm-rubric"
	value: "The response must clearly state the evidence (either data or reports) used to make the conclusion."

	- vars:
	question: "What were the lessons learned regarding weather-induced NPT?"
	context: "Historical Volve data indicates waiting on weather (WOW) caused 15% of all delays, particularly stalling riser pulling operations."
	assert:
	- type: "llm-rubric"
	value: "The response must synthesize the context to identify actionable lessons learned, not just repeat the data."
	- type: "not-icontains"
	value: "As an AI language model"