benchmark_config.yaml · Minibase/DeId-Small at main

DeId-Small / benchmark_config.yaml

Upload benchmark_config.yaml with huggingface_hub

d16cb83 verified 7 months ago

1.77 kB

	model:
	base_url: "http://127.0.0.1:8000"
	max_tokens: 256
	temperature: 0.1
	timeout: 30

	datasets:
	benchmark_dataset:
	file_path: "Personal_De-identifier_Benchmark_SFT.jsonl"
	sample_size: 100 # Use first 100 examples for quick benchmarking
	instruction_field: "instruction"
	input_field: "input"
	expected_output_field: "response"

	metrics:
	# Primary metrics for HuggingFace
	pii_detection:
	name: "PII Detection Rate"
	description: "Percentage of personal identifiers correctly identified and masked"
	type: "accuracy"

	completeness:
	name: "Completeness Score"
	description: "Percentage of texts where all PII was successfully removed"
	type: "binary_accuracy"

	semantic_preservation:
	name: "Semantic Preservation"
	description: "How well the original meaning is preserved (placeholder-based similarity)"
	type: "similarity"

	latency:
	name: "Average Latency"
	description: "Average response time in milliseconds"
	type: "latency"

	# Domain-specific performance
	domain_performance:
	medical:
	name: "Medical Records"
	keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"]
	legal:
	name: "Legal Documents"
	keywords: ["deponent", "attorney", "case", "court", "legal"]
	hr:
	name: "HR Records"
	keywords: ["employee", "salary", "hr", "personnel", "recruitment"]
	customer_service:
	name: "Customer Service"
	keywords: ["customer", "complaint", "service", "support", "inquiry"]
	research:
	name: "Research Data"
	keywords: ["participant", "study", "research", "consent", "ethics"]

	output:
	results_file: "benchmarks.txt"
	detailed_results_file: "benchmark_results.json"
	include_examples: true
	max_examples: 10