File size: 2,258 Bytes
d76d3b9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | {
"model_id": "mindbomber/aana",
"architecture": "Alignment-Aware Neural Architecture",
"system_model": "S = (f_theta, E_phi, R, Pi_psi, G)",
"results": [
{
"benchmark": "PIIMB",
"submission": "https://huggingface.co/datasets/piimb/pii-masking-benchmark-results/discussions/3",
"model_card": "https://huggingface.co/mindbomber/aana-presidio-piimb-policy-v1",
"dataset": "piimb/pii-masking-benchmark",
"dataset_revision": "df8299e90ff053fa6fd1d3678f6693a454f4ecc0",
"subset": "sentences",
"base_detector": "microsoft/presidio-analyzer",
"base_average_masking_f2": 0.4492985573,
"aana_average_masking_f2": 0.5629171363,
"delta_average_masking_f2": 0.113618579,
"base_average_recall": 0.4008557794,
"aana_average_recall": 0.5159532273,
"delta_average_recall": 0.1150974479,
"scope": "official PIIMB submission showing AANA verifier/correction gain over the same specialist detector"
},
{
"benchmark": "PIIMB",
"submission": "https://huggingface.co/datasets/piimb/pii-masking-benchmark-results/discussions/2",
"model_card": "https://huggingface.co/mindbomber/aana-piimb-policy-baseline",
"dataset": "piimb/pii-masking-benchmark",
"dataset_revision": "df8299e90ff053fa6fd1d3678f6693a454f4ecc0",
"subset": "sentences",
"aana_average_masking_f2": 0.5195345497,
"scope": "official PIIMB submission for a zero-parameter deterministic policy baseline"
},
{
"benchmark": "TruthfulQA",
"dataset": "truthfulqa/truthful_qa",
"configuration": "multiple_choice",
"split": "validation",
"sample_size": 100,
"base_generator": "openai/gpt-4o-mini",
"mc1_accuracy": 0.85,
"scope": "local AANA-gated run and public artifact publication, not an official leaderboard submission"
}
],
"claim_limits": [
"AANA is a runtime architecture, not a standalone neural-weight checkpoint.",
"Current public results do not claim state-of-the-art performance.",
"Current public results do not guarantee hallucination removal, PII removal, or production safety.",
"Production readiness requires external deployment evidence beyond local benchmark results."
]
}
|