| model: |
| base_url: "http://127.0.0.1:8000" |
| max_tokens: 512 |
| temperature: 0.1 |
| timeout: 30 |
|
|
| datasets: |
| benchmark_dataset: |
| file_path: "ner_benchmark_dataset.jsonl" |
| sample_size: 100 |
| instruction_field: "instruction" |
| input_field: "input" |
| expected_output_field: "response" |
|
|
| metrics: |
| |
| entity_recognition: |
| name: "Entity Recognition F1 Score" |
| description: "F1 score for named entity recognition accuracy" |
| type: "f1" |
|
|
| precision: |
| name: "Precision Score" |
| description: "Precision for entity recognition" |
| type: "precision" |
|
|
| recall: |
| name: "Recall Score" |
| description: "Recall for entity recognition" |
| type: "recall" |
|
|
| latency: |
| name: "Average Latency" |
| description: "Average response time in milliseconds" |
| type: "latency" |
|
|
| |
| entity_types: |
| person: |
| name: "Person Entity Recognition" |
| keywords: ["PERSON", "person", "Person"] |
| organization: |
| name: "Organization Entity Recognition" |
| keywords: ["ORG", "organization", "Organization"] |
| location: |
| name: "Location Entity Recognition" |
| keywords: ["LOC", "location", "Location"] |
| miscellaneous: |
| name: "Miscellaneous Entity Recognition" |
| keywords: ["MISC", "miscellaneous", "Miscellaneous"] |
|
|
| output: |
| results_file: "benchmarks.txt" |
| detailed_results_file: "benchmark_results.json" |
| include_examples: true |
| max_examples: 10 |
|
|