hoho / find_best_results.py
jskvrna's picture
Final submission code
9518589
#!/usr/bin/env python3
# filepath: /home/skvrnjan/hoho/find_best_results.py
# This script scans a directory for result files (text files typically starting
# with "results_vt" within subdirectories matching a given prefix).
# It parses these files to extract metrics like Mean HSS, Mean F1, Mean IoU,
# Vertex Threshold, Edge Threshold, and Only Predicted Connections.
# The script then identifies and prints the top N results (default N=10)
# for Mean HSS, Mean F1, and Mean IoU, along with their associated configuration
# parameters.
import os
import re
N_TOP = 10 # Number of top results to keep for each category
def parse_result_file(filepath):
"""
Parse the result file and return a dictionary with the metrics:
{
'Mean HSS': float,
'Mean F1': float,
'Mean IoU': float,
'Vertex Threshold': float,
'Edge Threshold': float,
'Only Predicted Connections': bool
}
"""
metrics = {
'Mean HSS': 0.0,
'Mean F1': 0.0,
'Mean IoU': 0.0,
'Vertex Threshold': None,
'Edge Threshold': None,
'Only Predicted Connections': None
}
with open(filepath, 'r') as f:
lines = f.readlines()
for line in lines:
if line.startswith("Mean HSS:"):
parts = line.split()
metrics['Mean HSS'] = float(parts[-1])
elif line.startswith("Mean F1:"):
parts = line.split()
metrics['Mean F1'] = float(parts[-1])
elif line.startswith("Mean IoU:"):
parts = line.split()
metrics['Mean IoU'] = float(parts[-1])
elif line.startswith("Configuration:"):
config_line = line.partition("Configuration:")[2].strip()
vt_match = re.search(r"vertex_threshold': ([0-9.]+)", config_line)
et_match = re.search(r"edge_threshold': ([0-9.]+)", config_line)
opc_match = re.search(r"only_predicted_connections': (True|False)", config_line)
if vt_match:
metrics['Vertex Threshold'] = float(vt_match.group(1))
if et_match:
metrics['Edge Threshold'] = float(et_match.group(1))
if opc_match:
metrics['Only Predicted Connections'] = True if opc_match.group(1) == "True" else False
return metrics
def add_to_top_n(top_n_list, item_details):
"""
Adds an item to a list maintaining the top N items sorted by 'value'.
item_details: dict with 'value', 'file', 'metrics'.
top_n_list: list of such dicts, kept sorted by 'value' descending.
Uses global N_TOP.
"""
if len(top_n_list) < N_TOP:
top_n_list.append(item_details)
top_n_list.sort(key=lambda x: x['value'], reverse=True)
elif item_details['value'] > top_n_list[-1]['value']: # Assumes list is sorted
top_n_list.pop() # Remove the worst
top_n_list.append(item_details) # Add the new one
top_n_list.sort(key=lambda x: x['value'], reverse=True) # Re-sort
def print_top_n_results(category_name, top_n_list):
"""
Prints the top N results for a given category.
"""
if not top_n_list:
print(f"No valid results found for {category_name}.")
return
print(f"\n--- Top {len(top_n_list)} by {category_name} (up to {N_TOP}) ---")
for i, item in enumerate(top_n_list):
metrics = item['metrics']
print(f" Rank {i+1}:")
print(f" File: {item['file']}")
print(f" {category_name}: {item['value']:.4f}") # Primary metric for this category
# Display other metrics for context
all_metrics_keys = ['Mean HSS', 'Mean F1', 'Mean IoU']
for key in all_metrics_keys:
if key != category_name: # Avoid printing the primary metric again
print(f" {key}: {metrics.get(key, 0.0):.4f}") # Use .get for safety, default to 0.0 if missing
print(f" Vertex Threshold: {metrics.get('Vertex Threshold', 'N/A')}")
print(f" Edge Threshold: {metrics.get('Edge Threshold', 'N/A')}")
print(f" Only Predicted Connections: {metrics.get('Only Predicted Connections', 'N/A')}")
def main(results_dir, folder_prefix="v4"):
top_hss_results = [] # List of {'value': float, 'file': str, 'metrics': dict}
top_f1_results = []
top_iou_results = []
files_scanned = 0
for root, dirs, files in os.walk(results_dir):
# Filter directories to only process those starting with the specified prefix
dirs[:] = [d for d in dirs if d.startswith(folder_prefix)]
# Only process files if we're in a valid directory (or subdirectory of one)
rel_root = os.path.relpath(root, results_dir)
if rel_root != '.' and not any(part.startswith(folder_prefix) for part in rel_root.split(os.sep)):
continue
for file in files:
if file.endswith(".txt") and file.startswith("results_vt"):
files_scanned += 1
filepath = os.path.join(root, file)
try:
parsed_metrics = parse_result_file(filepath)
relative_filepath = os.path.relpath(filepath, results_dir)
# Store item details for HSS
hss_item = {'value': parsed_metrics['Mean HSS'],
'file': relative_filepath,
'metrics': parsed_metrics}
add_to_top_n(top_hss_results, hss_item)
# Store item details for F1
f1_item = {'value': parsed_metrics['Mean F1'],
'file': relative_filepath,
'metrics': parsed_metrics}
add_to_top_n(top_f1_results, f1_item)
# Store item details for IoU
iou_item = {'value': parsed_metrics['Mean IoU'],
'file': relative_filepath,
'metrics': parsed_metrics}
add_to_top_n(top_iou_results, iou_item)
except Exception as e:
print(f"Error processing {filepath}: {e}")
continue
print(f"Files scanned: {files_scanned}")
if files_scanned == 0:
print(f"No result files found in {results_dir} (folders starting with '{folder_prefix}')")
return
print_top_n_results("Mean HSS", top_hss_results)
print_top_n_results("Mean F1", top_f1_results)
print_top_n_results("Mean IoU", top_iou_results)
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
results_dir = sys.argv[1]
else:
results_dir = "/path/to/your/results/directory" # MODIFIED: Placeholder for results directory
# You can specify a different folder prefix as the second argument
folder_prefix = sys.argv[2] if len(sys.argv) > 2 else ""
main(results_dir, folder_prefix)