hoho / find_best_results.py

Final submission code

9518589 11 months ago

7.11 kB

	#!/usr/bin/env python3
	# filepath: /home/skvrnjan/hoho/find_best_results.py
	# This script scans a directory for result files (text files typically starting
	# with "results_vt" within subdirectories matching a given prefix).
	# It parses these files to extract metrics like Mean HSS, Mean F1, Mean IoU,
	# Vertex Threshold, Edge Threshold, and Only Predicted Connections.
	# The script then identifies and prints the top N results (default N=10)
	# for Mean HSS, Mean F1, and Mean IoU, along with their associated configuration
	# parameters.
	import os
	import re

	N_TOP = 10 # Number of top results to keep for each category

	def parse_result_file(filepath):
	"""
	Parse the result file and return a dictionary with the metrics:
	{
	'Mean HSS': float,
	'Mean F1': float,
	'Mean IoU': float,
	'Vertex Threshold': float,
	'Edge Threshold': float,
	'Only Predicted Connections': bool
	}
	"""
	metrics = {
	'Mean HSS': 0.0,
	'Mean F1': 0.0,
	'Mean IoU': 0.0,
	'Vertex Threshold': None,
	'Edge Threshold': None,
	'Only Predicted Connections': None
	}
	with open(filepath, 'r') as f:
	lines = f.readlines()
	for line in lines:
	if line.startswith("Mean HSS:"):
	parts = line.split()
	metrics['Mean HSS'] = float(parts[-1])
	elif line.startswith("Mean F1:"):
	parts = line.split()
	metrics['Mean F1'] = float(parts[-1])
	elif line.startswith("Mean IoU:"):
	parts = line.split()
	metrics['Mean IoU'] = float(parts[-1])
	elif line.startswith("Configuration:"):
	config_line = line.partition("Configuration:")[2].strip()
	vt_match = re.search(r"vertex_threshold': ([0-9.]+)", config_line)
	et_match = re.search(r"edge_threshold': ([0-9.]+)", config_line)
	opc_match = re.search(r"only_predicted_connections': (True\|False)", config_line)
	if vt_match:
	metrics['Vertex Threshold'] = float(vt_match.group(1))
	if et_match:
	metrics['Edge Threshold'] = float(et_match.group(1))
	if opc_match:
	metrics['Only Predicted Connections'] = True if opc_match.group(1) == "True" else False
	return metrics

	def add_to_top_n(top_n_list, item_details):
	"""
	Adds an item to a list maintaining the top N items sorted by 'value'.
	item_details: dict with 'value', 'file', 'metrics'.
	top_n_list: list of such dicts, kept sorted by 'value' descending.
	Uses global N_TOP.
	"""
	if len(top_n_list) < N_TOP:
	top_n_list.append(item_details)
	top_n_list.sort(key=lambda x: x['value'], reverse=True)
	elif item_details['value'] > top_n_list[-1]['value']: # Assumes list is sorted
	top_n_list.pop() # Remove the worst
	top_n_list.append(item_details) # Add the new one
	top_n_list.sort(key=lambda x: x['value'], reverse=True) # Re-sort

	def print_top_n_results(category_name, top_n_list):
	"""
	Prints the top N results for a given category.
	"""
	if not top_n_list:
	print(f"No valid results found for {category_name}.")
	return

	print(f"\n--- Top {len(top_n_list)} by {category_name} (up to {N_TOP}) ---")
	for i, item in enumerate(top_n_list):
	metrics = item['metrics']
	print(f" Rank {i+1}:")
	print(f" File: {item['file']}")
	print(f" {category_name}: {item['value']:.4f}") # Primary metric for this category

	# Display other metrics for context
	all_metrics_keys = ['Mean HSS', 'Mean F1', 'Mean IoU']
	for key in all_metrics_keys:
	if key != category_name: # Avoid printing the primary metric again
	print(f" {key}: {metrics.get(key, 0.0):.4f}") # Use .get for safety, default to 0.0 if missing

	print(f" Vertex Threshold: {metrics.get('Vertex Threshold', 'N/A')}")
	print(f" Edge Threshold: {metrics.get('Edge Threshold', 'N/A')}")
	print(f" Only Predicted Connections: {metrics.get('Only Predicted Connections', 'N/A')}")

	def main(results_dir, folder_prefix="v4"):
	top_hss_results = [] # List of {'value': float, 'file': str, 'metrics': dict}
	top_f1_results = []
	top_iou_results = []

	files_scanned = 0

	for root, dirs, files in os.walk(results_dir):
	# Filter directories to only process those starting with the specified prefix
	dirs[:] = [d for d in dirs if d.startswith(folder_prefix)]

	# Only process files if we're in a valid directory (or subdirectory of one)
	rel_root = os.path.relpath(root, results_dir)
	if rel_root != '.' and not any(part.startswith(folder_prefix) for part in rel_root.split(os.sep)):
	continue

	for file in files:
	if file.endswith(".txt") and file.startswith("results_vt"):
	files_scanned += 1
	filepath = os.path.join(root, file)
	try:
	parsed_metrics = parse_result_file(filepath)
	relative_filepath = os.path.relpath(filepath, results_dir)

	# Store item details for HSS
	hss_item = {'value': parsed_metrics['Mean HSS'],
	'file': relative_filepath,
	'metrics': parsed_metrics}
	add_to_top_n(top_hss_results, hss_item)

	# Store item details for F1
	f1_item = {'value': parsed_metrics['Mean F1'],
	'file': relative_filepath,
	'metrics': parsed_metrics}
	add_to_top_n(top_f1_results, f1_item)

	# Store item details for IoU
	iou_item = {'value': parsed_metrics['Mean IoU'],
	'file': relative_filepath,
	'metrics': parsed_metrics}
	add_to_top_n(top_iou_results, iou_item)

	except Exception as e:
	print(f"Error processing {filepath}: {e}")
	continue

	print(f"Files scanned: {files_scanned}")

	if files_scanned == 0:
	print(f"No result files found in {results_dir} (folders starting with '{folder_prefix}')")
	return

	print_top_n_results("Mean HSS", top_hss_results)
	print_top_n_results("Mean F1", top_f1_results)
	print_top_n_results("Mean IoU", top_iou_results)

	if __name__ == "__main__":
	import sys
	if len(sys.argv) > 1:
	results_dir = sys.argv[1]
	else:
	results_dir = "/path/to/your/results/directory" # MODIFIED: Placeholder for results directory

	# You can specify a different folder prefix as the second argument
	folder_prefix = sys.argv[2] if len(sys.argv) > 2 else ""

	main(results_dir, folder_prefix)