| """MLPerf Hardware Configuration Finder application.""" |
|
|
| import logging |
| import os |
|
|
| import gradio as gr |
| import pandas as pd |
| import plotly.graph_objects as go |
| import polars as pl |
| from cost_calculator import ( |
| calculate_costs, |
| get_device_costs, |
| initialize_device_costs, |
| update_device_costs, |
| ) |
| from plotly.subplots import make_subplots |
| from predictor import PerformancePredictor |
| from recommender import ConfigurationFinder |
|
|
| from utils import get_feature_type, load_data |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| logger.info("Loading benchmark data...") |
| df = load_data() |
| pd_df = df.to_pandas() if not df.is_empty() else pd.DataFrame() |
| logger.info(f"Loaded {len(pd_df)} benchmark records total") |
|
|
| initialize_device_costs(pd_df) |
|
|
| predictor = PerformancePredictor(pd_df) if not pd_df.empty else None |
| config_finder = ConfigurationFinder(pd_df) if not pd_df.empty else None |
|
|
|
|
| def extract_metadata(df: pl.DataFrame) -> dict: |
| """Extract metadata for UI filters from dataset.""" |
| metadata = {} |
| if df.is_empty(): |
| return metadata |
|
|
| metadata["architectures"] = sorted( |
| df.filter(pl.col("model.architecture").is_not_null()) |
| .get_column("model.architecture") |
| .unique() |
| .to_list() |
| ) |
|
|
| model_sizes = sorted( |
| df.filter(pl.col("model.number_of_parameters").is_not_null()) |
| .get_column("model.number_of_parameters") |
| .unique() |
| .to_list() |
| ) |
| if model_sizes: |
| metadata["model_sizes"] = model_sizes |
| metadata["model_size_min"] = min(model_sizes) |
| metadata["model_size_max"] = max(model_sizes) |
| metadata["model_size_values"] = sorted(model_sizes) |
|
|
| metadata["weight_data_types"] = sorted( |
| df.filter(pl.col("model.weight_data_types").is_not_null()) |
| .get_column("model.weight_data_types") |
| .unique() |
| .to_list() |
| ) |
|
|
| metadata["accelerator_vendors"] = sorted( |
| df.filter(pl.col("system.accelerator.vendor").is_not_null()) |
| .get_column("system.accelerator.vendor") |
| .unique() |
| .to_list() |
| ) |
|
|
| metadata["cpu_vendors"] = sorted( |
| df.filter(pl.col("system.cpu.vendor").is_not_null()) |
| .get_column("system.cpu.vendor") |
| .unique() |
| .to_list() |
| ) |
|
|
| metadata["accelerator_models"] = sorted( |
| df.filter(pl.col("system.accelerator.name").is_not_null()) |
| .get_column("system.accelerator.name") |
| .unique() |
| .to_list() |
| ) |
|
|
| metadata["cpu_models"] = sorted( |
| df.filter(pl.col("system.cpu.model").is_not_null()) |
| .get_column("system.cpu.model") |
| .unique() |
| .to_list() |
| ) |
|
|
| memory_values = df.filter( |
| pl.col("system.accelerator.memory_capacity").is_not_null() |
| ) |
| metadata["gpu_memory_min"] = max( |
| 1, |
| round( |
| float(memory_values.get_column("system.accelerator.memory_capacity").min()) |
| ), |
| ) |
| metadata["gpu_memory_max"] = min( |
| 1024, |
| round( |
| float(memory_values.get_column("system.accelerator.memory_capacity").max()) |
| ), |
| ) |
|
|
| memory_values = df.filter(pl.col("system.memory.capacity").is_not_null()) |
| metadata["cpu_memory_min"] = max( |
| 1, round(float(memory_values.get_column("system.memory.capacity").min())) |
| ) |
| metadata["cpu_memory_max"] = min( |
| 16384, round(float(memory_values.get_column("system.memory.capacity").max())) |
| ) |
|
|
| metadata["interconnect_types"] = sorted( |
| df.filter(pl.col("system.interconnect.accelerator").is_not_null()) |
| .get_column("system.interconnect.accelerator") |
| .unique() |
| .to_list() |
| ) |
|
|
| acc_counts = sorted( |
| df.filter(pl.col("system.accelerator.total_count").is_not_null()) |
| .get_column("system.accelerator.total_count") |
| .unique() |
| .cast(pl.Int64) |
| .to_list() |
| ) |
| metadata["accelerator_counts"] = acc_counts |
| metadata["min_accelerators"] = min(acc_counts) |
| metadata["max_accelerators"] = max(acc_counts) |
|
|
| metadata["node_counts"] = sorted( |
| df.filter(pl.col("system.number_of_nodes").is_not_null()) |
| .get_column("system.number_of_nodes") |
| .unique() |
| .cast(pl.Int64) |
| .to_list() |
| ) |
|
|
| frameworks = [] |
| for col in df.columns: |
| if col.startswith("software.framework.") and col != "software.framework": |
| framework_name = col.replace("software.framework.", "") |
| frameworks.append(framework_name) |
| versions = ( |
| df.filter(pl.col(col).is_not_null()).get_column(col).unique().to_list() |
| ) |
| if versions: |
| metadata[f"{framework_name}_versions"] = sorted(versions) |
|
|
| metadata["frameworks"] = sorted(frameworks) |
|
|
| metadata["operating_systems"] = sorted( |
| df.filter(pl.col("software.operating_system").is_not_null()) |
| .get_column("software.operating_system") |
| .unique() |
| .to_list() |
| ) |
|
|
| result_per_acc = df.filter(pl.col("metrics.result_per_accelerator").is_not_null()) |
| metadata["result_per_accelerator_ranges"] = { |
| "min": float(result_per_acc.get_column("metrics.result_per_accelerator").min()), |
| "max": float(result_per_acc.get_column("metrics.result_per_accelerator").max()), |
| "median": float( |
| result_per_acc.get_column("metrics.result_per_accelerator").median() |
| ), |
| } |
|
|
| return metadata |
|
|
|
|
| metadata = extract_metadata(df) |
|
|
|
|
| def apply_continuous_feature_tolerance( |
| df: pd.DataFrame, feature: str, value: float, tolerance: float = 0.1 |
| ) -> pd.DataFrame: |
| """Apply tolerance for continuous feature searches.""" |
| lower_bound = value * (1 - tolerance) |
| upper_bound = value * (1 + tolerance) |
| return df[(df[feature] >= lower_bound) & (df[feature] <= upper_bound)] |
|
|
|
|
| def find_best_configs( |
| workload_specs: dict, |
| constraints: dict, |
| include_predictions: bool = True, |
| optimization_metric: str = "performance", |
| ) -> pd.DataFrame: |
| """Find best hardware configurations for workload.""" |
| if pd_df.empty: |
| return pd.DataFrame() |
|
|
| filtered_df = pd_df.copy() |
|
|
| if workload_specs.get("model_size") is not None: |
| filtered_df = apply_continuous_feature_tolerance( |
| filtered_df, |
| "model.number_of_parameters", |
| float(workload_specs["model_size"]), |
| ) |
|
|
| if ( |
| workload_specs.get("weight_data_type") |
| and workload_specs["weight_data_type"] != "Any" |
| ): |
| filtered_df = filtered_df[ |
| filtered_df["model.weight_data_types"] == workload_specs["weight_data_type"] |
| ] |
|
|
| if workload_specs.get("architecture") and workload_specs["architecture"] != "Any": |
| filtered_df = filtered_df[ |
| filtered_df["model.architecture"] == workload_specs["architecture"] |
| ] |
|
|
| clean_constraints = {k: v for k, v in constraints.items() if v and v != "Any"} |
|
|
| for feature, value in clean_constraints.items(): |
| if feature in filtered_df.columns: |
| if get_feature_type(feature) == "continuous": |
| filtered_df = apply_continuous_feature_tolerance( |
| filtered_df, feature, float(value) |
| ) |
| else: |
| filtered_df = filtered_df[filtered_df[feature] == value] |
|
|
| if constraints.get("min_gpu_memory") is not None: |
| filtered_df = filtered_df[ |
| filtered_df["system.accelerator.memory_capacity"] |
| >= constraints["min_gpu_memory"] |
| ] |
|
|
| if constraints.get("max_gpu_memory") is not None: |
| filtered_df = filtered_df[ |
| filtered_df["system.accelerator.memory_capacity"] |
| <= constraints["max_gpu_memory"] |
| ] |
|
|
| if constraints.get("min_cpu_memory") is not None: |
| filtered_df = filtered_df[ |
| filtered_df["system.memory.capacity"] >= constraints["min_cpu_memory"] |
| ] |
|
|
| if constraints.get("max_cpu_memory") is not None: |
| filtered_df = filtered_df[ |
| filtered_df["system.memory.capacity"] <= constraints["max_cpu_memory"] |
| ] |
|
|
| if constraints.get("min_accelerators") is not None: |
| filtered_df = filtered_df[ |
| filtered_df["system.accelerator.total_count"] |
| >= constraints["min_accelerators"] |
| ] |
|
|
| if constraints.get("max_accelerators") is not None: |
| filtered_df = filtered_df[ |
| filtered_df["system.accelerator.total_count"] |
| <= constraints["max_accelerators"] |
| ] |
|
|
| if ( |
| include_predictions |
| and predictor |
| and workload_specs.get("model_size") |
| and workload_specs.get("architecture") |
| ): |
| predicted_df = predictor.generate_predictions( |
| architecture=workload_specs["architecture"], |
| parameters=float(workload_specs["model_size"]), |
| constraints=clean_constraints, |
| num_configs=20, |
| ) |
|
|
| if not predicted_df.empty: |
| predicted_df = calculate_costs(predicted_df) |
|
|
| if not filtered_df.empty: |
| filtered_df = calculate_costs(filtered_df) |
| filtered_df["predicted"] = False |
| combined_df = pd.concat([filtered_df, predicted_df], ignore_index=True) |
| else: |
| combined_df = predicted_df |
|
|
| sort_col = ( |
| "cost_per_million_tokens" |
| if optimization_metric == "cost" |
| else "metrics.result_per_accelerator" |
| ) |
| asc = optimization_metric == "cost" |
| return combined_df.sort_values(by=sort_col, ascending=asc) |
|
|
| if not filtered_df.empty: |
| filtered_df = calculate_costs(filtered_df) |
| filtered_df["predicted"] = False |
|
|
| sort_col = ( |
| "cost_per_million_tokens" |
| if optimization_metric == "cost" |
| else "metrics.result_per_accelerator" |
| ) |
| asc = optimization_metric == "cost" |
| return filtered_df.sort_values(by=sort_col, ascending=asc) |
|
|
| return pd.DataFrame() |
|
|
|
|
| def format_recommendations(configs_df: pd.DataFrame) -> pd.DataFrame: |
| """Format recommendations for display.""" |
| if configs_df.empty: |
| return pd.DataFrame( |
| columns=[ |
| "System", |
| "Accelerator", |
| "Count", |
| "Nodes", |
| "GPU Memory (GB)", |
| "Model", |
| "Architecture", |
| "Parameters (B)", |
| "Weight Data Type", |
| "Total Performance (Tokens/s)", |
| "Per-GPU Performance (Tokens/s)", |
| "Hourly Cost ($)", |
| "Cost/Million Tokens", |
| "Predicted", |
| ] |
| ) |
|
|
| display_columns = { |
| "system.name": "System", |
| "system.accelerator.name": "Accelerator", |
| "system.accelerator.total_count": "Count", |
| "system.number_of_nodes": "Nodes", |
| "system.accelerator.memory_capacity": "GPU Memory (GB)", |
| "model.name": "Model", |
| "model.architecture": "Architecture", |
| "model.number_of_parameters": "Parameters (B)", |
| "model.weight_data_types": "Weight Data Type", |
| "metrics.result": "Total Performance (Tokens/s)", |
| "metrics.result_per_accelerator": "Per-GPU Performance (Tokens/s)", |
| "hourly_cost": "Hourly Cost ($)", |
| "cost_per_million_tokens": "Cost/Million Tokens", |
| "predicted": "Predicted", |
| } |
|
|
| result_df = pd.DataFrame() |
| for col_name, display_name in display_columns.items(): |
| if col_name in configs_df.columns: |
| result_df[display_name] = configs_df[col_name] |
| else: |
| result_df[display_name] = "N/A" if col_name != "predicted" else "No" |
|
|
| numeric_columns = [ |
| "Count", |
| "Nodes", |
| "GPU Memory (GB)", |
| "Parameters (B)", |
| "Total Performance (Tokens/s)", |
| "Per-GPU Performance (Tokens/s)", |
| "Hourly Cost ($)", |
| "Cost/Million Tokens", |
| ] |
|
|
| for col in numeric_columns: |
| if col in result_df.columns: |
| result_df[col] = pd.to_numeric(result_df[col], errors="coerce") |
|
|
| result_df["Total Performance (Tokens/s)"] = result_df[ |
| "Total Performance (Tokens/s)" |
| ].round(4) |
| result_df["Per-GPU Performance (Tokens/s)"] = result_df[ |
| "Per-GPU Performance (Tokens/s)" |
| ].round(4) |
| result_df["GPU Memory (GB)"] = result_df["GPU Memory (GB)"].round(2) |
| result_df["Cost/Million Tokens"] = result_df["Cost/Million Tokens"].round(4) |
| result_df["Hourly Cost ($)"] = result_df["Hourly Cost ($)"].round(4) |
|
|
| if "Parameters (B)" in result_df.columns: |
| result_df["Parameters (B)"] = result_df["Parameters (B)"].round(2) |
|
|
| if "Predicted" in result_df.columns: |
| result_df["Predicted"] = result_df["Predicted"].map( |
| lambda x: "Yes" if x is True else "No" |
| ) |
|
|
| result_df = result_df.drop_duplicates() |
|
|
| return result_df |
|
|
|
|
| def get_top_config_details(configs_df: pd.DataFrame) -> pd.DataFrame: |
| """Extract details for the top recommendation.""" |
| if configs_df.empty: |
| return pd.DataFrame(columns=["Feature", "Value"]) |
|
|
| top_config = configs_df.iloc[0] |
| is_predicted = "predicted" in top_config and top_config["predicted"] |
|
|
| details = { |
| "Feature": [ |
| "System", |
| "Accelerator", |
| "Accelerator Count", |
| "Accelerator Vendor", |
| "Memory Capacity", |
| "CPU", |
| "CPU Vendor", |
| "Nodes", |
| "Devices per Node", |
| "Interconnect", |
| "Total Performance (Tokens/s)", |
| "Per-Accelerator Performance (Tokens/s)", |
| "Hourly Cost (estimated)", |
| "Cost per Million Tokens", |
| "Prediction Status", |
| ], |
| "Value": [ |
| top_config.get("system.name", "N/A"), |
| top_config.get("system.accelerator.name", "N/A"), |
| top_config.get("system.accelerator.total_count", "N/A"), |
| top_config.get("system.accelerator.vendor", "N/A"), |
| ( |
| f"{float(top_config.get('system.accelerator.memory_capacity', 0)):.1f}GB" |
| if top_config.get("system.accelerator.memory_capacity") is not None |
| else "N/A" |
| ), |
| top_config.get("system.cpu.model", "N/A"), |
| top_config.get("system.cpu.vendor", "N/A"), |
| top_config.get("system.number_of_nodes", "N/A"), |
| top_config.get("system.accelerator.count_per_node", "N/A"), |
| top_config.get("system.interconnect.accelerator", "N/A"), |
| ( |
| f"{float(top_config.get('metrics.result', 0)):.4f}" |
| if top_config.get("metrics.result") is not None |
| else "N/A" |
| ), |
| ( |
| f"{float(top_config.get('metrics.result_per_accelerator', 0)):.4f}" |
| if top_config.get("metrics.result_per_accelerator") is not None |
| else "N/A" |
| ), |
| ( |
| f"${float(top_config.get('hourly_cost', 0)):.4f}" |
| if top_config.get("hourly_cost") is not None |
| else "N/A" |
| ), |
| ( |
| f"${float(top_config.get('cost_per_million_tokens', 0)):.4f}" |
| if top_config.get("cost_per_million_tokens") is not None |
| else "N/A" |
| ), |
| "Predicted" if is_predicted else "Actual data", |
| ], |
| } |
|
|
| return pd.DataFrame(details) |
|
|
|
|
| def create_top_configs_plot( |
| configs_df: pd.DataFrame, optimization_metric: str = "performance", top_n: int = 10 |
| ) -> go.Figure: |
| """Create a bar plot of top configurations based on the optimization metric.""" |
| if configs_df.empty: |
| fig = go.Figure() |
| fig.update_layout( |
| title="No configurations found", |
| xaxis_title="Value", |
| yaxis_title="Rank", |
| template="plotly_white", |
| height=600, |
| ) |
| return fig |
|
|
| if optimization_metric == "cost": |
| sort_col = "cost_per_million_tokens" |
| display_col = "Cost/Million Tokens ($)" |
| configs_df = configs_df.sort_values(by=sort_col, ascending=True) |
| else: |
| sort_col = "metrics.result_per_accelerator" |
| display_col = "Performance (Tokens/s per device)" |
| configs_df = configs_df.sort_values(by=sort_col, ascending=False) |
|
|
| top_configs = configs_df.head(top_n) |
|
|
| ranks = [f"#{i + 1}" for i in range(len(top_configs))] |
|
|
| if optimization_metric == "cost": |
| x_values = top_configs["cost_per_million_tokens"] |
| color = "crimson" |
| else: |
| x_values = top_configs["metrics.result_per_accelerator"] |
| color = "royalblue" |
|
|
| hover_text = [] |
| for _, row in top_configs.iterrows(): |
| system = row.get("system.name", "Unknown") |
| acc_name = row.get("system.accelerator.name", "Unknown") |
| acc_count = row.get("system.accelerator.total_count", "?") |
| total_perf = row.get("metrics.result", 0) |
| per_acc_perf = row.get("metrics.result_per_accelerator", 0) |
| cost = row.get("hourly_cost", 0) |
| cost_per_million = row.get("cost_per_million_tokens", 0) or 0 |
| predicted = "Yes" if row.get("predicted", False) else "No" |
|
|
| info = f"System: {system}<br>" |
| info += f"Config: {acc_count}× {acc_name}<br>" |
| info += f"Tokens/s (total): {total_perf:.4f}<br>" |
| info += f"Tokens/s (per device): {per_acc_perf:.4f}<br>" |
| info += f"Hourly cost: ${cost:.4f}<br>" |
| info += f"Cost per million tokens: ${cost_per_million:.4f}<br>" |
| info += f"Predicted: {predicted}" |
| hover_text.append(info) |
|
|
| fig = go.Figure() |
| fig.add_trace( |
| go.Bar( |
| y=ranks, |
| x=x_values, |
| text=x_values.apply(lambda x: f"{x:.4f}"), |
| textposition="auto", |
| marker=dict(color=color), |
| hovertext=hover_text, |
| hoverinfo="text", |
| orientation="h", |
| ) |
| ) |
|
|
| title = f"Top {len(ranks)} Configurations by {'Cost' if optimization_metric == 'cost' else 'Performance'}" |
| fig.update_layout( |
| title=title, |
| xaxis_title=display_col, |
| yaxis_title="Rank", |
| template="plotly_white", |
| height=max(400, min(20 * len(ranks), 800)), |
| margin=dict(l=50), |
| ) |
|
|
| return fig |
|
|
|
|
| def recommend_hardware( |
| model_size: float, |
| weight_data_type: str, |
| architecture: str, |
| accelerator_vendor: str, |
| accelerator_model: str, |
| min_gpu_memory: float | None, |
| max_gpu_memory: float | None, |
| interconnect: str, |
| min_accelerators: int | None, |
| max_accelerators: int | None, |
| cpu_vendor: str, |
| cpu_model: str, |
| nodes: str, |
| min_cpu_memory: float | None, |
| max_cpu_memory: float | None, |
| os: str, |
| include_predictions: bool = True, |
| optimization_metric: str = "performance", |
| top_n_configs: int = 10, |
| **framework_versions, |
| ) -> tuple[pd.DataFrame, pd.DataFrame, str, go.Figure]: |
| """Find hardware configurations matching requirements.""" |
| workload_specs = { |
| "model_size": model_size, |
| "weight_data_type": weight_data_type, |
| "architecture": architecture, |
| } |
|
|
| constraints = { |
| "system.accelerator.vendor": accelerator_vendor, |
| "system.accelerator.name": accelerator_model, |
| "system.interconnect.accelerator": interconnect, |
| "system.cpu.vendor": cpu_vendor, |
| "system.cpu.model": cpu_model, |
| "system.number_of_nodes": nodes if nodes != "Any" else None, |
| "software.operating_system": os, |
| "min_gpu_memory": min_gpu_memory, |
| "max_gpu_memory": max_gpu_memory, |
| "min_cpu_memory": min_cpu_memory, |
| "max_cpu_memory": max_cpu_memory, |
| "min_accelerators": min_accelerators, |
| "max_accelerators": max_accelerators, |
| } |
|
|
| for fw_name, version in framework_versions.items(): |
| if version != "Any": |
| constraints[f"software.framework.{fw_name}"] = version |
|
|
| best_configs = find_best_configs( |
| workload_specs, constraints, include_predictions, optimization_metric |
| ) |
| recommendations_df = format_recommendations(best_configs) |
| details_df = get_top_config_details(best_configs) |
|
|
| top_configs_chart = create_top_configs_plot( |
| best_configs, optimization_metric, top_n_configs |
| ) |
|
|
| if best_configs.empty: |
| summary = "No matching configurations found. Try relaxing some constraints or changing the model parameters." |
| else: |
| actual_count = ( |
| sum(~best_configs["predicted"]) |
| if "predicted" in best_configs.columns |
| else len(best_configs) |
| ) |
| predicted_count = ( |
| sum(best_configs["predicted"]) if "predicted" in best_configs.columns else 0 |
| ) |
|
|
| top_config = best_configs.iloc[0] |
| is_predicted = "predicted" in top_config and top_config["predicted"] |
|
|
| if optimization_metric == "cost": |
| metric_value = f"${float(top_config.get('cost_per_million_tokens', 0)):.4f} per million tokens" |
| metric_name = "cost" |
| else: |
| metric_value = f"{float(top_config.get('metrics.result_per_accelerator', 0)):.4f} tokens/s per device" |
| metric_name = "performance" |
|
|
| acc = top_config.get("system.accelerator.name", "Unknown") |
| count = top_config.get("system.accelerator.total_count", "Unknown") |
|
|
| summary = f"Found {actual_count} actual and {predicted_count} predicted configurations. " |
| summary += f"\nTop recommendation optimized for {metric_name}: {count}× {acc} with {metric_value}" |
| if is_predicted: |
| summary += " (Predicted)" |
|
|
| return recommendations_df, details_df, summary, top_configs_chart |
|
|
|
|
| def create_model_performance_plot( |
| predictor: PerformancePredictor, |
| ) -> tuple[go.Figure, dict, pd.DataFrame]: |
| """Create performance visualization for the ML model using Plotly.""" |
| logger.info("Starting to create model performance plot") |
|
|
| empty_metrics = {"rmse": 0, "mae": 0, "r2": 0, "mape": 0} |
| empty_df = pd.DataFrame(columns=["Feature", "Importance"]) |
|
|
| empty_fig = make_subplots( |
| rows=2, |
| cols=2, |
| subplot_titles=( |
| "Predicted vs Actual Performance", |
| "Residual Plot (% Error)", |
| "Distribution of Prediction Errors", |
| "Top 10 Feature Importance", |
| ), |
| ) |
| empty_fig.update_layout( |
| height=800, |
| width=1200, |
| showlegend=False, |
| title_text="No Model Evaluation Data Available", |
| annotations=[ |
| dict( |
| text="Train the model with test data to see evaluation metrics", |
| showarrow=False, |
| xref="paper", |
| yref="paper", |
| x=0.5, |
| y=0.5, |
| ) |
| ], |
| ) |
|
|
| if predictor is None: |
| logger.warning("No predictor available for performance plot") |
| return empty_fig, empty_metrics, empty_df |
|
|
| if ( |
| not hasattr(predictor, "evaluation_data") |
| or predictor.evaluation_data is None |
| or predictor.evaluation_data.empty |
| ): |
| logger.warning("Evaluation data not found, attempting to re-train model") |
| try: |
| predictor._train_model() |
| except Exception as e: |
| logger.error(f"Error re-training model: {e}") |
|
|
| eval_data = predictor.get_evaluation_data() |
| metrics = predictor.get_evaluation_metrics() |
| feature_importance = predictor.get_feature_importance() |
|
|
| logger.info(f"Retrieved evaluation data: {type(eval_data)}") |
| if eval_data is not None: |
| logger.info( |
| f"Evaluation data shape: {eval_data.shape if not eval_data.empty else 'empty'}" |
| ) |
|
|
| if eval_data is None or eval_data.empty: |
| logger.warning("Evaluation data is not available") |
| return ( |
| empty_fig, |
| empty_metrics, |
| feature_importance if feature_importance is not None else empty_df, |
| ) |
|
|
| logger.info(f"First few rows of evaluation data: {eval_data.head(3).to_dict()}") |
|
|
| fig = make_subplots( |
| rows=2, |
| cols=2, |
| subplot_titles=( |
| "Predicted vs Actual Performance", |
| "Residual Plot (% Error)", |
| "Distribution of Prediction Errors", |
| "Top 10 Feature Importance", |
| ), |
| ) |
|
|
| hover_text = [ |
| f"Accelerator: {acc}<br>" |
| f"Vendor: {vendor}<br>" |
| f"Count: {count}<br>" |
| f"Actual: {actual:.4f}<br>" |
| f"Predicted: {pred:.4f}<br>" |
| f"Error: {error:.2f} ({err_pct:.2f}%)" |
| for acc, vendor, count, actual, pred, error, err_pct in zip( |
| eval_data["system.accelerator.name"], |
| eval_data["system.accelerator.vendor"], |
| eval_data["system.accelerator.total_count"], |
| eval_data["actual"], |
| eval_data["predicted"], |
| eval_data["error"], |
| eval_data["error_percent"], |
| ) |
| ] |
|
|
| fig.add_trace( |
| go.Scatter( |
| x=eval_data["actual"], |
| y=eval_data["predicted"], |
| mode="markers", |
| marker=dict( |
| opacity=0.6, |
| color=eval_data["error_percent"], |
| colorscale="RdBu_r", |
| colorbar=dict(title="Error %"), |
| cmin=-30, |
| cmax=30, |
| ), |
| text=hover_text, |
| hoverinfo="text", |
| name="Predictions", |
| ), |
| row=1, |
| col=1, |
| ) |
|
|
| max_val = max(eval_data["actual"].max(), eval_data["predicted"].max()) |
| min_val = min(eval_data["actual"].min(), eval_data["predicted"].min()) |
|
|
| fig.add_trace( |
| go.Scatter( |
| x=[min_val, max_val], |
| y=[min_val, max_val], |
| mode="lines", |
| line=dict(color="red", dash="dash"), |
| name="Perfect Prediction", |
| hoverinfo="none", |
| ), |
| row=1, |
| col=1, |
| ) |
|
|
| fig.add_trace( |
| go.Scatter( |
| x=eval_data["predicted"], |
| y=eval_data["error_percent"], |
| mode="markers", |
| marker=dict( |
| opacity=0.6, |
| color=eval_data["error_percent"], |
| colorscale="RdBu_r", |
| colorbar=dict(title="Error %"), |
| showscale=False, |
| cmin=-30, |
| cmax=30, |
| ), |
| text=hover_text, |
| hoverinfo="text", |
| name="Errors", |
| ), |
| row=1, |
| col=2, |
| ) |
|
|
| fig.add_trace( |
| go.Histogram( |
| x=eval_data["error_percent"], |
| nbinsx=20, |
| marker=dict(color="blue", opacity=0.7, line=dict(color="black", width=1)), |
| name="Error Distribution", |
| ), |
| row=2, |
| col=1, |
| ) |
|
|
| fig.add_vline(x=0, line_dash="dash", line_color="red", row=2, col=1) |
|
|
| top_features = feature_importance.head(10).sort_values("Importance") |
|
|
| fig.add_trace( |
| go.Bar( |
| y=top_features["Feature"], |
| x=top_features["Importance"], |
| orientation="h", |
| marker=dict(color="blue"), |
| name="Feature Importance", |
| ), |
| row=2, |
| col=2, |
| ) |
|
|
| fig.update_xaxes(title_text="Actual Performance (tokens/s)", row=1, col=1) |
| fig.update_yaxes(title_text="Predicted Performance (tokens/s)", row=1, col=1) |
|
|
| fig.update_xaxes(title_text="Predicted Value", row=1, col=2) |
| fig.update_yaxes(title_text="Error (%)", row=1, col=2) |
|
|
| fig.update_xaxes(title_text="Prediction Error (%)", row=2, col=1) |
| fig.update_yaxes(title_text="Frequency", row=2, col=1) |
|
|
| fig.update_xaxes(title_text="Importance", row=2, col=2) |
|
|
| fig.update_layout( |
| height=800, |
| width=1200, |
| autosize=True, |
| showlegend=False, |
| title_text="Model Performance Analysis", |
| ) |
|
|
| logger.info("Successfully created model performance plot") |
| return fig, metrics, feature_importance.head(10) |
|
|
|
|
| with gr.Blocks(title="MLPerf Configuration Finder") as interface: |
| gr.Markdown( |
| """ |
| # 🔍 MLPerf Configuration Finder (ongoing preliminary work) |
| |
| Find the optimal configurations for your AI workloads by specifying your model and constraints. |
| Results are ranked by performance and include both real benchmark data and AI-generated predictions. |
| |
| *All configurations include a ±10% tolerance for continuous features like model size, memory capacity, etc.* |
| """ |
| ) |
|
|
| with gr.Row(): |
| status_msg = gr.Markdown( |
| "*Ready to search. Enter your criteria and click 'Search Configurations'.*" |
| ) |
|
|
| with gr.Tabs(): |
| with gr.TabItem("Workload Specifications"): |
| with gr.Accordion("Model Specifications", open=True): |
| with gr.Row(): |
| architecture = gr.Dropdown( |
| choices=["Any"] + metadata.get("architectures", []), |
| label="Architecture", |
| value="LLM", |
| info="Model architecture type", |
| ) |
| weight_data_type = gr.Dropdown( |
| choices=["Any"] + metadata.get("weight_data_types", []), |
| label="Weight Data Type", |
| value="Any", |
| info="Precision format for model weights", |
| ) |
|
|
| model_size = gr.Slider( |
| minimum=metadata.get("model_size_min"), |
| maximum=metadata.get("model_size_max"), |
| value=70, |
| step=1, |
| label="Model Size (billions of parameters)", |
| info="Number of parameters in billions", |
| ) |
|
|
| with gr.Accordion("Accelerator (GPU/TPU) Specifications", open=False): |
| with gr.Row(): |
| accelerator_vendor = gr.Dropdown( |
| choices=["Any"] + metadata.get("accelerator_vendors", []), |
| label="Vendor", |
| value="Any", |
| info="Hardware manufacturer", |
| ) |
| accelerator_model = gr.Dropdown( |
| choices=["Any"] + metadata.get("accelerator_models", []), |
| label="Model", |
| value="Any", |
| info="Specific accelerator model", |
| ) |
|
|
| with gr.Row(): |
| min_gpu_memory = gr.Slider( |
| minimum=metadata.get("gpu_memory_min"), |
| maximum=metadata.get("gpu_memory_max"), |
| value=metadata.get("gpu_memory_min"), |
| step=1, |
| label="Min GPU Memory (GB)", |
| info="Minimum GPU memory capacity needed", |
| ) |
| max_gpu_memory = gr.Slider( |
| minimum=metadata.get("gpu_memory_min"), |
| maximum=metadata.get("gpu_memory_max"), |
| value=metadata.get("gpu_memory_max"), |
| step=1, |
| label="Max GPU Memory (GB)", |
| info="Maximum GPU memory capacity to consider", |
| ) |
|
|
| with gr.Row(): |
| interconnect = gr.Dropdown( |
| choices=["Any"] + metadata.get("interconnect_types", []), |
| label="Interconnect", |
| value="Any", |
| info="GPU-to-GPU connection type", |
| ) |
|
|
| with gr.Row(): |
| min_accelerators = gr.Slider( |
| minimum=metadata.get("min_accelerators"), |
| maximum=metadata.get("max_accelerators"), |
| value=metadata.get("min_accelerators"), |
| step=1, |
| label="Minimum Accelerators", |
| info="Minimum number of accelerators needed", |
| ) |
| max_accelerators = gr.Slider( |
| minimum=metadata.get("min_accelerators"), |
| maximum=metadata.get("max_accelerators"), |
| value=metadata.get("max_accelerators"), |
| step=1, |
| label="Maximum Accelerators", |
| info="Maximum number of accelerators to consider", |
| ) |
|
|
| with gr.Accordion("CPU & System Specifications", open=False): |
| with gr.Row(): |
| cpu_vendor = gr.Dropdown( |
| choices=["Any"] + metadata.get("cpu_vendors", []), |
| label="CPU Vendor", |
| value="Any", |
| info="CPU manufacturer", |
| ) |
| cpu_model = gr.Dropdown( |
| choices=["Any"] + metadata.get("cpu_models", []), |
| label="CPU Model", |
| value="Any", |
| info="Specific CPU model", |
| ) |
|
|
| nodes = gr.Dropdown( |
| choices=["Any"] + [str(n) for n in metadata.get("node_counts", [])], |
| label="Number of Nodes", |
| value="Any", |
| info="Number of physical servers in the system", |
| ) |
|
|
| with gr.Row(): |
| min_cpu_memory = gr.Slider( |
| minimum=metadata.get("cpu_memory_min"), |
| maximum=metadata.get("cpu_memory_max"), |
| value=metadata.get("cpu_memory_min"), |
| step=1, |
| label="Min System Memory (GB)", |
| info="Minimum system RAM needed", |
| ) |
| max_cpu_memory = gr.Slider( |
| minimum=metadata.get("cpu_memory_min"), |
| maximum=metadata.get("cpu_memory_max"), |
| value=metadata.get("cpu_memory_max"), |
| step=1, |
| label="Max System Memory (GB)", |
| info="Maximum system RAM to consider", |
| ) |
|
|
| with gr.Accordion("Software Environment", open=False): |
| os = gr.Dropdown( |
| choices=["Any"] + metadata.get("operating_systems", []), |
| label="Operating System", |
| value="Any", |
| info="Host operating system", |
| ) |
|
|
| frameworks = [ |
| fw |
| for fw in metadata.get("frameworks", []) |
| if f"{fw}_versions" in metadata |
| ] |
| n_frameworks = len(frameworks) |
| column_size = (n_frameworks + 1) // 2 |
|
|
| framework_dropdowns = [] |
| with gr.Row(): |
| for i in range(0, 2): |
| with gr.Column(): |
| start_idx = i * column_size |
| end_idx = min((i + 1) * column_size, n_frameworks) |
|
|
| if start_idx < n_frameworks: |
| column_frameworks = frameworks[start_idx:end_idx] |
| for fw in column_frameworks: |
| version_key = f"{fw}_versions" |
| dropdown = gr.Dropdown( |
| choices=["Any"] + metadata.get(version_key), |
| label=fw, |
| value="Any", |
| info=f"Select {fw} framework version", |
| ) |
| framework_dropdowns.append((fw, dropdown)) |
|
|
| with gr.TabItem("Device Cost Settings 💰"): |
| gr.Markdown( |
| """ |
| ## Configure Device Hourly Costs |
| |
| Customize the hourly cost (in USD) for each accelerator type. These values will be used to |
| calculate the cost metrics for hardware configurations. |
| |
| Default values may not reflect actual current market prices. Please adjust them according to your needs. |
| """ |
| ) |
|
|
| with gr.Column(): |
| with gr.Row(): |
| save_costs_button = gr.Button( |
| "💾 Save Cost Settings", variant="primary" |
| ) |
| reset_costs_button = gr.Button("↻ Reset to Defaults") |
|
|
| current_costs = get_device_costs() |
| cost_data = pd.DataFrame( |
| { |
| "Device": list(current_costs.keys()), |
| "Hourly Cost ($)": list(current_costs.values()), |
| } |
| ).sort_values("Device") |
|
|
| device_costs_df = gr.DataFrame( |
| value=cost_data, |
| datatype=["str", "number"], |
| col_count=(2, "fixed"), |
| interactive=True, |
| wrap=True, |
| show_copy_button=True, |
| show_search="filter", |
| ) |
|
|
| costs_status = gr.Markdown("*Device costs ready for customization*") |
|
|
| def update_costs_callback(df): |
| """Update device costs with values from dataframe.""" |
| if isinstance(df, list): |
| new_costs = { |
| row[0]: float(row[1]) for row in df if len(row) >= 2 |
| } |
| else: |
| new_costs = { |
| df.loc[i, "Device"]: float(df.loc[i, "Hourly Cost ($)"]) |
| for i in range(len(df)) |
| } |
|
|
| update_device_costs(new_costs) |
| return "*Device costs successfully updated!*" |
|
|
| def reset_costs_callback(): |
| """Reset all costs to defaults.""" |
| initialize_device_costs(pd_df) |
| current_costs = get_device_costs() |
| cost_data = pd.DataFrame( |
| { |
| "Device": list(current_costs.keys()), |
| "Hourly Cost ($)": list(current_costs.values()), |
| } |
| ).sort_values("Device") |
| return cost_data, "*Device costs reset to defaults*" |
|
|
| save_costs_button.click( |
| fn=update_costs_callback, |
| inputs=device_costs_df, |
| outputs=costs_status, |
| ) |
|
|
| reset_costs_button.click( |
| fn=reset_costs_callback, |
| inputs=[], |
| outputs=[device_costs_df, costs_status], |
| ) |
|
|
| with gr.Row(): |
| with gr.Accordion("Options", open=True): |
| with gr.Row(): |
| include_predictions = gr.Checkbox( |
| label="Include AI-generated predictions", |
| value=True, |
| info="When enabled, AI will predict performance for configurations not in the benchmark database", |
| ) |
| optimization_metric = gr.Radio( |
| choices=["performance", "cost"], |
| label="Optimization Target", |
| value="performance", |
| info="Choose whether to optimize for highest performance or lowest cost per token", |
| ) |
|
|
| with gr.Row(): |
| search_button = gr.Button( |
| "🔍 Search Configurations", variant="primary", scale=3 |
| ) |
|
|
| with gr.Group(): |
| summary = gr.Markdown( |
| "Enter your requirements and click 'Search Configurations' to find suitable hardware.", |
| label="Summary", |
| ) |
|
|
| with gr.Tabs(): |
| with gr.TabItem("Top Configuration Details 🏆"): |
| details = gr.DataFrame( |
| headers=["Feature", "Value"], |
| datatype=["str", "str"], |
| label="Configuration Details", |
| ) |
|
|
| with gr.TabItem("All Matching Configurations 📊"): |
| recommendations = gr.DataFrame( |
| headers=[ |
| "System", |
| "Accelerator", |
| "Count", |
| "Nodes", |
| "GPU Memory (GB)", |
| "Model", |
| "Architecture", |
| "Parameters (B)", |
| "Weight Data Type", |
| "Total Performance (Tokens/s)", |
| "Per-GPU Performance (Tokens/s)", |
| "Hourly Cost ($)", |
| "Cost/Million Tokens", |
| "Predicted", |
| ], |
| datatype=[ |
| "str", |
| "str", |
| "number", |
| "number", |
| "number", |
| "str", |
| "str", |
| "number", |
| "str", |
| "number", |
| "number", |
| "number", |
| "number", |
| "str", |
| ], |
| label="Hardware Configurations", |
| ) |
|
|
| with gr.TabItem("ML Model Performance 📈"): |
| gr.Markdown( |
| """ |
| ## Model Performance Analysis |
| This tab shows how well our machine learning model can predict performance for unseen hardware configurations. |
| The evaluation is based on a test set that was not used to train the model. |
| |
| **Hover over data points in the plots to see detailed information about each prediction.** |
| """ |
| ) |
|
|
| model_metrics = gr.Dataframe( |
| headers=["Metric", "Value"], |
| value=[ |
| ["Root Mean Squared Error (RMSE)", 0], |
| ["Mean Absolute Error (MAE)", 0], |
| ["R² Score", 0], |
| ["Mean Absolute Percentage Error (MAPE)", 0], |
| ], |
| label="Model Performance Metrics", |
| ) |
|
|
| feature_importance_df = gr.Dataframe( |
| headers=["Feature", "Importance"], label="Feature Importance" |
| ) |
|
|
| performance_plot = gr.Plot( |
| label="Performance Visualization", elem_id="performance_plot" |
| ) |
|
|
| with gr.Row(): |
| gr.Markdown("## Top Configurations Comparison") |
|
|
| with gr.Row(): |
| top_n_configs = gr.Slider( |
| minimum=1, |
| maximum=100, |
| value=10, |
| step=1, |
| label="Number of configurations to show", |
| info="Adjust to see more or fewer configurations in the chart", |
| ) |
|
|
| with gr.Row(): |
| top_configs_chart = gr.Plot(label="") |
|
|
| current_configs_state = gr.State(pd.DataFrame()) |
|
|
| all_inputs = [ |
| model_size, |
| weight_data_type, |
| architecture, |
| accelerator_vendor, |
| accelerator_model, |
| min_gpu_memory, |
| max_gpu_memory, |
| interconnect, |
| min_accelerators, |
| max_accelerators, |
| cpu_vendor, |
| cpu_model, |
| nodes, |
| min_cpu_memory, |
| max_cpu_memory, |
| os, |
| include_predictions, |
| optimization_metric, |
| top_n_configs, |
| ] |
|
|
| framework_input_components = [dropdown for _, dropdown in framework_dropdowns] |
|
|
| def process_framework_inputs(*args): |
| base_args = args[: -len(framework_dropdowns)] |
| framework_args = args[-len(framework_dropdowns) :] |
|
|
| framework_versions = {} |
| for (framework_name, _), version in zip(framework_dropdowns, framework_args): |
| if version != "Any": |
| framework_versions[framework_name] = version |
|
|
| opt_metric = base_args[16] |
|
|
| results = recommend_hardware(*base_args, **framework_versions) |
| recommendations_df, details_df, summary, top_chart = results |
|
|
| best_configs = find_best_configs( |
| { |
| "model_size": base_args[0], |
| "weight_data_type": base_args[1], |
| "architecture": base_args[2], |
| }, |
| constraints=get_constraints_from_args(*base_args), |
| include_predictions=base_args[15], |
| optimization_metric=opt_metric, |
| ) |
|
|
| return ( |
| recommendations_df, |
| details_df, |
| summary, |
| top_chart, |
| best_configs, |
| ) |
|
|
| def get_constraints_from_args(*args): |
| """Helper function to convert args to constraints dict.""" |
| return { |
| "system.accelerator.vendor": args[3], |
| "system.accelerator.name": args[4], |
| "system.interconnect.accelerator": args[7], |
| "system.cpu.vendor": args[10], |
| "system.cpu.model": args[11], |
| "system.number_of_nodes": args[12] if args[12] != "Any" else None, |
| "software.operating_system": args[15], |
| "min_gpu_memory": args[5], |
| "max_gpu_memory": args[6], |
| "min_cpu_memory": args[13], |
| "max_cpu_memory": args[14], |
| "min_accelerators": args[8], |
| "max_accelerators": args[9], |
| } |
|
|
| def update_chart(n: int, configs_df: pd.DataFrame, metric: str) -> go.Figure: |
| """Update the configurations chart based on the slider value.""" |
| return create_top_configs_plot(configs_df, metric, n) |
|
|
| search_button.click( |
| fn=process_framework_inputs, |
| inputs=all_inputs + framework_input_components, |
| outputs=[ |
| recommendations, |
| details, |
| summary, |
| top_configs_chart, |
| current_configs_state, |
| ], |
| show_progress="full", |
| ) |
|
|
| top_n_configs.change( |
| fn=update_chart, |
| inputs=[top_n_configs, current_configs_state, optimization_metric], |
| outputs=top_configs_chart, |
| ) |
|
|
| def initial_load(): |
| logger.info("Starting initial load of app") |
| default_values = [] |
| for input_component in all_inputs: |
| default_values.append(input_component.value) |
|
|
| for _, dropdown in framework_dropdowns: |
| default_values.append(dropdown.value) |
|
|
| ( |
| recommendations_df, |
| details_df, |
| summary_text, |
| top_chart, |
| best_configs, |
| ) = process_framework_inputs(*default_values) |
|
|
| if not recommendations_df.empty: |
| top_n_configs.maximum = min(100, len(recommendations_df)) |
|
|
| if predictor: |
| logger.info("Predictor available, generating performance visualization") |
| try: |
| plot_fig, metrics, feature_importance = create_model_performance_plot( |
| predictor |
| ) |
|
|
| metrics_df = pd.DataFrame( |
| { |
| "Metric": [ |
| "Root Mean Squared Error (RMSE)", |
| "Mean Absolute Error (MAE)", |
| "R² Score", |
| "Mean Absolute Percentage Error (MAPE)", |
| ], |
| "Value": [ |
| f"{metrics.get('rmse', 0):.4f}", |
| f"{metrics.get('mae', 0):.4f}", |
| f"{metrics.get('r2', 0):.4f}", |
| f"{metrics.get('mape', 0):.2f}%", |
| ], |
| } |
| ) |
| logger.info(f"Created metrics_df with values: {metrics_df.to_dict()}") |
| except Exception as e: |
| logger.error(f"Error creating performance plot: {e}", exc_info=True) |
| plot_fig = go.Figure() |
| metrics_df = pd.DataFrame( |
| { |
| "Metric": [ |
| "Root Mean Squared Error (RMSE)", |
| "Mean Absolute Error (MAE)", |
| "R² Score", |
| "Mean Absolute Percentage Error (MAPE)", |
| ], |
| "Value": ["N/A", "N/A", "N/A", "N/A"], |
| } |
| ) |
| feature_importance = pd.DataFrame(columns=["Feature", "Importance"]) |
| else: |
| logger.warning("No predictor available for initial load") |
| plot_fig = go.Figure() |
| plot_fig.update_layout( |
| title="No model available", |
| annotations=[ |
| dict( |
| text="No prediction model available", |
| showarrow=False, |
| xref="paper", |
| yref="paper", |
| x=0.5, |
| y=0.5, |
| ) |
| ], |
| ) |
|
|
| metrics_df = pd.DataFrame( |
| { |
| "Metric": [ |
| "Root Mean Squared Error (RMSE)", |
| "Mean Absolute Error (MAE)", |
| "R² Score", |
| "Mean Absolute Percentage Error (MAPE)", |
| ], |
| "Value": ["N/A", "N/A", "N/A", "N/A"], |
| } |
| ) |
| feature_importance = pd.DataFrame(columns=["Feature", "Importance"]) |
|
|
| logger.info("Completed initial load") |
| return ( |
| recommendations_df, |
| details_df, |
| summary_text, |
| plot_fig, |
| metrics_df, |
| feature_importance, |
| top_chart, |
| best_configs, |
| ) |
|
|
| interface.load( |
| fn=initial_load, |
| outputs=[ |
| recommendations, |
| details, |
| summary, |
| performance_plot, |
| model_metrics, |
| feature_importance_df, |
| top_configs_chart, |
| current_configs_state, |
| ], |
| api_name=False, |
| ) |
|
|
| gr.Markdown("---") |
| gr.HTML(""" |
| <div style="text-align: center;"> |
| Authors: <a href="https://www.linkedin.com/in/daltunay">Daniel Altunay</a> and |
| <a href="https://cKnowledge.org/gfursin">Grigori Fursin</a> (FCS Labs) |
| </div> |
| """) |
|
|
| if __name__ == "__main__": |
| interface.launch(share=False) |
|
|