| import os |
| import sys |
| import numpy as np |
| import matplotlib.pyplot as plt |
|
|
| |
| try: |
| import atlas_mpl_style as ampl |
| ampl.use_atlas_style() |
| plt.rcParams['font.family'] = 'DejaVu Sans' |
| except ImportError: |
| print("Warning: ATLAS style not available, using default matplotlib style") |
| plt.style.use('default') |
|
|
| |
| |
|
|
| import argparse |
| parser = argparse.ArgumentParser() |
| add_arg = parser.add_argument |
| add_arg('--out_dir', help='output directory') |
| add_arg('--step', type=int, choices=[1, 2, 3, 4, 5], |
| help='Validate only specific step (1-5)') |
| args = parser.parse_args() |
| out_dir = args.out_dir |
| specific_step = args.step |
|
|
|
|
| def arrays_match(generated, reference, name: str, atol: float = 1e-10) -> bool: |
| """ |
| Compare two numpy arrays element-wise with a strict absolute tolerance. |
| - NaNs are considered equal when they appear at the same positions. |
| - rtol is set to 0.0 so only absolute tolerance matters. |
| Prints a concise status and returns True/False. |
| """ |
| print(f"Validating {name}...") |
| if generated.shape != reference.shape: |
| print(f" ❌ Shape mismatch: {generated.shape} vs {reference.shape}") |
| return False |
| ok = np.allclose(generated, reference, rtol=0.0, atol=atol, equal_nan=True) |
| if ok: |
| print(f" ✅ {name} matches (atol={atol})") |
| return True |
| |
| nan_mask_equal = np.array_equal(np.isnan(generated), np.isnan(reference)) |
| finite = (~np.isnan(generated)) & (~np.isnan(reference)) |
| mismatches = int(np.sum(generated[finite] != reference[finite])) |
| print(f" ❌ {name} differs: NaN mask equal={nan_mask_equal}, finite mismatches={mismatches}/{int(finite.sum())}") |
| if finite.any(): |
| diffs = np.abs(generated[finite] - reference[finite]) |
| print(f" diff stats: max={diffs.max():.6g}, mean={diffs.mean():.6g}") |
| |
| print("🔍 Running detailed mismatch analysis...") |
| analyze_array_differences(generated, reference, name) |
| return False |
|
|
| def calculate_adaptive_tolerance(values, significant_digits=4): |
| """ |
| Calculate adaptive tolerance based on the magnitude of values to achieve desired significant digits. |
| For each value, the tolerance is set to preserve the specified number of significant digits. |
| |
| Examples: |
| - Value 123000 with 4 sig digits: tolerance = 1000 (1e3) |
| - Value 0.00014 with 4 sig digits: tolerance = 0.0000014 (1.4e-6) |
| - Value 0 with 4 sig digits: tolerance = 1e-10 (small default) |
| """ |
| |
| non_zero_mask = values != 0 |
| tolerances = np.full_like(values, 1e-10, dtype=float) |
| |
| if np.any(non_zero_mask): |
| |
| |
| abs_values = np.abs(values[non_zero_mask]) |
| tolerances[non_zero_mask] = abs_values / (10 ** significant_digits) |
| |
| return tolerances |
|
|
| def analyze_array_differences(generated, reference, array_name, significant_digits=4): |
| """ |
| Analyze differences between generated and reference numpy arrays. |
| Uses adaptive tolerance based on significant digits rather than fixed tolerance. |
| """ |
| print(f"\n🔍 Detailed analysis for {array_name} (using {significant_digits} significant digit tolerance):") |
| print(f" Generated shape: {generated.shape}, Reference shape: {reference.shape}") |
| print(f" Tolerance: Adaptive based on {significant_digits} significant digits per value") |
|
|
| |
| if generated.shape != reference.shape: |
| print(f" ❌ Shape mismatch: {generated.shape} vs {reference.shape}") |
| return |
|
|
| |
| combined_values = np.abs(np.concatenate([generated.flatten(), reference.flatten()])) |
| adaptive_tolerances = calculate_adaptive_tolerance(combined_values, significant_digits) |
| |
| |
| atol_array = adaptive_tolerances[:generated.size].reshape(generated.shape) |
| |
| |
|
|
| |
| diff = generated - reference |
| abs_diff = np.abs(diff) |
| not_close = abs_diff > atol_array |
| |
| invalid = np.isnan(generated) | np.isnan(reference) |
| not_close = not_close & ~invalid |
| |
| total_different = np.sum(not_close) |
|
|
| if total_different == 0: |
| print(" ✅ All elements match within tolerance") |
| return |
|
|
| print(f" ❌ {total_different} elements differ (out of {generated.size} total)") |
| |
| |
| flat_gen = generated.flatten() |
| flat_ref = reference.flatten() |
| flat_not_close = not_close.flatten() |
| |
| numeric_mask = (~np.isnan(flat_gen)) & (~np.isnan(flat_ref)) |
| mismatch_mask = flat_not_close & numeric_mask |
| if np.any(mismatch_mask): |
| diff_indices = np.where(mismatch_mask)[0][:10] |
| print(" 📊 Sample numeric mismatches (first 10 indices):") |
| for idx in diff_indices: |
| gen_val = flat_gen[idx] |
| ref_val = flat_ref[idx] |
| diff_val = gen_val - ref_val |
| print(f" Index {idx}: gen={gen_val}, ref={ref_val}, diff={diff_val}") |
| else: |
| print(" ✅ No numeric mismatches (all differences involve NaNs)") |
| |
| |
|
|
| |
| if generated.ndim == 2: |
| col_diffs = np.sum(not_close, axis=0) |
| cols_with_diffs = np.where(col_diffs > 0)[0] |
|
|
| if len(cols_with_diffs) > 0: |
| print(f" 📊 Columns with differences: {cols_with_diffs[:10]} (showing first 10)") |
| |
| |
| num_cols_to_show = min(10, len(cols_with_diffs)) |
| num_rows_to_show = min(5, generated.shape[0]) |
| |
| print(f" 📋 Sample entries (first {num_rows_to_show} rows, first {num_cols_to_show} differing columns):") |
| print(" Row | Column | Generated Value | Reference Value | Difference") |
| print(" ----|--------|----------------|-----------------|------------") |
| |
| for col_idx in cols_with_diffs[:num_cols_to_show]: |
| for row_idx in range(num_rows_to_show): |
| gen_val = generated[row_idx, col_idx] |
| ref_val = reference[row_idx, col_idx] |
| diff = gen_val - ref_val |
| |
| |
| gen_str = f"{gen_val:.6g}" if not np.isnan(gen_val) else "NaN" |
| ref_str = f"{ref_val:.6g}" if not np.isnan(ref_val) else "NaN" |
| diff_str = f"{diff:.6g}" if not np.isnan(diff) else "NaN" |
| |
| print(f" {row_idx:3d} | {col_idx:3d} | {gen_str:>14} | {ref_str:>15} | {diff_str:>10}") |
| else: |
| print(" ✅ All columns match within tolerance") |
| else: |
| print(" 📊 1D array - no column-by-column analysis needed") |
|
|
| |
| nan_gen = np.sum(np.isnan(generated)) |
| nan_ref = np.sum(np.isnan(reference)) |
|
|
| if nan_gen > 1000 or nan_ref > 1000: |
| |
| if nan_gen > 0 and nan_ref > 0: |
| nan_ratio = min(nan_gen, nan_ref) / max(nan_gen, nan_ref) |
| if nan_ratio > 0.99: |
| print(" ✅ Data structure consistency: Identical NaN patterns in generated and reference files") |
| print(f" - Both files have {nan_gen:,} NaN values (excellent consistency)") |
| else: |
| print(" ⚠️ Special values detected:") |
| if nan_gen > 1000: |
| print(f" - NaN in generated: {nan_gen:,}") |
| if nan_ref > 1000: |
| print(f" - NaN in reference: {nan_ref:,}") |
| else: |
| print(" ⚠️ Special values detected:") |
| if nan_gen > 1000: |
| print(f" - NaN in generated: {nan_gen:,}") |
| if nan_ref > 1000: |
| print(f" - NaN in reference: {nan_ref:,}") |
| def validate_root_summary(llm_content, ref_content): |
| """ |
| Validate root_summary.txt content by checking that all required branch names are present |
| Focus on content (branch names) rather than exact format structure |
| """ |
| try: |
| |
| llm_branches = set(extract_branch_names(llm_content)) |
| |
| |
| required_branches = { |
| 'SumWeights', 'XSection', 'channelNumber', 'ditau_m', 'eventNumber', |
| 'jet_E', 'jet_MV2c10', 'jet_eta', 'jet_jvt', 'jet_n', 'jet_phi', 'jet_pt', |
| 'jet_pt_syst', 'jet_trueflav', 'jet_truthMatched', 'largeRjet_D2', 'largeRjet_E', |
| 'largeRjet_eta', 'largeRjet_m', 'largeRjet_n', 'largeRjet_phi', 'largeRjet_pt', |
| 'largeRjet_pt_syst', 'largeRjet_tau32', 'largeRjet_truthMatched', 'lep_E', |
| 'lep_charge', 'lep_eta', 'lep_etcone20', 'lep_isTightID', 'lep_n', 'lep_phi', |
| 'lep_pt', 'lep_pt_syst', 'lep_ptcone30', 'lep_trackd0pvunbiased', |
| 'lep_tracksigd0pvunbiased', 'lep_trigMatched', 'lep_truthMatched', 'lep_type', |
| 'lep_z0', 'mcWeight', 'met_et', 'met_et_syst', 'met_phi', 'photon_E', |
| 'photon_convType', 'photon_eta', 'photon_etcone20', 'photon_isTightID', 'photon_n', |
| 'photon_phi', 'photon_pt', 'photon_pt_syst', 'photon_ptcone30', 'photon_trigMatched', |
| 'photon_truthMatched', 'runNumber', 'scaleFactor_BTAG', 'scaleFactor_ELE', |
| 'scaleFactor_LepTRIGGER', 'scaleFactor_MUON', 'scaleFactor_PHOTON', 'scaleFactor_PILEUP', |
| 'scaleFactor_PhotonTRIGGER', 'scaleFactor_TAU', 'tau_BDTid', 'tau_E', 'tau_charge', |
| 'tau_eta', 'tau_isTightID', 'tau_n', 'tau_nTracks', 'tau_phi', 'tau_pt', |
| 'tau_pt_syst', 'tau_trigMatched', 'tau_truthMatched', 'trigE', 'trigM', 'trigP' |
| } |
| |
| print(f" 📊 LLM output has {len(llm_branches)} unique words, Required: {len(required_branches)} branches") |
|
|
| |
| found_required_branches = required_branches & llm_branches |
| if found_required_branches: |
| sorted_found = sorted(found_required_branches) |
| print(f" 🔍 Required branch names found in txt file: {', '.join(sorted_found)}") |
| |
| |
| if len(llm_branches) == 0: |
| print(" ❌ No branches found in LLM output") |
| return False |
| |
| |
| missing_branches = required_branches - llm_branches |
| |
| if missing_branches: |
| print(f" ❌ Missing {len(missing_branches)} required branches:") |
| for branch in sorted(missing_branches): |
| print(f" - {branch}") |
| return False |
| else: |
| print(" ✅ All required branches present in LLM output") |
| return True |
| |
| except Exception as e: |
| print(f" ❌ Error parsing root_summary: {e}") |
| return False |
|
|
| def extract_branch_names(content): |
| """ |
| Extract all words from root_summary.txt content. |
| This approach parses the file into words and checks for branch names as tokens. |
| """ |
| import re |
| |
| |
| |
| words = re.findall(r'\b\w+\b', content) |
| |
| |
| return set(words) |
|
|
| def parse_root_summary(content): |
| """ |
| Parse root_summary.txt content into structured data |
| Supports both reference format (File 1:, File 2:, etc.) and LLM format (single file summary) |
| """ |
| files = {} |
| current_file = None |
| lines = content.split('\n') |
| i = 0 |
| |
| while i < len(lines): |
| line = lines[i].strip() |
| |
| |
| if line.startswith('File ') and ':' in line: |
| |
| parts = line.split(': ') |
| if len(parts) >= 2: |
| filename = parts[1].strip() |
| current_file = filename |
| files[current_file] = { |
| 'total_objects': 0, |
| 'trees': 0, |
| 'entries': 0, |
| 'total_branches': 0, |
| 'branches': {} |
| } |
| |
| |
| elif line.startswith('Root file: ') and ':' in line: |
| |
| parts = line.split(': ') |
| if len(parts) >= 2: |
| full_path = parts[1].strip() |
| filename = os.path.basename(full_path) |
| current_file = filename |
| files[current_file] = { |
| 'total_objects': 1, |
| 'trees': 1, |
| 'entries': 0, |
| 'total_branches': 0, |
| 'branches': {} |
| } |
| |
| |
| elif current_file and current_file in files: |
| if 'Total objects:' in line: |
| try: |
| files[current_file]['total_objects'] = int(line.split(':')[1].strip()) |
| except Exception: |
| pass |
| elif 'Trees found:' in line: |
| try: |
| files[current_file]['trees'] = int(line.split(':')[1].strip()) |
| except Exception: |
| pass |
| elif 'Entries:' in line: |
| try: |
| files[current_file]['entries'] = int(line.split(':')[1].strip()) |
| except Exception: |
| pass |
| elif 'Common branches (' in line and ')' in line: |
| |
| try: |
| count_part = line.split('(')[1].split(')')[0] |
| |
| common_branch_count = int(count_part) |
| |
| for filename in files: |
| files[filename]['total_branches'] = common_branch_count |
| except Exception: |
| pass |
| |
| |
| branches = {} |
| j = i + 1 |
| while j < len(lines) and not lines[j].strip().startswith('='): |
| branch_line = lines[j].strip() |
| if ': ' in branch_line: |
| category, branch_list = branch_line.split(': ', 1) |
| category = category.strip().lower() |
| branch_names = [b.strip() for b in branch_list.split(',')] |
| branches[category] = branch_names |
| j += 1 |
| |
| files[current_file]['branches'] = branches |
| i = j - 1 |
| |
| |
| elif line == 'TTree: mini': |
| |
| branches = {} |
| branch_lines = [] |
| j = i + 1 |
| while j < len(lines) and lines[j].strip() and not lines[j].strip().startswith('='): |
| branch_line = lines[j].strip() |
| if branch_line.startswith(' Branches:'): |
| |
| j += 1 |
| continue |
| elif branch_line.startswith(' - '): |
| |
| branch_name = branch_line.replace(' - ', '').strip() |
| branch_lines.append(branch_name) |
| j += 1 |
| |
| |
| photon_branches = [] |
| jet_branches = [] |
| met_branches = [] |
| lep_branches = [] |
| tau_branches = [] |
| event_branches = [] |
| weights_branches = [] |
| |
| for branch in branch_lines: |
| if branch.startswith('photon_'): |
| photon_branches.append(branch) |
| elif branch.startswith('jet_'): |
| jet_branches.append(branch) |
| elif branch.startswith('met_'): |
| met_branches.append(branch) |
| elif branch.startswith('lep_'): |
| lep_branches.append(branch) |
| elif branch.startswith('tau_'): |
| tau_branches.append(branch) |
| elif branch in ['runNumber', 'eventNumber', 'channelNumber', 'mcWeight', 'trigE', 'trigM', 'trigP', 'ditau_m']: |
| event_branches.append(branch) |
| elif branch in ['SumWeights', 'XSection'] or branch.startswith('scaleFactor_') or branch.startswith('largeRjet_'): |
| weights_branches.append(branch) |
| |
| if photon_branches: |
| branches['photon'] = photon_branches |
| if jet_branches: |
| branches['jet'] = jet_branches |
| if met_branches: |
| branches['met'] = met_branches |
| if lep_branches: |
| branches['lep'] = lep_branches |
| if tau_branches: |
| branches['tau'] = tau_branches |
| if event_branches: |
| branches['event'] = event_branches |
| if weights_branches: |
| branches['weights'] = weights_branches |
| |
| files[current_file]['branches'] = branches |
| files[current_file]['total_branches'] = len(branch_lines) |
| i = j - 1 |
| |
| i += 1 |
| |
| return files |
|
|
| |
| |
|
|
| |
| signal_soln = np.load('/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/signal.npy') |
| bkgd_soln = np.load('/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/bkgd.npy') |
| signal_scores_soln = np.load('/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/signal_scores.npy') |
| bkgd_scores_soln = np.load('/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/bkgd_scores.npy') |
| boundaries_soln = np.load('/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/boundaries.npy') |
| significances_soln = np.load('/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/significances.npy') |
|
|
| base_dir = os.path.join(out_dir, 'arrays') |
|
|
| missing_file_1 = False |
| missing_file_2 = False |
| missing_file_3 = False |
| missing_file_4 = False |
| missing_file_5 = False |
|
|
| |
| if not specific_step or specific_step == 1: |
| file_list_llm_path = os.path.join(out_dir, 'logs', 'file_list.txt') |
| root_summary_llm_path = os.path.join(out_dir, 'logs', 'root_summary.txt') |
| |
| |
| if not (os.path.exists(file_list_llm_path) and os.path.exists(root_summary_llm_path)): |
| if not specific_step or specific_step == 1: |
| print("Step 1 (summarize_root) outputs missing") |
| missing_file_1 = True |
|
|
| |
| if not specific_step or specific_step == 2: |
| |
| data_A_raw_llm_path = os.path.join(base_dir, 'data_A_raw.npy') |
| signal_WH_raw_llm_path = os.path.join(base_dir, 'signal_WH_raw.npy') |
|
|
| if os.path.exists(data_A_raw_llm_path) and os.path.exists(signal_WH_raw_llm_path): |
| data_raw_llm = np.load(data_A_raw_llm_path) |
| signal_raw_llm = np.load(signal_WH_raw_llm_path) |
| if not specific_step or specific_step == 2: |
| print("Found required files: data_A_raw.npy and signal_WH_raw.npy") |
| else: |
| if not specific_step or specific_step == 2: |
| print("Step 2 (create_numpy) outputs missing - data_A_raw.npy and/or signal_WH_raw.npy not found") |
| missing_file_2 = True |
|
|
| |
| if not specific_step or specific_step == 3: |
| signal_llm_path = os.path.join(base_dir, 'signal.npy') |
| if os.path.exists(signal_llm_path): |
| signal_llm = np.load(signal_llm_path) |
| else: |
| if not specific_step or specific_step == 3: |
| print("LLM generated signal sample does not exist (Step 3)") |
| missing_file_3 = True |
|
|
| bkgd_llm_path = os.path.join(base_dir, 'bkgd.npy') |
| if os.path.exists(bkgd_llm_path): |
| bkgd_llm = np.load(bkgd_llm_path) |
| else: |
| if not specific_step or specific_step == 3: |
| print("LLM generated background sample does not exist (Step 3)") |
| missing_file_3 = True |
|
|
| |
| if not specific_step or specific_step == 4: |
| signal_scores_llm_path = os.path.join(base_dir, 'signal_scores.npy') |
| if os.path.exists(signal_scores_llm_path): |
| signal_scores_llm = np.load(signal_scores_llm_path) |
| else: |
| if not specific_step or specific_step == 4: |
| print("LLM generated signal scores do not exist (Step 4)") |
| missing_file_4 = True |
|
|
| bkgd_scores_llm_path = os.path.join(base_dir, 'bkgd_scores.npy') |
| if os.path.exists(bkgd_scores_llm_path): |
| bkgd_scores_llm = np.load(bkgd_scores_llm_path) |
| else: |
| if not specific_step or specific_step == 4: |
| print("LLM generated background scores do not exist (Step 4)") |
| missing_file_4 = True |
|
|
| |
| if not specific_step or specific_step == 5: |
| boundaries_llm_path = os.path.join(base_dir, 'boundaries.npy') |
| if os.path.exists(boundaries_llm_path): |
| boundaries_llm = np.load(boundaries_llm_path) |
| else: |
| if not specific_step or specific_step == 5: |
| print("LLM generated boundaries do not exist (Step 5)") |
| missing_file_5 = True |
|
|
| significances_llm_path = os.path.join(base_dir, 'significances.npy') |
| if os.path.exists(significances_llm_path): |
| significances_llm = np.load(significances_llm_path) |
| else: |
| if not specific_step or specific_step == 5: |
| print("LLM generated significances do not exist (Step 5)") |
| missing_file_5 = True |
|
|
| |
| signal_raw_llm_path = os.path.join(base_dir, 'signal_raw.npy') |
| data_raw_llm_path = os.path.join(base_dir, 'data_raw.npy') |
|
|
| |
| data_A_raw_llm_path = os.path.join(base_dir, 'data_A_raw.npy') |
| signal_WH_raw_llm_path = os.path.join(base_dir, 'signal_WH_raw.npy') |
|
|
| if os.path.exists(data_A_raw_llm_path) and os.path.exists(signal_WH_raw_llm_path): |
| data_raw_llm = np.load(data_A_raw_llm_path) |
| signal_raw_llm = np.load(signal_WH_raw_llm_path) |
| else: |
| missing_file_2 = True |
|
|
| |
| selective_refs_loaded = False |
| standard_refs_loaded = False |
|
|
| data_A_raw_soln_path = '/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/data_A_raw.npy' |
| signal_WH_raw_soln_path = '/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/signal_WH_raw.npy' |
| signal_raw_soln_path = '/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/signal_raw.npy' |
| data_raw_soln_path = '/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/data_raw.npy' |
|
|
| |
| if os.path.exists(data_A_raw_soln_path): |
| data_A_raw_soln = np.load(data_A_raw_soln_path) |
| selective_refs_loaded = True |
| if os.path.exists(signal_WH_raw_soln_path): |
| signal_WH_raw_soln = np.load(signal_WH_raw_soln_path) |
| selective_refs_loaded = True |
| |
| |
| if os.path.exists(signal_raw_soln_path): |
| signal_raw_soln = np.load(signal_raw_soln_path) |
| standard_refs_loaded = True |
| if os.path.exists(data_raw_soln_path): |
| data_raw_soln = np.load(data_raw_soln_path) |
| standard_refs_loaded = True |
|
|
| |
| signal_llm_path = os.path.join(base_dir, 'signal.npy') |
| if os.path.exists(signal_llm_path): |
| signal_llm = np.load(signal_llm_path) |
| else: |
| missing_file_3 = True |
|
|
| bkgd_llm_path = os.path.join(base_dir, 'bkgd.npy') |
| if os.path.exists(bkgd_llm_path): |
| bkgd_llm = np.load(bkgd_llm_path) |
| else: |
| missing_file_3 = True |
|
|
| |
| signal_scores_llm_path = os.path.join(base_dir, 'signal_scores.npy') |
| if os.path.exists(signal_scores_llm_path): |
| signal_scores_llm = np.load(signal_scores_llm_path) |
| else: |
| missing_file_4 = True |
|
|
| bkgd_scores_llm_path = os.path.join(base_dir, 'bkgd_scores.npy') |
| if os.path.exists(bkgd_scores_llm_path): |
| bkgd_scores_llm = np.load(bkgd_scores_llm_path) |
| else: |
| missing_file_4 = True |
|
|
| |
| boundaries_llm_path = os.path.join(base_dir, 'boundaries.npy') |
| if os.path.exists(boundaries_llm_path): |
| boundaries_llm = np.load(boundaries_llm_path) |
| else: |
| missing_file_5 = True |
|
|
| significances_llm_path = os.path.join(base_dir, 'significances.npy') |
| if os.path.exists(significances_llm_path): |
| significances_llm = np.load(significances_llm_path) |
| else: |
| missing_file_5 = True |
|
|
| """ |
| Plotting and derived checks removed per request: validation for steps 2–5 now does |
| direct array comparisons only (generated vs reference). |
| """ |
|
|
| step1_success = False |
| step2_success = False |
| step3_success = False |
| step4_success = False |
| step5_success = False |
|
|
| |
| if (not specific_step or specific_step == 1) and not missing_file_1: |
| try: |
| print("=== Step 1 Validation (summarize_root) ===") |
| |
| ref_file_list_path = '/global/cfs/projectdirs/atlas/dwkim/llm4hep/solution/arrays/file_list.txt' |
| |
| |
| |
| with open(file_list_llm_path, 'r') as f: |
| file_list_llm = f.read() |
| with open(root_summary_llm_path, 'r') as f: |
| root_summary_llm = f.read() |
| |
| |
| if os.path.exists(ref_file_list_path): |
| with open(ref_file_list_path, 'r') as f: |
| ref_file_list = f.read() |
| |
| |
| |
| def extract_filenames(content): |
| lines = [line.strip() for line in content.strip().split('\n') if line.strip()] |
| filenames = [] |
| for line in lines: |
| |
| filename = os.path.basename(line) if '/' in line else line |
| filenames.append(filename) |
| return sorted(filenames) |
| |
| llm_filenames = extract_filenames(file_list_llm) |
| ref_filenames = extract_filenames(ref_file_list) |
| file_list_match = llm_filenames == ref_filenames |
| |
| if not file_list_match: |
| print(f" 📊 LLM files: {len(llm_filenames)} | Reference files: {len(ref_filenames)}") |
| if len(llm_filenames) != len(ref_filenames): |
| print(f" ❌ File count mismatch: {len(llm_filenames)} vs {len(ref_filenames)}") |
| else: |
| |
| for i, (llm_file, ref_file) in enumerate(zip(llm_filenames, ref_filenames)): |
| if llm_file != ref_file: |
| print(f" ❌ File {i+1} mismatch: '{llm_file}' vs '{ref_file}'") |
| break |
| else: |
| file_list_match = True |
| |
| |
| |
| root_summary_match = validate_root_summary(root_summary_llm, "") |
| |
| step1_success = file_list_match and root_summary_match |
| |
| except Exception as e: |
| print(f"Error in Step 1 validation: {e}") |
| step1_success = False |
|
|
| |
| if (not specific_step or specific_step == 2) and not missing_file_2: |
| print("=== Step 2 Validation (create_numpy) ===") |
| |
| data_ref = None |
| signal_ref = None |
| if 'data_A_raw_soln' in globals(): |
| data_ref = data_A_raw_soln |
| elif 'data_raw_soln' in globals(): |
| data_ref = data_raw_soln |
| if 'signal_WH_raw_soln' in globals(): |
| signal_ref = signal_WH_raw_soln |
| elif 'signal_raw_soln' in globals(): |
| signal_ref = signal_raw_soln |
|
|
| ok_data = False |
| ok_signal = False |
| if data_ref is not None: |
| ok_data = arrays_match(data_raw_llm, data_ref, "data_A_raw.npy (or data_raw.npy)") |
| else: |
| print(" ❌ Missing data reference array (data_A_raw.npy or data_raw.npy)") |
| if signal_ref is not None: |
| ok_signal = arrays_match(signal_raw_llm, signal_ref, "signal_WH_raw.npy (or signal_raw.npy)") |
| else: |
| print(" ❌ Missing signal reference array (signal_WH_raw.npy or signal_raw.npy)") |
| step2_success = ok_data and ok_signal |
| print(f"Step 2 validation: {'PASS' if step2_success else 'FAIL'}") |
|
|
| |
| if (not specific_step or specific_step == 3) and not missing_file_3: |
| print("=== Step 3 Validation (preprocess) ===") |
| ok_signal = arrays_match(signal_llm, signal_soln, "signal.npy") |
| ok_bkgd = arrays_match(bkgd_llm, bkgd_soln, "bkgd.npy") |
| step3_success = ok_signal and ok_bkgd |
| |
| if (not specific_step or specific_step == 4) and not missing_file_4: |
| print("=== Step 4 Validation (scores) ===") |
| ok_sig_scores = arrays_match(signal_scores_llm, signal_scores_soln, "signal_scores.npy") |
| ok_bkg_scores = arrays_match(bkgd_scores_llm, bkgd_scores_soln, "bkgd_scores.npy") |
| step4_success = ok_sig_scores and ok_bkg_scores |
|
|
| |
| if (not specific_step or specific_step == 5) and not missing_file_5: |
| print("=== Step 5 Validation (categorization) ===") |
| ok_boundaries = arrays_match(boundaries_llm, boundaries_soln, "boundaries.npy") |
| ok_significances = arrays_match(significances_llm, significances_soln, "significances.npy") |
| step5_success = ok_boundaries and ok_significances |
|
|
| |
| success_results = [int(step1_success), int(step2_success), int(step3_success), int(step4_success), int(step5_success)] |
| |
|
|
| print("\n=== VALIDATION SUMMARY ===") |
| if specific_step: |
| step_names = ["summarize_root", "create_numpy", "preprocess", "scores", "categorization"] |
| step_name = step_names[specific_step - 1] |
| print(f"Step: {specific_step} ({step_name})") |
| if specific_step == 1: |
| print("Files validated:") |
| print(" • file_list.txt - List of processed ROOT files") |
| print(" • root_summary.txt - Branch structure and file metadata") |
| elif specific_step == 2: |
| print("Files validated:") |
| print(" • data_A_raw.npy - Raw data array (must have 46 columns)") |
| print(" • signal_WH_raw.npy - Raw signal array (must have 46 columns)") |
| elif specific_step == 3: |
| print("Files validated:") |
| print(" • signal.npy - Preprocessed signal events") |
| print(" • bkgd.npy - Preprocessed background events") |
| |
| |
| |
| |
| |
| elif specific_step == 4: |
| print("Files validated:") |
| print(" • signal_scores.npy - Signal event classification scores") |
| print(" • bkgd_scores.npy - Background event classification scores") |
| elif specific_step == 5: |
| print("Files validated:") |
| print(" • boundaries.npy - Category boundary thresholds") |
| print(" • significances.npy - Statistical significance values") |
| else: |
| print("All steps validated") |
|
|
| |
|
|
| |
| if specific_step: |
| step_names = ["summarize_root", "create_numpy", "preprocess", "scores", "categorization"] |
| step_name = step_names[specific_step - 1] |
| |
| if specific_step == 1 and not missing_file_1: |
| status = "PASS" if step1_success else "FAIL" |
| elif specific_step == 2 and not missing_file_2: |
| status = "PASS" if step2_success else "FAIL" |
| elif specific_step == 3 and not missing_file_3: |
| status = "PASS" if step3_success else "FAIL" |
| elif specific_step == 4 and not missing_file_4: |
| status = "PASS" if step4_success else "FAIL" |
| elif specific_step == 5 and not missing_file_5: |
| status = "PASS" if step5_success else "FAIL" |
| else: |
| status = "MISSING" |
| |
| print(f"\nStep {specific_step} ({step_name}): {status}") |
| |
| if status == "PASS": |
| print("✅ Validation successful") |
| elif status == "FAIL": |
| print("❌ Validation failed") |
| else: |
| print("⚠️ Step outputs missing") |
| else: |
| |
| step_status = [] |
| for i, (success, missing) in enumerate([(step1_success, missing_file_1), |
| (step2_success, missing_file_2), |
| (step3_success, missing_file_3), |
| (step4_success, missing_file_4), |
| (step5_success, missing_file_5)], 1): |
| if missing: |
| step_status.append("MISSING") |
| elif success: |
| step_status.append("PASS") |
| else: |
| step_status.append("FAIL") |
|
|
| print(f"Step 1 (summarize_root): {step_status[0]}") |
| print(f"Step 2 (create_numpy): {step_status[1]}") |
| print(f"Step 3 (preprocess): {step_status[2]}") |
| print(f"Step 4 (scores): {step_status[3]}") |
| print(f"Step 5 (categorization): {step_status[4]}") |
|
|
| |
| if specific_step: |
| validated_steps = 1 |
| passed_steps = 1 if success_results[specific_step-1] and not [missing_file_1, missing_file_2, missing_file_3, missing_file_4, missing_file_5][specific_step-1] else 0 |
| print(f"\nResult: {passed_steps}/{validated_steps} step passed") |
| else: |
| validated_steps = sum(1 for missing in [missing_file_1, missing_file_2, missing_file_3, missing_file_4, missing_file_5] if not missing) |
| passed_steps = sum(success_results) |
| print(f"Overall success: {passed_steps}/{validated_steps} validated steps passed") |
| print(f"Success array: {success_results}") |
|
|
| |
| sys.exit(0) |