Spaces:
Running
Running
| # ui/tab_tables.py | |
| """ | |
| Tab6: Tables β Paper-ready table generation for Wang's Five Laws. | |
| Data: language modality, standard layers only, from SQLite DB. | |
| Output: Gradio DataFrames + LaTeX + Markdown + CSV downloads. | |
| """ | |
| import io | |
| import os | |
| import zipfile | |
| import tempfile | |
| import gradio as gr | |
| import pandas as pd | |
| from db.schema import init_db | |
| from db.reader import get_layer_metrics, get_analyzed_models | |
| from core.table_gen import ( | |
| generate_all_tables, | |
| format_all_latex, | |
| format_all_markdown, | |
| TABLE_META, | |
| ) | |
| _OUT_DIR = "/tmp/wang_tables" | |
| os.makedirs(_OUT_DIR, exist_ok=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # DB helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _get_model_choices() -> list[str]: | |
| try: | |
| conn = init_db() | |
| df = get_analyzed_models(conn) | |
| return df["model_id"].tolist() if not df.empty else [] | |
| except Exception: | |
| return [] | |
| def _load_all_models(choices: list[str]) -> dict[str, pd.DataFrame]: | |
| """Load language-modality standard-layer data for all selected models.""" | |
| conn = init_db() | |
| out = {} | |
| for mid in choices: | |
| df = get_layer_metrics( | |
| conn, | |
| model_id = mid, | |
| modality = "language", | |
| layer_type = None, # keep both; table_gen filters internally | |
| start_layer= 0, | |
| end_layer = 9999, | |
| ) | |
| if not df.empty: | |
| out[mid] = df | |
| return out | |
| def _parse_groups(text: str) -> list[tuple[int, int]]: | |
| """ | |
| Parse user-defined layer groups. | |
| Format: "0-11, 12-23, 24-35, 36-47" | |
| Returns list of (lo, hi) tuples. | |
| """ | |
| groups = [] | |
| for part in text.split(","): | |
| part = part.strip() | |
| if "-" in part: | |
| try: | |
| lo_s, hi_s = part.split("-", 1) | |
| groups.append((int(lo_s.strip()), int(hi_s.strip()))) | |
| except ValueError: | |
| continue | |
| return groups if groups else [(0, 11), (12, 23), (24, 35), (36, 47)] | |
| def _save_csv(df: pd.DataFrame, name: str) -> str: | |
| path = os.path.join(_OUT_DIR, f"{name}.csv") | |
| df.to_csv(path, index=False) | |
| return path | |
| def _make_zip(paths: list[str]) -> str: | |
| zp = os.path.join(_OUT_DIR, "wang_laws_tables.zip") | |
| with zipfile.ZipFile(zp, "w", zipfile.ZIP_DEFLATED) as zf: | |
| for p in paths: | |
| if p and os.path.exists(p): | |
| zf.write(p, os.path.basename(p)) | |
| return zp | |
| def _save_text(content: str, name: str) -> str: | |
| path = os.path.join(_OUT_DIR, name) | |
| with open(path, "w", encoding="utf-8") as f: | |
| f.write(content) | |
| return path | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Main generation function | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_tables( | |
| selected_models: list[str], | |
| table2_model_a: str, | |
| table2_model_b: str, | |
| group_text: str, | |
| progress=gr.Progress(), | |
| ): | |
| """ | |
| Returns: | |
| status, t1_df, t2_df, t3_df, t4_df, t5_df, t6_df, | |
| latex_text, md_text, | |
| csv_t1, csv_t2, csv_t3, csv_t4, csv_t5, csv_t6, | |
| latex_file, md_file, zip_file | |
| """ | |
| EMPTY = tuple([None] * 17) | |
| if not selected_models: | |
| return ("β Please select at least one model.",) + EMPTY[1:] | |
| progress(0.05, desc="Loading data from DB...") | |
| model_dfs = _load_all_models(selected_models) | |
| if not model_dfs: | |
| return ("β No language-modality data found. Run Tab 2 analysis first.",) + EMPTY[1:] | |
| progress(0.25, desc="Parsing layer groups...") | |
| group_bounds = _parse_groups(group_text) | |
| # Validate Table 2 model selection | |
| name_a = table2_model_a if table2_model_a in model_dfs else None | |
| name_b = table2_model_b if table2_model_b in model_dfs else None | |
| if name_b == name_a: | |
| name_b = None | |
| progress(0.40, desc="Computing tables...") | |
| tables = generate_all_tables( | |
| model_dfs = model_dfs, | |
| group_bounds = group_bounds, | |
| name_a = name_a, | |
| name_b = name_b, | |
| ) | |
| progress(0.65, desc="Formatting LaTeX & Markdown...") | |
| latex_str = format_all_latex(tables) | |
| md_str = format_all_markdown(tables) | |
| progress(0.80, desc="Saving files...") | |
| csv_paths = {} | |
| for key, df in tables.items(): | |
| csv_paths[key] = _save_csv(df, f"wang_laws_{key}") | |
| latex_file = _save_text(latex_str, "wang_laws_tables.tex") | |
| md_file = _save_text(md_str, "wang_laws_tables.md") | |
| zip_file = _make_zip(list(csv_paths.values()) + [latex_file, md_file]) | |
| loaded = list(model_dfs.keys()) | |
| status = ( | |
| f"β Generated 6 tables | {len(loaded)} models loaded\n" | |
| f" Models: {', '.join(loaded)}\n" | |
| f" Layer groups (Table 2): {group_bounds}\n" | |
| f" Table 2 comparison: {name_a or 'β'} vs {name_b or 'β'}\n" | |
| f" Note: language modality, standard layers only (global layers excluded)" | |
| ) | |
| progress(1.0) | |
| return ( | |
| status, | |
| tables["t1"], tables["t2"], tables["t3"], | |
| tables["t4"], tables["t5"], tables["t6"], | |
| latex_str, md_str, | |
| csv_paths["t1"], csv_paths["t2"], csv_paths["t3"], | |
| csv_paths["t4"], csv_paths["t5"], csv_paths["t6"], | |
| latex_file, md_file, zip_file, | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Tab6 UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_tab_tables(): | |
| with gr.Tab("π Tables"): | |
| gr.Markdown(""" | |
| ## Wang's Five Laws β Paper Tables | |
| One-click generation of all 6 tables. Data: **language modality, standard layers only** | |
| (global/K=V-shared layers excluded from all metrics). | |
| | Table | Content | Law | | |
| |-------|---------|-----| | |
| | 1 | Cross-model summary: Pearson r, SSR (Wang Score in Table 6) | 1 & 2 | | |
| | 2 | SSR layer-group trend (RL effect, user-defined groups) | 2 | | |
| | 3 | Output subspace cosU: QβK, QβV, KβV + random baseline | 4 | | |
| | 4 | Input subspace cosV: QβK, QβV, KβV + random baseline | 5 | | |
| | 5 | Condition number ΞΊ: median all layers + Layer 0 + deep layers | 3 | | |
| | 6 | Wang Score leaderboard (ranked) | 1 & 2 | | |
| > Run **Tab 2 (Analyze)** first to populate the database. | |
| """) | |
| # ββ Model selector with Refresh βββββββββββββββββββββββββββββββββββββββ | |
| def _refresh_choices(): | |
| new_choices = _get_model_choices() | |
| return ( | |
| gr.CheckboxGroup(choices=new_choices, value=new_choices), | |
| gr.Dropdown(choices=new_choices), | |
| gr.Dropdown(choices=new_choices), | |
| ) | |
| init_choices = _get_model_choices() | |
| # ββ Controls ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| with gr.Row(): | |
| gr.Markdown("**Models to include** (all tables use selected models)") | |
| refresh_btn = gr.Button("π Refresh", scale=0, min_width=100) | |
| model_selector = gr.CheckboxGroup( | |
| choices = init_choices, | |
| value = init_choices, | |
| label = "", | |
| show_label=False, | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("**Table 2 β SSR Layer-Group Comparison**") | |
| t2_model_a = gr.Dropdown( | |
| choices = init_choices, | |
| value = init_choices[0] if init_choices else None, | |
| allow_custom_value=True, | |
| label = "Model A (base)", | |
| ) | |
| t2_model_b = gr.Dropdown( | |
| choices = init_choices, | |
| value = init_choices[1] if len(init_choices) > 1 else None, | |
| allow_custom_value=True, | |
| label = "Model B (RL-tuned / comparison)", | |
| info = "Leave same as A for single-model view", | |
| ) | |
| group_input = gr.Textbox( | |
| label = "Layer groups (comma-separated lo-hi pairs)", | |
| value = "0-11, 12-23, 24-35, 36-47", | |
| placeholder = "0-11, 12-23, 24-35, 36-47", | |
| info = "Adjust for model depth: 32-layerβ0-7,8-15,16-23,24-31 60-layerβ0-14,15-29,30-44,45-59", | |
| ) | |
| generate_btn = gr.Button("π Generate All Tables", variant="primary") | |
| status_box = gr.Textbox(lines=4, interactive=False, label="Status") | |
| gr.Markdown("---") | |
| # ββ Table displays ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Accordion("π Table 1 β Cross-Model Summary (Law 1 & 2)", open=True): | |
| t1_df = gr.Dataframe(interactive=False, wrap=True) | |
| dl_t1 = gr.File(label="β¬ CSV") | |
| with gr.Accordion("π Table 2 β SSR Layer-Group Trend (Law 2)", open=True): | |
| t2_df = gr.Dataframe(interactive=False, wrap=True) | |
| dl_t2 = gr.File(label="β¬ CSV") | |
| with gr.Accordion("π Table 3 β Output Subspace cosU (Law 4)", open=True): | |
| t3_df = gr.Dataframe(interactive=False, wrap=True) | |
| dl_t3 = gr.File(label="β¬ CSV") | |
| with gr.Accordion("π Table 4 β Input Subspace cosV (Law 5)", open=True): | |
| t4_df = gr.Dataframe(interactive=False, wrap=True) | |
| dl_t4 = gr.File(label="β¬ CSV") | |
| with gr.Accordion("π Table 5 β Condition Number ΞΊ (Law 3)", open=True): | |
| t5_df = gr.Dataframe(interactive=False, wrap=True) | |
| dl_t5 = gr.File(label="β¬ CSV") | |
| with gr.Accordion("π Table 6 β Wang Score Leaderboard", open=True): | |
| t6_df = gr.Dataframe(interactive=False, wrap=True) | |
| dl_t6 = gr.File(label="β¬ CSV") | |
| gr.Markdown("---") | |
| # ββ Text outputs ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Accordion("π LaTeX Output (paste into .tex)", open=False): | |
| latex_box = gr.Code( | |
| language = "latex", | |
| label = "LaTeX tables (booktabs style)", | |
| interactive= False, | |
| lines = 30, | |
| ) | |
| with gr.Accordion("π Markdown Output (paste into README)", open=False): | |
| md_box = gr.Code( | |
| language = "markdown", | |
| label = "Markdown tables (GitHub-flavored)", | |
| interactive= False, | |
| lines = 30, | |
| ) | |
| gr.Markdown("---") | |
| # ββ Bulk downloads ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("### β¬ Bulk Downloads") | |
| with gr.Row(): | |
| dl_latex = gr.File(label="β¬ wang_laws_tables.tex") | |
| dl_md = gr.File(label="β¬ wang_laws_tables.md") | |
| dl_zip = gr.File(label="β¬ ZIP (all CSVs + LaTeX + Markdown)") | |
| gr.Markdown(""" | |
| --- | |
| **Notes** | |
| - All tables use **language modality** and **standard layers only**. | |
| Global (K=V-shared) layers are excluded from metrics but their count is shown in Table 1. | |
| - Table 2 layer groups are user-defined. Suggested defaults: | |
| 48-layer models β `0-11, 12-23, 24-35, 36-47` | |
| 32-layer models β `0-7, 8-15, 16-23, 24-31` | |
| 60-layer models β `0-14, 15-29, 30-44, 45-59` | |
| - LaTeX output uses `booktabs` style (`\\toprule`, `\\midrule`, `\\bottomrule`). | |
| Add `\\usepackage{booktabs}` to your preamble. | |
| - Wang Score = 1 β median(SSR_QK). Theoretical maximum = 1. | |
| """) | |
| # ββ Wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| generate_btn.click( | |
| fn = generate_tables, | |
| inputs = [model_selector, t2_model_a, t2_model_b, group_input], | |
| outputs = [ | |
| status_box, | |
| t1_df, t2_df, t3_df, t4_df, t5_df, t6_df, | |
| latex_box, md_box, | |
| dl_t1, dl_t2, dl_t3, dl_t4, dl_t5, dl_t6, | |
| dl_latex, dl_md, dl_zip, | |
| ], | |
| ) | |
| refresh_btn.click( | |
| fn = _refresh_choices, | |
| inputs = [], | |
| outputs = [model_selector, t2_model_a, t2_model_b], | |
| ) |