Spaces:
Running
Running
| # db/schema.py | |
| """ | |
| ๆฐๆฎๅบ่กจ็ปๆๅฎไนไธๅๅงๅ | |
| SQLite ๅญๅจๅจ /data/wang_laws.db๏ผHF Space bucket ๆไน ๅ๏ผ | |
| """ | |
| import sqlite3 | |
| import os | |
| from datetime import datetime | |
| def get_db_path() -> str: | |
| if os.path.exists("/data"): | |
| return "/data/wang_laws.db" | |
| return "wang_laws.db" | |
| def get_connection() -> sqlite3.Connection: | |
| conn = sqlite3.connect(get_db_path(), check_same_thread=False) | |
| conn.row_factory = sqlite3.Row | |
| conn.execute("PRAGMA journal_mode=WAL") | |
| conn.execute("PRAGMA foreign_keys=ON") | |
| return conn | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ๅปบ่กจ SQL | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| SQL_CREATE_MODELS = """ | |
| CREATE TABLE IF NOT EXISTS models ( | |
| model_id TEXT PRIMARY KEY, | |
| model_type TEXT, | |
| analyzed_at TIMESTAMP, | |
| analyze_sec REAL, | |
| notes TEXT | |
| ); | |
| """ | |
| SQL_CREATE_COMPONENTS = """ | |
| CREATE TABLE IF NOT EXISTS components ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| model_id TEXT NOT NULL, | |
| prefix TEXT NOT NULL, | |
| modality TEXT DEFAULT 'language', -- language/vision/audio | |
| n_layers INTEGER, | |
| head_dim_min INTEGER, | |
| head_dim_max INTEGER, | |
| has_kv_shared INTEGER DEFAULT 0, | |
| has_global INTEGER DEFAULT 0, | |
| d_model INTEGER, | |
| UNIQUE(model_id, prefix), | |
| FOREIGN KEY(model_id) REFERENCES models(model_id) | |
| ); | |
| """ | |
| SQL_CREATE_LAYER_HEAD_METRICS = """ | |
| CREATE TABLE IF NOT EXISTS layer_head_metrics ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| model_id TEXT NOT NULL, | |
| prefix TEXT NOT NULL, | |
| layer INTEGER NOT NULL, | |
| layer_type TEXT DEFAULT 'standard', -- standard/global | |
| modality TEXT DEFAULT 'language', -- language/vision/audio | |
| kv_head INTEGER NOT NULL, | |
| q_head INTEGER NOT NULL, | |
| kv_shared INTEGER DEFAULT 0, | |
| head_dim INTEGER, | |
| d_model INTEGER, | |
| n_q_heads INTEGER, | |
| n_kv_heads INTEGER, | |
| -- ็ฌฌไธๅฎๅพ | |
| pearson_QK REAL, spearman_QK REAL, | |
| pearson_QV REAL, pearson_KV REAL, | |
| -- ็ฌฌไบๅฎๅพ | |
| ssr_QK REAL, ssr_QV REAL, ssr_KV REAL, | |
| -- ็ฌฌไธๅฎๅพ | |
| sigma_max_Q REAL, sigma_min_Q REAL, cond_Q REAL, | |
| sigma_max_K REAL, sigma_min_K REAL, cond_K REAL, | |
| sigma_max_V REAL, sigma_min_V REAL, cond_V REAL, | |
| -- ็ฌฌๅๅฎๅพ | |
| cosU_QK REAL, cosU_QV REAL, cosU_KV REAL, | |
| -- ็ฌฌไบๅฎๅพ | |
| cosV_QK REAL, cosV_QV REAL, cosV_KV REAL, | |
| -- ๅฐบๅบฆๅ ๅญ | |
| alpha_QK REAL, alpha_res_QK REAL, | |
| alpha_QV REAL, alpha_res_QV REAL, | |
| alpha_KV REAL, alpha_res_KV REAL, | |
| UNIQUE(model_id, prefix, layer, kv_head, q_head), | |
| FOREIGN KEY(model_id) REFERENCES models(model_id) | |
| ); | |
| """ | |
| SQL_CREATE_MODEL_SUMMARY = """ | |
| CREATE TABLE IF NOT EXISTS model_summary ( | |
| model_id TEXT NOT NULL, | |
| prefix TEXT NOT NULL, | |
| layer_type TEXT NOT NULL DEFAULT 'all', | |
| -- ็ฌฌไธๅฎๅพ | |
| median_pearson_QK REAL, mean_pearson_QK REAL, | |
| -- ็ฌฌไบๅฎๅพ | |
| median_ssr_QK REAL, mean_ssr_QK REAL, | |
| median_ssr_QV REAL, mean_ssr_QV REAL, | |
| -- ็ฌฌไธๅฎๅพ | |
| median_cond_Q REAL, mean_cond_Q REAL, | |
| -- ็ฌฌๅๅฎๅพ | |
| median_cosU_QK REAL, median_cosU_QV REAL, | |
| -- ็ฌฌไบๅฎๅพ | |
| median_cosV_QK REAL, median_cosV_QV REAL, | |
| -- ็ๆฐ่ฏๅ | |
| wang_score REAL, | |
| -- ็ป่ฎก่ๅด | |
| n_layers INTEGER, | |
| n_records INTEGER, | |
| updated_at TIMESTAMP, | |
| PRIMARY KEY(model_id, prefix, layer_type), | |
| FOREIGN KEY(model_id) REFERENCES models(model_id) | |
| ); | |
| """ | |
| SQL_CREATE_INDEXES = [ | |
| """CREATE INDEX IF NOT EXISTS idx_metrics_model_prefix | |
| ON layer_head_metrics(model_id, prefix)""", | |
| """CREATE INDEX IF NOT EXISTS idx_metrics_layer | |
| ON layer_head_metrics(model_id, prefix, layer)""", | |
| """CREATE INDEX IF NOT EXISTS idx_metrics_modality | |
| ON layer_head_metrics(model_id, modality)""", | |
| """CREATE INDEX IF NOT EXISTS idx_summary_wang_score | |
| ON model_summary(wang_score DESC)""", | |
| """CREATE INDEX IF NOT EXISTS idx_metrics_resume | |
| ON layer_head_metrics(model_id, prefix, layer, kv_head, q_head)""", | |
| """CREATE INDEX IF NOT EXISTS idx_components_modality | |
| ON components(model_id, modality)""", | |
| ] | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ่ฟ็งป๏ผไธบๆงๆฐๆฎๅบๅ modality ๅ | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| def _migrate_add_modality(conn: sqlite3.Connection): | |
| """ | |
| ๅน็ญ่ฟ็งป๏ผ็ปๆง่กจๅ modality ๅๅนถๅๅกซๆฐๆฎใ | |
| ๆฐๅปบๆฐๆฎๅบๆถ่ฟไบๅๅทฒๅจๅปบ่กจSQLไธญ๏ผPRAGMAไผๆฃๆตๅฐ็ดๆฅ่ทณ่ฟใ | |
| """ | |
| cur = conn.cursor() | |
| # โโ layer_head_metrics โโโโโโโโโโโโโโโโโโโโ | |
| cur.execute("PRAGMA table_info(layer_head_metrics)") | |
| lhm_cols = [row[1] for row in cur.fetchall()] | |
| if "modality" not in lhm_cols: | |
| cur.execute( | |
| "ALTER TABLE layer_head_metrics " | |
| "ADD COLUMN modality TEXT DEFAULT 'language'" | |
| ) | |
| # ๅๅกซ vision | |
| cur.execute( | |
| """UPDATE layer_head_metrics SET modality = 'vision' | |
| WHERE prefix LIKE '%vision%' | |
| OR prefix LIKE '%visual%' | |
| OR prefix LIKE '%image%'""" | |
| ) | |
| # ๅๅกซ audio | |
| cur.execute( | |
| """UPDATE layer_head_metrics SET modality = 'audio' | |
| WHERE prefix LIKE '%audio%' | |
| OR prefix LIKE '%speech%' | |
| OR prefix LIKE '%acoustic%'""" | |
| ) | |
| # language ๅทฒ็ฑ DEFAULT 'language' ่ฆ็๏ผๆ ้้ขๅคๆดๆฐ | |
| # โโ components โโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| cur.execute("PRAGMA table_info(components)") | |
| comp_cols = [row[1] for row in cur.fetchall()] | |
| if "modality" not in comp_cols: | |
| cur.execute( | |
| "ALTER TABLE components " | |
| "ADD COLUMN modality TEXT DEFAULT 'language'" | |
| ) | |
| cur.execute( | |
| """UPDATE components SET modality = 'vision' | |
| WHERE prefix LIKE '%vision%' | |
| OR prefix LIKE '%visual%' | |
| OR prefix LIKE '%image%'""" | |
| ) | |
| cur.execute( | |
| """UPDATE components SET modality = 'audio' | |
| WHERE prefix LIKE '%audio%' | |
| OR prefix LIKE '%speech%' | |
| OR prefix LIKE '%acoustic%'""" | |
| ) | |
| conn.commit() | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ๅๅงๅ | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| def init_db() -> sqlite3.Connection: | |
| conn = get_connection() | |
| cur = conn.cursor() | |
| # ็ฌฌไธๆญฅ๏ผๅปบ่กจ | |
| cur.execute(SQL_CREATE_MODELS) | |
| cur.execute(SQL_CREATE_COMPONENTS) | |
| cur.execute(SQL_CREATE_LAYER_HEAD_METRICS) | |
| cur.execute(SQL_CREATE_MODEL_SUMMARY) | |
| conn.commit() | |
| # ็ฌฌไบๆญฅ๏ผ่ฟ็งปๆงๆฐๆฎ๏ผๅ modality ๅ๏ผโ ๅฟ ้กปๅจๅปบ็ดขๅผไนๅ | |
| _migrate_add_modality(conn) | |
| # ็ฌฌไธๆญฅ๏ผๅปบ็ดขๅผ๏ผๆญคๆถ modality ๅๅทฒ็กฎไฟๅญๅจ๏ผ | |
| cur = conn.cursor() | |
| for sql in SQL_CREATE_INDEXES: | |
| cur.execute(sql) | |
| conn.commit() | |
| return conn | |
| def get_db_stats(conn: sqlite3.Connection) -> dict: | |
| cur = conn.cursor() | |
| stats = {} | |
| for table in ["models", "components", "layer_head_metrics", "model_summary"]: | |
| cur.execute(f"SELECT COUNT(*) FROM {table}") | |
| stats[table] = cur.fetchone()[0] | |
| db_path = get_db_path() | |
| if os.path.exists(db_path): | |
| stats["db_size_mb"] = round(os.path.getsize(db_path) / 1024 / 1024, 2) | |
| else: | |
| stats["db_size_mb"] = 0 | |
| return stats |