TRIADS / app.py

fix: clean up phonons tab info

b90bf21 about 1 month ago

20.8 kB

	"""
	TRIADS — Multi-Benchmark Materials Property Prediction
	HuggingFace Gradio App (Production Redux)

	Covers all 6 Matbench benchmarks:
	1. matbench_steels — Yield Strength (MPa)
	2. matbench_expt_gap — Band Gap (eV)
	3. matbench_ismetal — Metallicity (ROC-AUC)
	4. matbench_glass — Glass Forming Ability
	5. matbench_jdft2d — Exfoliation Energy (meV/atom)
	6. matbench_phonons — Peak Phonon Frequency (cm⁻¹)
	"""

	import os
	import warnings
	import urllib.request
	import json
	import traceback

	warnings.filterwarnings("ignore")

	import numpy as np
	import torch
	import torch.nn as nn
	import gradio as gr
	from huggingface_hub import hf_hub_download

	# ─────────────────────────────────────────────────────────────────
	# CONFIG
	# ─────────────────────────────────────────────────────────────────

	REPO_ID = "Rtx09/TRIADS" # Used only if local weights are missing

	BENCHMARK_INFO = {
	"steels": {
	"title": "🔩 Steel Yield Strength",
	"description": "Predict yield strength (MPa) of steel alloys from composition.",
	"unit": "MPa",
	"example": "Fe0.7Cr0.15Ni0.15",
	"examples": ["Fe0.7Cr0.15Ni0.15", "Fe0.8C0.02Mn0.1Si0.05Cr0.03", "Fe0.6Ni0.25Mo0.1Cr0.05"],
	"task": "regression",
	"result": "91.20 ± 12.23 MPa MAE (5-fold, 5-seed ensemble)",
	},
	"expt_gap": {
	"title": "⚡ Experimental Band Gap",
	"description": "Predict experimental electronic band gap (eV) from composition.",
	"unit": "eV",
	"example": "TiO2",
	"examples": ["TiO2", "GaN", "ZnO", "Si", "CdS"],
	"task": "regression",
	"result": "0.3068 ± 0.0082 eV MAE (5-fold, composition-only)",
	},
	"ismetal": {
	"title": "🔮 Metallicity Classifier",
	"description": "Predict whether a material is metallic or non-metallic from composition.",
	"unit": "probability (1 = metal)",
	"example": "Cu",
	"examples": ["Cu", "SiO2", "Fe3O4", "BaTiO3", "Al"],
	"task": "classification",
	"result": "0.9655 ± 0.0029 ROC-AUC (5-fold, composition-only)",
	},
	"glass": {
	"title": "🪟 Glass Forming Ability",
	"description": "Predict metallic glass forming ability from alloy composition.",
	"unit": "probability (1 = glass former)",
	"example": "Cu46Zr54",
	"examples": ["Cu46Zr54", "Fe80B20", "Al86Ni7La6Y1", "Pd40Cu30Ni10P20"],
	"task": "classification",
	"result": "0.9285 ± 0.0063 ROC-AUC (5-fold, 5-seed ensemble)",
	},
	"jdft2d": {
	"title": "📐 Exfoliation Energy",
	"description": "Predict exfoliation energy (meV/atom) of 2D materials from structure+composition.",
	"unit": "meV/atom",
	"example": "MoS2",
	"examples": ["MoS2", "WSe2", "BN", "graphene (C)", "MoTe2"],
	"task": "regression",
	"result": "35.89 ± 12.40 meV/atom MAE (5-fold, 5-seed ensemble)",
	},
	"phonons": {
	"title": "🎵 Phonon Peak Frequency",
	"description": "Predict peak phonon frequency (cm⁻¹) from crystal structure.",
	"unit": "cm⁻¹",
	"example": "Si (diamond cubic)",
	"examples": ["Si", "GaAs", "MgO", "BN (wurtzite)", "TiO2 (rutile)"],
	"task": "regression",
	"result": "41.91 ± 4.04 cm⁻¹ MAE (5-fold, gate-halt GraphTRIADS)",
	},
	}


	# ─────────────────────────────────────────────────────────────────
	# MODEL DEFINITIONS (inlined for self-contained app)
	# ─────────────────────────────────────────────────────────────────

	class DeepHybridTRM(nn.Module):
	"""
	HybridTRIADS — composition-only tasks.
	Shared across: steels, expt_gap, ismetal, glass, jdft2d.
	"""
	def __init__(self, n_props=22, stat_dim=6, n_extra=0, mat2vec_dim=200,
	d_attn=64, nhead=4, d_hidden=96, ff_dim=150,
	dropout=0.2, max_steps=20, **kw):
	super().__init__()
	self.max_steps, self.D = max_steps, d_hidden
	self.n_props, self.stat_dim, self.n_extra = n_props, stat_dim, n_extra

	self.tok_proj = nn.Sequential(
	nn.Linear(stat_dim, d_attn), nn.LayerNorm(d_attn), nn.GELU())
	self.m2v_proj = nn.Sequential(
	nn.Linear(mat2vec_dim, d_attn), nn.LayerNorm(d_attn), nn.GELU())

	self.sa1 = nn.MultiheadAttention(d_attn, nhead, dropout=dropout, batch_first=True)
	self.sa1_n = nn.LayerNorm(d_attn)
	self.sa1_ff = nn.Sequential(
	nn.Linear(d_attn, d_attn*2), nn.GELU(), nn.Dropout(dropout),
	nn.Linear(d_attn*2, d_attn))
	self.sa1_fn = nn.LayerNorm(d_attn)

	self.sa2 = nn.MultiheadAttention(d_attn, nhead, dropout=dropout, batch_first=True)
	self.sa2_n = nn.LayerNorm(d_attn)
	self.sa2_ff = nn.Sequential(
	nn.Linear(d_attn, d_attn*2), nn.GELU(), nn.Dropout(dropout),
	nn.Linear(d_attn*2, d_attn))
	self.sa2_fn = nn.LayerNorm(d_attn)

	self.ca = nn.MultiheadAttention(d_attn, nhead, dropout=dropout, batch_first=True)
	self.ca_n = nn.LayerNorm(d_attn)

	pool_in = d_attn + (n_extra if n_extra > 0 else 0)
	self.pool = nn.Sequential(
	nn.Linear(pool_in, d_hidden), nn.LayerNorm(d_hidden), nn.GELU())

	self.z_up = nn.Sequential(
	nn.Linear(d_hidden*3, ff_dim), nn.GELU(), nn.Dropout(dropout),
	nn.Linear(ff_dim, d_hidden), nn.LayerNorm(d_hidden))
	self.y_up = nn.Sequential(
	nn.Linear(d_hidden*2, ff_dim), nn.GELU(), nn.Dropout(dropout),
	nn.Linear(ff_dim, d_hidden), nn.LayerNorm(d_hidden))
	self.head = nn.Linear(d_hidden, 1)
	self._init()

	def _init(self):
	for m in self.modules():
	if isinstance(m, nn.Linear):
	nn.init.xavier_uniform_(m.weight)
	if m.bias is not None: nn.init.zeros_(m.bias)

	def _attention(self, x):
	B = x.size(0)
	mg_dim = self.n_props * self.stat_dim
	if self.n_extra > 0:
	extra = x[:, mg_dim:mg_dim + self.n_extra]
	m2v = x[:, mg_dim + self.n_extra:]
	else:
	extra, m2v = None, x[:, mg_dim:]

	tok = self.tok_proj(x[:, :mg_dim].view(B, self.n_props, self.stat_dim))
	ctx = self.m2v_proj(m2v).unsqueeze(1)

	tok = self.sa1_n(tok + self.sa1(tok, tok, tok)[0])
	tok = self.sa1_fn(tok + self.sa1_ff(tok))
	tok = self.sa2_n(tok + self.sa2(tok, tok, tok)[0])
	tok = self.sa2_fn(tok + self.sa2_ff(tok))
	tok = self.ca_n(tok + self.ca(tok, ctx, ctx)[0])

	pooled = tok.mean(dim=1)
	if extra is not None:
	pooled = torch.cat([pooled, extra], dim=-1)
	return self.pool(pooled)

	def forward(self, x, deep_supervision=False):
	B = x.size(0)
	xp = self._attention(x)
	z = torch.zeros(B, self.D, device=x.device)
	y = torch.zeros(B, self.D, device=x.device)
	step_preds = []
	for _ in range(self.max_steps):
	z = z + self.z_up(torch.cat([xp, y, z], -1))
	y = y + self.y_up(torch.cat([y, z], -1))
	step_preds.append(self.head(y).squeeze(1))
	return step_preds if deep_supervision else step_preds[-1]


	# ─────────────────────────────────────────────────────────────────
	# FEATURIZER (composition-only, shared across HybridTRIADS tasks)
	# ─────────────────────────────────────────────────────────────────

	_featurizer_cache = {}
	_mat2vec_cache = {}
	_featurizer_err = None

	def _get_featurizer():
	"""Lazy-load the ExpandedFeaturizer (downloads Mat2Vec once)."""
	global _featurizer_err
	if "main" in _featurizer_cache:
	return _featurizer_cache["main"]

	try:
	from matminer.featurizers.composition import (
	ElementProperty, ElementFraction, Stoichiometry,
	ValenceOrbital, IonProperty, BandCenter
	)
	from matminer.featurizers.base import MultipleFeaturizer
	from gensim.models import Word2Vec
	from sklearn.preprocessing import StandardScaler

	GCS = "https://storage.googleapis.com/mat2vec/"
	M2V_FILES = [
	"pretrained_embeddings",
	"pretrained_embeddings.wv.vectors.npy",
	"pretrained_embeddings.trainables.syn1neg.npy",
	]

	# Use /tmp for writable cache if current dir is read-only
	cache_dir = os.path.join(os.getcwd(), "mat2vec_cache")
	try:
	os.makedirs(cache_dir, exist_ok=True)
	# Test write access
	test_file = os.path.join(cache_dir, ".test")
	with open(test_file, 'w') as f: f.write('1')
	os.remove(test_file)
	except Exception:
	cache_dir = "/tmp/mat2vec_cache"
	os.makedirs(cache_dir, exist_ok=True)

	for f in M2V_FILES:
	p = os.path.join(cache_dir, f)
	if not os.path.exists(p):
	print(f"Downloading {f}...")
	urllib.request.urlretrieve(GCS + f, p)

	# Magpie preset can fail if Figshare is down
	try:
	ep = ElementProperty.from_preset("magpie")
	except Exception as e:
	print(f"Magpie download failed, retrying once: {e}")
	import time
	time.sleep(2)
	ep = ElementProperty.from_preset("magpie")

	m2v = Word2Vec.load(os.path.join(cache_dir, "pretrained_embeddings"))
	emb = {w: m2v.wv[w] for w in m2v.wv.index_to_key}
	extra = MultipleFeaturizer([ElementFraction(), Stoichiometry(),
	ValenceOrbital(), IonProperty(), BandCenter()])

	_featurizer_cache["main"] = (ep, m2v, emb, extra)
	return _featurizer_cache["main"]

	except Exception as e:
	_featurizer_err = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
	print(f"CRITICAL Featurizer Error: {_featurizer_err}")
	return None


	def featurize_composition(formula: str):
	"""Featurize a chemical formula into the TRIADS feature vector."""
	from pymatgen.core import Composition

	result = _get_featurizer()
	if result is None:
	return None, f"Featurizer initialization failed.\nError: {_featurizer_err}"

	ep, m2v, emb, extra = result

	try:
	comp = Composition(formula)
	except Exception as e:
	return None, f"Invalid formula: '{formula}' \| {str(e)}"

	try:
	mg = np.array(ep.featurize(comp), np.float32)
	except Exception as e:
	mg = np.zeros(len(ep.feature_labels()), np.float32)

	try:
	ex = np.array(extra.featurize(comp), np.float32)
	ex = np.nan_to_num(ex, nan=0.0)
	except Exception as e:
	ex = np.zeros(50, np.float32)

	# Mat2Vec pooled
	v, t = np.zeros(200, np.float32), 0.0
	for s, f in comp.get_el_amt_dict().items():
	if s in emb:
	v += f * emb[s]
	t += f
	m2v_vec = v / max(t, 1e-8)

	mg = np.nan_to_num(mg, nan=0.0)
	feat = np.concatenate([mg, ex, m2v_vec])
	return feat.astype(np.float32), None


	# ─────────────────────────────────────────────────────────────────
	# WEIGHT LOADING (lazy, cached)
	# ─────────────────────────────────────────────────────────────────

	_fold_models = {} # benchmark -> list[nn.Module]

	_MODEL_CONFIGS = {
	"steels": dict(d_attn=64, d_hidden=96, ff_dim=150, dropout=0.20, max_steps=20),
	"expt_gap": dict(d_attn=64, d_hidden=96, ff_dim=150, dropout=0.20, max_steps=20),
	"ismetal": dict(d_attn=24, d_hidden=48, ff_dim=72, dropout=0.20, max_steps=16),
	"glass": dict(d_attn=24, d_hidden=48, ff_dim=72, dropout=0.20, max_steps=16),
	"jdft2d": dict(d_attn=32, d_hidden=64, ff_dim=96, dropout=0.20, max_steps=16),
	}

	_HF_PATHS = {
	"steels": "weights/steels/weights.pt",
	"expt_gap": "weights/expt_gap/weights.pt",
	"ismetal": "weights/is_metal/weights.pt",
	"glass": "weights/glass/weights.pt",
	"jdft2d": "weights/jdft2d/weights.pt",
	"phonons": "weights/phonons/weights.pt",
	}


	def _load_benchmark_models(benchmark: str):
	if benchmark in _fold_models:
	return _fold_models[benchmark]
	if benchmark == "phonons":
	return None

	try:
	# 1. Try local path first (relative to app.py)
	local_path = _HF_PATHS[benchmark]
	if os.path.exists(local_path):
	path = local_path
	else:
	# 2. Try hfHub if local missing
	print(f"Local weight {local_path} missing. Attempting hf_hub_download...")
	path = hf_hub_download(repo_id=REPO_ID, filename=_HF_PATHS[benchmark])

	ckpt = torch.load(path, map_location="cpu", weights_only=False)

	fold_entries = ckpt.get("folds", [ckpt])
	n_extra = ckpt.get("n_extra", 0)
	cfg = {**_MODEL_CONFIGS[benchmark], "n_extra": n_extra}

	models = []
	for entry in fold_entries:
	m = DeepHybridTRM(**cfg)
	sd = entry.get("model_state", entry) if isinstance(entry, dict) else entry
	m.load_state_dict(sd)
	m.eval()
	models.append(m)

	_fold_models[benchmark] = models
	return models
	except Exception as e:
	err_msg = f"Error loading {benchmark} weights: {e}\n{traceback.format_exc()}"
	print(err_msg)
	return None


	def _ensemble_predict(benchmark: str, x: np.ndarray, is_classification: bool = False):
	models = _load_benchmark_models(benchmark)
	if not models:
	return None, "Weights could not be loaded. See logs."

	xt = torch.tensor(x[None], dtype=torch.float32)
	preds = []
	for m in models:
	with torch.no_grad():
	out = m(xt).item()
	if is_classification:
	out = torch.sigmoid(torch.tensor(out)).item()
	preds.append(out)
	return float(np.mean(preds)), None


	# ─────────────────────────────────────────────────────────────────
	# PREDICTION FUNCTIONS
	# ─────────────────────────────────────────────────────────────────

	def predict_steels(formula: str):
	feat, err = featurize_composition(formula)
	if err: return f"❌ Error: {err}", ""
	pred, err = _ensemble_predict("steels", feat)
	if err: return f"❌ {err}", ""
	return f"### {pred:.1f} MPa", f"{pred:.1f} MPa yield strength"

	def predict_expt_gap(formula: str):
	feat, err = featurize_composition(formula)
	if err: return f"❌ Error: {err}", ""
	pred, err = _ensemble_predict("expt_gap", feat)
	if err: return f"❌ {err}", ""
	return f"### {pred:.3f} eV", f"{pred:.3f} eV band gap"

	def predict_ismetal(formula: str):
	feat, err = featurize_composition(formula)
	if err: return f"❌ Error: {err}", ""
	pred, err = _ensemble_predict("ismetal", feat, True)
	if err: return f"❌ {err}", ""
	label = "🔩 METALLIC" if pred > 0.5 else "💎 NON-METALLIC"
	return f"### {pred:.3f} (Metal)", f"{label} (p={pred:.3f})"

	def predict_glass(formula: str):
	feat, err = featurize_composition(formula)
	if err: return f"❌ Error: {err}", ""
	pred, err = _ensemble_predict("glass", feat, True)
	if err: return f"❌ {err}", ""
	label = "🪟 GLASS-FORMER" if pred > 0.5 else "❌ CRYSTALLINE"
	return f"### {pred:.3f} (Glass)", f"{label} (p={pred:.3f})"

	def predict_jdft2d(formula: str):
	feat, err = featurize_composition(formula)
	if err: return f"❌ Error: {err}", ""
	pred, err = _ensemble_predict("jdft2d", feat)
	if err: return f"❌ {err}", ""
	return f"### {pred:.1f} meV/atom", f"{pred:.1f} meV/atom exfoliation"

	PHONONS_INFO = """
	## 🎵 Phonon Peak Frequency

	The TRIADS V6 Graph-TRM achieves 41.91 ± 4.04 cm⁻¹ MAE on Matbench phonons, using a gate-based halting Graph Neural Network that adaptively runs 4–16 message-passing cycles.

	### Architecture
	- Gate-based halting: 4–16 adaptive GNN cycles (halts when gate activations drop below threshold)
	- Graph Attention TRM: line-graph bond updates + joint self-attention + cross-attention
	- Input: Full crystal structure — atom positions, bond distances, angles (requires CIF/POSCAR)

	### Why no live demo?
	The phonons model requires a pre-computed crystal graph (atom positions, bond lengths, bond angles).
	Composition-only featurization is insufficient for phonon prediction — structural details like bond stiffness
	and crystal symmetry are essential.

	### Benchmark Results
	\| Model \| MAE (cm⁻¹) \|
	\|---\|---\|
	\| TRIADS V6 (ours) \| 41.91 ± 4.04 \|
	\| MEGNet \| 28.76 \|
	\| ALIGNN \| 29.34 \|
	\| MODNet \| 45.39 \|
	\| CrabNet \| 47.09 \|
	\| TRIADS V4 \| 56.33 \|

	> Note: MEGNet and ALIGNN use full DFT structural relaxation data.
	> TRIADS V6 achieves competitive performance with a simpler, more parameter-efficient Graph-TRM architecture (< 50K parameters).
	"""


	# ─────────────────────────────────────────────────────────────────
	# INTERFACE
	# ─────────────────────────────────────────────────────────────────

	CSS = """
	#result_text { font-size: 1.5rem; font-weight: 700; color: #6366f1; }
	.benchmark-badge { background: #1e293b; color: #94a3b8; border-radius: 8px; padding: 8px; }
	"""

	def build():
	with gr.Blocks(css=CSS, title="TRIADS") as demo:
	gr.Markdown("# ⚡ TRIADS — Materials Property Prediction")
	gr.Markdown("Recursive Information-Attention with Deep Supervision for all Matbench benchmarks.")

	with gr.Tabs():
	with gr.Tab("🔩 Steel Yield"):
	f_s = gr.Textbox(label="Formula", value="Fe0.7Cr0.15Ni0.15")
	btn_s = gr.Button("Predict", variant="primary")
	out_s = gr.Markdown(elem_id="result_text")
	ctx_s = gr.Markdown()
	btn_s.click(predict_steels, f_s, [out_s, ctx_s])

	with gr.Tab("⚡ Band Gap"):
	f_g = gr.Textbox(label="Formula", value="TiO2")
	btn_g = gr.Button("Predict", variant="primary")
	out_g = gr.Markdown(elem_id="result_text")
	ctx_g = gr.Markdown()
	btn_g.click(predict_expt_gap, f_g, [out_g, ctx_g])

	with gr.Tab("🔮 Metallicity"):
	f_m = gr.Textbox(label="Formula", value="Cu")
	btn_m = gr.Button("Predict", variant="primary")
	out_m = gr.Markdown(elem_id="result_text")
	ctx_m = gr.Markdown()
	btn_m.click(predict_ismetal, f_m, [out_m, ctx_m])

	with gr.Tab("🪟 Glass Forming"):
	f_gf = gr.Textbox(label="Formula", value="Cu46Zr54")
	btn_gf = gr.Button("Predict", variant="primary")
	out_gf = gr.Markdown(elem_id="result_text")
	ctx_gf = gr.Markdown()
	btn_gf.click(predict_glass, f_gf, [out_gf, ctx_gf])

	with gr.Tab("📐 JDFT2D"):
	f_j = gr.Textbox(label="Formula", value="MoS2")
	btn_j = gr.Button("Predict", variant="primary")
	out_j = gr.Markdown(elem_id="result_text")
	ctx_j = gr.Markdown()
	btn_j.click(predict_jdft2d, f_j, [out_j, ctx_j])

	with gr.Tab("🎵 Phonons"):
	gr.Markdown(PHONONS_INFO)

	return demo

	if __name__ == "__main__":
	build().launch()