Spaces:

Adhitya122
/

molforge

Running

App Files Files Community

molforge / mlx_lora_inference.py

Adhitya122

Prepare MolForge OpenEnv Docker Space submission

bf9e424 verified 12 days ago

raw

history blame contribute delete

16.6 kB

	"""MLX-backed local LoRA inference runner for MolForge on Apple Silicon."""

	from __future__ import annotations

	import json
	import os
	import time
	from pathlib import Path
	from typing import Any, Dict, Optional, Tuple

	from mlx_lm import generate, load
	from mlx_lm.sample_utils import make_sampler

	from inference_common import (
	COMPACT_SYSTEM_PROMPT,
	SYSTEM_PROMPT,
	attach_team_messages,
	build_model_payload,
	extract_json,
	)

	try:
	from molforge.models import MolForgeAction, MolForgeObservation
	from molforge.server.molforge_environment import MolForgeEnvironment
	except ImportError:
	from models import MolForgeAction, MolForgeObservation
	from server.molforge_environment import MolForgeEnvironment


	ADAPTER_PATH = Path(os.getenv("LORA_ADAPTER_PATH", "qwen3_5_2b_lora_adapters_strict"))
	BASE_MODEL_NAME = os.getenv("BASE_MODEL_NAME", "unsloth/Qwen3.5-2B")
	LOCAL_NUM_EPISODES = int(os.getenv("LOCAL_NUM_EPISODES", "3"))
	LOCAL_MAX_TURNS = int(os.getenv("LOCAL_MAX_TURNS", "10"))
	MLX_MAX_TOKENS = int(os.getenv("MLX_MAX_TOKENS", "768"))
	MLX_RETRY_MAX_TOKENS = int(os.getenv("MLX_RETRY_MAX_TOKENS", "512"))
	MLX_JSON_PREFILL = os.getenv("MLX_JSON_PREFILL", "true").lower() == "true"
	MLX_COMPACT_ACTION = os.getenv("MLX_COMPACT_ACTION", "false").lower() == "true"
	MLX_COMPACT_REPAIR = os.getenv("MLX_COMPACT_REPAIR", "false").lower() == "true"
	MLX_FORCED_ACTION_TYPES = [
	item.strip()
	for item in os.getenv("MLX_FORCED_ACTION_TYPES", "").split(",")
	if item.strip()
	]
	JSON_PREFILL = '{"action_type":"'
	COMPACT_ACTION_SYSTEM_PROMPT = """
	You control the MolForge action policy.
	Return exactly one JSON object with only these top-level keys:
	action_type, acting_role, edit_type, slot, fragment, tool_name, rationale,
	evidence, expected_effects.

	Valid action_type values are exactly:
	edit, run_assay, submit, restart, defer.

	Do not output team messages. Do not output proposal, approval, objection,
	risk_flag, assay_request, rejection, or submission_recommendation as action_type.
	The environment will attach governance messages automatically.

	Role rules:
	- run_assay uses acting_role "assay_planner" and a valid tool_name.
	- edit, submit, restart, and defer use acting_role "lead_chemist".
	- unused optional fields must be JSON null.
	""".strip()


	def main() -> None:
	adapter_path = ADAPTER_PATH.expanduser().resolve()
	print(f"Using MLX base model: {BASE_MODEL_NAME}", flush=True)
	print(f"Using LoRA adapter: {adapter_path}", flush=True)
	model, tokenizer = load(BASE_MODEL_NAME, adapter_path=str(adapter_path))
	sampler = make_sampler(temp=0.0)

	env = MolForgeEnvironment()
	scores = []
	submission_scores = []
	progress_scores = []

	for episode_index in range(LOCAL_NUM_EPISODES):
	observation = env.reset()
	print(f"\n=== Episode {episode_index + 1}: {observation.scenario_id} ===", flush=True)

	for _ in range(LOCAL_MAX_TURNS):
	if observation.done:
	break
	action, source, elapsed = choose_mlx_action(model, tokenizer, sampler, observation)
	if MLX_COMPACT_ACTION:
	action = attach_team_messages(observation, action)
	observation = env.step(action)
	print(
	f"step={observation.step_index:02d} action={action.action_type} actor={action.acting_role} "
	f"source={source} gen_s={elapsed:.2f} reward={observation.reward:+.3f} "
	f"budget={observation.remaining_budget} governance={observation.governance.status}",
	flush=True,
	)
	print(f" {observation.last_transition_summary}", flush=True)
	if observation.done:
	break

	grader_scores = observation.metadata.get("terminal_grader_scores", {})
	final_score = float(grader_scores.get("final_score", grader_scores.get("submission_score", 0.0)))
	submission_score = float(grader_scores.get("submission_score", 0.0))
	progress_score = float(grader_scores.get("progress_score", 0.0))
	scores.append(final_score)
	submission_scores.append(submission_score)
	progress_scores.append(progress_score)
	print(f"final_score={final_score:.3f}", flush=True)
	print(f"submission_score={submission_score:.3f}", flush=True)
	print(f"progress_score={progress_score:.3f}", flush=True)
	if observation.report_card:
	print(observation.report_card, flush=True)

	average = sum(scores) / len(scores)
	average_progress = sum(progress_scores) / len(progress_scores)
	print("\n=== MLX LoRA Local Summary ===", flush=True)
	print(
	json.dumps(
	{
	"adapter": str(adapter_path),
	"base_model": BASE_MODEL_NAME,
	"scores": scores,
	"average_final_score": round(average, 4),
	"submission_scores": submission_scores,
	"average_submission_score": round(sum(submission_scores) / len(submission_scores), 4),
	"progress_scores": progress_scores,
	"average_progress_score": round(average_progress, 4),
	},
	indent=2,
	),
	flush=True,
	)


	def choose_mlx_action(
	model,
	tokenizer,
	sampler,
	observation: MolForgeObservation,
	) -> Tuple[MolForgeAction, str, float]:
	started = time.perf_counter()
	action, error = ask_mlx_model(
	model,
	tokenizer,
	sampler,
	observation,
	compact=False,
	max_tokens=MLX_MAX_TOKENS,
	forced_action_type=None,
	)
	if action is not None:
	return action, "mlx_lora_model", time.perf_counter() - started

	forced_errors = []
	for forced_action_type in forced_action_types(observation):
	forced_action, forced_error = ask_mlx_model(
	model,
	tokenizer,
	sampler,
	observation,
	compact=True,
	max_tokens=MLX_RETRY_MAX_TOKENS,
	forced_action_type=forced_action_type,
	)
	if forced_action is not None:
	return (
	forced_action,
	f"mlx_lora_forced_{forced_action_type}",
	time.perf_counter() - started,
	)
	forced_errors.append(f"{forced_action_type}:{forced_error}")

	retry_action, retry_error = ask_mlx_model(
	model,
	tokenizer,
	sampler,
	observation,
	compact=True,
	max_tokens=MLX_RETRY_MAX_TOKENS,
	forced_action_type=None,
	)
	if retry_action is not None:
	return retry_action, "mlx_lora_compact_retry", time.perf_counter() - started

	raise RuntimeError(
	"MLX LoRA action failed: "
	f"full_prompt:{error} \| forced:{' \|\| '.join(forced_errors)} \| compact_prompt:{retry_error}"
	)


	def ask_mlx_model(
	model,
	tokenizer,
	sampler,
	observation: MolForgeObservation,
	*,
	compact: bool,
	max_tokens: int,
	forced_action_type: Optional[str],
	) -> Tuple[Optional[MolForgeAction], str]:
	response_text = ""
	try:
	payload = (
	compact_action_payload(observation)
	if MLX_COMPACT_ACTION
	else build_model_payload(observation, compact=compact)
	)
	system_prompt = (
	COMPACT_ACTION_SYSTEM_PROMPT
	if MLX_COMPACT_ACTION
	else (COMPACT_SYSTEM_PROMPT if compact else SYSTEM_PROMPT)
	)
	response_text = generate_response(
	model,
	tokenizer,
	sampler,
	system_prompt=system_prompt,
	user_payload=payload,
	max_tokens=max_tokens,
	use_json_prefill=MLX_JSON_PREFILL,
	forced_action_type=forced_action_type,
	)
	if MLX_JSON_PREFILL:
	response_text = json_prefill(forced_action_type) + response_text
	data = extract_json(response_text)
	repair_notes: list[str] = []
	if MLX_COMPACT_ACTION and MLX_COMPACT_REPAIR:
	data, repair_notes = repair_compact_action(data)
	if MLX_COMPACT_ACTION and "messages" in data:
	raise ValueError("compact action output must not include messages")
	action = MolForgeAction(**data)
	if repair_notes:
	action.metadata["compact_repair_notes"] = repair_notes
	return action, ""
	except Exception as exc:
	snippet = response_text[:1200].replace("\n", "\\n")
	return None, f"{exc.__class__.__name__}:{exc}; raw={snippet}"


	def generate_response(
	model,
	tokenizer,
	sampler,
	*,
	system_prompt: str,
	user_payload: Dict[str, Any],
	max_tokens: int,
	use_json_prefill: bool,
	forced_action_type: Optional[str],
	) -> str:
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": json.dumps(user_payload, separators=(",", ":"))},
	]
	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=False,
	)
	if use_json_prefill:
	prompt += json_prefill(forced_action_type)
	return generate(
	model,
	tokenizer,
	prompt,
	verbose=False,
	max_tokens=max_tokens,
	sampler=sampler,
	).strip()


	def json_prefill(forced_action_type: Optional[str]) -> str:
	if forced_action_type:
	return f'{{"action_type":"{forced_action_type}",'
	return JSON_PREFILL


	def forced_action_types(observation: MolForgeObservation) -> list[str]:
	if MLX_FORCED_ACTION_TYPES:
	return MLX_FORCED_ACTION_TYPES
	if observation.step_index == 0:
	if observation.scenario_id == "level_2_hard":
	return ["restart", "edit", "run_assay", "defer"]
	return ["edit", "run_assay", "defer"]
	return ["run_assay", "edit", "submit", "restart", "defer"]


	def compact_action_payload(observation: MolForgeObservation) -> dict[str, Any]:
	lead_view = next(
	(role.observation for role in observation.role_observations if role.role == "lead_chemist"),
	{},
	)
	assay_view = next(
	(role.observation for role in observation.role_observations if role.role == "assay_planner"),
	{},
	)
	return {
	"valid_action_types": ["edit", "run_assay", "submit", "restart", "defer"],
	"scenario_id": observation.scenario_id,
	"difficulty": observation.difficulty,
	"task_brief": observation.task_brief,
	"current_molecule": observation.current_molecule,
	"current_smiles": observation.metadata.get("current_smiles", ""),
	"visible_metrics": observation.visible_metrics,
	"constraint_status": [constraint.model_dump() for constraint in observation.constraint_status],
	"remaining_budget": observation.remaining_budget,
	"max_budget": observation.max_budget,
	"step_index": observation.step_index,
	"max_steps": observation.max_steps,
	"molecule_slots": lead_view.get("molecule_slots", {}),
	"candidate_edits": lead_view.get("candidate_edits", [])[:12],
	"open_questions": lead_view.get("open_questions", []),
	"known_assays": [
	{
	"tool_name": reading.tool_name,
	"property_name": reading.property_name,
	"estimate": reading.estimate,
	"confidence_low": reading.confidence_low,
	"confidence_high": reading.confidence_high,
	"molecule_signature": reading.molecule_signature,
	}
	for reading in observation.known_assays[-8:]
	],
	"tool_costs": assay_view.get("tool_costs", {}),
	"evidence_gaps": assay_view.get("evidence_gaps", []),
	"estimated_information_value": assay_view.get("estimated_information_value", {}),
	}


	def repair_compact_action(data: Dict[str, Any]) -> tuple[Dict[str, Any], list[str]]:
	"""Bounded normalization for compact-action models.

	This repairs only schema-near-misses. It does not invent an action from a
	non-action wrapper and it still rejects invalid top-level action types.
	"""

	repaired = dict(data)
	notes: list[str] = []

	if "role" in repaired and "acting_role" not in repaired:
	repaired["acting_role"] = repaired.pop("role")
	notes.append("role->acting_role")

	action_type = repaired.get("action_type")
	if action_type not in {"edit", "run_assay", "submit", "restart", "defer"}:
	return repaired, notes

	if repaired.get("edit_type") == "replace":
	repaired["edit_type"] = "substitute"
	notes.append("edit_type:replace->substitute")

	if isinstance(repaired.get("evidence"), str):
	repaired["evidence"] = [repaired["evidence"]]
	notes.append("evidence:string->list")

	repaired["expected_effects"] = repair_effects(repaired.get("expected_effects"), notes)

	if action_type == "run_assay":
	repaired["acting_role"] = "assay_planner"
	repaired["edit_type"] = None
	repaired["slot"] = None
	repaired["fragment"] = None
	if repaired.get("tool_name") not in {
	"evaluate_properties",
	"dock_target",
	"assay_toxicity",
	"estimate_synthesizability",
	"evaluate_novelty",
	"search_literature",
	"run_md_simulation",
	}:
	repaired["tool_name"] = "evaluate_properties"
	notes.append("tool_name:invalid->evaluate_properties")
	else:
	repaired["acting_role"] = "lead_chemist"
	if action_type == "edit":
	if repaired.get("edit_type") not in {"add_fragment", "substitute", "remove", "undo_last_edit"}:
	repaired["edit_type"] = "substitute"
	notes.append("edit_type:invalid->substitute")
	if repaired.get("tool_name") is not None:
	repaired["tool_name"] = None
	notes.append("tool_name:edit->null")
	else:
	for key in ("edit_type", "slot", "fragment", "tool_name"):
	if repaired.get(key) is not None:
	repaired[key] = None
	notes.append(f"{key}:{action_type}->null")

	allowed_keys = {
	"action_type",
	"acting_role",
	"edit_type",
	"slot",
	"fragment",
	"tool_name",
	"rationale",
	"evidence",
	"expected_effects",
	}
	for key in list(repaired):
	if key not in allowed_keys:
	repaired.pop(key)
	notes.append(f"drop_extra:{key}")

	repaired.setdefault("rationale", "Choose the next compact MolForge action.")
	repaired.setdefault("evidence", [])
	for key in ("edit_type", "slot", "fragment", "tool_name"):
	repaired.setdefault(key, None)

	return repaired, notes


	def repair_effects(value: Any, notes: list[str]) -> dict[str, str]:
	defaults = {
	"potency": "unknown",
	"toxicity": "unknown",
	"synth": "unknown",
	"novelty": "unknown",
	"budget": "neutral",
	}
	if not isinstance(value, dict):
	notes.append("expected_effects:non_dict->defaults")
	return defaults

	aliases = {
	"synthesizability": "synth",
	"synthesis": "synth",
	}
	for raw_key, raw_value in value.items():
	key = aliases.get(raw_key, raw_key)
	if key not in defaults:
	notes.append(f"expected_effects:drop_extra:{raw_key}")
	continue
	defaults[key] = normalize_effect_value(raw_value, notes, key)
	return defaults


	def normalize_effect_value(value: Any, notes: list[str], key: str) -> str:
	if value in {"up", "down", "neutral", "unknown", "not_applicable"}:
	return value
	text = str(value).lower().strip().replace("-", "_").replace(" ", "_")
	if any(token in text for token in ("increase", "improve", "higher", "upward", "+")):
	notes.append(f"expected_effects:{key}:{value}->up")
	return "up"
	if any(token in text for token in ("decrease", "lower", "reduce", "downward", "-")):
	notes.append(f"expected_effects:{key}:{value}->down")
	return "down"
	if any(token in text for token in ("maintain", "stable", "unchanged", "same")):
	notes.append(f"expected_effects:{key}:{value}->neutral")
	return "neutral"
	if "not_applicable" in text or text == "na":
	notes.append(f"expected_effects:{key}:{value}->not_applicable")
	return "not_applicable"
	notes.append(f"expected_effects:{key}:{value}->unknown")
	return "unknown"


	if __name__ == "__main__":
	main()