Spaces:

Lunamos
/

flas-demo

Running on Zero

App Files Files Community

flas-demo / app.py

Lunamos

Initial demo

76e22a8 verified 4 days ago

raw

history blame contribute delete

4.65 kB

	"""FLAS interactive demo (Gemma-2-2B-IT).

	Pulls the bf16 checkpoint from the Hugging Face Hub on first launch, then
	serves a Gradio UI that runs steered vs baseline generation side-by-side.

	Runs locally with `python app.py` (any CUDA GPU >= 6 GB) and on Hugging Face
	Spaces with the same code (the @gpu decorator becomes a ZeroGPU slice).
	"""

	import gradio as gr
	import torch
	from huggingface_hub import hf_hub_download

	# ZeroGPU decorator on Spaces; no-op locally so the same code runs unchanged.
	try:
	import spaces
	gpu = spaces.GPU(duration=120)
	except ImportError:
	def gpu(fn):
	return fn


	HF_REPO = "flas-ai/flas-gemma-2-2b-it"
	_gen = None
	_ckpt_path = None


	def _ensure_loaded():
	"""Lazy init: download weights and build FlasGenerator on first call."""
	global _gen, _ckpt_path
	if _gen is None:
	from flas.generate import load_generator
	if _ckpt_path is None:
	_ckpt_path = hf_hub_download(HF_REPO, "flas-gemma-2-2b-it.safetensors")
	hf_hub_download(HF_REPO, "config.json") # cached alongside
	_gen = load_generator(_ckpt_path)
	return _gen


	@gpu
	def steer(concept, prompt, flowtime, n_steps, max_tokens, temperature):
	gen = _ensure_loaded()
	if not prompt.strip():
	return "(prompt is empty)", "(prompt is empty)"

	# Generate baseline (no steering) and steered side-by-side. We pass T=0
	# for baseline; the velocity field still runs but contributes nothing.
	baseline = gen.generate_batch(
	[prompt], concept or " ",
	flowtimes=[0.0], n_steps=int(n_steps),
	max_tokens=int(max_tokens), temperature=float(temperature),
	max_batch=1,
	)[0]["generation"]

	if not concept.strip():
	return baseline, "(set a concept to see the steered output)"

	steered = gen.generate_batch(
	[prompt], concept,
	flowtimes=[float(flowtime)], n_steps=int(n_steps),
	max_tokens=int(max_tokens), temperature=float(temperature),
	max_batch=1,
	)[0]["generation"]
	return steered, baseline


	EXAMPLES = [
	["Talk like a pirate", "Tell me about your day."],
	["Respond as a noir detective", "How do I make a good cup of coffee?"],
	["Always reference places in Minnesota", "Plan me a perfect Sunday."],
	["Frame everything as a musical performance", "Explain quantum mechanics like I'm new to it."],
	["French words and phrases related to months and days", "Describe the weather in autumn."],
	["Speak in programming terms", "What does it feel like to be tired?"],
	]

	INTRO = """
	# FLAS — Flow-based Activation Steering

	Steer Gemma-2-2B-IT toward any concept you can describe in words. Drop in a
	phrase like "talk like a pirate" or "always reference places in Minnesota",
	adjust the strength, and the model rewrites itself accordingly. No fine-tuning,
	no per-concept training.

	[📄 Paper](https://arxiv.org/abs/2605.05892) · [💻 Code](https://github.com/flas-ai/FLAS) · [🤝 Model card](https://huggingface.co/flas-ai/flas-gemma-2-2b-it)
	"""

	with gr.Blocks(title="FLAS — Flow-based Activation Steering") as demo:
	gr.Markdown(INTRO)
	with gr.Row():
	with gr.Column(scale=1):
	concept = gr.Textbox(
	label="Steering concept",
	placeholder="e.g. talk like a pirate",
	value="Talk like a pirate", lines=2,
	)
	prompt = gr.Textbox(
	label="Your prompt",
	value="Tell me about your day.", lines=3,
	)
	with gr.Row():
	flowtime = gr.Slider(0.0, 4.0, value=2.0, step=0.1,
	label="Flow time T (steering strength)")
	n_steps = gr.Slider(1, 10, value=3, step=1, label="Euler steps N")
	with gr.Row():
	max_tokens = gr.Slider(32, 256, value=128, step=32, label="Max tokens")
	temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
	run_btn = gr.Button("Generate", variant="primary")
	with gr.Column(scale=1):
	steered_out = gr.Textbox(
	label="Steered (FLAS @ chosen T)", lines=10,
	)
	baseline_out = gr.Textbox(
	label="Baseline (no steering)", lines=10,
	)

	gr.Examples(EXAMPLES, inputs=[concept, prompt],
	label="Try one of these:")

	run_btn.click(
	steer,
	inputs=[concept, prompt, flowtime, n_steps, max_tokens, temperature],
	outputs=[steered_out, baseline_out],
	)

	if __name__ == "__main__":
	demo.launch(theme=gr.themes.Soft())