Spaces:
Running on Zero
Running on Zero
| """Gradio demo for Microsoft Lens (RL) and Lens-Turbo (4-step distilled). | |
| Both pipelines are preloaded at import time and share a single GPT-OSS text | |
| encoder to fit ZeroGPU memory. ZeroGPU hijacks CUDA on `import spaces`, so we | |
| do the heavy load at module scope, not inside a `@spaces.GPU` function. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import random | |
| import spaces | |
| import torch | |
| import gradio as gr | |
| from lens import LensGptOssEncoder, LensPipeline | |
| from lens.resolution import SUPPORTED_ASPECT_RATIOS, SUPPORTED_BASE_RESOLUTIONS | |
| DTYPE = torch.bfloat16 | |
| TURBO_REPO = "microsoft/Lens-Turbo" | |
| LENS_REPO = "microsoft/Lens" | |
| # --------------------------------------------------------------------------- | |
| # Global preload: shared text encoder, then both DiT pipelines. | |
| # --------------------------------------------------------------------------- | |
| text_encoder_kwargs = {"subfolder": "text_encoder", "dtype": DTYPE} | |
| try: | |
| from transformers import Mxfp4Config | |
| # Keep GPT-OSS in MXFP4 — ZeroGPU runs H200 (Hopper), which supports the | |
| # native kernels and saves ~25 GB vs. dequantized bf16. | |
| text_encoder_kwargs["quantization_config"] = Mxfp4Config(dequantize=False) | |
| except ImportError: | |
| pass | |
| text_encoder = LensGptOssEncoder.from_pretrained(TURBO_REPO, **text_encoder_kwargs) | |
| turbo_pipe = LensPipeline.from_pretrained( | |
| TURBO_REPO, text_encoder=text_encoder, torch_dtype=DTYPE | |
| ).to("cuda") | |
| lens_pipe = LensPipeline.from_pretrained( | |
| LENS_REPO, text_encoder=text_encoder, torch_dtype=DTYPE | |
| ).to("cuda") | |
| PIPES = {"Lens-Turbo (4 steps)": turbo_pipe, "Lens (20 steps, RL)": lens_pipe} | |
| MODEL_CHOICES = list(PIPES.keys()) | |
| MAX_SEED = 2**31 - 1 | |
| def model_defaults(model_name: str): | |
| if "Turbo" in model_name: | |
| return 4, 1.0 | |
| return 20, 5.0 | |
| def generate( | |
| prompt: str, | |
| model_name: str = MODEL_CHOICES[0], | |
| base_resolution: int = 1024, | |
| aspect_ratio: str = "1:1", | |
| steps: int | None = None, | |
| cfg: float | None = None, | |
| seed: int = 0, | |
| randomize_seed: bool = True, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| if not prompt or not prompt.strip(): | |
| raise gr.Error("Please enter a prompt.") | |
| pipe = PIPES[model_name] | |
| default_steps, default_cfg = model_defaults(model_name) | |
| steps = default_steps if steps is None else int(steps) | |
| cfg = default_cfg if cfg is None else float(cfg) | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| seed = int(seed) | |
| generator = torch.Generator(device=pipe._execution_device).manual_seed(seed) | |
| out = pipe( | |
| prompt=prompt.strip(), | |
| base_resolution=int(base_resolution), | |
| aspect_ratio=aspect_ratio, | |
| num_inference_steps=steps, | |
| guidance_scale=cfg, | |
| num_images_per_prompt=1, | |
| generator=generator, | |
| ) | |
| return out.images[0], seed | |
| CSS = """ | |
| #col-container { max-width: 1100px; margin: 0 auto; } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Citrus(), css=CSS, title="Lens / Lens-Turbo") as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown( | |
| """ | |
| # Microsoft Lens | |
| 3.8B foundational text-to-image model. Switch between **Lens-Turbo** | |
| (4-step distilled, fast) and **Lens** (20-step RL-tuned, higher | |
| quality). | |
| [Paper](https://arxiv.org/abs/2605.21573) · [Code](https://github.com/microsoft/Lens) · [Lens](https://huggingface.co/microsoft/Lens) · [Lens-Turbo](https://huggingface.co/microsoft/Lens-Turbo) | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="A cinematic mountain lake at sunrise, soft golden light, mist rising off the water", | |
| lines=3, | |
| ) | |
| with gr.Row(): | |
| model = gr.Radio( | |
| choices=MODEL_CHOICES, | |
| value=MODEL_CHOICES[0], | |
| label="Model", | |
| ) | |
| run_btn = gr.Button("Generate", variant="primary") | |
| with gr.Accordion("Advanced", open=False): | |
| with gr.Row(): | |
| base_res = gr.Radio( | |
| choices=list(SUPPORTED_BASE_RESOLUTIONS), | |
| value=1024, | |
| label="Base resolution", | |
| ) | |
| aspect = gr.Dropdown( | |
| choices=list(SUPPORTED_ASPECT_RATIOS), | |
| value="1:1", | |
| label="Aspect ratio (W:H)", | |
| ) | |
| with gr.Row(): | |
| steps = gr.Slider(1, 50, value=4, step=1, label="Steps") | |
| cfg = gr.Slider(1.0, 10.0, value=1.0, step=0.1, label="Guidance scale") | |
| with gr.Row(): | |
| seed = gr.Slider(0, MAX_SEED, value=0, step=1, label="Seed") | |
| randomize = gr.Checkbox(value=True, label="Randomize seed") | |
| with gr.Column(scale=4): | |
| image = gr.Image(label="Output", type="pil", height=640) | |
| used_seed = gr.Number(label="Seed used", interactive=False) | |
| gr.Examples( | |
| examples=[ | |
| ["A generous portion of classic British fish and chips on white paper, golden crispy beer-battered cod, thick-cut chips, lemon wedge, mushy peas, wooden pub table, overhead shot", MODEL_CHOICES[0]], | |
| ["A crystal dragon soaring through an aurora borealis sky, transparent faceted body refracting green and purple light, ice trail from its wings, high fantasy digital art", MODEL_CHOICES[0]], | |
| ["Aerial view of Yuanyang rice terraces at sunrise, cascading water-filled paddies reflecting pink sky, morning mist between layers, drone photography", MODEL_CHOICES[1]], | |
| ["A green iguana basking on a moss-covered log in a tropical rainforest, every scale rendered sharply, dewdrops on its skin, National Geographic style", MODEL_CHOICES[1]], | |
| ], | |
| inputs=[prompt, model], | |
| outputs=[image, used_seed], | |
| fn=generate, | |
| cache_examples=True, | |
| cache_mode="lazy", | |
| ) | |
| def _sync_defaults(model_name): | |
| s, g = model_defaults(model_name) | |
| return gr.update(value=s), gr.update(value=g) | |
| model.change(_sync_defaults, inputs=model, outputs=[steps, cfg]) | |
| run_btn.click( | |
| generate, | |
| inputs=[prompt, model, base_res, aspect, steps, cfg, seed, randomize], | |
| outputs=[image, used_seed], | |
| ) | |
| prompt.submit( | |
| generate, | |
| inputs=[prompt, model, base_res, aspect, steps, cfg, seed, randomize], | |
| outputs=[image, used_seed], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |