ysharma HF Staff commited on
Commit
cc5df64
·
verified ·
1 Parent(s): ca7940d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from threading import Thread
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from fastapi.responses import HTMLResponse
7
+ from pathlib import Path
8
+
9
+ app = gr.Server()
10
+ HOME = Path(__file__).parent
11
+
12
+ MODEL_ID = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
13
+ tok = AutoTokenizer.from_pretrained(MODEL_ID)
14
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda")
15
+
16
+ @spaces.GPU
17
+ def _generate(text: str):
18
+ inputs = tok.apply_chat_template(
19
+ [{"role": "user", "content": f"Summarize in 3 bullets:\n\n{text}"}],
20
+ return_tensors="pt", add_generation_prompt=True,
21
+ ).to("cuda")
22
+ streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
23
+ Thread(target=model.generate, kwargs=dict(
24
+ inputs=inputs, streamer=streamer, max_new_tokens=300, do_sample=False,
25
+ )).start()
26
+ return streamer
27
+
28
+ @app.mcp.tool(name="summarize")
29
+ @app.api(name="summarize", concurrency_limit=1, stream_every=0.2)
30
+ def summarize(text: str):
31
+ """Summarize the input text into 3 bullet points."""
32
+ out = ""
33
+ for chunk in _generate(text):
34
+ out += chunk
35
+ yield out
36
+
37
+ @app.get("/", response_class=HTMLResponse)
38
+ async def index():
39
+ return (HOME / "index.html").read_text(encoding="utf-8")
40
+
41
+ if __name__ == "__main__":
42
+ app.launch(mcp_server=True)