from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM import torch app = FastAPI() MODEL_NAME = "mjpsm/progress-generation-model" device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device) tokenizer.pad_token = tokenizer.eos_token class Request(BaseModel): text: str def generate_response(user_input): prompt = f"""<|system|> You describe what progress was achieved in one sentence. <|user|> {user_input} <|assistant|> """ inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=50, temperature=0.6, top_p=0.9, repetition_penalty=1.2, pad_token_id=tokenizer.eos_token_id ) decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) return decoded.split("<|assistant|>")[-1].strip() @app.get("/") def root(): return {"message": "Progress Model API running"} @app.post("/predict") def predict(req: Request): result = generate_response(req.text) return {"output": result}