Spaces:

izuemon
/

phi-3

Running

App Files Files Community

phi-3 / app.py

izuemon

Update app.py

a79f08d verified about 1 month ago

raw

history blame contribute delete

3.48 kB

	from flask import Flask, request, jsonify
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	app = Flask(__name__)

	# モデルロード（起動時1回）
	torch.random.manual_seed(0)

	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/Phi-3-mini-4k-instruct",
	device_map="cpu",
	torch_dtype="auto",
	trust_remote_code=True
	)

	tokenizer = AutoTokenizer.from_pretrained(
	"microsoft/Phi-3-mini-4k-instruct"
	)

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer
	)

	generation_args = {
	"max_new_tokens": 500,
	"return_full_text": False,
	"temperature": 0.0,
	"do_sample": False,
	}

	# -----------------------
	# ルートページ (HTML)
	# -----------------------
	@app.route("/")
	def index():
	return """
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<title>Local LLM Chat</title>

	<style>
	body{
	font-family: Arial;
	background:#111;
	color:white;
	margin:0;
	}

	#chat{
	height:80vh;
	overflow-y:auto;
	padding:20px;
	}

	.message{
	margin-bottom:12px;
	}

	.user{
	color:#6cf;
	}

	.assistant{
	color:#9f9;
	}

	#inputArea{
	position:fixed;
	bottom:0;
	width:100%;
	background:#222;
	padding:10px;
	}

	#input{
	width:80%;
	padding:10px;
	font-size:16px;
	}

	button{
	padding:10px;
	font-size:16px;
	}
	</style>
	</head>

	<body>

	<h2 style="padding:10px;">Local Phi-3 Chat</h2>

	<div id="chat"></div>

	<div id="inputArea">
	<input id="input" placeholder="メッセージを入力..." />
	<button onclick="send()">送信</button>
	</div>

	<script>

	let messages = [
	{role:"system",content:"You are a helpful assistant."}
	]

	function add(role,text){

	const chat=document.getElementById("chat")

	const div=document.createElement("div")
	div.className="message "+role

	div.innerText=role+": "+text

	chat.appendChild(div)
	chat.scrollTop=chat.scrollHeight
	}

	async function send(){

	const input=document.getElementById("input")
	const text=input.value

	if(!text) return

	input.value=""

	add("user",text)

	messages.push({
	role:"user",
	content:text
	})

	const res=await fetch("/v1/chat/completions",{
	method:"POST",
	headers:{
	"Content-Type":"application/json"
	},
	body:JSON.stringify({
	messages:messages
	})
	})

	const data=await res.json()

	const reply=data.choices[0].message.content

	add("assistant",reply)

	messages.push({
	role:"assistant",
	content:reply
	})
	}

	document.getElementById("input").addEventListener("keypress",function(e){
	if(e.key==="Enter"){
	send()
	}
	})

	</script>

	</body>
	</html>
	"""

	# -----------------------
	# OpenAI互換API
	# -----------------------
	@app.route("/v1/chat/completions", methods=["POST"])
	def chat_completions():

	data = request.json
	messages = data.get("messages", [])

	result = pipe(messages, **generation_args)
	text = result[0]["generated_text"]

	response = {
	"id": "chatcmpl-local",
	"object": "chat.completion",
	"choices": [
	{
	"index": 0,
	"message": {
	"role": "assistant",
	"content": text
	},
	"finish_reason": "stop"
	}
	]
	}

	return jsonify(response)


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)