| from flask import Flask, request, jsonify |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
| app = Flask(__name__) |
|
|
| |
| torch.random.manual_seed(0) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| "microsoft/Phi-3-mini-4k-instruct", |
| device_map="cpu", |
| torch_dtype="auto", |
| trust_remote_code=True |
| ) |
|
|
| tokenizer = AutoTokenizer.from_pretrained( |
| "microsoft/Phi-3-mini-4k-instruct" |
| ) |
|
|
| pipe = pipeline( |
| "text-generation", |
| model=model, |
| tokenizer=tokenizer |
| ) |
|
|
| generation_args = { |
| "max_new_tokens": 500, |
| "return_full_text": False, |
| "temperature": 0.0, |
| "do_sample": False, |
| } |
|
|
| |
| |
| |
| @app.route("/") |
| def index(): |
| return """ |
| <!DOCTYPE html> |
| <html> |
| <head> |
| <meta charset="utf-8"> |
| <title>Local LLM Chat</title> |
| |
| <style> |
| body{ |
| font-family: Arial; |
| background:#111; |
| color:white; |
| margin:0; |
| } |
| |
| #chat{ |
| height:80vh; |
| overflow-y:auto; |
| padding:20px; |
| } |
| |
| .message{ |
| margin-bottom:12px; |
| } |
| |
| .user{ |
| color:#6cf; |
| } |
| |
| .assistant{ |
| color:#9f9; |
| } |
| |
| #inputArea{ |
| position:fixed; |
| bottom:0; |
| width:100%; |
| background:#222; |
| padding:10px; |
| } |
| |
| #input{ |
| width:80%; |
| padding:10px; |
| font-size:16px; |
| } |
| |
| button{ |
| padding:10px; |
| font-size:16px; |
| } |
| </style> |
| </head> |
| |
| <body> |
| |
| <h2 style="padding:10px;">Local Phi-3 Chat</h2> |
| |
| <div id="chat"></div> |
| |
| <div id="inputArea"> |
| <input id="input" placeholder="メッセージを入力..." /> |
| <button onclick="send()">送信</button> |
| </div> |
| |
| <script> |
| |
| let messages = [ |
| {role:"system",content:"You are a helpful assistant."} |
| ] |
| |
| function add(role,text){ |
| |
| const chat=document.getElementById("chat") |
| |
| const div=document.createElement("div") |
| div.className="message "+role |
| |
| div.innerText=role+": "+text |
| |
| chat.appendChild(div) |
| chat.scrollTop=chat.scrollHeight |
| } |
| |
| async function send(){ |
| |
| const input=document.getElementById("input") |
| const text=input.value |
| |
| if(!text) return |
| |
| input.value="" |
| |
| add("user",text) |
| |
| messages.push({ |
| role:"user", |
| content:text |
| }) |
| |
| const res=await fetch("/v1/chat/completions",{ |
| method:"POST", |
| headers:{ |
| "Content-Type":"application/json" |
| }, |
| body:JSON.stringify({ |
| messages:messages |
| }) |
| }) |
| |
| const data=await res.json() |
| |
| const reply=data.choices[0].message.content |
| |
| add("assistant",reply) |
| |
| messages.push({ |
| role:"assistant", |
| content:reply |
| }) |
| } |
| |
| document.getElementById("input").addEventListener("keypress",function(e){ |
| if(e.key==="Enter"){ |
| send() |
| } |
| }) |
| |
| </script> |
| |
| </body> |
| </html> |
| """ |
|
|
| |
| |
| |
| @app.route("/v1/chat/completions", methods=["POST"]) |
| def chat_completions(): |
|
|
| data = request.json |
| messages = data.get("messages", []) |
|
|
| result = pipe(messages, **generation_args) |
| text = result[0]["generated_text"] |
|
|
| response = { |
| "id": "chatcmpl-local", |
| "object": "chat.completion", |
| "choices": [ |
| { |
| "index": 0, |
| "message": { |
| "role": "assistant", |
| "content": text |
| }, |
| "finish_reason": "stop" |
| } |
| ] |
| } |
|
|
| return jsonify(response) |
|
|
|
|
| if __name__ == "__main__": |
| app.run(host="0.0.0.0", port=7860) |