| #app.py.chatbot |
| #app.py Modif04 |
| #https: |
| import gradio as gr |
| from llama_cpp import Llama |
|
|
| llm = Llama( |
| model_path="/home/user/app/h2o-danube3-500m-chat-Q4_K_M.gguf", |
| verbose=True |
| ) |
|
|
| def predict(message, history): |
| # messages = [{"role": "system", "content": "You are a helpful assistant."}] |
| # messages = [{"role": "assistant", "content": "You are a helpful assistant."}] |
| # messages = [{"role": "assistant", "content": "Bonjour, comment puis-je vous aider?"}] |
| messages = [] |
| for user_message, bot_message in history: |
| if user_message: |
| messages.append({"role": "user", "content": user_message}) |
| if bot_message: |
| messages.append({"role": "assistant", "content": bot_message}) |
| messages.append({"role": "user", "content": message}) |
| |
| response = "" |
| for chunk in llm.create_chat_completion( |
| stream=True, |
| messages=messages, |
| ): |
| part = chunk["choices"][0]["delta"].get("content", None) |
| if part: |
| response += part |
| yield response |
|
|
| demo = gr.ChatInterface(predict) |
|
|
| demo.launch() |
|
|
|
|
|
|
| ##app.py Modif03 |
| #import gradio as gr |
| #from huggingface_hub import create_inference_endpoint, InferenceClient |
| #from transformers import AutoModelForCausalLM, AutoTokenizer |
| # |
| ##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf" |
| ##model = AutoModelForCausalLM.from_pretrained(model_name) |
| ##tokenizer = AutoTokenizer.from_pretrained(model_name) |
| # |
| ##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
| ##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf") |
| ##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf") |
| # |
| ## Créez une instance Inference locale |
| #endpoint = create_inference_endpoint( |
| # "Local-Endpoint-MisterAI-H2O", |
| # repository="MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf", |
| ## model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf", |
| # framework="pytorch", |
| # task="text-generation", |
| # accelerator="cpu", |
| # vendor="local", |
| # region="local", |
| # type="unprotected", |
| # instance_size="small", |
| # instance_type="local", |
| # URL="http://0.0.0.0:6789" |
| #) |
| # |
| #print(f"Endpoint créé à l'URL : {endpoint.url}") |
| # |
| #client = endpoint.client |
| # |
| # |
| # |
| #def respond( |
| # message, |
| # history: list[tuple[str, str]], |
| # system_message, |
| # max_tokens, |
| # temperature, |
| # top_p, |
| #): |
| # messages = [{"role": "system", "content": system_message}] |
| # |
| # for val in history: |
| # if val[0]: |
| # messages.append({"role": "user", "content": val[0]}) |
| # if val[1]: |
| # messages.append({"role": "assistant", "content": val[1]}) |
| # |
| # messages.append({"role": "user", "content": message}) |
| # |
| # response = "" |
| # |
| # for message in client.chat_completion( |
| # messages, |
| # max_tokens=max_tokens, |
| # stream=True, |
| # temperature=temperature, |
| # top_p=top_p, |
| # ): |
| # token = message.choices[0].delta.content |
| # |
| # response += token |
| # yield response |
| # |
| #demo = gr.ChatInterface( |
| # respond, |
| # additional_inputs=[ |
| # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| # gr.Slider( |
| # minimum=0.1, |
| # maximum=1.0, |
| # value=0.95, |
| # step=0.05, |
| # label="Top-p (nucleus sampling)", |
| # ), |
| # ], |
| #) |
| # |
| # |
| #if __name__ == "__main__": |
| # demo.launch() |
| # |
| # |
| # |
| # |
| ##app.py Modif01 |
| #import gradio as gr |
| #from huggingface_hub import Inference, InferenceClient |
| #from transformers import AutoModelForCausalLM, AutoTokenizer |
| # |
| ##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf" |
| ##model = AutoModelForCausalLM.from_pretrained(model_name) |
| ##tokenizer = AutoTokenizer.from_pretrained(model_name) |
| # |
| ##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
| ##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf") |
| ##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf") |
| # |
| ## Créez une instance Inference locale |
| #inference = Inference( |
| # model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf", |
| # device="cpu", # Utilisez le CPU pour l'inference |
| # token=None, # Pas de token nécessaire pour cette instance |
| #) |
| # |
| #client = inference |
| # |
| # |
| # |
| #def respond( |
| # message, |
| # history: list[tuple[str, str]], |
| # system_message, |
| # max_tokens, |
| # temperature, |
| # top_p, |
| #): |
| # messages = [{"role": "system", "content": system_message}] |
| # |
| # for val in history: |
| # if val[0]: |
| # messages.append({"role": "user", "content": val[0]}) |
| # if val[1]: |
| # messages.append({"role": "assistant", "content": val[1]}) |
| # |
| # messages.append({"role": "user", "content": message}) |
| # |
| # response = "" |
| # |
| # for message in client.chat_completion( |
| # messages, |
| # max_tokens=max_tokens, |
| # stream=True, |
| # temperature=temperature, |
| # top_p=top_p, |
| # ): |
| # token = message.choices[0].delta.content |
| # |
| # response += token |
| # yield response |
| # |
| #demo = gr.ChatInterface( |
| # respond, |
| # additional_inputs=[ |
| # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| # gr.Slider( |
| # minimum=0.1, |
| # maximum=1.0, |
| # value=0.95, |
| # step=0.05, |
| # label="Top-p (nucleus sampling)", |
| # ), |
| # ], |
| #) |
| # |
| # |
| #if __name__ == "__main__": |
| # demo.launch() |
| # |
| # |
| # |
| # |
| # |
| ##app.py ORIGINAL |
| #import gradio as gr |
| #from huggingface_hub import InferenceClient |
| # |
| #""" |
| #For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference |
| #""" |
| #client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
| # |
| # |
| #def respond( |
| # message, |
| # history: list[tuple[str, str]], |
| # system_message, |
| # max_tokens, |
| # temperature, |
| # top_p, |
| #): |
| # messages = [{"role": "system", "content": system_message}] |
| # |
| # for val in history: |
| # if val[0]: |
| # messages.append({"role": "user", "content": val[0]}) |
| # if val[1]: |
| # messages.append({"role": "assistant", "content": val[1]}) |
| # |
| # messages.append({"role": "user", "content": message}) |
| # |
| # response = "" |
| # |
| # for message in client.chat_completion( |
| # messages, |
| # max_tokens=max_tokens, |
| # stream=True, |
| # temperature=temperature, |
| # top_p=top_p, |
| # ): |
| # token = message.choices[0].delta.content |
| # |
| # response += token |
| # yield response |
| # |
| #""" |
| #For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface |
| #""" |
| #demo = gr.ChatInterface( |
| # respond, |
| # additional_inputs=[ |
| # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| # gr.Slider( |
| # minimum=0.1, |
| # maximum=1.0, |
| # value=0.95, |
| # step=0.05, |
| # label="Top-p (nucleus sampling)", |
| # ), |
| # ], |
| #) |
| # |
| # |
| #if __name__ == "__main__": |
| # demo.launch() |
| |
| |