Spaces:

segestic
/

chatcpu

Sleeping

App Files Files Community

chatcpu / main.py

segestic

Rename app.py to main.py

170284a verified 12 months ago

raw

history blame contribute delete

1.98 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import threading

	# Title and description
	TITLE = "AI Copilot for Patients"
	DESCRIPTION = "I provide answers to concerns related to Health"

	# Globals
	llm_llama_cpp = None
	model_ready = False

	# Download and initialize model in background
	def load_model():
	global llm_llama_cpp, model_ready
	try:
	print("Downloading model...")
	model_file_path = hf_hub_download(
	repo_id="TheBloke/Llama-2-7B-GGUF",
	filename="llama-2-7b.Q4_0.gguf"
	)

	print("Initializing model...")
	llm_llama_cpp = Llama(
	model_path=model_file_path,
	verbose=False,
	n_ctx=4096
	)
	model_ready = True
	print("Model is ready.")
	except Exception as e:
	print(f"Failed to load model: {e}")

	# Background thread for model loading
	threading.Thread(target=load_model).start()

	# Chatbot logic
	def talk(prompt, history):
	if not model_ready:
	return "⏳ Please wait, the model is still loading..."

	try:
	response = ""
	response_stream = llm_llama_cpp.create_completion(
	prompt=prompt,
	max_tokens=200,
	stream=True
	)

	for chunk in response_stream:
	if 'choices' in chunk and 'text' in chunk['choices'][0]:
	response += chunk['choices'][0]['text']
	return response

	except Exception as e:
	print(f"Error in generating response: {e}")
	return f"Error during response generation: {e}"

	# Gradio interface
	demo = gr.ChatInterface(
	fn=talk,
	chatbot=gr.Chatbot(
	show_label=True,
	show_share_button=True,
	show_copy_button=True,
	layout="bubble",
	type="messages",
	),
	theme="Soft",
	examples=[["what is Diabetes?"]],
	title=TITLE,
	description=DESCRIPTION,
	)

	# Launch the UI
	demo.launch(share=True)