forcemultiplier
/

instruct-evolve-xml-gem2b-adapter

Model card Files Files and versions

instruct-evolve-xml-gem2b-adapter / gradio_gemma2-2b.py

fullstack's picture

Upload folder using huggingface_hub

35311ae verified over 1 year ago

history blame contribute delete

3.32 kB

	import gradio as gr
	import requests
	import re
	import html
	import traceback
	import logging

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	VLLM_URL = "http://localhost:6002/v1/completions"
	MODEL = "lora"

	def flexible_parse(text):
	pattern = r'<(\w+)(?:\s+[^>])?>((?:(?!<\1).)?)</\1>\|<(\w+)(?:\s+[^>]*)?>'
	result = []

	for match in re.finditer(pattern, text):
	tag, content, single_tag = match.groups()
	if single_tag:
	result.append((single_tag, ''))
	elif tag:
	result.append((tag, content.strip() if content else ''))

	return result

	def format_as_collapsible_markdown(parsed_content):
	markdown = ""
	for tag, content in parsed_content:
	if content:
	markdown += f'<details>\n<summary><strong>{html.escape(tag)}</strong></summary>\n\n{html.escape(content)}\n\n</details>\n\n'
	else:
	markdown += f'<strong>{html.escape(tag)}</strong>\n\n'
	return markdown

	def get_completion(title: str, prompt: str):
	full_prompt = f"<title>{title}</title>\n<content>\n{prompt}\n</content>"
	try:
	logging.info(f"Sending request to VLLM server: {VLLM_URL}")
	response = requests.post(
	VLLM_URL,
	json={
	"prompt": full_prompt,
	"max_tokens": 6000,
	"temperature": 1,
	"model": MODEL
	},
	timeout=30000 # Add a timeout
	)
	response.raise_for_status() # Raise an exception for bad status codes
	logging.info("Successfully received response from VLLM server")
	return response.json()["choices"][0]["text"]
	except requests.exceptions.RequestException as e:
	logging.error(f"Error connecting to VLLM server: {str(e)}")
	return f"Error connecting to VLLM server: {str(e)}"
	except Exception as e:
	logging.error(f"Unexpected error in get_completion: {str(e)}")
	return f"Unexpected error: {str(e)}\n{traceback.format_exc()}"

	def gradio_interface(title, prompt):
	try:
	logging.info(f"Received request - Title: {title}, Prompt: {prompt}")
	raw_response = get_completion(title, prompt)
	parsed_content = flexible_parse(raw_response)
	collapsible_view = format_as_collapsible_markdown(parsed_content)

	combined_output = f"""
	## Raw Response:

	```
	{raw_response}
	```

	## Parsed Structure:

	{collapsible_view}
	"""
	logging.info("Successfully processed request")
	return combined_output
	except Exception as e:
	logging.error(f"Error in gradio_interface: {str(e)}")
	return f"Error in gradio_interface: {str(e)}\n{traceback.format_exc()}"

	iface = gr.Interface(
	fn=gradio_interface,
	inputs=[
	gr.Textbox(label="Title"),
	gr.Textbox(label="Prompt", lines=5)
	],
	outputs=gr.Markdown(label="Response and Parsed Structure"),
	title="VLLM Completion Client with Raw Response and Collapsible View",
	description=f"Enter a title and prompt to generate a completion using the {MODEL} model. The raw response and a collapsible view of the parsed structure will be displayed."
	)

	if __name__ == "__main__":
	logging.info("Starting Gradio interface")
	iface.launch()