| import gradio as gr |
| import requests |
| import re |
| import html |
| import traceback |
| import logging |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
| VLLM_URL = "http://localhost:6002/v1/completions" |
| MODEL = "lora" |
|
|
| def flexible_parse(text): |
| pattern = r'<(\w+)(?:\s+[^>]*)?>((?:(?!<\1).)*?)</\1>|<(\w+)(?:\s+[^>]*)?>' |
| result = [] |
| |
| for match in re.finditer(pattern, text): |
| tag, content, single_tag = match.groups() |
| if single_tag: |
| result.append((single_tag, '')) |
| elif tag: |
| result.append((tag, content.strip() if content else '')) |
| |
| return result |
|
|
| def format_as_collapsible_markdown(parsed_content): |
| markdown = "" |
| for tag, content in parsed_content: |
| if content: |
| markdown += f'<details>\n<summary><strong>{html.escape(tag)}</strong></summary>\n\n{html.escape(content)}\n\n</details>\n\n' |
| else: |
| markdown += f'<strong>{html.escape(tag)}</strong>\n\n' |
| return markdown |
|
|
| def get_completion(title: str, prompt: str): |
| full_prompt = f"<title>{title}</title>\n<content>\n{prompt}\n</content>" |
| try: |
| logging.info(f"Sending request to VLLM server: {VLLM_URL}") |
| response = requests.post( |
| VLLM_URL, |
| json={ |
| "prompt": full_prompt, |
| "max_tokens": 6000, |
| "temperature": 1, |
| "model": MODEL |
| }, |
| timeout=30000 |
| ) |
| response.raise_for_status() |
| logging.info("Successfully received response from VLLM server") |
| return response.json()["choices"][0]["text"] |
| except requests.exceptions.RequestException as e: |
| logging.error(f"Error connecting to VLLM server: {str(e)}") |
| return f"Error connecting to VLLM server: {str(e)}" |
| except Exception as e: |
| logging.error(f"Unexpected error in get_completion: {str(e)}") |
| return f"Unexpected error: {str(e)}\n{traceback.format_exc()}" |
|
|
| def gradio_interface(title, prompt): |
| try: |
| logging.info(f"Received request - Title: {title}, Prompt: {prompt}") |
| raw_response = get_completion(title, prompt) |
| parsed_content = flexible_parse(raw_response) |
| collapsible_view = format_as_collapsible_markdown(parsed_content) |
| |
| combined_output = f""" |
| ## Raw Response: |
| |
| ``` |
| {raw_response} |
| ``` |
| |
| ## Parsed Structure: |
| |
| {collapsible_view} |
| """ |
| logging.info("Successfully processed request") |
| return combined_output |
| except Exception as e: |
| logging.error(f"Error in gradio_interface: {str(e)}") |
| return f"Error in gradio_interface: {str(e)}\n{traceback.format_exc()}" |
|
|
| iface = gr.Interface( |
| fn=gradio_interface, |
| inputs=[ |
| gr.Textbox(label="Title"), |
| gr.Textbox(label="Prompt", lines=5) |
| ], |
| outputs=gr.Markdown(label="Response and Parsed Structure"), |
| title="VLLM Completion Client with Raw Response and Collapsible View", |
| description=f"Enter a title and prompt to generate a completion using the {MODEL} model. The raw response and a collapsible view of the parsed structure will be displayed." |
| ) |
|
|
| if __name__ == "__main__": |
| logging.info("Starting Gradio interface") |
| iface.launch() |