| import os |
| import gradio as gr |
| from huggingface_hub import hf_hub_download |
| from llama_cpp import Llama |
|
|
| MODEL_REPO = "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" |
| MODEL_FILE = "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf" |
|
|
| print("Downloading model...") |
| model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) |
| print(f"Model downloaded to: {model_path}") |
|
|
| print("Loading model...") |
| llm = Llama( |
| model_path=model_path, |
| n_ctx=8192, |
| n_gpu_layers=-1, |
| verbose=False, |
| ) |
| print("Model loaded!") |
|
|
|
|
| def format_messages(message: str, history: list, system_prompt: str = "") -> str: |
| formatted = "" |
| |
| if system_prompt.strip(): |
| formatted += f"<|im_start|>system\n{system_prompt}<|im_end|>\n" |
| |
| for user_msg, assistant_msg in history: |
| if user_msg: |
| formatted += f"<|im_start|>user\n{user_msg}<|im_end|>\n" |
| if assistant_msg: |
| formatted += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n" |
| |
| formatted += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" |
| return formatted |
|
|
|
|
| def generate_response( |
| message: str, |
| history: list, |
| system_prompt: str = "", |
| temperature: float = 0.7, |
| top_p: float = 0.8, |
| top_k: int = 20, |
| max_tokens: int = 2048, |
| ) -> str: |
| prompt = format_messages(message, history, system_prompt) |
| |
| output = llm( |
| prompt, |
| max_tokens=max_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| top_k=top_k, |
| stop=["<|im_end|>", "<|im_start|>"], |
| ) |
| |
| return output["choices"][0]["text"].strip() |
|
|
|
|
| def api_generate( |
| prompt: str, |
| system_prompt: str = "", |
| temperature: float = 0.7, |
| top_p: float = 0.8, |
| max_tokens: int = 2048, |
| ) -> dict: |
| """ |
| API endpoint for text generation. |
| |
| Args: |
| prompt: The user prompt/question |
| system_prompt: Optional system instruction |
| temperature: Sampling temperature (0.0-2.0) |
| top_p: Nucleus sampling parameter (0.0-1.0) |
| max_tokens: Maximum tokens to generate |
| |
| Returns: |
| Dictionary with 'response' key containing generated text |
| """ |
| try: |
| response = generate_response( |
| message=prompt, |
| history=[], |
| system_prompt=system_prompt, |
| temperature=temperature, |
| top_p=top_p, |
| max_tokens=max_tokens, |
| ) |
| return {"response": response, "status": "success"} |
| except Exception as e: |
| return {"response": None, "status": "error", "error": str(e)} |
|
|
|
|
| with gr.Blocks(title="Qwen3.5-9B Uncensored API", theme=gr.themes.Soft()) as demo: |
| gr.Markdown( |
| """ |
| # 🤖 Qwen3.5-9B Uncensored API Interface |
| |
| Powered by [HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive](https://huggingface.co/HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive) |
| |
| **Features:** |
| - 9B parameters with 262K context window |
| - Fully uncensored (0/465 refusals) |
| - Multimodal capable (text, image, video) |
| - Supports 201 languages |
| - Running with Q4_K_M quantization via llama.cpp |
| |
| Use the chat interface below or access via API. |
| """ |
| ) |
| |
| with gr.Tab("💬 Chat"): |
| chatbot = gr.Chatbot(height=500, label="Conversation") |
| |
| with gr.Row(): |
| msg = gr.Textbox( |
| label="Message", |
| placeholder="Type your message here...", |
| scale=4, |
| lines=2, |
| ) |
| submit_btn = gr.Button("Send", variant="primary", scale=1) |
| |
| with gr.Accordion("⚙️ Settings", open=False): |
| system_prompt = gr.Textbox( |
| label="System Prompt", |
| placeholder="Optional: Set behavior/personality for the model", |
| lines=3, |
| ) |
| with gr.Row(): |
| temperature = gr.Slider( |
| minimum=0.0, |
| maximum=2.0, |
| value=0.7, |
| step=0.1, |
| label="Temperature", |
| ) |
| top_p = gr.Slider( |
| minimum=0.0, |
| maximum=1.0, |
| value=0.8, |
| step=0.05, |
| label="Top P", |
| ) |
| with gr.Row(): |
| top_k = gr.Slider( |
| minimum=1, |
| maximum=100, |
| value=20, |
| step=1, |
| label="Top K", |
| ) |
| max_tokens = gr.Slider( |
| minimum=64, |
| maximum=4096, |
| value=1024, |
| step=64, |
| label="Max Tokens", |
| ) |
| |
| clear_btn = gr.Button("🗑️ Clear Chat") |
| |
| def user_submit(message, history): |
| return "", history + [[message, None]] |
| |
| def bot_response(history, system_prompt, temperature, top_p, top_k, max_tokens): |
| if not history: |
| return history |
| |
| message = history[-1][0] |
| history_without_last = history[:-1] |
| |
| response = generate_response( |
| message, |
| history_without_last, |
| system_prompt, |
| temperature, |
| top_p, |
| top_k, |
| max_tokens |
| ) |
| history[-1][1] = response |
| return history |
| |
| msg.submit( |
| user_submit, |
| [msg, chatbot], |
| [msg, chatbot] |
| ).then( |
| bot_response, |
| [chatbot, system_prompt, temperature, top_p, top_k, max_tokens], |
| chatbot, |
| ) |
| |
| submit_btn.click( |
| user_submit, |
| [msg, chatbot], |
| [msg, chatbot] |
| ).then( |
| bot_response, |
| [chatbot, system_prompt, temperature, top_p, top_k, max_tokens], |
| chatbot, |
| ) |
| |
| clear_btn.click(lambda: [], None, chatbot) |
| |
| with gr.Tab("🔌 API"): |
| gr.Markdown( |
| """ |
| ## API Usage |
| |
| This Space provides a REST API for programmatic access. |
| |
| ### Python Example |
| |
| ```python |
| from gradio_client import Client |
| |
| client = Client("Ngixdev/qwen-api") |
| |
| result = client.predict( |
| prompt="Explain quantum computing in simple terms", |
| system_prompt="You are a helpful assistant", |
| temperature=0.7, |
| top_p=0.8, |
| max_tokens=1024, |
| api_name="/api_generate" |
| ) |
| print(result) |
| ``` |
| |
| ### cURL Example |
| |
| ```bash |
| curl -X POST https://ngixdev-qwen-api.hf.space/api/api_generate \\ |
| -H "Content-Type: application/json" \\ |
| -d '{ |
| "data": [ |
| "Explain quantum computing", |
| "You are a helpful assistant", |
| 0.7, |
| 0.8, |
| 1024 |
| ] |
| }' |
| ``` |
| """ |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| api_prompt = gr.Textbox( |
| label="Prompt", |
| placeholder="Enter your prompt here...", |
| lines=4, |
| ) |
| api_system = gr.Textbox( |
| label="System Prompt (Optional)", |
| placeholder="Set behavior/personality...", |
| lines=2, |
| ) |
| with gr.Row(): |
| api_temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature") |
| api_top_p = gr.Slider(0.0, 1.0, 0.8, step=0.05, label="Top P") |
| api_max_tokens = gr.Slider(64, 4096, 1024, step=64, label="Max Tokens") |
| api_submit = gr.Button("Generate", variant="primary") |
| |
| with gr.Column(): |
| api_output = gr.JSON(label="API Response") |
| |
| api_submit.click( |
| api_generate, |
| [api_prompt, api_system, api_temp, api_top_p, api_max_tokens], |
| api_output, |
| api_name="api_generate", |
| ) |
|
|
| demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|