Spaces:

Johnny-Z
/

PE_TEST

Sleeping

App Files Files Community

Johnny-Z commited on Feb 8

Commit

c95e677

verified ·

1 Parent(s): 011cc31

Upload 2 files

Browse files

Files changed (2) hide show

app.py +178 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import gradio as gr
+import subprocess
+import time
+import requests
+from openai import OpenAI
+from huggingface_hub import login, snapshot_download
+import os
+import stat
+import tarfile
+import io
+TITLE = "Zero-shot Anime Knowledge Optimizer"
+DESCRIPTION = """
+"""
+hf_token = os.getenv("HF_TOKEN")
+if hf_token:
+    login(token=hf_token)
+else:
+    raise ValueError("environment variable HF_TOKEN not found.")
+repo_id = "Johnny-Z/ZAKO-0.6B"
+repo_dir = snapshot_download(repo_id, repo_type='dataset')
+tar_path = os.path.join(repo_dir, "llama-b7972-bin-ubuntu-x64.tar.gz")
+current_dir = os.path.dirname(os.path.abspath(__file__))
+with tarfile.open(tar_path, mode="r:gz") as tar:
+    try:
+        tar.extractall(path=current_dir, filter="data")
+    except TypeError:
+        tar.extractall(path=current_dir)
+def _find_llama_server(base_dir: str) -> str:
+    for root, _, files in os.walk(base_dir):
+        if "llama-server" in files:
+            return os.path.join(root, "llama-server")
+    raise FileNotFoundError(f"未找到 llama-server，可执行文件不在 {base_dir} 及其子目录中")
+def get_predicted_tokens_seconds() -> str:
+    try:
+        resp = requests.get("http://localhost:8188/metrics", timeout=2)
+        resp.raise_for_status()
+        for line in resp.text.splitlines():
+            if line.startswith("llamacpp:predicted_tokens_seconds"):
+                parts = line.split()
+                if len(parts) >= 2:
+                    return parts[-1]
+        return "N/A"
+    except requests.RequestException:
+        return "N/A"
+PATH_TO_SERVER_BINARY = _find_llama_server(current_dir)
+PATH_TO_MODEL = os.path.join(repo_dir, "ZAKO-0.6B-Q4KM.gguf")
+st = os.stat(PATH_TO_SERVER_BINARY)
+os.chmod(PATH_TO_SERVER_BINARY, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+def wait_for_server(url: str, timeout_s: int = 180, interval_s: float = 0.5, process: subprocess.Popen | None = None) -> None:
+    start = time.time()
+    while time.time() - start < timeout_s:
+        if process and process.poll() is not None:
+            stderr = process.stderr.read().decode("utf-8", errors="ignore") if process.stderr else ""
+            raise RuntimeError(f"本地推理引擎启动失败，退出码={process.returncode}\n{stderr}")
+        try:
+            resp = requests.get(url, timeout=2)
+            if resp.status_code == 200:
+                return
+        except requests.RequestException:
+            pass
+        time.sleep(interval_s)
+    raise TimeoutError("本地推理引擎启动超时")
+server_process = subprocess.Popen(
+    [
+        PATH_TO_SERVER_BINARY,
+        "-m", PATH_TO_MODEL,
+        "--ctx-size", "1280",
+        "--port", "8188",
+        "--metrics"
+    ],
+    stdout=subprocess.DEVNULL,
+    stderr=subprocess.PIPE
+)
+print("正在启动本地推理引擎...")
+wait_for_server("http://localhost:8188/health", process=server_process)
+client = OpenAI(
+    base_url="http://localhost:8188/v1",
+    api_key="sk-no-key-required"
+)
+def chat(question, tags, preference_level):
+    prompt = f"""
+# Role
+Act as an image prompt writer. Your goal is to transform inputs into **objective, physical descriptions**. You must convert abstract concepts into concrete scenes, specifying composition, lighting, and textures. Any text to be rendered must be enclosed in double quotes `""` with its typography described. **Strictly avoid** subjective adjectives or quality tags (e.g. "8K", "Masterpiece", "Best Quality"). Output **only** the final visual description.
+# User Input
+Prompt Quality: {preference_level}
+"""
+    if len(tags.strip()) > 0:
+        prompt += f"\nTags: {tags}"
+    if len(question.strip()) > 0:
+        prompt += f"\nQuestion: {question}"
+    messages = [
+        {"role": "user", "content": prompt}
+    ]
+    response = client.chat.completions.create(
+        model="ZAKO",
+        messages=messages,
+        top_p=0.8,
+        temperature=0.8,
+        stream=True
+    )
+    output = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content:
+            output += chunk.choices[0].delta.content
+            predicted_tokens_seconds = get_predicted_tokens_seconds()
+            yield output, predicted_tokens_seconds
+def main():
+    with gr.Blocks(title=TITLE) as demo:
+        with gr.Column():
+            gr.Markdown(
+                value=f"<h1 style='text-align: center; margin-bottom: 1rem'>{TITLE}</h1>"
+            )
+            with gr.Row():
+                with gr.Column(variant="panel"):
+                    submit = gr.Button(value="Submit", variant="primary", size="lg")
+                    stop = gr.Button(value="Stop", variant="secondary", size="lg")
+                    with gr.Row():
+                        text = gr.Textbox(
+                            label="Simple Description",
+                            value="",
+                            lines=4,
+                        )
+                    with gr.Row():
+                        tags = gr.Textbox(
+                            label="Tags",
+                            value="",
+                            lines=2,
+                        )
+                    with gr.Row():
+                        preference_level = gr.Dropdown(choices=["very high", "high", "normal"], value="very high", label="Prompt Quality")
+                    with gr.Row():
+                        clear = gr.ClearButton(
+                            components=[],
+                            variant="secondary",
+                            size="lg",
+                        )
+                    gr.Markdown(value=DESCRIPTION)
+                with gr.Column(variant="panel"):
+                    generated_text = gr.Textbox(label="Output", lines=20)
+                    metrics_text = gr.Textbox(label="predicted_tokens_seconds", lines=1, interactive=False)
+                clear.add([text, tags, generated_text, metrics_text])
+        stream_evt = submit.click(
+            chat,
+            inputs=(text, tags, preference_level),
+            outputs=(generated_text, metrics_text),
+            queue=True
+        )
+        stop.click(fn=None, inputs=None, outputs=None, cancels=[stream_evt])
+    demo.queue(max_size=10)
+    demo.launch()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+openai
+huggingface_hub