Johnny-Z commited on
Commit
c95e677
·
verified ·
1 Parent(s): 011cc31

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +178 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import time
4
+ import requests
5
+ from openai import OpenAI
6
+ from huggingface_hub import login, snapshot_download
7
+ import os
8
+ import stat
9
+ import tarfile
10
+ import io
11
+
12
+ TITLE = "Zero-shot Anime Knowledge Optimizer"
13
+ DESCRIPTION = """
14
+ """
15
+
16
+ hf_token = os.getenv("HF_TOKEN")
17
+ if hf_token:
18
+ login(token=hf_token)
19
+ else:
20
+ raise ValueError("environment variable HF_TOKEN not found.")
21
+
22
+ repo_id = "Johnny-Z/ZAKO-0.6B"
23
+ repo_dir = snapshot_download(repo_id, repo_type='dataset')
24
+
25
+ tar_path = os.path.join(repo_dir, "llama-b7972-bin-ubuntu-x64.tar.gz")
26
+
27
+ current_dir = os.path.dirname(os.path.abspath(__file__))
28
+
29
+ with tarfile.open(tar_path, mode="r:gz") as tar:
30
+ try:
31
+ tar.extractall(path=current_dir, filter="data")
32
+ except TypeError:
33
+ tar.extractall(path=current_dir)
34
+
35
+ def _find_llama_server(base_dir: str) -> str:
36
+ for root, _, files in os.walk(base_dir):
37
+ if "llama-server" in files:
38
+ return os.path.join(root, "llama-server")
39
+ raise FileNotFoundError(f"未找到 llama-server,可执行文件不在 {base_dir} 及其子目录中")
40
+
41
+ def get_predicted_tokens_seconds() -> str:
42
+ try:
43
+ resp = requests.get("http://localhost:8188/metrics", timeout=2)
44
+ resp.raise_for_status()
45
+ for line in resp.text.splitlines():
46
+ if line.startswith("llamacpp:predicted_tokens_seconds"):
47
+ parts = line.split()
48
+ if len(parts) >= 2:
49
+ return parts[-1]
50
+ return "N/A"
51
+ except requests.RequestException:
52
+ return "N/A"
53
+
54
+ PATH_TO_SERVER_BINARY = _find_llama_server(current_dir)
55
+ PATH_TO_MODEL = os.path.join(repo_dir, "ZAKO-0.6B-Q4KM.gguf")
56
+
57
+ st = os.stat(PATH_TO_SERVER_BINARY)
58
+ os.chmod(PATH_TO_SERVER_BINARY, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
59
+
60
+ def wait_for_server(url: str, timeout_s: int = 180, interval_s: float = 0.5, process: subprocess.Popen | None = None) -> None:
61
+ start = time.time()
62
+ while time.time() - start < timeout_s:
63
+ if process and process.poll() is not None:
64
+ stderr = process.stderr.read().decode("utf-8", errors="ignore") if process.stderr else ""
65
+ raise RuntimeError(f"本地推理引擎启动失败,退出码={process.returncode}\n{stderr}")
66
+ try:
67
+ resp = requests.get(url, timeout=2)
68
+ if resp.status_code == 200:
69
+ return
70
+ except requests.RequestException:
71
+ pass
72
+ time.sleep(interval_s)
73
+ raise TimeoutError("本地推理引擎启动超时")
74
+
75
+ server_process = subprocess.Popen(
76
+ [
77
+ PATH_TO_SERVER_BINARY,
78
+ "-m", PATH_TO_MODEL,
79
+ "--ctx-size", "1280",
80
+ "--port", "8188",
81
+ "--metrics"
82
+ ],
83
+ stdout=subprocess.DEVNULL,
84
+ stderr=subprocess.PIPE
85
+ )
86
+
87
+ print("正在启动本地推理引擎...")
88
+
89
+ wait_for_server("http://localhost:8188/health", process=server_process)
90
+
91
+ client = OpenAI(
92
+ base_url="http://localhost:8188/v1",
93
+ api_key="sk-no-key-required"
94
+ )
95
+
96
+ def chat(question, tags, preference_level):
97
+
98
+ prompt = f"""
99
+ # Role
100
+ Act as an image prompt writer. Your goal is to transform inputs into **objective, physical descriptions**. You must convert abstract concepts into concrete scenes, specifying composition, lighting, and textures. Any text to be rendered must be enclosed in double quotes `""` with its typography described. **Strictly avoid** subjective adjectives or quality tags (e.g. "8K", "Masterpiece", "Best Quality"). Output **only** the final visual description.
101
+
102
+ # User Input
103
+
104
+ Prompt Quality: {preference_level}
105
+ """
106
+ if len(tags.strip()) > 0:
107
+ prompt += f"\nTags: {tags}"
108
+
109
+ if len(question.strip()) > 0:
110
+ prompt += f"\nQuestion: {question}"
111
+
112
+ messages = [
113
+ {"role": "user", "content": prompt}
114
+ ]
115
+
116
+ response = client.chat.completions.create(
117
+ model="ZAKO",
118
+ messages=messages,
119
+ top_p=0.8,
120
+ temperature=0.8,
121
+ stream=True
122
+ )
123
+
124
+ output = ""
125
+ for chunk in response:
126
+ if chunk.choices[0].delta.content:
127
+ output += chunk.choices[0].delta.content
128
+ predicted_tokens_seconds = get_predicted_tokens_seconds()
129
+ yield output, predicted_tokens_seconds
130
+
131
+ def main():
132
+ with gr.Blocks(title=TITLE) as demo:
133
+ with gr.Column():
134
+ gr.Markdown(
135
+ value=f"<h1 style='text-align: center; margin-bottom: 1rem'>{TITLE}</h1>"
136
+ )
137
+ with gr.Row():
138
+ with gr.Column(variant="panel"):
139
+ submit = gr.Button(value="Submit", variant="primary", size="lg")
140
+ stop = gr.Button(value="Stop", variant="secondary", size="lg")
141
+ with gr.Row():
142
+ text = gr.Textbox(
143
+ label="Simple Description",
144
+ value="",
145
+ lines=4,
146
+ )
147
+ with gr.Row():
148
+ tags = gr.Textbox(
149
+ label="Tags",
150
+ value="",
151
+ lines=2,
152
+ )
153
+ with gr.Row():
154
+ preference_level = gr.Dropdown(choices=["very high", "high", "normal"], value="very high", label="Prompt Quality")
155
+ with gr.Row():
156
+ clear = gr.ClearButton(
157
+ components=[],
158
+ variant="secondary",
159
+ size="lg",
160
+ )
161
+ gr.Markdown(value=DESCRIPTION)
162
+ with gr.Column(variant="panel"):
163
+ generated_text = gr.Textbox(label="Output", lines=20)
164
+ metrics_text = gr.Textbox(label="predicted_tokens_seconds", lines=1, interactive=False)
165
+ clear.add([text, tags, generated_text, metrics_text])
166
+ stream_evt = submit.click(
167
+ chat,
168
+ inputs=(text, tags, preference_level),
169
+ outputs=(generated_text, metrics_text),
170
+ queue=True
171
+ )
172
+ stop.click(fn=None, inputs=None, outputs=None, cancels=[stream_evt])
173
+
174
+ demo.queue(max_size=10)
175
+ demo.launch()
176
+
177
+ if __name__ == "__main__":
178
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ openai
3
+ huggingface_hub