File size: 6,625 Bytes
2866e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d9d503
2866e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "nickoo004/queryshield-1.5b"

SYSTEM = (
    "You are QueryShield, a multilingual prompt optimizer. "
    "Given a raw user question, rewrite it into a detailed instruction "
    "prompt for a downstream LLM expert. "
    "User language: {in_lang}. Response language: {out_lang}. "
    "Expert role: {role}."
)

LANGUAGES = [
    "English",
    "Uzbek",
    "Russian",
    "Kazakh",
    "Karakalpak",
]

ROLES = [
    "Senior Software Engineer",
    "Medical Expert",
    "Financial Analyst",
    "Legal Advisor",
    "Data Scientist",
    "Cybersecurity Specialist",
    "Aerospace Engineer",
    "Agricultural Scientist",
    "Experienced Educator",
    "Automotive Engineer",
    "Pharmaceutical Researcher",
    "Manufacturing Expert",
    "Business Strategist",
    "Professional Writer",
    "Project Manager",
    "Support Specialist",
    "HR Consultant",
    "Environmental Scientist",
    "Mathematician",
    "UX Designer",
    "Research Professor",
    "Nutritionist",
    "Real Estate Consultant",
    "Supply Chain Manager",
    "Mechanical Engineer",
    "Electrical Engineer",
    "Civil Engineer",
    "Physics Researcher",
    "Chemistry Expert",
    "Biology Researcher",
]

EXAMPLES = [
    ["hey how do i fix memory leak in my python app? its getting slower over time", "English", "English", "Senior Software Engineer"],
    ["menga diabetni boshqarish uchun eng yaxshi ovqatlanish rejimini ayting, qon qandim yuqori", "Uzbek", "Uzbek", "Medical Expert"],
    ["как мне улучшить производительность SQL запросов? таблица очень большая", "Russian", "Russian", "Data Scientist"],
    ["бизнесімді қалай бастауға болады? капиталым аз, бірақ идеям бар", "Kazakh", "Kazakh", "Business Strategist"],
    ["balalarımda matematika sabaqları qıyın bolıp atır, qanday úyretiw kerek?", "Karakalpak", "Karakalpak", "Experienced Educator"],
    ["uyimda elektr toki kesib qoldi, qanday muammoni o'zim hal qila olaman?", "Uzbek", "Russian", "Electrical Engineer"],
    ["менің фермамда топырақ сапасы нашар, не істеуім керек?", "Kazakh", "Uzbek", "Agricultural Scientist"],
]

print("Loading model (CPU — this may take a minute)...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float32,   # CPU requires float32
    device_map="cpu",
    trust_remote_code=True,
)
model.eval()
print("✅ Model loaded")


def optimize(user_question, input_language, output_language, role, max_new_tokens=400):
    if not user_question.strip():
        return "⚠️ Please enter a question."

    messages = [
        {"role": "system", "content": SYSTEM.format(
            in_lang=input_language,
            out_lang=output_language,
            role=role,
        )},
        {"role": "user", "content": user_question},
    ]
    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer(
        text, return_tensors="pt", truncation=True, max_length=512
    )
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            do_sample=True,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.eos_token_id,
        )
    new_tokens = output[0][inputs["input_ids"].shape[1]:]
    return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()


# ── UI ─────────────────────────────────────────────────────────────────
with gr.Blocks(title="QueryShield") as demo:

    gr.Markdown("""
# 🛡️ QueryShield — Multilingual Prompt Optimizer

Fine-tuned **Qwen2.5-1.5B** that rewrites raw user queries into expert-level instruction prompts for downstream LLMs.

Supports **5 languages**: English · Uzbek · Russian · Kazakh · Karakalpak  
Supports **cross-lingual routing**: write in one language, get instructions for another.

> ⚠️ Running on **CPU** — generation takes ~30–60 seconds. Please be patient.

📦 [Dataset](https://huggingface.co/datasets/nickoo004/queryshield-multilingual) · 
🤖 [Model](https://huggingface.co/nickoo004/queryshield-1.5b) · 
📓 [Kaggle Demo](https://www.kaggle.com/code/nursultankoshekbaev/queryshield-1-5b)
""")

    with gr.Row():
        with gr.Column(scale=1):
            question = gr.Textbox(
                label="Raw User Question",
                placeholder="Type your messy, natural question here...",
                lines=4,
            )
            with gr.Row():
                input_lang = gr.Dropdown(
                    choices=LANGUAGES,
                    value="English",
                    label="Input Language",
                )
                output_lang = gr.Dropdown(
                    choices=LANGUAGES,
                    value="English",
                    label="Output Language",
                )
            role = gr.Dropdown(
                choices=ROLES,
                value="Senior Software Engineer",
                label="Expert Role",
            )
            max_tokens = gr.Slider(
                minimum=100,
                maximum=600,
                value=400,
                step=50,
                label="Max output tokens",
            )
            btn = gr.Button("✨ Optimize Prompt", variant="primary")

        with gr.Column(scale=1):
            output = gr.Textbox(
                label="Optimized Prompt (instruction for downstream LLM)",
                lines=18,
                show_copy_button=True,
            )

    gr.Examples(
        examples=EXAMPLES,
        inputs=[question, input_lang, output_lang, role],
        label="📌 Example queries (click to load)",
        cache_examples=False,
    )

    btn.click(
        fn=optimize,
        inputs=[question, input_lang, output_lang, role, max_tokens],
        outputs=output,
    )

    gr.Markdown("""
---
**How it works:**  
QueryShield sits between the user and the main LLM. It takes a raw query and outputs a structured instruction prompt — including role, tone, format, edge cases, and language routing instructions.

Built with ❤️ by [nickoo004](https://huggingface.co/nickoo004)
""")

demo.launch()