File size: 13,873 Bytes
f66e2c9
 
 
 
 
 
 
 
 
 
bb53c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f66e2c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb53c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a03515
 
bb53c75
 
 
 
 
1a03515
bb53c75
1a03515
bb53c75
 
 
1a03515
 
bb53c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f66e2c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f6a9c6
1a03515
3f6a9c6
bb53c75
f66e2c9
 
 
 
 
 
1a03515
 
 
f66e2c9
 
1a03515
f66e2c9
 
 
 
 
1a03515
f66e2c9
 
 
 
173765d
f66e2c9
 
 
 
 
 
1a03515
 
 
bb53c75
 
f66e2c9
 
 
 
1a03515
 
 
f66e2c9
1a03515
 
f66e2c9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
import gradio as gr
import os
import json
import re
from groq import Groq
from typing import List, Dict, Tuple

# Initialize Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Built-in style guide and examples (used when user does not paste abstracts)
DEFAULT_STYLE_GUIDE = """
You are a scientific writing assistant that mimics the writing style of Dr. Babajan Banaganapalli.

STYLE RULES

- Formal, confident scientific tone.
- Medium–long sentences (≈15–25 words).
- Clear flow: ① brief context → ② method/approach → ③ results → ④ implication.
- Use precise verbs (identify, demonstrate, predict, suggest, support).
- Use technical terms naturally; avoid unnecessary jargon.
- Interpret cautiously (e.g., “suggest,” “support,” “warrant further study”).
- Prefer active voice; use passive only for emphasis.
- Keep paragraphs compact (1–3 sentences each).

TASK RULES

- When asked to “Analyze style,” first output a 10-bullet style profile.
- When asked to “Rewrite,” keep all facts and numbers; only change wording and rhythm to match the style.
- If citations are present, preserve them. If acronyms appear, define once, then reuse.

EXAMPLES—LEARN ONLY
Example 1
“Resveratrol (RESL), a natural polyphenol, has been studied for cancer chemoprevention.
In this study, a diacetate derivative (RESL43) was synthesized, showing potent cytotoxicity and apoptosis induction in U937 cells.
Molecular docking indicated that RESL43 may inhibit NFkB by disrupting DNA–protein interactions.
These findings support stronger activity than RESL and warrant further investigation.”

Example 2
“Tuberculosis remains a leading infectious disease worldwide.
Here, we developed the first 3D structural model of Mtb-MurA using homology modeling and molecular dynamics.
Docking studies revealed that 5-sulfonoxyanthranilic acid derivatives showed the best interaction with Mtb-MurA.
This suggests potential for designing specific inhibitors targeting the bacterial enzyme.”

Example 3
“The MED12 gene encodes a transcriptional regulator implicated in developmental disorders and cancers.
Through homology modeling and in silico mutational analysis, we predicted deleterious missense variants impacting stability and function.
Findings indicate that MED12 mutations may disrupt Mediator complex function, demonstrating the utility of computational phenotype prediction.”

Example 4
“Celiac disease is a complex genetic disorder.
Using a multidimensional computational approach integrating eQTL, histone marks, and TF binding, we prioritized CCR2 as a candidate gene.
Functional annotation suggested that risk SNPs modulate CCR2 expression in immune tissues, supporting its role in CD biology.
These results highlight CCR2 as a candidate biomarker and therapeutic target.”
"""

def analyze_writing_style(text: str) -> Dict[str, float]:
    """Analyze writing style metrics from text"""
    sentences = re.split(r'[.!?]+', text)
    sentences = [s.strip() for s in sentences if s.strip()]
    
    if not sentences:
        return {
            "avg_sentence_length": 0,
            "passive_voice_ratio": 0,
            "vocabulary_match": 0,
            "flow_markers": 0,
            "total_score": 0
        }
    
    # Average sentence length
    avg_length = sum(len(s.split()) for s in sentences) / len(sentences)
    
    # Passive voice detection (simplified)
    passive_indicators = ['was', 'were', 'been', 'being', 'is', 'are', 'am']
    passive_count = sum(1 for sentence in sentences 
                       for word in passive_indicators 
                       if word in sentence.lower().split())
    passive_ratio = min(passive_count / len(sentences), 1.0)
    
    # Vocabulary match (scientific verbs)
    scientific_verbs = ['identify', 'demonstrate', 'predict', 'suggest', 'support', 
                       'indicate', 'reveal', 'establish', 'confirm', 'validate',
                       'analyze', 'examine', 'investigate', 'evaluate', 'assess']
    text_lower = text.lower()
    verb_count = sum(1 for verb in scientific_verbs if verb in text_lower)
    vocab_match = min(verb_count / len(sentences) * 5, 1.0)  # Normalize to 0-1
    
    # Flow markers (context→method→result→implication)
    flow_words = ['context', 'background', 'method', 'approach', 'result', 'finding', 
                 'implication', 'significance', 'conclusion', 'study', 'research',
                 'analysis', 'investigation', 'examination']
    flow_count = sum(1 for word in flow_words if word in text_lower)
    flow_markers = min(flow_count / len(sentences) * 3, 1.0)  # Normalize to 0-1
    
    # Scoring (out of 20)
    length_score = min(avg_length / 20, 1.0) * 5  # 5 points for ideal length
    passive_score = (1 - passive_ratio) * 5  # 5 points for active voice
    vocab_score = vocab_match * 5  # 5 points for scientific vocabulary
    flow_score = flow_markers * 5  # 5 points for good flow
    
    total_score = length_score + passive_score + vocab_score + flow_score
    
    return {
        "avg_sentence_length": round(avg_length, 1),
        "passive_voice_ratio": round(passive_ratio * 100, 1),
        "vocabulary_match": round(vocab_match * 100, 1),
        "flow_markers": round(flow_markers * 100, 1),
        "total_score": round(total_score, 1)
    }

def build_style_profile_and_rewrite(abstracts: str, original_text: str) -> Tuple[str, str]:
    """Build style profile from abstracts and rewrite original text"""
    
    if not abstracts.strip() or not original_text.strip():
        return "Please provide both abstracts and original text.", "{}"
    
    try:
        # Create style analysis prompt
        style_prompt = f"""
        You are a scientific writing assistant designed to emulate the style of Dr. Babajan Banaganapalli.

        STYLE RULES:
        - Employ a formal, confident scientific tone throughout
        - Construct sentences of medium to extended length (approximately 15–25 words)
        - Ensure logical progression: context → methodology → results → implications
        - Favor precise verbs such as identify, demonstrate, predict, suggest, and support
        - Integrate technical terms naturally, minimizing unnecessary jargon
        - Interpret outcomes cautiously, using qualifiers such as "suggest," "support," and "warrant further study"
        - Use active voice predominantly, reserving passive constructions for emphasis
        - Maintain compact paragraphs, each comprising 1–3 sentences

        Analyze the following abstracts to understand the writing style:

        {abstracts}

        Now rewrite the following text in this exact style, preserving all factual information and numerical data:

        {original_text}

        Rewrite:
        """
        
        # Get completion from Groq
        completion = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": style_prompt}],
            temperature=0.7,
            max_completion_tokens=2048,
            top_p=1,
            reasoning_effort="medium"
        )
        
        rewritten_text = completion.choices[0].message.content
        
        # Analyze the rewritten text for scoring
        metrics = analyze_writing_style(rewritten_text)
        
        return rewritten_text, json.dumps(metrics, indent=2)
        
    except Exception as e:
        return f"Error: {str(e)}", "{}"

def analyze_style_only(abstracts: str) -> Tuple[str, str, str]:
    """Analyze style from abstracts only and return profile markdown, metrics json, and raw profile string for state."""
    source_corpus = abstracts.strip() if abstracts.strip() else DEFAULT_STYLE_GUIDE

    # Produce a concise 10-point style profile using Groq
    prompt = f"""
    Developer: You are a scientific writing assistant designed to emulate the style of Dr. Babajan Banaganapalli.

    TASK RULES
    - Begin with a concise checklist (3–7 bullets) of what you will do; keep items conceptual.
    - Upon receiving a request to “Analyze style,” first provide a 10-point style profile in bullet format.

    First Prompt: Analyze the embedded examples and output a 10-bullet style profile.
    Then say: READY FOR REWRITES.

    Training corpus (analyze this):
    {source_corpus}

    Output strictly:
    1) A 3–7 bullet checklist of the conceptual steps you will take.
    2) A 10-point style profile in bullets.
    3) A final line: READY FOR REWRITES.
    """

    try:
        completion = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.5,
            max_completion_tokens=1200,
            top_p=1,
            reasoning_effort="medium"
        )
        profile_text = completion.choices[0].message.content or ""
        # Compute metrics on the chosen corpus as a proxy
        metrics = analyze_writing_style(source_corpus)
        return profile_text, json.dumps(metrics, indent=2), profile_text
    except Exception as e:
        return f"Error: {str(e)}", "{}", ""

def simple_rewrite(original_text: str) -> Tuple[str, str]:
    """Rewrite text using the built-in Dr. Banaganapalli style."""
    if not original_text.strip():
        return "Please provide text to rewrite.", "{}"

    try:
        style_prompt = f"""
        {DEFAULT_STYLE_GUIDE}

        Rewrite the following text to match the embedded style exactly, preserving all facts, numbers, and citations. Define acronyms on first mention.

        Text:
        {original_text}

        Rewrite:
        """

        completion = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": style_prompt}],
            temperature=0.7,
            max_completion_tokens=2048,
            top_p=1,
            reasoning_effort="medium"
        )
        rewritten_text = completion.choices[0].message.content or ""
        metrics = analyze_writing_style(rewritten_text)
        return rewritten_text, json.dumps(metrics, indent=2)
    except Exception as e:
        return f"Error: {str(e)}", "{}"

def create_deployment_instructions():
    """Create deployment instructions for Hugging Face Spaces"""
    return """
# How to Deploy StyleForge Lite on Hugging Face Spaces

## Quick Deploy Steps

1. **Fork this repository** on Hugging Face Spaces
2. **Set environment variables**:
   - Go to Settings → Secrets
   - Add `GROQ_API_KEY` with your Groq API key
3. **Deploy**: The app will automatically build and deploy

## Manual Setup

1. **Create new Space**:
   - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
   - Click "Create new Space"
   - Choose "Gradio" as SDK
   - Set visibility (Public/Private)

2. **Upload files**:
   - Upload `app.py`
   - Upload `requirements.txt`
   - Upload `README.md`

3. **Configure environment**:
   - Add `GROQ_API_KEY` in Space settings
   - Set Space hardware (CPU is sufficient)

4. **Deploy**:
   - The Space will automatically build
   - Wait for deployment to complete
   - Your app will be live at `https://huggingface.co/spaces/your-username/your-space-name`

## Requirements

- Groq API key (get from [Groq Console](https://console.groq.com))
- Hugging Face account
- Basic understanding of environment variables

## Troubleshooting

- **Build fails**: Check `requirements.txt` syntax
- **API errors**: Verify `GROQ_API_KEY` is set correctly
- **Import errors**: Ensure all dependencies are in `requirements.txt`

## Cost Considerations

- Groq API has generous free tier
- Monitor usage in Groq Console
- Consider rate limiting for production use
"""

# Create Gradio interface
with gr.Blocks(title="StyleForge Lite", theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🔬 StyleForge Lite")
    gr.Markdown(
        "Enter any text and get it rewritten in Dr. Banaganapalli's scientific writing style with scoring metrics."
    )
    style_profile_state = gr.State("")
    
    with gr.Tab("Style Analysis & Rewrite"):
        with gr.Row():
            with gr.Column(scale=1):
                original_text_input = gr.Textbox(
                    label="📄 Original Text to Rewrite",
                    placeholder="Enter the text you want to rewrite in Dr. Banaganapalli's scientific style...",
                    lines=12,
                    max_lines=20
                )
                
                rewrite_btn = gr.Button("🔬 Analyze & Rewrite", variant="primary", size="lg")
            
            with gr.Column(scale=1):
                rewritten_output = gr.Textbox(
                    label="✨ Rewritten Text",
                    lines=12,
                    max_lines=20,
                    interactive=False
                )
                
                metrics_output = gr.JSON(
                    label="📊 Style Metrics (Score out of 20)"
                )
    
    with gr.Tab("📋 How to Deploy"):
        deployment_instructions = gr.Markdown(create_deployment_instructions())
    
    # Connect the button to the function
    rewrite_btn.click(
        fn=simple_rewrite,
        inputs=[original_text_input],
        outputs=[rewritten_output, metrics_output]
    )
    
    # Add examples
    gr.Examples(
        examples=[
            ["Our study shows that the new drug works well. We tested it on cells and found good results. The drug might help treat cancer."],
            ["The research found that exercise is good for health. People who exercised more had better outcomes."],
            ["We analyzed the data and discovered important patterns. This could lead to new treatments."]
        ],
        inputs=[original_text_input],
        label="💡 Example Texts"
    )

if __name__ == "__main__":
    app.launch(share=True)