Spaces:

Babajaan
/

Writing-Style

Sleeping

File size: 13,873 Bytes

import gradio as gr
import os
import json
import re
from groq import Groq
from typing import List, Dict, Tuple

# Initialize Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Built-in style guide and examples (used when user does not paste abstracts)
DEFAULT_STYLE_GUIDE = """
You are a scientific writing assistant that mimics the writing style of Dr. Babajan Banaganapalli.

STYLE RULES

- Formal, confident scientific tone.
- Medium–long sentences (≈15–25 words).
- Clear flow: ① brief context → ② method/approach → ③ results → ④ implication.
- Use precise verbs (identify, demonstrate, predict, suggest, support).
- Use technical terms naturally; avoid unnecessary jargon.
- Interpret cautiously (e.g., “suggest,” “support,” “warrant further study”).
- Prefer active voice; use passive only for emphasis.
- Keep paragraphs compact (1–3 sentences each).

TASK RULES

- When asked to “Analyze style,” first output a 10-bullet style profile.
- When asked to “Rewrite,” keep all facts and numbers; only change wording and rhythm to match the style.
- If citations are present, preserve them. If acronyms appear, define once, then reuse.

EXAMPLES—LEARN ONLY
Example 1
“Resveratrol (RESL), a natural polyphenol, has been studied for cancer chemoprevention.
In this study, a diacetate derivative (RESL43) was synthesized, showing potent cytotoxicity and apoptosis induction in U937 cells.
Molecular docking indicated that RESL43 may inhibit NFkB by disrupting DNA–protein interactions.
These findings support stronger activity than RESL and warrant further investigation.”

Example 2
“Tuberculosis remains a leading infectious disease worldwide.
Here, we developed the first 3D structural model of Mtb-MurA using homology modeling and molecular dynamics.
Docking studies revealed that 5-sulfonoxyanthranilic acid derivatives showed the best interaction with Mtb-MurA.
This suggests potential for designing specific inhibitors targeting the bacterial enzyme.”

Example 3
“The MED12 gene encodes a transcriptional regulator implicated in developmental disorders and cancers.
Through homology modeling and in silico mutational analysis, we predicted deleterious missense variants impacting stability and function.
Findings indicate that MED12 mutations may disrupt Mediator complex function, demonstrating the utility of computational phenotype prediction.”

Example 4
“Celiac disease is a complex genetic disorder.
Using a multidimensional computational approach integrating eQTL, histone marks, and TF binding, we prioritized CCR2 as a candidate gene.
Functional annotation suggested that risk SNPs modulate CCR2 expression in immune tissues, supporting its role in CD biology.
These results highlight CCR2 as a candidate biomarker and therapeutic target.”
"""

def analyze_writing_style(text: str) -> Dict[str, float]:
    """Analyze writing style metrics from text"""
    sentences = re.split(r'[.!?]+', text)
    sentences = [s.strip() for s in sentences if s.strip()]
    
    if not sentences:
        return {
            "avg_sentence_length": 0,
            "passive_voice_ratio": 0,
            "vocabulary_match": 0,
            "flow_markers": 0,
            "total_score": 0
        }
    
    # Average sentence length
    avg_length = sum(len(s.split()) for s in sentences) / len(sentences)
    
    # Passive voice detection (simplified)
    passive_indicators = ['was', 'were', 'been', 'being', 'is', 'are', 'am']
    passive_count = sum(1 for sentence in sentences 
                       for word in passive_indicators 
                       if word in sentence.lower().split())
    passive_ratio = min(passive_count / len(sentences), 1.0)
    
    # Vocabulary match (scientific verbs)
    scientific_verbs = ['identify', 'demonstrate', 'predict', 'suggest', 'support', 
                       'indicate', 'reveal', 'establish', 'confirm', 'validate',
                       'analyze', 'examine', 'investigate', 'evaluate', 'assess']
    text_lower = text.lower()
    verb_count = sum(1 for verb in scientific_verbs if verb in text_lower)
    vocab_match = min(verb_count / len(sentences) * 5, 1.0)  # Normalize to 0-1
    
    # Flow markers (context→method→result→implication)
    flow_words = ['context', 'background', 'method', 'approach', 'result', 'finding', 
                 'implication', 'significance', 'conclusion', 'study', 'research',
                 'analysis', 'investigation', 'examination']
    flow_count = sum(1 for word in flow_words if word in text_lower)
    flow_markers = min(flow_count / len(sentences) * 3, 1.0)  # Normalize to 0-1
    
    # Scoring (out of 20)
    length_score = min(avg_length / 20, 1.0) * 5  # 5 points for ideal length
    passive_score = (1 - passive_ratio) * 5  # 5 points for active voice
    vocab_score = vocab_match * 5  # 5 points for scientific vocabulary
    flow_score = flow_markers * 5  # 5 points for good flow
    
    total_score = length_score + passive_score + vocab_score + flow_score
    
    return {
        "avg_sentence_length": round(avg_length, 1),
        "passive_voice_ratio": round(passive_ratio * 100, 1),
        "vocabulary_match": round(vocab_match * 100, 1),
        "flow_markers": round(flow_markers * 100, 1),
        "total_score": round(total_score, 1)
    }

def build_style_profile_and_rewrite(abstracts: str, original_text: str) -> Tuple[str, str]:
    """Build style profile from abstracts and rewrite original text"""
    
    if not abstracts.strip() or not original_text.strip():
        return "Please provide both abstracts and original text.", "{}"
    
    try:
        # Create style analysis prompt
        style_prompt = f"""
        You are a scientific writing assistant designed to emulate the style of Dr. Babajan Banaganapalli.

        STYLE RULES:
        - Employ a formal, confident scientific tone throughout
        - Construct sentences of medium to extended length (approximately 15–25 words)
        - Ensure logical progression: context → methodology → results → implications
        - Favor precise verbs such as identify, demonstrate, predict, suggest, and support
        - Integrate technical terms naturally, minimizing unnecessary jargon
        - Interpret outcomes cautiously, using qualifiers such as "suggest," "support," and "warrant further study"
        - Use active voice predominantly, reserving passive constructions for emphasis
        - Maintain compact paragraphs, each comprising 1–3 sentences

        Analyze the following abstracts to understand the writing style:

        {abstracts}

        Now rewrite the following text in this exact style, preserving all factual information and numerical data:

        {original_text}

        Rewrite:
        """
        
        # Get completion from Groq
        completion = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": style_prompt}],
            temperature=0.7,
            max_completion_tokens=2048,
            top_p=1,
            reasoning_effort="medium"
        )
        
        rewritten_text = completion.choices[0].message.content
        
        # Analyze the rewritten text for scoring
        metrics = analyze_writing_style(rewritten_text)
        
        return rewritten_text, json.dumps(metrics, indent=2)
        
    except Exception as e:
        return f"Error: {str(e)}", "{}"

def analyze_style_only(abstracts: str) -> Tuple[str, str, str]:
    """Analyze style from abstracts only and return profile markdown, metrics json, and raw profile string for state."""
    source_corpus = abstracts.strip() if abstracts.strip() else DEFAULT_STYLE_GUIDE

    # Produce a concise 10-point style profile using Groq
    prompt = f"""
    Developer: You are a scientific writing assistant designed to emulate the style of Dr. Babajan Banaganapalli.

    TASK RULES
    - Begin with a concise checklist (3–7 bullets) of what you will do; keep items conceptual.
    - Upon receiving a request to “Analyze style,” first provide a 10-point style profile in bullet format.

    First Prompt: Analyze the embedded examples and output a 10-bullet style profile.
    Then say: READY FOR REWRITES.

    Training corpus (analyze this):
    {source_corpus}

    Output strictly:
    1) A 3–7 bullet checklist of the conceptual steps you will take.
    2) A 10-point style profile in bullets.
    3) A final line: READY FOR REWRITES.
    """

    try:
        completion = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.5,
            max_completion_tokens=1200,
            top_p=1,
            reasoning_effort="medium"
        )
        profile_text = completion.choices[0].message.content or ""
        # Compute metrics on the chosen corpus as a proxy
        metrics = analyze_writing_style(source_corpus)
        return profile_text, json.dumps(metrics, indent=2), profile_text
    except Exception as e:
        return f"Error: {str(e)}", "{}", ""

def simple_rewrite(original_text: str) -> Tuple[str, str]:
    """Rewrite text using the built-in Dr. Banaganapalli style."""
    if not original_text.strip():
        return "Please provide text to rewrite.", "{}"

    try:
        style_prompt = f"""
        {DEFAULT_STYLE_GUIDE}

        Rewrite the following text to match the embedded style exactly, preserving all facts, numbers, and citations. Define acronyms on first mention.

        Text:
        {original_text}

        Rewrite:
        """

        completion = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": style_prompt}],
            temperature=0.7,
            max_completion_tokens=2048,
            top_p=1,
            reasoning_effort="medium"
        )
        rewritten_text = completion.choices[0].message.content or ""
        metrics = analyze_writing_style(rewritten_text)
        return rewritten_text, json.dumps(metrics, indent=2)
    except Exception as e:
        return f"Error: {str(e)}", "{}"

def create_deployment_instructions():
    """Create deployment instructions for Hugging Face Spaces"""
    return """
# How to Deploy StyleForge Lite on Hugging Face Spaces

## Quick Deploy Steps

1. **Fork this repository** on Hugging Face Spaces
2. **Set environment variables**:
   - Go to Settings → Secrets
   - Add `GROQ_API_KEY` with your Groq API key
3. **Deploy**: The app will automatically build and deploy

## Manual Setup

1. **Create new Space**:
   - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
   - Click "Create new Space"
   - Choose "Gradio" as SDK
   - Set visibility (Public/Private)

2. **Upload files**:
   - Upload `app.py`
   - Upload `requirements.txt`
   - Upload `README.md`

3. **Configure environment**:
   - Add `GROQ_API_KEY` in Space settings
   - Set Space hardware (CPU is sufficient)

4. **Deploy**:
   - The Space will automatically build
   - Wait for deployment to complete
   - Your app will be live at `https://huggingface.co/spaces/your-username/your-space-name`

## Requirements

- Groq API key (get from [Groq Console](https://console.groq.com))
- Hugging Face account
- Basic understanding of environment variables

## Troubleshooting

- **Build fails**: Check `requirements.txt` syntax
- **API errors**: Verify `GROQ_API_KEY` is set correctly
- **Import errors**: Ensure all dependencies are in `requirements.txt`

## Cost Considerations

- Groq API has generous free tier
- Monitor usage in Groq Console
- Consider rate limiting for production use
"""

# Create Gradio interface
with gr.Blocks(title="StyleForge Lite", theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🔬 StyleForge Lite")
    gr.Markdown(
        "Enter any text and get it rewritten in Dr. Banaganapalli's scientific writing style with scoring metrics."
    )
    style_profile_state = gr.State("")
    
    with gr.Tab("Style Analysis & Rewrite"):
        with gr.Row():
            with gr.Column(scale=1):
                original_text_input = gr.Textbox(
                    label="📄 Original Text to Rewrite",
                    placeholder="Enter the text you want to rewrite in Dr. Banaganapalli's scientific style...",
                    lines=12,
                    max_lines=20
                )
                
                rewrite_btn = gr.Button("🔬 Analyze & Rewrite", variant="primary", size="lg")
            
            with gr.Column(scale=1):
                rewritten_output = gr.Textbox(
                    label="✨ Rewritten Text",
                    lines=12,
                    max_lines=20,
                    interactive=False
                )
                
                metrics_output = gr.JSON(
                    label="📊 Style Metrics (Score out of 20)"
                )
    
    with gr.Tab("📋 How to Deploy"):
        deployment_instructions = gr.Markdown(create_deployment_instructions())
    
    # Connect the button to the function
    rewrite_btn.click(
        fn=simple_rewrite,
        inputs=[original_text_input],
        outputs=[rewritten_output, metrics_output]
    )
    
    # Add examples
    gr.Examples(
        examples=[
            ["Our study shows that the new drug works well. We tested it on cells and found good results. The drug might help treat cancer."],
            ["The research found that exercise is good for health. People who exercised more had better outcomes."],
            ["We analyzed the data and discovered important patterns. This could lead to new treatments."]
        ],
        inputs=[original_text_input],
        label="💡 Example Texts"
    )

if __name__ == "__main__":
    app.launch(share=True)