File size: 14,104 Bytes
61f47ab
1fa3dbb
61f47ab
 
 
 
 
 
b95f82b
 
 
 
 
 
 
 
 
61f47ab
 
 
 
 
 
 
 
 
 
 
a4fbec1
b95f82b
61f47ab
 
 
 
 
 
 
 
 
915ebc8
 
 
 
 
 
 
 
 
61f47ab
 
 
 
 
 
 
 
 
 
 
a4fbec1
b95f82b
 
 
 
61f47ab
7cfb6fe
b95f82b
61f47ab
 
7cfb6fe
61f47ab
27827fb
915ebc8
27827fb
a4fbec1
61f47ab
 
 
 
 
27827fb
 
915ebc8
27827fb
61f47ab
 
7cfb6fe
 
 
 
 
61f47ab
a4fbec1
61f47ab
a4fbec1
61f47ab
 
b95f82b
 
a4fbec1
 
b95f82b
61f47ab
7cfb6fe
 
a4fbec1
61f47ab
a4fbec1
61f47ab
 
b95f82b
 
a4fbec1
 
b95f82b
61f47ab
7cfb6fe
 
61f47ab
 
 
 
 
a4fbec1
61f47ab
a4fbec1
61f47ab
 
 
 
 
 
b95f82b
 
27827fb
b95f82b
a4fbec1
 
b95f82b
a4fbec1
61f47ab
 
b95f82b
61f47ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b95f82b
 
61f47ab
 
 
 
a4fbec1
 
 
 
 
 
61f47ab
 
 
 
b95f82b
a4fbec1
61f47ab
 
 
 
 
 
 
 
a4fbec1
 
61f47ab
 
 
 
 
 
 
 
 
a4fbec1
61f47ab
 
a4fbec1
61f47ab
 
a4fbec1
61f47ab
 
a4fbec1
 
 
61f47ab
 
 
 
 
 
 
 
 
 
 
 
 
a4fbec1
61f47ab
b95f82b
61f47ab
7cfb6fe
61f47ab
 
a4fbec1
 
7cfb6fe
61f47ab
7cfb6fe
61f47ab
a4fbec1
7cfb6fe
61f47ab
 
7cfb6fe
61f47ab
 
 
a4fbec1
61f47ab
 
 
7cfb6fe
 
 
 
 
 
a4fbec1
7cfb6fe
61f47ab
 
 
7cfb6fe
61f47ab
 
 
 
21f1941
 
 
 
 
a4fbec1
21f1941
a4fbec1
61f47ab
21f1941
a4fbec1
21f1941
 
b95f82b
a4fbec1
61f47ab
a4fbec1
21f1941
 
 
 
a4fbec1
 
 
61f47ab
a4fbec1
21f1941
 
 
a4fbec1
 
 
 
 
 
 
61f47ab
a4fbec1
21f1941
 
a4fbec1
61f47ab
a4fbec1
21f1941
 
 
 
 
 
a4fbec1
61f47ab
a4fbec1
 
61f47ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4fbec1
61f47ab
 
 
 
 
a4fbec1
61f47ab
 
 
 
 
 
 
 
 
7cfb6fe
61f47ab
7cfb6fe
61f47ab
 
 
 
 
 
 
 
 
 
 
 
 
7cfb6fe
61f47ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import os
import gradio as gr
import json
import logging
import time
from typing import List, Dict, Generator
from dotenv import load_dotenv

# Try to use truststore for corporate network compatibility (local only)
if not os.getenv("SPACE_ID"):
    try:
        import truststore
        truststore.inject_into_ssl()
        print("πŸ’‘ Truststore injected (Corporate SSL mode)")
    except ImportError:
        pass

# Import our agents from the src directory
from src.clarifier import Clarifier
from src.planner import Planner
from src.splitter import Splitter
from src.coordinator import Coordinator
from src.reviewer import Reviewer

# Configuration and Secrets
load_dotenv()

def get_secret(key):
    val = os.getenv(key)
    return val.strip() if val is not None else ""

HF_KEY = get_secret("HF_KEY")
TAVILY_API_KEY = get_secret("TAVILY_API_KEY")

# Logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- MODELS ---
# Using the most stable and powerful mid-sized model for all tasks
STABLE_MODEL = 'Qwen/Qwen2.5-7B-Instruct'

CLARIFIER_MODEL = STABLE_MODEL
PLANNER_MODEL = STABLE_MODEL
SPLITTER_MODEL = STABLE_MODEL
COORDINATOR_MODEL = STABLE_MODEL
SUBAGENT_MODEL = STABLE_MODEL
REVIEWER_MODEL = STABLE_MODEL

# --- GRADIO THEME ---
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="slate",
    neutral_hue="slate",
    font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
)

def start_clarification(topic, hf_key, state):
    if not topic:
        return gr.update(), "### ⚠️ Warning\nPlease enter a topic.", state, gr.update()
    
    clean_key = hf_key.strip() if hf_key else ""
    if not clean_key:
        return gr.update(), "### ⚠️ Warning\nPlease provide a valid Hugging Face Token.", state, gr.update()
    
    state["initial_topic"] = topic
    clarifier = Clarifier(model_name=CLARIFIER_MODEL, hf_key=clean_key)
    try:
        suggestions = clarifier.get_suggestions(topic)
        state["suggestions"] = suggestions
        
        if not suggestions:
            return gr.update(), "### ❌ Error\nNo suggestions received. This could be due to model traffic. Please try again in 5 seconds.", state, gr.update()

        suggestion_md = "### πŸ’‘ Refine Your Topic\n\nChoose one of the suggested directions or enter a custom one below:\n\n"
        for i, s in enumerate(suggestions):
            suggestion_md += f"**Option {i+1}: {s['title']}**\n{s['description']}\n\n"
        
        return gr.update(visible=True), suggestion_md, state, gr.update(visible=False)
    except Exception as e:
        error_msg = str(e)
        if "401" in error_msg:
            error_msg = "401 Unauthorized: Your Hugging Face Token is invalid for the Inference API. Ensure it is a 'Write' token or has 'Inference' scope enabled."
        return gr.update(), f"### ❌ Error\n{error_msg}", state, gr.update()

def select_suggestion(index, custom_topic, state):
    if custom_topic and custom_topic.strip():
        state["final_topic"] = custom_topic
    elif index is not None and 0 <= int(index)-1 < len(state.get("suggestions", [])):
        sug = state["suggestions"][int(index)-1]
        state["final_topic"] = f"{sug['title']}: {sug['description']}"
    else:
        return gr.update(), "### ⚠️ Warning\nPlease select an option or enter a custom topic.", state, gr.update()
    
    return gr.update(visible=True), f"### 🎯 Target Topic\n**{state['final_topic']}**", state, gr.update(visible=False)

def generate_strategy(hf_key, state):
    clean_key = hf_key.strip() if hf_key else ""
    if not clean_key:
        return "### ⚠️ Warning\nHF Key missing.", state, gr.update()
    
    planner = Planner(model_name=PLANNER_MODEL, hf_key=clean_key)
    try:
        plan = planner.plan(state["final_topic"])
        state["research_plan"] = plan
        return plan, state, gr.update(visible=True)
    except Exception as e:
        return f"### ❌ Error\n{str(e)}", state, gr.update()

def decompose_tasks(hf_key, state):
    clean_key = hf_key.strip() if hf_key else ""
    if not clean_key:
        return "### ⚠️ Warning\nHF Key missing.", state, gr.update()

    splitter = Splitter(model_name=SPLITTER_MODEL, hf_key=clean_key)
    try:
        subtasks = splitter.split(state["research_plan"])
        state["subtasks"] = subtasks
        
        tasks_md = "### πŸ“‹ Generated Subtasks\n\n"
        for task in subtasks:
            tasks_md += f"- **{task['title']}** (ID: `{task['id']}`)\n"
            
        return tasks_md, state, gr.update(visible=True)
    except Exception as e:
        return f"### ❌ Error\n{str(e)}", state, gr.update()

def run_research(hf_key, tavily_key, state):
    from smolagents import CodeAgent, tool, InferenceClientModel
    from src.prompts import SUBAGENT_DIRECTION, COORDINATOR_DIRECTION
    from tavily import TavilyClient
    
    clean_hf = hf_key.strip() if hf_key else ""
    clean_tavily = tavily_key.strip() if tavily_key else ""

    if not clean_hf:
        yield "### ❌ Error\nHF Token missing.", state, gr.update(), ""
        return
    if not clean_tavily:
        yield "### ❌ Error\nTavily API Key missing.", state, gr.update(), ""
        return

    tavily_client = TavilyClient(api_key=clean_tavily)

    @tool
    def web_search(query: str) -> str:
        """
        Search the web for real-time information using Tavily.
        Args:
            query: The search query to look up.
        """
        try:
            response = tavily_client.search(query=query, search_depth="advanced", max_results=5)
            results = response.get("results", [])
            formatted = [f"Title: {r['title']}\nURL: {r['url']}\nContent: {r['content']}\n" for r in results]
            return "\n---\n".join(formatted) if formatted else "No results."
        except Exception as e:
            return f"Search failed: {e}"

    coordinator_model = InferenceClientModel(model_id=COORDINATOR_MODEL, api_key=clean_hf)
    subagent_model = InferenceClientModel(model_id=SUBAGENT_MODEL, api_key=clean_hf)
    
    current_findings = []
    log_content = "### πŸ” Agentic Research Progress\n\n"
    
    subtasks = state.get("subtasks", [])
    if not subtasks:
        yield "### ❌ Error\nNo subtasks found.", state, gr.update(), ""
        return

    for i, task in enumerate(subtasks):
        t_id = task['id']
        t_title = task['title']
        t_desc = task['description']
        
        log_content += f"**Agent {i+1} working on:** {t_title}...\n"
        yield log_content, state, gr.update(), ""
        
        subagent = CodeAgent(
            tools=[web_search],
            model=subagent_model,
            add_base_tools=False,
            max_steps=2
        )
        prompt = SUBAGENT_DIRECTION.format(
            user_query=state.get("final_topic", ""),
            research_plan=state.get("research_plan", ""),
            subtask_id=t_id,
            subtask_title=t_title,
            subtask_description=t_desc
        )
        
        try:
            finding = subagent.run(prompt)
            current_findings.append(f"FINDINGS FOR TASK {t_id}: {t_title}\n\n{finding}")
            log_content += f"βœ… {t_title} complete!\n\n"
            yield log_content, state, gr.update(), ""
        except Exception as e:
            log_content += f"❌ {t_title} failed: {e}\n\n"
            yield log_content, state, gr.update(), ""

    log_content += "### ✨ Synthesis: Generating Final Report...\n"
    yield log_content, state, gr.update(), ""
    
    sys_prompt = COORDINATOR_DIRECTION.format(
        user_query=state.get("final_topic", ""),
        research_plan=state.get("research_plan", ""),
        subtasks_json=json.dumps(subtasks, indent=2)
    )
    user_prompt = f"Synthesize these findings:\n\n" + "\n\n".join(current_findings)
    
    try:
        response = coordinator_model(messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
        ])
        final_report = response.content
        if "<think>" in final_report and "</think>" in final_report:
            final_report = final_report.split("</think>")[-1].strip()
        
        log_content += "### πŸ–‹οΈ Review: Polishing and Finalizing...\n"
        yield log_content, state, gr.update(), ""
        
        reviewer = Reviewer(model_name=REVIEWER_MODEL, hf_key=clean_hf)
        polished_report = reviewer.review(final_report)
        state["final_report"] = polished_report
        
        os.makedirs("temp_outputs", exist_ok=True)
        ts = int(time.time())
        pdf_name = f"research_{ts}.pdf"
        pdf_path = os.path.join("temp_outputs", pdf_name)
        if reviewer.generate_pdf(polished_report, pdf_path):
            state["pdf_path"] = pdf_path
            
        md_name = f"research_{ts}.md"
        md_path = os.path.join("temp_outputs", md_name)
        with open(md_path, "w", encoding="utf-8") as f:
            f.write(polished_report)
        state["md_path"] = md_path
            
        yield log_content + "βœ… Research mission accomplished!", state, gr.update(visible=True), polished_report
    except Exception as e:
        yield log_content + f"❌ Synthesis failed: {e}", state, gr.update(), ""

def reset_all():
    return (
        gr.update(value="", visible=True), 
        gr.update(visible=False), 
        gr.update(visible=False), 
        gr.update(visible=False), 
        gr.update(visible=False), 
        {}, 
        "### 🧬 Status\nReady to research.", 
        ""
    )

with gr.Blocks(theme=theme, title="Deep Research Agent") as demo:
    state = gr.State({})
    
    gr.Markdown("# 🧬 Deep Research Agent")
    gr.Markdown("### The ultimate AI research pipeline that browses the web for you.")
    
    with gr.Row():
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("## βš™οΈ Configuration")
            hf_key_input = gr.Textbox(label="Hugging Face Token", type="password", value=HF_KEY)
            tavily_key_input = gr.Textbox(label="Tavily API Key", type="password", value=TAVILY_API_KEY)
            reset_btn = gr.Button("πŸ”„ New Research", variant="secondary")
            gr.Markdown("---")
            log_output = gr.Markdown("### 🧬 Status\nReady to research.")

        with gr.Column(scale=4):
            # STEP 1: Introduction
            with gr.Column(visible=True) as step1_col:
                gr.Markdown("## 1️⃣ What are you researching?")
                topic_input = gr.Textbox(label="Enter a broad topic or research question:", placeholder="e.g., Target market for sustainable polymers in Europe")
                start_btn = gr.Button("Clarify Topic ➑️", variant="primary")

            # STEP 2: Refinement
            with gr.Column(visible=False) as step2_col:
                gr.Markdown("## 2️⃣ Refine Your Topic")
                suggestion_display = gr.Markdown()
                with gr.Row():
                    opt_index = gr.Dropdown(choices=["1", "2", "3"], label="Select Option (Optional)")
                    custom_topic_input = gr.Textbox(label="Or type a custom refined topic:")
                refine_btn = gr.Button("Set Strategic Topic 🎯", variant="primary")

            # STEP 3: Planning & Splitting
            with gr.Column(visible=False) as step3_col:
                gr.Markdown("## 3️⃣ Strategy & Task Splitting")
                target_display = gr.Markdown()
                strat_btn = gr.Button("Generate Strategy πŸ“‹", variant="primary")
                strategy_display = gr.Markdown()
                
                split_btn = gr.Button("Decompose into Subtasks 🧩", variant="primary", visible=False)
                tasks_display = gr.Markdown()
                
                execute_btn = gr.Button("πŸš€ Launch Research Agents", variant="primary", visible=False)

            # STEP 4: Execution
            with gr.Column(visible=False) as step4_col:
                gr.Markdown("## 4️⃣ Agentic Research in Progress")
                gr.Markdown("Monitoring agent fleet... check the status panel on the left.")

            # STEP 5: Results
            with gr.Column(visible=False) as step5_col:
                gr.Markdown("## 🏁 Final Research Report")
                final_md_display = gr.Markdown()
                with gr.Row():
                    download_md = gr.File(label="Download Markdown")
                    download_pdf = gr.File(label="Download PDF")
                new_research_btn = gr.Button("Start Over", variant="primary")

    # --- Callbacks ---
    
    start_btn.click(
        start_clarification, 
        inputs=[topic_input, hf_key_input, state], 
        outputs=[step2_col, suggestion_display, state, step1_col]
    )
    
    refine_btn.click(
        select_suggestion,
        inputs=[opt_index, custom_topic_input, state],
        outputs=[step3_col, target_display, state, step2_col]
    )
    
    strat_btn.click(
        generate_strategy,
        inputs=[hf_key_input, state],
        outputs=[strategy_display, state, split_btn]
    )
    
    split_btn.click(
        decompose_tasks,
        inputs=[hf_key_input, state],
        outputs=[tasks_display, state, execute_btn]
    )
    
    execute_btn.click(
        lambda: gr.update(visible=True), outputs=step4_col
    ).then(
        run_research,
        inputs=[hf_key_input, tavily_key_input, state],
        outputs=[log_output, state, step5_col, final_md_display]
    ).then(
        lambda s: (s.get("md_path"), s.get("pdf_path")),
        inputs=[state],
        outputs=[download_md, download_pdf]
    )

    reset_btn.click(
        reset_all,
        outputs=[topic_input, step2_col, step3_col, step4_col, step5_col, state, log_output, final_md_display]
    )
    
    new_research_btn.click(
        reset_all,
        outputs=[topic_input, step2_col, step3_col, step4_col, step5_col, state, log_output, final_md_display]
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch()