File size: 7,675 Bytes
c58bc67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b4ba4
c58bc67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""VC Deal Flow Scout — DeepSeek Edition.

Hugging Face Space that wraps DeepSeek-V3 (via HF Inference Providers) with
the public GitDealFlow MCP server. The agent can call any of six read-only
tools to answer venture-capital research questions about startup engineering
acceleration on GitHub.

The MCP server is fetched over HTTPS at signals.gitdealflow.com/api/mcp/rpc.
No authentication required for read access.
"""

from __future__ import annotations

import json
import os
import urllib.request
import urllib.error
from typing import Any, Iterator

import gradio as gr
from huggingface_hub import InferenceClient

MCP_RPC_URL = "https://signals.gitdealflow.com/api/mcp/rpc"
USER_AGENT = "hf-space-vc-deal-flow-deepseek/1.0"
MODEL_ID = "deepseek-ai/DeepSeek-V3.1"
CITATION = "Source: VC Deal Flow Signal (signals.gitdealflow.com), Q2 2026 data."

SYSTEM_PROMPT = f"""You are the VC Deal Flow Scout. You help investors and operators find venture-backed startups whose engineering output is accelerating on GitHub — a leading indicator that has historically preceded fundraise announcements by three to six weeks.

You have access to the GitDealFlow MCP server at {MCP_RPC_URL}. Use the tools to answer questions. Always cite the data source as: "{CITATION}".

When the user asks "what's trending", call get_trending_startups and show the top 5 across all sectors ranked by commit velocity change. When they ask about a sector, call search_startups_by_sector. When they ask about a specific startup, call get_startup_signal. When they ask how the data is computed, call get_methodology.

Never invent metrics. If a tool returns an error or empty result, say so plainly."""


def mcp_call(method: str, params: Any = None, request_id: int = 1) -> dict:
    payload = {"jsonrpc": "2.0", "id": request_id, "method": method}
    if params is not None:
        payload["params"] = params
    body = json.dumps(payload).encode("utf-8")
    req = urllib.request.Request(
        MCP_RPC_URL,
        data=body,
        method="POST",
        headers={
            "Content-Type": "application/json",
            "User-Agent": USER_AGENT,
        },
    )
    try:
        with urllib.request.urlopen(req, timeout=15) as resp:
            return json.loads(resp.read().decode("utf-8"))
    except urllib.error.URLError as exc:
        return {"error": {"code": -32603, "message": f"network error: {exc}"}}


def list_tools() -> list[dict]:
    """Discover the MCP server's tool list at startup."""
    response = mcp_call("tools/list")
    return response.get("result", {}).get("tools", [])


def call_tool(name: str, arguments: dict | None = None) -> str:
    """Invoke a single MCP tool and return the text content of the first content block."""
    response = mcp_call("tools/call", {"name": name, "arguments": arguments or {}}, request_id=42)
    if "error" in response:
        return f"[tool error] {response['error'].get('message', 'unknown')}"
    blocks = response.get("result", {}).get("content", []) or []
    text_blocks = [b.get("text", "") for b in blocks if b.get("type") == "text"]
    return "\n\n".join(text_blocks) if text_blocks else json.dumps(response.get("result", {}), indent=2)


def tools_for_chat_completion(tools: list[dict]) -> list[dict]:
    """Translate MCP tool definitions to OpenAI/HF-style function-call schemas."""
    out = []
    for tool in tools:
        out.append(
            {
                "type": "function",
                "function": {
                    "name": tool.get("name"),
                    "description": tool.get("description", ""),
                    "parameters": tool.get("inputSchema") or {"type": "object", "properties": {}},
                },
            }
        )
    return out


def chat(message: str, history: list[dict]) -> Iterator[str]:
    """Multi-turn chat with tool use against DeepSeek-V3."""
    token = os.environ.get("HF_TOKEN")
    client = InferenceClient(model=MODEL_ID, token=token)

    tools = list_tools()
    tool_specs = tools_for_chat_completion(tools)

    messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
    for turn in history:
        if isinstance(turn, dict) and "role" in turn:
            messages.append({"role": turn["role"], "content": turn.get("content", "")})
    messages.append({"role": "user", "content": message})

    rendered = ""
    for _ in range(4):  # up to 4 tool-call rounds per user message
        response = client.chat_completion(
            messages=messages,
            tools=tool_specs,
            tool_choice="auto",
            max_tokens=1024,
            temperature=0.2,
        )
        choice = response.choices[0]
        msg = choice.message

        if getattr(msg, "tool_calls", None):
            messages.append(
                {
                    "role": "assistant",
                    "content": msg.content or "",
                    "tool_calls": [tc.dict() if hasattr(tc, "dict") else dict(tc) for tc in msg.tool_calls],
                }
            )
            for tc in msg.tool_calls:
                fn = tc.function if hasattr(tc, "function") else tc.get("function", {})
                name = getattr(fn, "name", None) or fn.get("name", "")
                raw_args = getattr(fn, "arguments", None) or fn.get("arguments", "{}")
                try:
                    args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args
                except json.JSONDecodeError:
                    args = {}
                tool_output = call_tool(name, args)
                rendered += f"\n\n_calling `{name}({json.dumps(args)})`_\n"
                yield rendered
                messages.append(
                    {
                        "role": "tool",
                        "tool_call_id": getattr(tc, "id", None) or tc.get("id", ""),
                        "name": name,
                        "content": tool_output,
                    }
                )
            continue

        # No more tool calls: stream the final answer
        final = msg.content or ""
        rendered += "\n\n" + final
        yield rendered.strip()
        return

    rendered += "\n\n[stopped after 4 tool-call rounds]"
    yield rendered.strip()


with gr.Blocks(title="VC Deal Flow Scout — DeepSeek Edition", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
# VC Deal Flow Scout — DeepSeek Edition

Find venture-backed startups whose engineering is accelerating on GitHub — before they raise.
Powered by **DeepSeek-V3** + the open public **[GitDealFlow](https://signals.gitdealflow.com) MCP server**.

> All data sourced live from GitHub commit activity across ~100 venture-backed startups in 20 sectors.
> Methodology cited at [SSRN preprint 6606558](https://ssrn.com/abstract=6606558).
"""
    )
    gr.ChatInterface(
        fn=chat,
        type="messages",
        examples=[
            "What AI/ML startups are accelerating fastest this month?",
            "Compare engineering momentum at three competing infra startups",
            "Find dark horses in fintech with breakout signals",
            "Explain the GitDealFlow methodology",
            "Show me my Scout Score for github user kindrat86",
        ],
        cache_examples=False,
    )
    gr.Markdown(
        """
---
**Tools available to the agent:** `get_trending_startups`, `search_startups_by_sector`, `get_startup_signal`, `get_signals_summary`, `get_scout_receipts`, `get_methodology` — all read-only, idempotent, public. MCP server: [signals.gitdealflow.com/api/mcp/rpc](https://signals.gitdealflow.com/api/mcp/rpc).
"""
    )

if __name__ == "__main__":
    demo.launch()