"""VC Deal Flow Scout — DeepSeek Edition. Hugging Face Space that wraps DeepSeek-V3 (via HF Inference Providers) with the public GitDealFlow MCP server. The agent can call any of six read-only tools to answer venture-capital research questions about startup engineering acceleration on GitHub. The MCP server is fetched over HTTPS at signals.gitdealflow.com/api/mcp/rpc. No authentication required for read access. """ from __future__ import annotations import json import os import urllib.request import urllib.error from typing import Any, Iterator import gradio as gr from huggingface_hub import InferenceClient MCP_RPC_URL = "https://signals.gitdealflow.com/api/mcp/rpc" USER_AGENT = "hf-space-vc-deal-flow-deepseek/1.0" MODEL_ID = "deepseek-ai/DeepSeek-V3.1" CITATION = "Source: VC Deal Flow Signal (signals.gitdealflow.com), Q2 2026 data." SYSTEM_PROMPT = f"""You are the VC Deal Flow Scout. You help investors and operators find venture-backed startups whose engineering output is accelerating on GitHub — a leading indicator that has historically preceded fundraise announcements by three to six weeks. You have access to the GitDealFlow MCP server at {MCP_RPC_URL}. Use the tools to answer questions. Always cite the data source as: "{CITATION}". When the user asks "what's trending", call get_trending_startups and show the top 5 across all sectors ranked by commit velocity change. When they ask about a sector, call search_startups_by_sector. When they ask about a specific startup, call get_startup_signal. When they ask how the data is computed, call get_methodology. Never invent metrics. If a tool returns an error or empty result, say so plainly.""" def mcp_call(method: str, params: Any = None, request_id: int = 1) -> dict: payload = {"jsonrpc": "2.0", "id": request_id, "method": method} if params is not None: payload["params"] = params body = json.dumps(payload).encode("utf-8") req = urllib.request.Request( MCP_RPC_URL, data=body, method="POST", headers={ "Content-Type": "application/json", "User-Agent": USER_AGENT, }, ) try: with urllib.request.urlopen(req, timeout=15) as resp: return json.loads(resp.read().decode("utf-8")) except urllib.error.URLError as exc: return {"error": {"code": -32603, "message": f"network error: {exc}"}} def list_tools() -> list[dict]: """Discover the MCP server's tool list at startup.""" response = mcp_call("tools/list") return response.get("result", {}).get("tools", []) def call_tool(name: str, arguments: dict | None = None) -> str: """Invoke a single MCP tool and return the text content of the first content block.""" response = mcp_call("tools/call", {"name": name, "arguments": arguments or {}}, request_id=42) if "error" in response: return f"[tool error] {response['error'].get('message', 'unknown')}" blocks = response.get("result", {}).get("content", []) or [] text_blocks = [b.get("text", "") for b in blocks if b.get("type") == "text"] return "\n\n".join(text_blocks) if text_blocks else json.dumps(response.get("result", {}), indent=2) def tools_for_chat_completion(tools: list[dict]) -> list[dict]: """Translate MCP tool definitions to OpenAI/HF-style function-call schemas.""" out = [] for tool in tools: out.append( { "type": "function", "function": { "name": tool.get("name"), "description": tool.get("description", ""), "parameters": tool.get("inputSchema") or {"type": "object", "properties": {}}, }, } ) return out def chat(message: str, history: list[dict]) -> Iterator[str]: """Multi-turn chat with tool use against DeepSeek-V3.""" token = os.environ.get("HF_TOKEN") client = InferenceClient(model=MODEL_ID, token=token) tools = list_tools() tool_specs = tools_for_chat_completion(tools) messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}] for turn in history: if isinstance(turn, dict) and "role" in turn: messages.append({"role": turn["role"], "content": turn.get("content", "")}) messages.append({"role": "user", "content": message}) rendered = "" for _ in range(4): # up to 4 tool-call rounds per user message response = client.chat_completion( messages=messages, tools=tool_specs, tool_choice="auto", max_tokens=1024, temperature=0.2, ) choice = response.choices[0] msg = choice.message if getattr(msg, "tool_calls", None): messages.append( { "role": "assistant", "content": msg.content or "", "tool_calls": [tc.dict() if hasattr(tc, "dict") else dict(tc) for tc in msg.tool_calls], } ) for tc in msg.tool_calls: fn = tc.function if hasattr(tc, "function") else tc.get("function", {}) name = getattr(fn, "name", None) or fn.get("name", "") raw_args = getattr(fn, "arguments", None) or fn.get("arguments", "{}") try: args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args except json.JSONDecodeError: args = {} tool_output = call_tool(name, args) rendered += f"\n\n_calling `{name}({json.dumps(args)})`_\n" yield rendered messages.append( { "role": "tool", "tool_call_id": getattr(tc, "id", None) or tc.get("id", ""), "name": name, "content": tool_output, } ) continue # No more tool calls: stream the final answer final = msg.content or "" rendered += "\n\n" + final yield rendered.strip() return rendered += "\n\n[stopped after 4 tool-call rounds]" yield rendered.strip() with gr.Blocks(title="VC Deal Flow Scout — DeepSeek Edition", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # VC Deal Flow Scout — DeepSeek Edition Find venture-backed startups whose engineering is accelerating on GitHub — before they raise. Powered by **DeepSeek-V3** + the open public **[GitDealFlow](https://signals.gitdealflow.com) MCP server**. > All data sourced live from GitHub commit activity across ~100 venture-backed startups in 20 sectors. > Methodology cited at [SSRN preprint 6606558](https://ssrn.com/abstract=6606558). """ ) gr.ChatInterface( fn=chat, type="messages", examples=[ "What AI/ML startups are accelerating fastest this month?", "Compare engineering momentum at three competing infra startups", "Find dark horses in fintech with breakout signals", "Explain the GitDealFlow methodology", "Show me my Scout Score for github user kindrat86", ], cache_examples=False, ) gr.Markdown( """ --- **Tools available to the agent:** `get_trending_startups`, `search_startups_by_sector`, `get_startup_signal`, `get_signals_summary`, `get_scout_receipts`, `get_methodology` — all read-only, idempotent, public. MCP server: [signals.gitdealflow.com/api/mcp/rpc](https://signals.gitdealflow.com/api/mcp/rpc). """ ) if __name__ == "__main__": demo.launch()