File size: 6,960 Bytes
3295172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897d4ed
 
 
3295172
 
 
897d4ed
3295172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
from __future__ import annotations
import json
import re
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from agent.state import AgentState
from agent.llm import get_llm
from agent.tools import mock_lead_capture
from rag.retriever import get_retriever

# --------------------------------------------------------------------------- #
# System prompts
# --------------------------------------------------------------------------- #

_INTENT_SYSTEM = """\
You are an intent classifier for AutoStream, an automated video editing SaaS.

Classify the user's latest message into EXACTLY one of these labels:
- greeting    : Hello, hi, how are you, casual small talk
- inquiry     : Questions about features, pricing, plans, policies, or how AutoStream works
- high_intent : User clearly wants to sign up, purchase a plan, start a trial, or get started
- unknown     : Does not fit any category above

Reply with ONLY the label. No punctuation, no explanation.\
"""

_AGENT_SYSTEM = """\
You are AutoStream's friendly and knowledgeable AI sales assistant.
AutoStream provides automated video editing tools for content creators.

Guidelines:
- Answer product questions using ONLY the knowledge base context provided below.
- Be concise, warm, and helpful.
- Never make up features or prices not in the context.

{kb_context}\
{lead_section}\
"""

_EXTRACT_PROMPT = """\
Extract lead qualification data from the conversation below.
Return a JSON object with exactly these keys: name, email, platform.
- name     : user's full name (string or null)
- email    : user's email address (string or null)
- platform : creator platform, e.g. YouTube, Instagram, TikTok (string or null)

Rules:
- Only extract values the USER explicitly stated — never guess.
- Use null for any field not yet provided.
- Return ONLY valid JSON, nothing else.

Conversation:
{conversation}
"""


# --------------------------------------------------------------------------- #
# Helper
# --------------------------------------------------------------------------- #

def _last_human(state: AgentState) -> str:
    return state["messages"][-1].content


def _conversation_text(state: AgentState, window: int = 12) -> str:
    lines = []
    for m in state["messages"][-window:]:
        role = "User" if isinstance(m, HumanMessage) else "Assistant"
        lines.append(f"{role}: {m.content}")
    return "\n".join(lines)


# --------------------------------------------------------------------------- #
# Nodes
# --------------------------------------------------------------------------- #

def classify_intent(state: AgentState) -> dict:
    """Classify intent of the latest user message.

    If lead collection is already in progress, short-circuits to 'collecting'
    so the flow stays in the extraction branch.
    """
    if state.get("collecting_lead"):
        return {"intent": "collecting"}

    llm = get_llm()
    response = llm.invoke([
        SystemMessage(content=_INTENT_SYSTEM),
        HumanMessage(content=_last_human(state)),
    ])
    raw = response.content.strip().lower().strip("\"'")
    intent = raw if raw in ("greeting", "inquiry", "high_intent") else "unknown"
    return {"intent": intent}


def retrieve_rag(state: AgentState) -> dict:
    """BM25 retrieval from the knowledge base."""
    context = get_retriever().search(_last_human(state))
    return {"rag_context": context}


def extract_lead_fields(state: AgentState) -> dict:
    """Use the LLM to extract name / email / platform from conversation history."""
    llm = get_llm()
    prompt = _EXTRACT_PROMPT.format(conversation=_conversation_text(state))
    response = llm.invoke([HumanMessage(content=prompt)])

    try:
        match = re.search(r"\{[^}]+\}", response.content, re.DOTALL)
        data: dict = json.loads(match.group()) if match else {}
    except (json.JSONDecodeError, AttributeError):
        data = {}

    def _pick(key: str, state_key: str) -> str | None:
        val = data.get(key)
        return val if (val and val != "null") else state.get(state_key)

    return {
        "lead_name": _pick("name", "lead_name"),
        "lead_email": _pick("email", "lead_email"),
        "lead_platform": _pick("platform", "lead_platform"),
    }


def capture_lead(state: AgentState) -> dict:
    """Call mock_lead_capture — only reached when all three fields are present."""
    mock_lead_capture(
        state["lead_name"],
        state["lead_email"],
        state["lead_platform"],
    )
    return {"lead_captured": True}


def generate_response(state: AgentState) -> dict:
    """Generate the agent's conversational reply."""
    llm = get_llm()
    intent = state.get("intent", "unknown")
    collecting = state.get("collecting_lead", False)

    # Build KB context block — only inject when current turn is an inquiry.
    # Prevents stale context from leaking into greeting / collecting turns.
    ctx = state.get("rag_context", "") if intent == "inquiry" else ""
    kb_context = (
        f"\nKnowledge Base Context:\n{ctx}\n"
        if ctx
        else "\n[No KB context for this turn.]\n"
    )

    # Build lead collection status block
    if state.get("lead_captured"):
        name = state.get("lead_name", "")
        email = state.get("lead_email", "")
        platform = state.get("lead_platform", "")
        lead_section = (
            f"\n\nLEAD JUST CAPTURED: {name} | {email} | {platform}\n"
            "Write a warm confirmation message. Thank the user by name, confirm their details, "
            "and tell them the AutoStream team will be in touch soon. Mention they can start "
            "their free trial at autostream.io/signup"
        )
    elif collecting:
        missing = [
            f for f, k in [("name", "lead_name"), ("email address", "lead_email"), ("creator platform", "lead_platform")]
            if not state.get(k)
        ]
        collected = {
            "name": state.get("lead_name"),
            "email": state.get("lead_email"),
            "platform": state.get("lead_platform"),
        }
        lead_section = (
            f"\n\nLEAD COLLECTION IN PROGRESS.\n"
            f"Collected so far: {collected}\n"
            f"Still needed: {missing}\n"
            "Ask ONLY for the first missing field in a friendly, natural way. "
            "Do not ask for multiple fields at once."
        )
    elif intent == "high_intent":
        lead_section = (
            "\n\nThe user wants to sign up — great!\n"
            "Express enthusiasm, then ask for their full name to get started."
        )
    else:
        lead_section = ""

    system = _AGENT_SYSTEM.format(kb_context=kb_context, lead_section=lead_section)
    messages = [SystemMessage(content=system)] + list(state["messages"])
    response = llm.invoke(messages)

    updates: dict = {"messages": [response]}
    if intent == "high_intent" and not collecting:
        updates["collecting_lead"] = True
    return updates