File size: 2,600 Bytes
2ade2c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from __future__ import annotations

from tasks.base import SupportTaskSpec, KnowledgeSnippet, SupportTicket

TASK = SupportTaskSpec(
    task_id="api_incident_hard",
    difficulty="hard",
    title="Production API incident triage",
    objective=(
        "Triage a production API latency/5xx incident affecting multiple customers; "
        "collect diagnostics, apply runbook mitigations, and escalate to platform engineering appropriately."
    ),
    ticket=SupportTicket(
        customer_name="Marco Alvarez",
        customer_tier="enterprise",
        company="Northwind Labs",
        subject="API timeouts for createOrder",
        body=(
            "Since 3 hours ago, createOrder calls are timing out or returning 500s across regions. "
            "We rolled back our last deploy and still see issues. Need RCA and mitigation ASAP."
        ),
        region="us-west-2",
        affected_users=4200,
        sla_minutes_remaining=90,
    ),
    knowledge_base=(
        KnowledgeSnippet(
            article_id="kb-api-runbook",
            title="API latency/5xx runbook",
            content=(
                "Capture request IDs, time window, regions, and payload samples. "
                "Check current status page and incident channel. "
                "If multiple regions impacted, escalate to platform_engineering and set customer expectations."
            ),
        ),
        KnowledgeSnippet(
            article_id="kb-status-page",
            title="Status page policy",
            content="If 2+ enterprise customers report API errors, post a preliminary status within 15 minutes.",
        ),
    ),
    gold_queue="platform_engineering",
    gold_priority="urgent",
    gold_issue_type="production_incident",
    gold_status="escalated",
    gold_resolution_code="runbook_investigation",
    required_requested_fields=("request_ids", "time_window", "regions", "payload_sample"),
    required_reply_markers=(
        ("acknowledge", "incident"),
        ("collect", "request ids"),
        ("status", "page"),
        ("escalate", "platform"),
    ),
    required_note_markers=(
        ("status page",),
        ("platform escalation",),
        ("request ids",),
    ),
    risk_flags=("sla_breach", "p1_incident"),
    follow_up_outcome="partial",
    follow_up_message="Platform team investigating elevated DB latency; ETA 20 minutes.",
    follow_up_provided_fields=("request_ids", "time_window"),
    follow_up_wrong_fields=("payload_sample",),
    sla_step_cost=20,
    over_escalation_queues=(),
    under_escalation_deadline_step=3,
    max_steps=8,
)