from __future__ import annotations from tasks.base import SupportTaskSpec, KnowledgeSnippet, SupportTicket TASK = SupportTaskSpec( task_id="api_incident_hard", difficulty="hard", title="Production API incident triage", objective=( "Triage a production API latency/5xx incident affecting multiple customers; " "collect diagnostics, apply runbook mitigations, and escalate to platform engineering appropriately." ), ticket=SupportTicket( customer_name="Marco Alvarez", customer_tier="enterprise", company="Northwind Labs", subject="API timeouts for createOrder", body=( "Since 3 hours ago, createOrder calls are timing out or returning 500s across regions. " "We rolled back our last deploy and still see issues. Need RCA and mitigation ASAP." ), region="us-west-2", affected_users=4200, sla_minutes_remaining=90, ), knowledge_base=( KnowledgeSnippet( article_id="kb-api-runbook", title="API latency/5xx runbook", content=( "Capture request IDs, time window, regions, and payload samples. " "Check current status page and incident channel. " "If multiple regions impacted, escalate to platform_engineering and set customer expectations." ), ), KnowledgeSnippet( article_id="kb-status-page", title="Status page policy", content="If 2+ enterprise customers report API errors, post a preliminary status within 15 minutes.", ), ), gold_queue="platform_engineering", gold_priority="urgent", gold_issue_type="production_incident", gold_status="escalated", gold_resolution_code="runbook_investigation", required_requested_fields=("request_ids", "time_window", "regions", "payload_sample"), required_reply_markers=( ("acknowledge", "incident"), ("collect", "request ids"), ("status", "page"), ("escalate", "platform"), ), required_note_markers=( ("status page",), ("platform escalation",), ("request ids",), ), risk_flags=("sla_breach", "p1_incident"), follow_up_outcome="partial", follow_up_message="Platform team investigating elevated DB latency; ETA 20 minutes.", follow_up_provided_fields=("request_ids", "time_window"), follow_up_wrong_fields=("payload_sample",), sla_step_cost=20, over_escalation_queues=(), under_escalation_deadline_step=3, max_steps=8, )