Ad_Audit / server /response_generator.py
mnawfal29's picture
Upload folder using huggingface_hub
4bdb808 verified
"""Investigation tool response generator.
Returns structured numerical metrics deterministically via SHA256 seeding.
"""
from __future__ import annotations
import hashlib
from typing import Any, Dict
# ── Legitimate metric ranges ────────────────────────────────────────────────
LEGIT_RANGES: Dict[str, Dict[str, tuple]] = {
"click_timestamps": {
"avg_interval_seconds": (30.0, 90.0),
"interval_std_dev": (15.0, 45.0),
"pct_clicks_2am_5am": (0.02, 0.08),
"weekday_weekend_ratio": (1.2, 2.5),
"pct_sub_second_pairs": (0.0, 0.02),
"hourly_entropy": (3.0, 3.8),
},
"ip_distribution": {
"unique_ips_per_1000_clicks": (600.0, 950.0),
"top_ip_pct": (0.5, 3.0),
"pct_datacenter_ips": (2.0, 8.0),
"pct_residential_ips": (85.0, 95.0),
"country_entropy": (1.5, 3.0),
"pct_ip_subnet_collision": (1.0, 5.0),
},
"device_fingerprints": {
"unique_fps_per_1000_clicks": (700.0, 950.0),
"top_fp_pct": (0.3, 2.0),
"pct_headless_browser": (0.0, 0.5),
"avg_screen_resolutions": (8.0, 25.0),
"pct_mismatched_timezone_ip": (1.0, 5.0),
"os_entropy": (1.5, 2.5),
},
"referral_urls": {
"pct_direct_navigation": (15.0, 40.0),
"pct_referral_domain_mismatch": (1.0, 5.0),
"unique_referral_domains": (50.0, 200.0),
"pct_referral_chain_length_gt_2": (1.0, 5.0),
"referral_domain_entropy": (3.0, 4.5),
},
"viewability_scores": {
"pct_in_viewport_gt_1s": (60.0, 85.0),
"avg_viewport_dwell_seconds": (3.0, 12.0),
"pct_zero_pixel_ads": (0.0, 0.5),
"pct_stacked_ads": (0.0, 1.0),
"avg_focus_time_seconds": (5.0, 20.0),
"pct_mouse_nearby": (30.0, 60.0),
},
"conversion_quality": {
"click_to_conversion_seconds_mean": (120.0, 1800.0),
"conversion_rate": (1.0, 8.0),
"pct_bounce_after_click": (30.0, 55.0),
"avg_pages_per_session": (2.5, 6.0),
"pct_prior_engagement": (20.0, 50.0),
"pct_last_click_attributed": (40.0, 70.0),
},
}
# ── Fraud metric ranges ─────────────────────────────────────────────────────
# fraud_type -> adaptation_stage -> tool -> metric -> (lo, hi)
# Only distinctive signals are defined; unlisted combos fall through to LEGIT.
FRAUD_RANGES: Dict[str, Dict[str, Dict[str, Dict[str, tuple]]]] = {
"bot_traffic": {
"normal": {
"click_timestamps": {
"avg_interval_seconds": (2.0, 5.0),
"interval_std_dev": (0.5, 2.0),
"pct_clicks_2am_5am": (0.20, 0.45),
"pct_sub_second_pairs": (0.10, 0.35),
"hourly_entropy": (1.0, 2.0),
},
"ip_distribution": {
"unique_ips_per_1000_clicks": (50.0, 200.0),
"top_ip_pct": (10.0, 35.0),
"pct_datacenter_ips": (60.0, 90.0),
"pct_residential_ips": (10.0, 35.0),
"pct_ip_subnet_collision": (20.0, 50.0),
},
"device_fingerprints": {
"unique_fps_per_1000_clicks": (20.0, 100.0),
"top_fp_pct": (15.0, 45.0),
"pct_headless_browser": (30.0, 80.0),
"pct_mismatched_timezone_ip": (15.0, 40.0),
"os_entropy": (0.3, 0.8),
},
"referral_urls": {
"pct_direct_navigation": (60.0, 90.0),
"unique_referral_domains": (3.0, 15.0),
"referral_domain_entropy": (0.5, 1.5),
},
"viewability_scores": {
"pct_in_viewport_gt_1s": (10.0, 30.0),
"avg_viewport_dwell_seconds": (0.2, 1.5),
"avg_focus_time_seconds": (0.1, 1.0),
"pct_mouse_nearby": (0.0, 5.0),
},
"conversion_quality": {
"conversion_rate": (0.0, 0.1),
"pct_bounce_after_click": (85.0, 99.0),
"avg_pages_per_session": (1.0, 1.2),
"pct_prior_engagement": (0.0, 2.0),
},
},
"cautious": {
"click_timestamps": {
"avg_interval_seconds": (8.0, 20.0),
"interval_std_dev": (3.0, 10.0),
"pct_clicks_2am_5am": (0.10, 0.25),
"pct_sub_second_pairs": (0.05, 0.15),
"hourly_entropy": (2.0, 2.8),
},
"ip_distribution": {
"unique_ips_per_1000_clicks": (200.0, 400.0),
"top_ip_pct": (5.0, 15.0),
"pct_datacenter_ips": (30.0, 55.0),
"pct_residential_ips": (40.0, 65.0),
"pct_ip_subnet_collision": (10.0, 25.0),
},
"device_fingerprints": {
"unique_fps_per_1000_clicks": (200.0, 450.0),
"top_fp_pct": (5.0, 15.0),
"pct_headless_browser": (10.0, 30.0),
"pct_mismatched_timezone_ip": (8.0, 20.0),
"os_entropy": (0.8, 1.3),
},
"referral_urls": {
"pct_direct_navigation": (40.0, 65.0),
"unique_referral_domains": (15.0, 40.0),
"referral_domain_entropy": (1.5, 2.5),
},
"viewability_scores": {
"pct_in_viewport_gt_1s": (25.0, 45.0),
"avg_viewport_dwell_seconds": (1.0, 3.0),
"avg_focus_time_seconds": (1.0, 3.0),
"pct_mouse_nearby": (5.0, 15.0),
},
"conversion_quality": {
"conversion_rate": (0.1, 0.5),
"pct_bounce_after_click": (70.0, 85.0),
"avg_pages_per_session": (1.2, 1.8),
"pct_prior_engagement": (2.0, 8.0),
},
},
"covering_tracks": {
"click_timestamps": {
"avg_interval_seconds": (18.0, 35.0),
"interval_std_dev": (8.0, 18.0),
"pct_clicks_2am_5am": (0.06, 0.12),
"pct_sub_second_pairs": (0.02, 0.06),
"hourly_entropy": (2.5, 3.2),
},
"ip_distribution": {
"unique_ips_per_1000_clicks": (400.0, 600.0),
"top_ip_pct": (3.0, 8.0),
"pct_datacenter_ips": (12.0, 25.0),
"pct_residential_ips": (65.0, 82.0),
"pct_ip_subnet_collision": (5.0, 12.0),
},
"device_fingerprints": {
"unique_fps_per_1000_clicks": (450.0, 650.0),
"top_fp_pct": (2.0, 6.0),
"pct_headless_browser": (2.0, 8.0),
"pct_mismatched_timezone_ip": (4.0, 10.0),
"os_entropy": (1.2, 1.8),
},
"conversion_quality": {
"conversion_rate": (0.5, 1.5),
"pct_bounce_after_click": (55.0, 70.0),
"avg_pages_per_session": (1.5, 2.5),
},
},
"dark": {}, # no fraud signals β€” looks legit
},
"domain_spoofing": {
"normal": {
"referral_urls": {
"pct_referral_domain_mismatch": (40.0, 75.0),
"pct_referral_chain_length_gt_2": (15.0, 35.0),
"referral_domain_entropy": (0.8, 1.8),
},
"viewability_scores": {
"pct_zero_pixel_ads": (15.0, 45.0),
"pct_stacked_ads": (10.0, 30.0),
"pct_in_viewport_gt_1s": (15.0, 35.0),
"avg_viewport_dwell_seconds": (0.5, 2.0),
},
"ip_distribution": {
"pct_datacenter_ips": (25.0, 50.0),
"pct_residential_ips": (45.0, 70.0),
},
"device_fingerprints": {
"pct_headless_browser": (5.0, 20.0),
},
"click_timestamps": {
"avg_interval_seconds": (15.0, 35.0),
"pct_clicks_2am_5am": (0.10, 0.20),
},
"conversion_quality": {
"pct_bounce_after_click": (65.0, 85.0),
"avg_pages_per_session": (1.2, 2.0),
},
},
"cautious": {
"referral_urls": {
"pct_referral_domain_mismatch": (20.0, 40.0),
"pct_referral_chain_length_gt_2": (8.0, 18.0),
"referral_domain_entropy": (1.8, 2.8),
},
"viewability_scores": {
"pct_zero_pixel_ads": (5.0, 15.0),
"pct_stacked_ads": (3.0, 10.0),
"pct_in_viewport_gt_1s": (35.0, 50.0),
"avg_viewport_dwell_seconds": (2.0, 4.0),
},
},
"covering_tracks": {
"referral_urls": {
"pct_referral_domain_mismatch": (8.0, 18.0),
"pct_referral_chain_length_gt_2": (4.0, 8.0),
},
"viewability_scores": {
"pct_zero_pixel_ads": (1.0, 5.0),
"pct_stacked_ads": (1.0, 3.0),
"pct_in_viewport_gt_1s": (45.0, 60.0),
},
},
"dark": {},
},
"click_injection": {
"normal": {
"conversion_quality": {
"click_to_conversion_seconds_mean": (2.0, 15.0),
"conversion_rate": (15.0, 50.0),
"pct_last_click_attributed": (85.0, 99.0),
"pct_bounce_after_click": (10.0, 25.0),
"avg_pages_per_session": (1.0, 1.5),
},
"click_timestamps": {
"avg_interval_seconds": (5.0, 15.0),
"pct_sub_second_pairs": (0.15, 0.40),
"hourly_entropy": (1.5, 2.5),
},
"device_fingerprints": {
"pct_headless_browser": (5.0, 25.0),
"pct_mismatched_timezone_ip": (10.0, 25.0),
},
"ip_distribution": {
"pct_datacenter_ips": (15.0, 35.0),
},
},
"cautious": {
"conversion_quality": {
"click_to_conversion_seconds_mean": (15.0, 60.0),
"conversion_rate": (8.0, 20.0),
"pct_last_click_attributed": (70.0, 85.0),
"pct_bounce_after_click": (25.0, 40.0),
},
"click_timestamps": {
"avg_interval_seconds": (12.0, 25.0),
"pct_sub_second_pairs": (0.05, 0.15),
"hourly_entropy": (2.2, 3.0),
},
"device_fingerprints": {
"pct_headless_browser": (2.0, 8.0),
"pct_mismatched_timezone_ip": (5.0, 12.0),
},
},
"covering_tracks": {
"conversion_quality": {
"click_to_conversion_seconds_mean": (50.0, 120.0),
"conversion_rate": (5.0, 10.0),
"pct_last_click_attributed": (60.0, 72.0),
},
"click_timestamps": {
"avg_interval_seconds": (20.0, 35.0),
"pct_sub_second_pairs": (0.02, 0.06),
},
},
"dark": {},
},
}
# ── Seeded value generator ──────────────────────────────────────────────────
def _seeded_value(seed_str: str, lo: float, hi: float) -> float:
h = int(hashlib.sha256(seed_str.encode()).hexdigest()[:8], 16)
t = (h % 10000) / 10000.0
return round(lo + t * (hi - lo), 4)
# ── Public API ──────────────────────────────────────────────────────────────
def generate_investigation_metrics(
case_id: str,
publisher_id: str,
publisher_cfg: Dict[str, Any],
tool_name: str,
adaptation_stage: str,
) -> Dict[str, Any]:
"""Return structured numerical metrics for an investigation tool."""
is_fraudulent = publisher_cfg.get("is_fraudulent", False)
fraud_type = publisher_cfg.get("fraud_type")
legit = LEGIT_RANGES.get(tool_name, {})
if not legit:
return {"error": f"Unknown tool: {tool_name}"}
# Determine which ranges to use
fraud_tool_ranges: Dict[str, tuple] = {}
if is_fraudulent and fraud_type and adaptation_stage != "dark":
type_ranges = FRAUD_RANGES.get(fraud_type, {})
stage_ranges = type_ranges.get(adaptation_stage, {})
fraud_tool_ranges = stage_ranges.get(tool_name, {})
metrics: Dict[str, Any] = {}
for metric_name, legit_range in legit.items():
seed = f"{case_id}:{publisher_id}:{tool_name}:{metric_name}"
if metric_name in fraud_tool_ranges:
lo, hi = fraud_tool_ranges[metric_name]
else:
lo, hi = legit_range
metrics[metric_name] = _seeded_value(seed, lo, hi)
return {
"tool": tool_name,
"publisher_id": publisher_id,
"metrics": metrics,
}
def generate_trend_summary() -> str:
"""Placeholder for trend summary (currently muted)."""
return ""
def generate_alerts() -> list:
"""Placeholder for alerts (currently muted)."""
return []