Upload folder using huggingface_hub
Browse files- Dockerfile +81 -0
- README.md +130 -5
- __init__.py +16 -0
- cases/easy.json +59 -0
- cases/hard.json +96 -0
- cases/medium.json +96 -0
- client.py +77 -0
- inference.py +425 -0
- models.py +152 -0
- openenv.yaml +7 -0
- pyproject.toml +45 -0
- results.json +7 -0
- server/Ad_Audit_environment.py +527 -0
- server/__init__.py +14 -0
- server/app.py +84 -0
- server/fraud_engine.py +84 -0
- server/grader.py +134 -0
- server/publisher_engine.py +103 -0
- server/requirements.txt +6 -0
- server/response_generator.py +344 -0
- server/step_reward.py +49 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=Ad_Audit
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
# Health check
|
| 75 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
+
|
| 78 |
+
# Run the FastAPI server
|
| 79 |
+
# The module path is constructed to work with the /app/env structure
|
| 80 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 81 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,135 @@
|
|
| 1 |
---
|
| 2 |
-
title: Ad Audit
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Ad Audit Environment
|
| 3 |
+
emoji: π΅οΈ
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: yellow
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Ad Audit Environment
|
| 15 |
+
|
| 16 |
+
An RL environment for **detecting advertising fraud** in a simulated 14-day ad campaign. Agents monitor publisher traffic metrics, investigate suspicious patterns, and flag fraudulent publishers while avoiding false positives.
|
| 17 |
+
|
| 18 |
+
## The Challenge
|
| 19 |
+
|
| 20 |
+
You manage a digital ad campaign with multiple publishers. Some are legitimate, some are committing fraud. Each day you see traffic metrics and must decide: monitor, investigate, or flag.
|
| 21 |
+
|
| 22 |
+
**Fraud types:**
|
| 23 |
+
- **Bot Traffic** β CTR spikes dramatically, CVR drops near zero (bots click but never convert)
|
| 24 |
+
- **Click Injection** β CVR becomes abnormally high (fake conversions injected)
|
| 25 |
+
- **Domain Spoofing** β Impressions surge while CVR drops (fake ad inventory)
|
| 26 |
+
|
| 27 |
+
**The catch:** False positives are heavily penalized, investigations cost budget, and fraudsters adapt when investigated.
|
| 28 |
+
|
| 29 |
+
## Quick Start
|
| 30 |
+
|
| 31 |
+
```python
|
| 32 |
+
import asyncio
|
| 33 |
+
from Ad_Audit import AdAuditAction, AdAuditEnv
|
| 34 |
+
|
| 35 |
+
async def main():
|
| 36 |
+
env = await AdAuditEnv.from_docker_image("adaudit-env:latest")
|
| 37 |
+
try:
|
| 38 |
+
result = await env.reset(episode_id="medium")
|
| 39 |
+
obs = result.observation
|
| 40 |
+
print(f"Day {obs.day}: {len(obs.daily_metrics)} publishers")
|
| 41 |
+
|
| 42 |
+
# Monitor day 1
|
| 43 |
+
result = await env.step(AdAuditAction(action_type="monitor"))
|
| 44 |
+
|
| 45 |
+
# Investigate a suspicious publisher
|
| 46 |
+
result = await env.step(AdAuditAction(
|
| 47 |
+
action_type="investigate_publisher",
|
| 48 |
+
publisher_id="pub_003",
|
| 49 |
+
tool="click_timestamps"
|
| 50 |
+
))
|
| 51 |
+
|
| 52 |
+
# Flag fraud with evidence
|
| 53 |
+
result = await env.step(AdAuditAction(
|
| 54 |
+
action_type="flag_fraud",
|
| 55 |
+
publisher_id="pub_003",
|
| 56 |
+
fraud_type="bot_traffic",
|
| 57 |
+
evidence=["click_timestamps", "ip_distribution"]
|
| 58 |
+
))
|
| 59 |
+
print(f"Reward: {result.reward}")
|
| 60 |
+
finally:
|
| 61 |
+
await env.close()
|
| 62 |
+
|
| 63 |
+
asyncio.run(main())
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## Actions
|
| 67 |
+
|
| 68 |
+
| Action | Description | Cost |
|
| 69 |
+
|--------|-------------|------|
|
| 70 |
+
| `monitor` | Observe metrics, take no action | Free |
|
| 71 |
+
| `investigate_publisher` | Run a tool on one publisher | 1 investigation budget |
|
| 72 |
+
| `flag_fraud` | Flag publisher as fraudulent (irreversible) | Free but false positives penalized |
|
| 73 |
+
| `submit_report` | End the episode early | Free |
|
| 74 |
+
|
| 75 |
+
**Investigation tools:** click_timestamps, ip_distribution, device_fingerprints, referral_urls, viewability_scores, conversion_quality
|
| 76 |
+
|
| 77 |
+
## Observation
|
| 78 |
+
|
| 79 |
+
Each step returns:
|
| 80 |
+
- **daily_metrics** β Per-publisher: impressions, clicks, conversions, spend, CTR, CVR
|
| 81 |
+
- **investigation_results** β Tool output (if investigated)
|
| 82 |
+
- **publisher_status** β Active or flagged
|
| 83 |
+
- **budget_status** β Campaign spend and remaining investigation budget
|
| 84 |
+
|
| 85 |
+
## Tasks
|
| 86 |
+
|
| 87 |
+
| Task | Publishers | Fraudsters | Investigation Budget | Difficulty |
|
| 88 |
+
|------|-----------|------------|---------------------|------------|
|
| 89 |
+
| `easy` | 2 | 1 (bot_traffic) | 10 | Obvious signals |
|
| 90 |
+
| `medium` | 4 | 2 (bot_traffic + click_injection) | 10 | Mixed fraud types |
|
| 91 |
+
| `hard` | 4 | 2 (domain_spoofing + bot_traffic) | 6 | Subtle signals, tight budget |
|
| 92 |
+
|
| 93 |
+
## Scoring
|
| 94 |
+
|
| 95 |
+
Final score (0-1) is weighted:
|
| 96 |
+
- **Fraud detection accuracy** (50%) β Correct flags with right fraud type
|
| 97 |
+
- **Detection timeliness** (30%) β How early fraud was caught
|
| 98 |
+
- **Investigation efficiency** (20%) β Budget usage and false positive avoidance
|
| 99 |
+
|
| 100 |
+
## Deployment
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
# Build Docker image
|
| 104 |
+
docker build -t adaudit-env .
|
| 105 |
+
|
| 106 |
+
# Run locally
|
| 107 |
+
docker run -p 8000:8000 adaudit-env
|
| 108 |
+
|
| 109 |
+
# Or without Docker
|
| 110 |
+
ENABLE_WEB_INTERFACE=true python -m server.app
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
**Endpoints:**
|
| 114 |
+
- `/web` β Interactive Gradio UI
|
| 115 |
+
- `/docs` β API documentation
|
| 116 |
+
- `/health` β Health check
|
| 117 |
+
- `/ws` β WebSocket for persistent sessions
|
| 118 |
+
|
| 119 |
+
## Project Structure
|
| 120 |
+
|
| 121 |
+
```
|
| 122 |
+
Ad_Audit/
|
| 123 |
+
βββ inference.py # LLM agent + rule-based fallback
|
| 124 |
+
βββ models.py # Action / Observation / State models
|
| 125 |
+
βββ client.py # WebSocket client (AdAuditEnv)
|
| 126 |
+
βββ cases/ # Task definitions (easy/medium/hard)
|
| 127 |
+
βββ server/
|
| 128 |
+
βββ app.py # FastAPI server
|
| 129 |
+
βββ Ad_Audit_environment.py # Core environment logic
|
| 130 |
+
βββ fraud_engine.py # Suspicion tracking & fraud intensity
|
| 131 |
+
βββ publisher_engine.py # Traffic generation
|
| 132 |
+
βββ response_generator.py # Investigation tool responses
|
| 133 |
+
βββ step_reward.py # Per-step reward calculator
|
| 134 |
+
βββ grader.py # Episode-end scoring
|
| 135 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Ad Audit Environment."""
|
| 8 |
+
|
| 9 |
+
from .client import AdAuditEnv
|
| 10 |
+
from .models import AdAuditAction, AdAuditObservation
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"AdAuditAction",
|
| 14 |
+
"AdAuditObservation",
|
| 15 |
+
"AdAuditEnv",
|
| 16 |
+
]
|
cases/easy.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"case_id": "easy",
|
| 3 |
+
"difficulty": "easy",
|
| 4 |
+
"campaign": {
|
| 5 |
+
"name": "Summer Electronics Sale",
|
| 6 |
+
"duration_days": 14,
|
| 7 |
+
"total_budget": 10000,
|
| 8 |
+
"conversion_value": 10.0,
|
| 9 |
+
"investigation_budget": 10,
|
| 10 |
+
"industry_benchmarks": {
|
| 11 |
+
"avg_ctr": 0.020,
|
| 12 |
+
"avg_cvr": 0.030
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
"publishers": {
|
| 16 |
+
"pub_001": {
|
| 17 |
+
"name": "TechReview Daily",
|
| 18 |
+
"is_fraudulent": false,
|
| 19 |
+
"fraud_type": null,
|
| 20 |
+
"base_traffic_rate": 10000,
|
| 21 |
+
"true_ctr": 0.021,
|
| 22 |
+
"true_cvr": 0.031,
|
| 23 |
+
"cpm_rate": 2.0,
|
| 24 |
+
"budget_allocation": 0.5,
|
| 25 |
+
"suspicion_reactivity": 1.0,
|
| 26 |
+
"fraud_schedule": {},
|
| 27 |
+
"fake_ctr": 0.0,
|
| 28 |
+
"fake_cvr": 0.0,
|
| 29 |
+
"day_factors": [1.0, 1.02, 0.98, 1.01, 1.03, 0.97, 0.95, 1.04, 1.01, 0.99, 1.02, 1.03, 0.98, 1.0],
|
| 30 |
+
"noise_factors": [1.01, 0.99, 1.02, 0.98, 1.0, 1.01, 0.99, 1.01, 1.0, 0.99, 1.01, 1.0, 0.99, 1.01]
|
| 31 |
+
},
|
| 32 |
+
"pub_002": {
|
| 33 |
+
"name": "BargainClicks Network",
|
| 34 |
+
"is_fraudulent": true,
|
| 35 |
+
"fraud_type": "bot_traffic",
|
| 36 |
+
"base_traffic_rate": 8000,
|
| 37 |
+
"true_ctr": 0.019,
|
| 38 |
+
"true_cvr": 0.028,
|
| 39 |
+
"cpm_rate": 2.0,
|
| 40 |
+
"budget_allocation": 0.5,
|
| 41 |
+
"suspicion_reactivity": 0.3,
|
| 42 |
+
"fraud_schedule": {
|
| 43 |
+
"start_day": 2,
|
| 44 |
+
"ramp_days": 2,
|
| 45 |
+
"peak_intensity": 3.5
|
| 46 |
+
},
|
| 47 |
+
"fake_ctr": 0.08,
|
| 48 |
+
"fake_cvr": 0.0005,
|
| 49 |
+
"day_factors": [1.0, 1.03, 1.08, 1.15, 1.20, 1.22, 1.20, 1.23, 1.25, 1.23, 1.25, 1.23, 1.22, 1.20],
|
| 50 |
+
"noise_factors": [1.0, 1.01, 0.99, 1.02, 1.0, 0.98, 1.01, 1.0, 0.99, 1.02, 1.0, 0.98, 1.01, 1.0]
|
| 51 |
+
}
|
| 52 |
+
},
|
| 53 |
+
"ground_truth": {
|
| 54 |
+
"fraudulent_publishers": ["pub_002"],
|
| 55 |
+
"fraud_types": {
|
| 56 |
+
"pub_002": "bot_traffic"
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
}
|
cases/hard.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"case_id": "hard",
|
| 3 |
+
"difficulty": "hard",
|
| 4 |
+
"campaign": {
|
| 5 |
+
"name": "Global Auto Insurance Q4 Push",
|
| 6 |
+
"duration_days": 14,
|
| 7 |
+
"total_budget": 20000,
|
| 8 |
+
"conversion_value": 10.0,
|
| 9 |
+
"investigation_budget": 6,
|
| 10 |
+
"industry_benchmarks": {
|
| 11 |
+
"avg_ctr": 0.021,
|
| 12 |
+
"avg_cvr": 0.028
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
"publishers": {
|
| 16 |
+
"pub_001": {
|
| 17 |
+
"name": "AutoInsure Direct",
|
| 18 |
+
"is_fraudulent": false,
|
| 19 |
+
"fraud_type": null,
|
| 20 |
+
"base_traffic_rate": 11000,
|
| 21 |
+
"true_ctr": 0.023,
|
| 22 |
+
"true_cvr": 0.033,
|
| 23 |
+
"cpm_rate": 2.0,
|
| 24 |
+
"budget_allocation": 0.25,
|
| 25 |
+
"suspicion_reactivity": 1.0,
|
| 26 |
+
"fraud_schedule": {},
|
| 27 |
+
"fake_ctr": 0.0,
|
| 28 |
+
"fake_cvr": 0.0,
|
| 29 |
+
"day_factors": [1.0, 1.01, 0.99, 1.02, 1.0, 0.97, 0.94, 1.02, 1.01, 0.99, 1.01, 1.02, 0.98, 1.0],
|
| 30 |
+
"noise_factors": [1.0, 0.99, 1.01, 1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01]
|
| 31 |
+
},
|
| 32 |
+
"pub_002": {
|
| 33 |
+
"name": "SafeDrive Media",
|
| 34 |
+
"is_fraudulent": false,
|
| 35 |
+
"fraud_type": null,
|
| 36 |
+
"base_traffic_rate": 8000,
|
| 37 |
+
"true_ctr": 0.020,
|
| 38 |
+
"true_cvr": 0.029,
|
| 39 |
+
"cpm_rate": 2.0,
|
| 40 |
+
"budget_allocation": 0.25,
|
| 41 |
+
"suspicion_reactivity": 1.0,
|
| 42 |
+
"fraud_schedule": {},
|
| 43 |
+
"fake_ctr": 0.0,
|
| 44 |
+
"fake_cvr": 0.0,
|
| 45 |
+
"day_factors": [1.0, 0.99, 1.01, 1.0, 1.02, 0.97, 0.94, 1.01, 1.0, 0.99, 1.01, 1.0, 0.98, 1.01],
|
| 46 |
+
"noise_factors": [1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01]
|
| 47 |
+
},
|
| 48 |
+
"pub_003": {
|
| 49 |
+
"name": "ShadowNet Ads",
|
| 50 |
+
"is_fraudulent": true,
|
| 51 |
+
"fraud_type": "domain_spoofing",
|
| 52 |
+
"base_traffic_rate": 12000,
|
| 53 |
+
"true_ctr": 0.019,
|
| 54 |
+
"true_cvr": 0.025,
|
| 55 |
+
"cpm_rate": 2.5,
|
| 56 |
+
"budget_allocation": 0.25,
|
| 57 |
+
"suspicion_reactivity": 1.8,
|
| 58 |
+
"fraud_schedule": {
|
| 59 |
+
"start_day": 4,
|
| 60 |
+
"ramp_days": 3,
|
| 61 |
+
"peak_intensity": 1.8
|
| 62 |
+
},
|
| 63 |
+
"fake_ctr": 0.025,
|
| 64 |
+
"fake_cvr": 0.003,
|
| 65 |
+
"day_factors": [1.0, 1.02, 1.04, 1.08, 1.10, 1.08, 1.10, 1.12, 1.10, 1.08, 1.10, 1.12, 1.10, 1.08],
|
| 66 |
+
"noise_factors": [1.0, 1.01, 0.99, 1.02, 1.0, 0.98, 1.01, 1.0, 0.99, 1.01, 1.0, 0.99, 1.01, 1.0]
|
| 67 |
+
},
|
| 68 |
+
"pub_004": {
|
| 69 |
+
"name": "ConvertMax Pro",
|
| 70 |
+
"is_fraudulent": true,
|
| 71 |
+
"fraud_type": "bot_traffic",
|
| 72 |
+
"base_traffic_rate": 9000,
|
| 73 |
+
"true_ctr": 0.021,
|
| 74 |
+
"true_cvr": 0.029,
|
| 75 |
+
"cpm_rate": 2.0,
|
| 76 |
+
"budget_allocation": 0.25,
|
| 77 |
+
"suspicion_reactivity": 1.5,
|
| 78 |
+
"fraud_schedule": {
|
| 79 |
+
"start_day": 5,
|
| 80 |
+
"ramp_days": 3,
|
| 81 |
+
"peak_intensity": 1.5
|
| 82 |
+
},
|
| 83 |
+
"fake_ctr": 0.04,
|
| 84 |
+
"fake_cvr": 0.002,
|
| 85 |
+
"day_factors": [1.0, 1.0, 1.0, 1.01, 1.03, 1.05, 1.06, 1.07, 1.06, 1.07, 1.06, 1.07, 1.06, 1.05],
|
| 86 |
+
"noise_factors": [1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.02, 1.0, 0.98, 1.01, 1.0, 0.99, 1.01, 1.0]
|
| 87 |
+
}
|
| 88 |
+
},
|
| 89 |
+
"ground_truth": {
|
| 90 |
+
"fraudulent_publishers": ["pub_003", "pub_004"],
|
| 91 |
+
"fraud_types": {
|
| 92 |
+
"pub_003": "domain_spoofing",
|
| 93 |
+
"pub_004": "bot_traffic"
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
}
|
cases/medium.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"case_id": "medium",
|
| 3 |
+
"difficulty": "medium",
|
| 4 |
+
"campaign": {
|
| 5 |
+
"name": "Holiday Fashion Campaign",
|
| 6 |
+
"duration_days": 14,
|
| 7 |
+
"total_budget": 15000,
|
| 8 |
+
"conversion_value": 10.0,
|
| 9 |
+
"investigation_budget": 10,
|
| 10 |
+
"industry_benchmarks": {
|
| 11 |
+
"avg_ctr": 0.021,
|
| 12 |
+
"avg_cvr": 0.028
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
"publishers": {
|
| 16 |
+
"pub_001": {
|
| 17 |
+
"name": "StyleMaven Magazine",
|
| 18 |
+
"is_fraudulent": false,
|
| 19 |
+
"fraud_type": null,
|
| 20 |
+
"base_traffic_rate": 12000,
|
| 21 |
+
"true_ctr": 0.022,
|
| 22 |
+
"true_cvr": 0.032,
|
| 23 |
+
"cpm_rate": 2.5,
|
| 24 |
+
"budget_allocation": 0.30,
|
| 25 |
+
"suspicion_reactivity": 1.0,
|
| 26 |
+
"fraud_schedule": {},
|
| 27 |
+
"fake_ctr": 0.0,
|
| 28 |
+
"fake_cvr": 0.0,
|
| 29 |
+
"day_factors": [1.0, 1.01, 0.99, 1.02, 1.0, 0.97, 0.95, 1.03, 1.01, 0.99, 1.02, 1.0, 0.98, 1.01],
|
| 30 |
+
"noise_factors": [1.0, 0.99, 1.01, 1.0, 0.98, 1.02, 1.0, 0.99, 1.01, 1.0, 0.98, 1.01, 1.0, 0.99]
|
| 31 |
+
},
|
| 32 |
+
"pub_002": {
|
| 33 |
+
"name": "FashionForward Hub",
|
| 34 |
+
"is_fraudulent": false,
|
| 35 |
+
"fraud_type": null,
|
| 36 |
+
"base_traffic_rate": 7000,
|
| 37 |
+
"true_ctr": 0.019,
|
| 38 |
+
"true_cvr": 0.027,
|
| 39 |
+
"cpm_rate": 2.0,
|
| 40 |
+
"budget_allocation": 0.20,
|
| 41 |
+
"suspicion_reactivity": 1.0,
|
| 42 |
+
"fraud_schedule": {},
|
| 43 |
+
"fake_ctr": 0.0,
|
| 44 |
+
"fake_cvr": 0.0,
|
| 45 |
+
"day_factors": [1.0, 0.99, 1.01, 1.0, 1.02, 0.96, 0.94, 1.01, 1.0, 0.99, 1.01, 1.02, 0.98, 1.0],
|
| 46 |
+
"noise_factors": [1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01, 0.99, 1.0, 1.01]
|
| 47 |
+
},
|
| 48 |
+
"pub_003": {
|
| 49 |
+
"name": "LuxeAds Premium",
|
| 50 |
+
"is_fraudulent": true,
|
| 51 |
+
"fraud_type": "bot_traffic",
|
| 52 |
+
"base_traffic_rate": 11000,
|
| 53 |
+
"true_ctr": 0.020,
|
| 54 |
+
"true_cvr": 0.026,
|
| 55 |
+
"cpm_rate": 3.0,
|
| 56 |
+
"budget_allocation": 0.25,
|
| 57 |
+
"suspicion_reactivity": 0.5,
|
| 58 |
+
"fraud_schedule": {
|
| 59 |
+
"start_day": 3,
|
| 60 |
+
"ramp_days": 2,
|
| 61 |
+
"peak_intensity": 2.5
|
| 62 |
+
},
|
| 63 |
+
"fake_ctr": 0.06,
|
| 64 |
+
"fake_cvr": 0.001,
|
| 65 |
+
"day_factors": [1.0, 1.01, 1.0, 1.05, 1.10, 1.12, 1.10, 1.13, 1.12, 1.10, 1.12, 1.13, 1.11, 1.10],
|
| 66 |
+
"noise_factors": [1.01, 1.0, 0.99, 1.01, 1.0, 0.99, 1.01, 1.0, 0.98, 1.02, 1.0, 0.99, 1.01, 1.0]
|
| 67 |
+
},
|
| 68 |
+
"pub_004": {
|
| 69 |
+
"name": "ClickBoost Pro",
|
| 70 |
+
"is_fraudulent": true,
|
| 71 |
+
"fraud_type": "click_injection",
|
| 72 |
+
"base_traffic_rate": 8000,
|
| 73 |
+
"true_ctr": 0.018,
|
| 74 |
+
"true_cvr": 0.024,
|
| 75 |
+
"cpm_rate": 2.0,
|
| 76 |
+
"budget_allocation": 0.25,
|
| 77 |
+
"suspicion_reactivity": 0.5,
|
| 78 |
+
"fraud_schedule": {
|
| 79 |
+
"start_day": 3,
|
| 80 |
+
"ramp_days": 3,
|
| 81 |
+
"peak_intensity": 2.0
|
| 82 |
+
},
|
| 83 |
+
"fake_ctr": 0.04,
|
| 84 |
+
"fake_cvr": 0.07,
|
| 85 |
+
"day_factors": [1.0, 1.0, 1.0, 1.03, 1.06, 1.08, 1.10, 1.08, 1.10, 1.08, 1.10, 1.08, 1.10, 1.08],
|
| 86 |
+
"noise_factors": [1.0, 1.01, 0.99, 1.0, 0.98, 1.02, 1.0, 0.99, 1.01, 1.0, 0.99, 1.01, 1.0, 0.99]
|
| 87 |
+
}
|
| 88 |
+
},
|
| 89 |
+
"ground_truth": {
|
| 90 |
+
"fraudulent_publishers": ["pub_003", "pub_004"],
|
| 91 |
+
"fraud_types": {
|
| 92 |
+
"pub_003": "bot_traffic",
|
| 93 |
+
"pub_004": "click_injection"
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
}
|
client.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Ad Audit Environment Client."""
|
| 8 |
+
|
| 9 |
+
from typing import Any, Dict
|
| 10 |
+
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
from .models import AdAuditAction, AdAuditObservation, AdAuditState
|
| 16 |
+
except ImportError:
|
| 17 |
+
from models import AdAuditAction, AdAuditObservation, AdAuditState # type: ignore[no-redef]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class AdAuditEnv(
|
| 21 |
+
EnvClient[AdAuditAction, AdAuditObservation, AdAuditState]
|
| 22 |
+
):
|
| 23 |
+
"""
|
| 24 |
+
Client for the Ad Audit Environment.
|
| 25 |
+
|
| 26 |
+
This client maintains a persistent WebSocket connection to the environment server,
|
| 27 |
+
enabling efficient multi-step interactions with lower latency.
|
| 28 |
+
Each client instance has its own dedicated environment session on the server.
|
| 29 |
+
|
| 30 |
+
Example with Docker:
|
| 31 |
+
>>> client = await AdAuditEnv.from_docker_image("adaudit-env:latest")
|
| 32 |
+
>>> try:
|
| 33 |
+
... result = await client.reset(episode_id="medium")
|
| 34 |
+
... result = await client.step(AdAuditAction(action_type="monitor"))
|
| 35 |
+
... finally:
|
| 36 |
+
... await client.close()
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def _step_payload(self, action: AdAuditAction) -> Dict[str, Any]:
|
| 40 |
+
"""
|
| 41 |
+
Convert AdAuditAction to JSON payload for step message.
|
| 42 |
+
|
| 43 |
+
The server deserializes this via AdAuditAction.model_validate(),
|
| 44 |
+
so we just send the pydantic model_dump with None fields excluded.
|
| 45 |
+
"""
|
| 46 |
+
return action.model_dump(exclude_none=True)
|
| 47 |
+
|
| 48 |
+
def _parse_result(self, payload: Dict[str, Any]) -> StepResult[AdAuditObservation]:
|
| 49 |
+
"""
|
| 50 |
+
Parse server response into StepResult[AdAuditObservation].
|
| 51 |
+
|
| 52 |
+
The server sends:
|
| 53 |
+
{
|
| 54 |
+
"observation": { ... AdAuditObservation fields (minus reward/done/metadata) ... },
|
| 55 |
+
"reward": float | None,
|
| 56 |
+
"done": bool,
|
| 57 |
+
}
|
| 58 |
+
"""
|
| 59 |
+
obs_data = payload.get("observation", {})
|
| 60 |
+
|
| 61 |
+
# Re-inject reward/done so the Observation model has them
|
| 62 |
+
obs_data["reward"] = payload.get("reward")
|
| 63 |
+
obs_data["done"] = payload.get("done", False)
|
| 64 |
+
|
| 65 |
+
observation = AdAuditObservation.model_validate(obs_data)
|
| 66 |
+
|
| 67 |
+
return StepResult(
|
| 68 |
+
observation=observation,
|
| 69 |
+
reward=payload.get("reward"),
|
| 70 |
+
done=payload.get("done", False),
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
def _parse_state(self, payload: Dict[str, Any]) -> AdAuditState:
|
| 74 |
+
"""
|
| 75 |
+
Parse server response into AdAuditState.
|
| 76 |
+
"""
|
| 77 |
+
return AdAuditState.model_validate(payload)
|
inference.py
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Inference Script for AdAudit
|
| 3 |
+
===================================
|
| 4 |
+
MANDATORY
|
| 5 |
+
- Before submitting, ensure the following variables are defined in your environment configuration:
|
| 6 |
+
API_BASE_URL The API endpoint for the LLM.
|
| 7 |
+
MODEL_NAME The model identifier to use for inference.
|
| 8 |
+
HF_TOKEN Your Hugging Face / API key.
|
| 9 |
+
LOCAL_IMAGE_NAME The name of the local image to use for the environment if you are using from_docker_image()
|
| 10 |
+
method
|
| 11 |
+
|
| 12 |
+
- Defaults are set only for API_BASE_URL and MODEL_NAME
|
| 13 |
+
(and should reflect your active inference setup):
|
| 14 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "<your-active-endpoint>")
|
| 15 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "<your-active-model>")
|
| 16 |
+
|
| 17 |
+
- The inference script must be named `inference.py` and placed in the root directory of the project
|
| 18 |
+
- Participants must use OpenAI Client for all LLM calls using above variables
|
| 19 |
+
|
| 20 |
+
STDOUT FORMAT
|
| 21 |
+
- The script must emit exactly three line types to stdout, in this order:
|
| 22 |
+
|
| 23 |
+
[START] task=<task_name> env=<benchmark> model=<model_name>
|
| 24 |
+
[STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
|
| 25 |
+
[END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
|
| 26 |
+
|
| 27 |
+
Rules:
|
| 28 |
+
- One [START] line at episode begin.
|
| 29 |
+
- One [STEP] line per step, immediately after env.step() returns.
|
| 30 |
+
- One [END] line after env.close(), always emitted (even on exception).
|
| 31 |
+
- reward and rewards are formatted to 2 decimal places.
|
| 32 |
+
- done and success are lowercase booleans: true or false.
|
| 33 |
+
- error is the raw last_action_error string, or null if none.
|
| 34 |
+
- All fields on a single line with no newlines within a line.
|
| 35 |
+
- Each tasks should return score in [0, 1]
|
| 36 |
+
|
| 37 |
+
Example:
|
| 38 |
+
[START] task=click-test env=miniwob model=Qwen3-VL-30B
|
| 39 |
+
[STEP] step=1 action=click('123') reward=0.00 done=false error=null
|
| 40 |
+
[STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
|
| 41 |
+
[STEP] step=3 action=click('789') reward=1.00 done=true error=null
|
| 42 |
+
[END] success=true steps=3 score=1.00 rewards=0.00,0.00,1.00
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
import json
|
| 46 |
+
import os
|
| 47 |
+
import re
|
| 48 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 49 |
+
|
| 50 |
+
from openai import OpenAI
|
| 51 |
+
|
| 52 |
+
from server.Ad_Audit_environment import AdAuditEnv, EPISODE_DAYS
|
| 53 |
+
from models import AdAuditAction
|
| 54 |
+
|
| 55 |
+
# ---------------------------------------------------------------------------
|
| 56 |
+
# Configuration
|
| 57 |
+
# ---------------------------------------------------------------------------
|
| 58 |
+
|
| 59 |
+
IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
|
| 60 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
|
| 61 |
+
|
| 62 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 63 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
|
| 64 |
+
|
| 65 |
+
_VALID_TASKS = {"easy", "medium", "hard"}
|
| 66 |
+
_task_env = os.getenv("ADAUDIT_TASK", "").strip().lower()
|
| 67 |
+
TASK_NAME = _task_env if _task_env in _VALID_TASKS else "hard"
|
| 68 |
+
BENCHMARK = os.getenv("ADAUDIT_BENCHMARK", "adaudit")
|
| 69 |
+
TEMPERATURE = 0.0
|
| 70 |
+
MAX_TOKENS = 2048
|
| 71 |
+
HISTORY_WINDOW = 5
|
| 72 |
+
BASELINE_DAYS = 3
|
| 73 |
+
SUCCESS_SCORE_THRESHOLD = 0.5
|
| 74 |
+
|
| 75 |
+
# Rule-based investigation tools per fraud type
|
| 76 |
+
TOOLS_FOR = {
|
| 77 |
+
"bot_traffic": ["click_timestamps", "ip_distribution"],
|
| 78 |
+
"click_injection": ["conversion_quality", "click_timestamps"],
|
| 79 |
+
"domain_spoofing": ["referral_urls", "viewability_scores"],
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
SYSTEM_PROMPT = """\
|
| 83 |
+
You are an ad fraud analyst monitoring a 14-day campaign with multiple publishers.
|
| 84 |
+
Each day you see metrics (impressions, clicks, conversions, CTR, CVR) and take ONE action.
|
| 85 |
+
|
| 86 |
+
## ACTIONS
|
| 87 |
+
|
| 88 |
+
Use publisher IDs (pub_001, pub_002, etc.), never publisher names.
|
| 89 |
+
|
| 90 |
+
1. Monitor β observe metrics, take no action:
|
| 91 |
+
{"action_type": "monitor"}
|
| 92 |
+
2. Investigate β run one tool on one publisher (costs 1 budget):
|
| 93 |
+
{"action_type": "investigate_publisher", "publisher_id": "pub_001", "tool": "click_timestamps"}
|
| 94 |
+
3. Flag fraud β flag a publisher as fraudulent (irreversible):
|
| 95 |
+
{"action_type": "flag_fraud", "publisher_id": "pub_001", "fraud_type": "bot_traffic", "evidence": ["click_timestamps", "ip_distribution"]}
|
| 96 |
+
4. Submit report β end the episode:
|
| 97 |
+
{"action_type": "submit_report", "summary": "..."}
|
| 98 |
+
|
| 99 |
+
Valid tools: click_timestamps, ip_distribution, device_fingerprints, \
|
| 100 |
+
referral_urls, viewability_scores, conversion_quality
|
| 101 |
+
Valid fraud types: bot_traffic, domain_spoofing, click_injection
|
| 102 |
+
|
| 103 |
+
## STRATEGY
|
| 104 |
+
1. Monitor days 1-3 to establish baselines. Then compare each publisher's CTR/CVR against its own baseline. Only publishers with BIG shifts (CTR doubled, CVR crashed, impressions tripled) are suspicious. Stable metrics = clean.
|
| 105 |
+
2. Investigate suspicious publishers with 2 tools before flagging. bot_traffic = CTR spikes + CVR near zero. click_injection = CVR abnormally high (5%+). domain_spoofing = impressions surge + CVR drops.
|
| 106 |
+
3. False positives are heavily penalized. When in doubt, monitor.
|
| 107 |
+
4. Do not submit report until you are confident you have found and flagged ALL fraudsters.
|
| 108 |
+
|
| 109 |
+
Respond with ONLY a JSON action in ```json``` markers.
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ---------------------------------------------------------------------------
|
| 114 |
+
# Logging helpers
|
| 115 |
+
# ---------------------------------------------------------------------------
|
| 116 |
+
|
| 117 |
+
def log_start(task: str, env: str, model: str) -> None:
|
| 118 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 122 |
+
error_val = error if error else "null"
|
| 123 |
+
done_val = str(done).lower()
|
| 124 |
+
print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 128 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 129 |
+
print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# ---------------------------------------------------------------------------
|
| 133 |
+
# Observation formatting (for LLM)
|
| 134 |
+
# ---------------------------------------------------------------------------
|
| 135 |
+
|
| 136 |
+
def format_observation(obs_dict: Dict[str, Any], action_log: List[str] = None) -> str:
|
| 137 |
+
parts: List[str] = []
|
| 138 |
+
parts.append(f"=== DAY {obs_dict['day']} of {obs_dict.get('campaign_day_total', 14)} ===\n")
|
| 139 |
+
|
| 140 |
+
if action_log:
|
| 141 |
+
parts.append("Past actions: " + " | ".join(action_log))
|
| 142 |
+
parts.append("")
|
| 143 |
+
|
| 144 |
+
if obs_dict.get("daily_metrics"):
|
| 145 |
+
parts.append("Metrics:")
|
| 146 |
+
parts.append(f"{'ID':<10} {'Publisher':<22} {'Impressions':>12} {'Clicks':>8} {'Conversions':>12} {'Spend ($)':>10} {'CTR':>7} {'CVR':>7}")
|
| 147 |
+
for m in obs_dict["daily_metrics"]:
|
| 148 |
+
parts.append(
|
| 149 |
+
f"{m['publisher_id']:<10} {m['name']:<22} {m['impressions']:>12,} {m['clicks']:>8,} "
|
| 150 |
+
f"{m['conversions']:>12,} {m['spend']:>10,.2f} {m['ctr']:>6.2%} {m['cvr']:>6.2%}"
|
| 151 |
+
)
|
| 152 |
+
parts.append("")
|
| 153 |
+
|
| 154 |
+
if obs_dict.get("investigation_results"):
|
| 155 |
+
inv = obs_dict["investigation_results"]
|
| 156 |
+
if isinstance(inv, dict):
|
| 157 |
+
if "error" in inv:
|
| 158 |
+
parts.append(f"Investigation ERROR: {inv['error']}")
|
| 159 |
+
else:
|
| 160 |
+
parts.append(f"Investigation ({inv.get('tool', '?')}) for {inv.get('publisher_id', '?')}:")
|
| 161 |
+
for k, v in inv.items():
|
| 162 |
+
if k not in ("tool", "publisher_id"):
|
| 163 |
+
parts.append(f" {k}: {v}")
|
| 164 |
+
parts.append("")
|
| 165 |
+
|
| 166 |
+
pub_status = obs_dict.get("publisher_status", {})
|
| 167 |
+
if pub_status:
|
| 168 |
+
flagged_pubs = [pid for pid, s in pub_status.items() if s == "flagged"]
|
| 169 |
+
active = [pid for pid, s in pub_status.items() if s == "active"]
|
| 170 |
+
if flagged_pubs:
|
| 171 |
+
parts.append(f"FLAGGED: {', '.join(flagged_pubs)}")
|
| 172 |
+
parts.append(f"Active publishers: {', '.join(active)}")
|
| 173 |
+
|
| 174 |
+
b = obs_dict.get("budget_status", {})
|
| 175 |
+
if b:
|
| 176 |
+
parts.append(
|
| 177 |
+
f"Budget: ${b.get('remaining',0):,.0f} remaining | "
|
| 178 |
+
f"Investigations left: {b.get('investigation_budget_remaining', 0)}"
|
| 179 |
+
)
|
| 180 |
+
return "\n".join(parts)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
# ---------------------------------------------------------------------------
|
| 184 |
+
# Action parsing (for LLM)
|
| 185 |
+
# ---------------------------------------------------------------------------
|
| 186 |
+
|
| 187 |
+
def parse_action(text: str) -> Dict[str, Any]:
|
| 188 |
+
match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
|
| 189 |
+
if match:
|
| 190 |
+
try:
|
| 191 |
+
return json.loads(match.group(1))
|
| 192 |
+
except json.JSONDecodeError:
|
| 193 |
+
pass
|
| 194 |
+
for line in reversed(text.strip().split("\n")):
|
| 195 |
+
line = line.strip()
|
| 196 |
+
if line.startswith("{") and line.endswith("}"):
|
| 197 |
+
try:
|
| 198 |
+
return json.loads(line)
|
| 199 |
+
except json.JSONDecodeError:
|
| 200 |
+
continue
|
| 201 |
+
return {"action_type": "monitor"}
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# ---------------------------------------------------------------------------
|
| 205 |
+
# LLM action
|
| 206 |
+
# ---------------------------------------------------------------------------
|
| 207 |
+
|
| 208 |
+
def get_llm_action(
|
| 209 |
+
client: OpenAI,
|
| 210 |
+
obs_text: str,
|
| 211 |
+
history: List[Dict[str, str]],
|
| 212 |
+
) -> Optional[Dict[str, Any]]:
|
| 213 |
+
"""Try to get an action from the LLM. Returns None on failure."""
|
| 214 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 215 |
+
messages.extend(history)
|
| 216 |
+
messages.append({"role": "user", "content": obs_text})
|
| 217 |
+
|
| 218 |
+
try:
|
| 219 |
+
response = client.chat.completions.create(
|
| 220 |
+
model=MODEL_NAME,
|
| 221 |
+
messages=messages,
|
| 222 |
+
temperature=TEMPERATURE,
|
| 223 |
+
max_tokens=MAX_TOKENS,
|
| 224 |
+
)
|
| 225 |
+
text = response.choices[0].message.content or ""
|
| 226 |
+
return parse_action(text)
|
| 227 |
+
except Exception:
|
| 228 |
+
return None
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# ---------------------------------------------------------------------------
|
| 232 |
+
# Rule-based fallback
|
| 233 |
+
# ---------------------------------------------------------------------------
|
| 234 |
+
|
| 235 |
+
def score_publishers(metrics_history: Dict[str, List[Dict]]) -> List[Tuple[str, float, str]]:
|
| 236 |
+
"""Score each publisher's suspicion based on accumulated metrics."""
|
| 237 |
+
results = []
|
| 238 |
+
for pid, hist in metrics_history.items():
|
| 239 |
+
if len(hist) < 2:
|
| 240 |
+
continue
|
| 241 |
+
recent = hist[-3:]
|
| 242 |
+
avg_ctr = sum(h["ctr"] for h in recent) / len(recent)
|
| 243 |
+
avg_cvr = sum(h["cvr"] for h in recent) / len(recent)
|
| 244 |
+
baseline = hist[:2]
|
| 245 |
+
base_ctr = sum(h["ctr"] for h in baseline) / len(baseline)
|
| 246 |
+
base_cvr = sum(h["cvr"] for h in baseline) / len(baseline)
|
| 247 |
+
|
| 248 |
+
score = 0.0
|
| 249 |
+
fraud_type = "bot_traffic"
|
| 250 |
+
|
| 251 |
+
if base_ctr > 0:
|
| 252 |
+
ctr_rise = avg_ctr / base_ctr
|
| 253 |
+
if ctr_rise > 1.5:
|
| 254 |
+
score += 0.4
|
| 255 |
+
if ctr_rise > 2.0:
|
| 256 |
+
score += 0.3
|
| 257 |
+
if base_cvr > 0:
|
| 258 |
+
cvr_drop = avg_cvr / base_cvr
|
| 259 |
+
if cvr_drop < 0.5:
|
| 260 |
+
score += 0.3
|
| 261 |
+
if cvr_drop < 0.2:
|
| 262 |
+
score += 0.3
|
| 263 |
+
if avg_cvr > 0.04:
|
| 264 |
+
score += 0.5
|
| 265 |
+
fraud_type = "click_injection"
|
| 266 |
+
if base_ctr > 0 and base_cvr > 0:
|
| 267 |
+
ctr_rise = avg_ctr / base_ctr
|
| 268 |
+
cvr_drop = avg_cvr / base_cvr
|
| 269 |
+
if 1.2 < ctr_rise < 2.0 and 0.2 < cvr_drop < 0.6:
|
| 270 |
+
if fraud_type == "bot_traffic" and score < 0.5:
|
| 271 |
+
fraud_type = "domain_spoofing"
|
| 272 |
+
|
| 273 |
+
if score > 0.2:
|
| 274 |
+
results.append((pid, score, fraud_type))
|
| 275 |
+
|
| 276 |
+
results.sort(key=lambda x: -x[1])
|
| 277 |
+
return results
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def get_rule_action(
|
| 281 |
+
obs_dict: Dict[str, Any],
|
| 282 |
+
metrics_history: Dict[str, List[Dict]],
|
| 283 |
+
investigated: Dict[str, List[str]],
|
| 284 |
+
flagged: set,
|
| 285 |
+
) -> Dict[str, Any]:
|
| 286 |
+
"""Deterministic rule-based action selection."""
|
| 287 |
+
day = obs_dict["day"]
|
| 288 |
+
budget_left = obs_dict.get("budget_status", {}).get("investigation_budget_remaining", 0)
|
| 289 |
+
|
| 290 |
+
if day <= BASELINE_DAYS:
|
| 291 |
+
return {"action_type": "monitor"}
|
| 292 |
+
|
| 293 |
+
suspects = score_publishers(metrics_history)
|
| 294 |
+
suspects = [(pid, sc, ft) for pid, sc, ft in suspects if pid not in flagged]
|
| 295 |
+
|
| 296 |
+
for pid, _, ft in suspects:
|
| 297 |
+
tools_done = investigated.get(pid, [])
|
| 298 |
+
if len(tools_done) >= 2:
|
| 299 |
+
flagged.add(pid)
|
| 300 |
+
return {
|
| 301 |
+
"action_type": "flag_fraud",
|
| 302 |
+
"publisher_id": pid,
|
| 303 |
+
"fraud_type": ft,
|
| 304 |
+
"evidence": tools_done,
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
if budget_left > 0 and suspects:
|
| 308 |
+
for pid, _, ft in suspects:
|
| 309 |
+
if pid in flagged:
|
| 310 |
+
continue
|
| 311 |
+
tools_done = investigated.get(pid, [])
|
| 312 |
+
tools_to_try = TOOLS_FOR.get(ft, TOOLS_FOR["bot_traffic"])
|
| 313 |
+
for tool in tools_to_try:
|
| 314 |
+
if tool not in tools_done:
|
| 315 |
+
investigated.setdefault(pid, []).append(tool)
|
| 316 |
+
return {
|
| 317 |
+
"action_type": "investigate_publisher",
|
| 318 |
+
"publisher_id": pid,
|
| 319 |
+
"tool": tool,
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
return {"action_type": "monitor"}
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
# ---------------------------------------------------------------------------
|
| 326 |
+
# Main
|
| 327 |
+
# ---------------------------------------------------------------------------
|
| 328 |
+
|
| 329 |
+
def main() -> None:
|
| 330 |
+
# Try to init LLM client; fall back to rule-based if it fails
|
| 331 |
+
llm_client: Optional[OpenAI] = None
|
| 332 |
+
try:
|
| 333 |
+
llm_client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 334 |
+
llm_client.models.list()
|
| 335 |
+
except Exception:
|
| 336 |
+
llm_client = None
|
| 337 |
+
|
| 338 |
+
use_rules = llm_client is None
|
| 339 |
+
|
| 340 |
+
env = AdAuditEnv()
|
| 341 |
+
|
| 342 |
+
rewards: List[float] = []
|
| 343 |
+
steps_taken = 0
|
| 344 |
+
score = 0.0
|
| 345 |
+
success = False
|
| 346 |
+
|
| 347 |
+
# LLM state
|
| 348 |
+
history: List[Dict[str, str]] = []
|
| 349 |
+
action_log: List[str] = []
|
| 350 |
+
|
| 351 |
+
# Rule-based state
|
| 352 |
+
metrics_history: Dict[str, List[Dict]] = {}
|
| 353 |
+
investigated: Dict[str, List[str]] = {}
|
| 354 |
+
flagged: set = set()
|
| 355 |
+
|
| 356 |
+
log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME if not use_rules else "rule-based")
|
| 357 |
+
|
| 358 |
+
try:
|
| 359 |
+
obs = env.reset(episode_id=TASK_NAME)
|
| 360 |
+
obs_dict = obs.model_dump()
|
| 361 |
+
|
| 362 |
+
while not obs_dict.get("done", False) and steps_taken < EPISODE_DAYS:
|
| 363 |
+
# Track metrics for rule-based fallback
|
| 364 |
+
for m in obs_dict.get("daily_metrics", []):
|
| 365 |
+
metrics_history.setdefault(m["publisher_id"], []).append(m)
|
| 366 |
+
|
| 367 |
+
action = None
|
| 368 |
+
|
| 369 |
+
if not use_rules:
|
| 370 |
+
obs_text = format_observation(obs_dict, action_log)
|
| 371 |
+
action = get_llm_action(llm_client, obs_text, history[-HISTORY_WINDOW * 2:])
|
| 372 |
+
if action is None:
|
| 373 |
+
use_rules = True
|
| 374 |
+
|
| 375 |
+
if action is None:
|
| 376 |
+
action = get_rule_action(obs_dict, metrics_history, investigated, flagged)
|
| 377 |
+
|
| 378 |
+
# Validate action
|
| 379 |
+
try:
|
| 380 |
+
action_obj = AdAuditAction(**action)
|
| 381 |
+
except Exception:
|
| 382 |
+
action_obj = AdAuditAction(action_type="invalid")
|
| 383 |
+
|
| 384 |
+
# Build action log entry
|
| 385 |
+
log_entry = f"D{obs_dict['day']}:{action_obj.action_type}"
|
| 386 |
+
if action_obj.publisher_id:
|
| 387 |
+
log_entry += f"({action_obj.publisher_id}"
|
| 388 |
+
if action_obj.tool:
|
| 389 |
+
log_entry += f",{action_obj.tool}"
|
| 390 |
+
if action_obj.fraud_type:
|
| 391 |
+
log_entry += f",{action_obj.fraud_type}"
|
| 392 |
+
log_entry += ")"
|
| 393 |
+
action_log.append(log_entry)
|
| 394 |
+
|
| 395 |
+
# Update LLM history
|
| 396 |
+
if not use_rules:
|
| 397 |
+
history.append({"role": "user", "content": obs_text})
|
| 398 |
+
history.append({"role": "assistant", "content": json.dumps(action)})
|
| 399 |
+
|
| 400 |
+
# Step environment
|
| 401 |
+
obs = env.step(action_obj)
|
| 402 |
+
obs_dict = obs.model_dump()
|
| 403 |
+
steps_taken += 1
|
| 404 |
+
|
| 405 |
+
reward = obs_dict.get("reward", 0.0)
|
| 406 |
+
done = obs_dict.get("done", False)
|
| 407 |
+
error = None
|
| 408 |
+
rewards.append(reward)
|
| 409 |
+
|
| 410 |
+
action_str = json.dumps(action, separators=(",", ":"))
|
| 411 |
+
log_step(step=steps_taken, action=action_str, reward=reward, done=done, error=error)
|
| 412 |
+
|
| 413 |
+
# Final grading
|
| 414 |
+
state = env.state
|
| 415 |
+
grader = state.grader_inputs
|
| 416 |
+
score = grader.get("final_score", 0.0)
|
| 417 |
+
score = min(max(score, 0.0), 1.0)
|
| 418 |
+
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 419 |
+
|
| 420 |
+
finally:
|
| 421 |
+
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
if __name__ == "__main__":
|
| 425 |
+
main()
|
models.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic models for the AdAudit environment.
|
| 3 |
+
|
| 4 |
+
Defines Action, Observation, and State types that conform to the OpenEnv spec.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from typing import Any, Dict, List, Literal, Optional
|
| 8 |
+
|
| 9 |
+
from pydantic import BaseModel, Field
|
| 10 |
+
|
| 11 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# ---------------------------------------------------------------------------
|
| 15 |
+
# Action
|
| 16 |
+
# ---------------------------------------------------------------------------
|
| 17 |
+
|
| 18 |
+
class AdAuditAction(Action):
|
| 19 |
+
"""Single discrete action the agent takes each day."""
|
| 20 |
+
|
| 21 |
+
action_type: Literal[
|
| 22 |
+
"monitor",
|
| 23 |
+
"investigate_publisher",
|
| 24 |
+
"flag_fraud",
|
| 25 |
+
"submit_report",
|
| 26 |
+
"invalid",
|
| 27 |
+
] = Field(..., description="The type of action to take")
|
| 28 |
+
|
| 29 |
+
publisher_id: Optional[str] = Field(
|
| 30 |
+
default=None, description="Target publisher for investigate/flag actions"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# investigate_publisher
|
| 34 |
+
tool: Optional[Literal[
|
| 35 |
+
"click_timestamps",
|
| 36 |
+
"ip_distribution",
|
| 37 |
+
"device_fingerprints",
|
| 38 |
+
"referral_urls",
|
| 39 |
+
"viewability_scores",
|
| 40 |
+
"conversion_quality",
|
| 41 |
+
]] = Field(default=None, description="Investigation tool to use")
|
| 42 |
+
|
| 43 |
+
# flag_fraud
|
| 44 |
+
fraud_type: Optional[Literal[
|
| 45 |
+
"bot_traffic",
|
| 46 |
+
"domain_spoofing",
|
| 47 |
+
"click_injection",
|
| 48 |
+
]] = Field(default=None, description="Fraud type to flag")
|
| 49 |
+
evidence: Optional[List[str]] = Field(
|
| 50 |
+
default=None, description="List of tool names used as evidence"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# submit_report
|
| 54 |
+
summary: Optional[str] = Field(default=None)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# ---------------------------------------------------------------------------
|
| 58 |
+
# Observation helpers
|
| 59 |
+
# ---------------------------------------------------------------------------
|
| 60 |
+
|
| 61 |
+
class DailyPublisherMetrics(BaseModel):
|
| 62 |
+
"""Traffic metrics for one publisher on one day."""
|
| 63 |
+
|
| 64 |
+
publisher_id: str
|
| 65 |
+
name: str
|
| 66 |
+
impressions: int
|
| 67 |
+
clicks: int
|
| 68 |
+
conversions: int
|
| 69 |
+
spend: float
|
| 70 |
+
ctr: float
|
| 71 |
+
cvr: float
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class BudgetStatus(BaseModel):
|
| 75 |
+
"""Campaign and investigation budget snapshot."""
|
| 76 |
+
|
| 77 |
+
total_campaign_budget: float
|
| 78 |
+
spent_so_far: float
|
| 79 |
+
remaining: float
|
| 80 |
+
investigation_budget_remaining: int
|
| 81 |
+
daily_spend_rate: float
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ---------------------------------------------------------------------------
|
| 85 |
+
# Observation
|
| 86 |
+
# ---------------------------------------------------------------------------
|
| 87 |
+
|
| 88 |
+
class AdAuditObservation(Observation):
|
| 89 |
+
"""What the agent sees after each step.
|
| 90 |
+
|
| 91 |
+
Inherits ``done``, ``reward``, and ``metadata`` from the OpenEnv
|
| 92 |
+
``Observation`` base class. ``reward`` carries the daily P&L.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
day: int = Field(..., description="Current campaign day (1-30)")
|
| 96 |
+
campaign_day_total: int = Field(default=14)
|
| 97 |
+
|
| 98 |
+
daily_metrics: List[DailyPublisherMetrics] = Field(default_factory=list)
|
| 99 |
+
cumulative_metrics: List[DailyPublisherMetrics] = Field(default_factory=list)
|
| 100 |
+
|
| 101 |
+
trend_data: str = Field(default="", description="Trend summary")
|
| 102 |
+
investigation_results: Optional[Dict[str, Any]] = Field(
|
| 103 |
+
default=None, description="Structured metrics from investigation tool"
|
| 104 |
+
)
|
| 105 |
+
alerts: List[str] = Field(default_factory=list)
|
| 106 |
+
|
| 107 |
+
budget_status: Optional[BudgetStatus] = None
|
| 108 |
+
publisher_status: Dict[str, str] = Field(
|
| 109 |
+
default_factory=dict,
|
| 110 |
+
description="publisher_id -> active|flagged",
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
cumulative_reward: float = Field(default=0.0)
|
| 114 |
+
done_reason: Optional[str] = Field(default=None)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
# ---------------------------------------------------------------------------
|
| 118 |
+
# State (hidden β used for grading / debugging)
|
| 119 |
+
# ---------------------------------------------------------------------------
|
| 120 |
+
|
| 121 |
+
class PublisherState(BaseModel):
|
| 122 |
+
"""Public publisher state (visible via /state)."""
|
| 123 |
+
|
| 124 |
+
publisher_id: str
|
| 125 |
+
name: str
|
| 126 |
+
is_flagged: bool = False
|
| 127 |
+
budget_allocation: float = 0.0
|
| 128 |
+
tools_used: List[str] = Field(default_factory=list)
|
| 129 |
+
day_flagged: Optional[int] = None
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class AdAuditState(State):
|
| 133 |
+
"""Full internal state for debugging and grading.
|
| 134 |
+
|
| 135 |
+
Inherits ``episode_id`` and ``step_count`` from OpenEnv ``State``.
|
| 136 |
+
"""
|
| 137 |
+
|
| 138 |
+
case_id: str = ""
|
| 139 |
+
current_day: int = 0
|
| 140 |
+
|
| 141 |
+
publishers: List[PublisherState] = Field(default_factory=list)
|
| 142 |
+
|
| 143 |
+
action_history: List[Dict[str, Any]] = Field(default_factory=list)
|
| 144 |
+
daily_rewards: List[float] = Field(default_factory=list)
|
| 145 |
+
cumulative_reward: float = 0.0
|
| 146 |
+
|
| 147 |
+
investigation_budget_total: int = 0
|
| 148 |
+
investigation_budget_used: int = 0
|
| 149 |
+
|
| 150 |
+
flags_submitted: List[Dict[str, Any]] = Field(default_factory=list)
|
| 151 |
+
|
| 152 |
+
grader_inputs: Dict[str, Any] = Field(default_factory=dict)
|
openenv.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: Ad_Audit
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-Ad_Audit"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Ad Audit environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.2",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
# Add all dependencies needed for your environment here
|
| 23 |
+
# Examples:
|
| 24 |
+
# "numpy>=1.19.0",
|
| 25 |
+
# "torch>=2.0.0",
|
| 26 |
+
# "gymnasium>=0.29.0",
|
| 27 |
+
# "openspiel>=1.0.0",
|
| 28 |
+
# "smolagents>=1.22.0,<2",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
[project.optional-dependencies]
|
| 32 |
+
dev = [
|
| 33 |
+
"pytest>=8.0.0",
|
| 34 |
+
"pytest-cov>=4.0.0",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.scripts]
|
| 38 |
+
# Server entry point - enables running via: uv run --project . server
|
| 39 |
+
# or: python -m Ad_Audit.server.app
|
| 40 |
+
server = "Ad_Audit.server.app:main"
|
| 41 |
+
|
| 42 |
+
[tool.setuptools]
|
| 43 |
+
include-package-data = true
|
| 44 |
+
packages = ["Ad_Audit", "Ad_Audit.server"]
|
| 45 |
+
package-dir = { "Ad_Audit" = ".", "Ad_Audit.server" = "server" }
|
results.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hard": {
|
| 3 |
+
"task_id": "hard",
|
| 4 |
+
"steps": 13,
|
| 5 |
+
"final_score": 0.5312230769230769
|
| 6 |
+
}
|
| 7 |
+
}
|
server/Ad_Audit_environment.py
ADDED
|
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AdAuditEnv β main environment class.
|
| 3 |
+
|
| 4 |
+
Wires together publisher_engine, fraud_engine, response_generator,
|
| 5 |
+
step_reward, and grader into the OpenEnv Environment interface.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Any, Dict, List, Optional
|
| 13 |
+
from uuid import uuid4
|
| 14 |
+
|
| 15 |
+
from openenv.core.env_server.interfaces import Environment
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
from ..models import (
|
| 19 |
+
AdAuditAction,
|
| 20 |
+
AdAuditObservation,
|
| 21 |
+
AdAuditState,
|
| 22 |
+
BudgetStatus,
|
| 23 |
+
DailyPublisherMetrics,
|
| 24 |
+
PublisherState,
|
| 25 |
+
)
|
| 26 |
+
from .fraud_engine import (
|
| 27 |
+
decay_suspicion,
|
| 28 |
+
get_adaptation_stage,
|
| 29 |
+
update_suspicion,
|
| 30 |
+
)
|
| 31 |
+
from .publisher_engine import generate_daily_traffic
|
| 32 |
+
from .response_generator import (
|
| 33 |
+
generate_alerts,
|
| 34 |
+
generate_investigation_metrics,
|
| 35 |
+
generate_trend_summary,
|
| 36 |
+
)
|
| 37 |
+
from .step_reward import compute_step_reward
|
| 38 |
+
from .grader import grade_episode
|
| 39 |
+
except ImportError:
|
| 40 |
+
from models import ( # type: ignore[no-redef]
|
| 41 |
+
AdAuditAction,
|
| 42 |
+
AdAuditObservation,
|
| 43 |
+
AdAuditState,
|
| 44 |
+
BudgetStatus,
|
| 45 |
+
DailyPublisherMetrics,
|
| 46 |
+
PublisherState,
|
| 47 |
+
)
|
| 48 |
+
from server.fraud_engine import ( # type: ignore[no-redef]
|
| 49 |
+
decay_suspicion,
|
| 50 |
+
get_adaptation_stage,
|
| 51 |
+
update_suspicion,
|
| 52 |
+
)
|
| 53 |
+
from server.publisher_engine import generate_daily_traffic # type: ignore[no-redef]
|
| 54 |
+
from server.response_generator import ( # type: ignore[no-redef]
|
| 55 |
+
generate_alerts,
|
| 56 |
+
generate_investigation_metrics,
|
| 57 |
+
generate_trend_summary,
|
| 58 |
+
)
|
| 59 |
+
from server.step_reward import compute_step_reward # type: ignore[no-redef]
|
| 60 |
+
from server.grader import grade_episode # type: ignore[no-redef]
|
| 61 |
+
|
| 62 |
+
CASES_DIR = Path(__file__).resolve().parent.parent / "cases"
|
| 63 |
+
|
| 64 |
+
TASK_MAP = {
|
| 65 |
+
"easy": "easy.json",
|
| 66 |
+
"medium": "medium.json",
|
| 67 |
+
"hard": "hard.json",
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
EPISODE_DAYS = 14
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class _PubInternal:
|
| 74 |
+
"""Hidden per-publisher state (not exposed via /state)."""
|
| 75 |
+
__slots__ = (
|
| 76 |
+
"is_fraudulent", "fraud_type", "suspicion_level", "adaptation_stage",
|
| 77 |
+
"total_fraudulent_spend", "total_legitimate_spend", "total_legitimate_revenue",
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def __init__(self, is_fraudulent: bool = False, fraud_type: str = None):
|
| 81 |
+
self.is_fraudulent = is_fraudulent
|
| 82 |
+
self.fraud_type = fraud_type
|
| 83 |
+
self.suspicion_level = 0.0
|
| 84 |
+
self.adaptation_stage = "normal"
|
| 85 |
+
self.total_fraudulent_spend = 0.0
|
| 86 |
+
self.total_legitimate_spend = 0.0
|
| 87 |
+
self.total_legitimate_revenue = 0.0
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class AdAuditEnv(Environment[AdAuditAction, AdAuditObservation, AdAuditState]):
|
| 91 |
+
"""OpenEnv-compatible RL environment for ad fraud detection."""
|
| 92 |
+
|
| 93 |
+
SUPPORTS_CONCURRENT_SESSIONS = True
|
| 94 |
+
|
| 95 |
+
@classmethod
|
| 96 |
+
def get_tasks(cls) -> List[str]:
|
| 97 |
+
return list(TASK_MAP.keys())
|
| 98 |
+
|
| 99 |
+
_TASK_CYCLE = ["easy", "medium", "hard"]
|
| 100 |
+
|
| 101 |
+
def __init__(self) -> None:
|
| 102 |
+
super().__init__()
|
| 103 |
+
self._case: Dict[str, Any] = {}
|
| 104 |
+
self._state = AdAuditState()
|
| 105 |
+
self._pub_cfgs: Dict[str, Dict[str, Any]] = {}
|
| 106 |
+
self._pub_names: Dict[str, str] = {}
|
| 107 |
+
self._pub_internal: Dict[str, _PubInternal] = {}
|
| 108 |
+
self._daily_logs: Dict[str, List[Dict[str, Any]]] = {}
|
| 109 |
+
self._step_action: Optional[AdAuditAction] = None
|
| 110 |
+
self._cycle_index: int = 0
|
| 111 |
+
self._invalid_action: bool = False
|
| 112 |
+
|
| 113 |
+
# ------------------------------------------------------------------
|
| 114 |
+
# reset
|
| 115 |
+
# ------------------------------------------------------------------
|
| 116 |
+
|
| 117 |
+
def reset(
|
| 118 |
+
self,
|
| 119 |
+
seed: Optional[int] = None,
|
| 120 |
+
episode_id: Optional[str] = None,
|
| 121 |
+
**kwargs: Any,
|
| 122 |
+
) -> AdAuditObservation:
|
| 123 |
+
task_id = kwargs.get("task_id") or episode_id
|
| 124 |
+
if not task_id:
|
| 125 |
+
task_id = self._TASK_CYCLE[self._cycle_index % len(self._TASK_CYCLE)]
|
| 126 |
+
self._cycle_index += 1
|
| 127 |
+
case_file = CASES_DIR / TASK_MAP.get(task_id, f"{task_id}.json")
|
| 128 |
+
with open(case_file) as f:
|
| 129 |
+
self._case = json.load(f)
|
| 130 |
+
|
| 131 |
+
campaign = self._case["campaign"]
|
| 132 |
+
publishers = self._case["publishers"]
|
| 133 |
+
|
| 134 |
+
pub_states: List[PublisherState] = []
|
| 135 |
+
self._pub_cfgs = {}
|
| 136 |
+
self._pub_names = {}
|
| 137 |
+
self._pub_internal = {}
|
| 138 |
+
self._daily_logs = {}
|
| 139 |
+
|
| 140 |
+
for pub_id, cfg in publishers.items():
|
| 141 |
+
self._pub_cfgs[pub_id] = cfg
|
| 142 |
+
self._pub_names[pub_id] = cfg.get("name", pub_id)
|
| 143 |
+
self._daily_logs[pub_id] = []
|
| 144 |
+
|
| 145 |
+
pub_states.append(PublisherState(
|
| 146 |
+
publisher_id=pub_id,
|
| 147 |
+
name=cfg.get("name", pub_id),
|
| 148 |
+
budget_allocation=cfg.get("budget_allocation", 1.0 / len(publishers)),
|
| 149 |
+
))
|
| 150 |
+
|
| 151 |
+
self._pub_internal[pub_id] = _PubInternal(
|
| 152 |
+
is_fraudulent=cfg.get("is_fraudulent", False),
|
| 153 |
+
fraud_type=cfg.get("fraud_type"),
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
self._state = AdAuditState(
|
| 157 |
+
episode_id=episode_id or str(uuid4()),
|
| 158 |
+
step_count=0,
|
| 159 |
+
case_id=self._case.get("case_id", task_id),
|
| 160 |
+
current_day=0,
|
| 161 |
+
publishers=pub_states,
|
| 162 |
+
investigation_budget_total=campaign.get("investigation_budget", 8),
|
| 163 |
+
investigation_budget_used=0,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
self._step_action = None
|
| 167 |
+
self._invalid_action = False
|
| 168 |
+
return self._advance_day()
|
| 169 |
+
|
| 170 |
+
# ------------------------------------------------------------------
|
| 171 |
+
# step
|
| 172 |
+
# ------------------------------------------------------------------
|
| 173 |
+
|
| 174 |
+
def step(
|
| 175 |
+
self,
|
| 176 |
+
action: AdAuditAction,
|
| 177 |
+
timeout_s: Optional[float] = None,
|
| 178 |
+
**kwargs: Any,
|
| 179 |
+
) -> AdAuditObservation:
|
| 180 |
+
if self._state.current_day >= EPISODE_DAYS:
|
| 181 |
+
return self._finalize("Campaign ended.")
|
| 182 |
+
|
| 183 |
+
investigation_result: Optional[Dict[str, Any]] = None
|
| 184 |
+
self._invalid_action = False
|
| 185 |
+
at = action.action_type
|
| 186 |
+
self._state.action_history.append(action.model_dump(exclude_none=True))
|
| 187 |
+
self._step_action = action
|
| 188 |
+
|
| 189 |
+
if at == "monitor":
|
| 190 |
+
pass
|
| 191 |
+
elif at == "investigate_publisher":
|
| 192 |
+
investigation_result = self._handle_investigate(action)
|
| 193 |
+
if investigation_result and "error" in investigation_result:
|
| 194 |
+
self._invalid_action = True
|
| 195 |
+
elif at == "flag_fraud":
|
| 196 |
+
ps = self._get_pub_state(action.publisher_id)
|
| 197 |
+
if ps is None or ps.is_flagged or not action.fraud_type:
|
| 198 |
+
self._invalid_action = True
|
| 199 |
+
else:
|
| 200 |
+
self._handle_flag_fraud(action)
|
| 201 |
+
elif at == "submit_report":
|
| 202 |
+
return self._finalize("Agent submitted report.")
|
| 203 |
+
else:
|
| 204 |
+
self._invalid_action = True
|
| 205 |
+
|
| 206 |
+
return self._advance_day(investigation_result=investigation_result)
|
| 207 |
+
|
| 208 |
+
# ------------------------------------------------------------------
|
| 209 |
+
# state property
|
| 210 |
+
# ------------------------------------------------------------------
|
| 211 |
+
|
| 212 |
+
@property
|
| 213 |
+
def state(self) -> AdAuditState:
|
| 214 |
+
return self._state
|
| 215 |
+
|
| 216 |
+
# ------------------------------------------------------------------
|
| 217 |
+
# Action handlers
|
| 218 |
+
# ------------------------------------------------------------------
|
| 219 |
+
|
| 220 |
+
def _handle_investigate(self, action: AdAuditAction) -> Optional[Dict[str, Any]]:
|
| 221 |
+
pub_id = action.publisher_id
|
| 222 |
+
tool = action.tool
|
| 223 |
+
if not pub_id or not tool:
|
| 224 |
+
return {"error": "publisher_id and tool are required"}
|
| 225 |
+
|
| 226 |
+
ps = self._get_pub_state(pub_id)
|
| 227 |
+
if ps is None:
|
| 228 |
+
valid = [p.publisher_id for p in self._state.publishers]
|
| 229 |
+
return {"error": f"unknown publisher_id: {pub_id}. Valid IDs: {valid}"}
|
| 230 |
+
|
| 231 |
+
if ps.is_flagged:
|
| 232 |
+
return {"error": f"{pub_id} is already flagged."}
|
| 233 |
+
|
| 234 |
+
budget_remaining = (
|
| 235 |
+
self._state.investigation_budget_total
|
| 236 |
+
- self._state.investigation_budget_used
|
| 237 |
+
)
|
| 238 |
+
if budget_remaining <= 0:
|
| 239 |
+
return {"error": "no investigation budget remaining"}
|
| 240 |
+
|
| 241 |
+
self._state.investigation_budget_used += 1
|
| 242 |
+
|
| 243 |
+
cfg = self._pub_cfgs.get(pub_id, {})
|
| 244 |
+
hi = self._pub_internal[pub_id]
|
| 245 |
+
|
| 246 |
+
if tool not in ps.tools_used:
|
| 247 |
+
ps.tools_used.append(tool)
|
| 248 |
+
|
| 249 |
+
if hi.is_fraudulent:
|
| 250 |
+
hi.suspicion_level = update_suspicion(
|
| 251 |
+
hi.suspicion_level, tool, cfg.get("suspicion_reactivity", 1.0),
|
| 252 |
+
)
|
| 253 |
+
hi.adaptation_stage = get_adaptation_stage(hi.suspicion_level)
|
| 254 |
+
|
| 255 |
+
return generate_investigation_metrics(
|
| 256 |
+
case_id=self._state.case_id,
|
| 257 |
+
publisher_id=pub_id,
|
| 258 |
+
publisher_cfg=cfg,
|
| 259 |
+
tool_name=tool,
|
| 260 |
+
adaptation_stage=hi.adaptation_stage,
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
def _handle_flag_fraud(self, action: AdAuditAction) -> None:
|
| 264 |
+
pub_id = action.publisher_id
|
| 265 |
+
ps = self._get_pub_state(pub_id)
|
| 266 |
+
hi = self._pub_internal[pub_id]
|
| 267 |
+
|
| 268 |
+
ps.is_flagged = True
|
| 269 |
+
ps.day_flagged = self._state.current_day + 1
|
| 270 |
+
|
| 271 |
+
is_correct = hi.is_fraudulent
|
| 272 |
+
type_correct = (action.fraud_type == hi.fraud_type) if is_correct else False
|
| 273 |
+
|
| 274 |
+
self._state.flags_submitted.append({
|
| 275 |
+
"publisher_id": pub_id,
|
| 276 |
+
"fraud_type": action.fraud_type,
|
| 277 |
+
"evidence": action.evidence or [],
|
| 278 |
+
"day": self._state.current_day + 1,
|
| 279 |
+
"correct": is_correct,
|
| 280 |
+
"type_correct": type_correct,
|
| 281 |
+
})
|
| 282 |
+
|
| 283 |
+
# ------------------------------------------------------------------
|
| 284 |
+
# Day advancement
|
| 285 |
+
# ------------------------------------------------------------------
|
| 286 |
+
|
| 287 |
+
def _advance_day(
|
| 288 |
+
self,
|
| 289 |
+
investigation_result: Optional[Dict[str, Any]] = None,
|
| 290 |
+
) -> AdAuditObservation:
|
| 291 |
+
self._state.current_day += 1
|
| 292 |
+
self._state.step_count = self._state.current_day
|
| 293 |
+
day = self._state.current_day
|
| 294 |
+
|
| 295 |
+
# Decay suspicion for publishers NOT investigated today
|
| 296 |
+
investigated_today = set()
|
| 297 |
+
if self._state.action_history:
|
| 298 |
+
last = self._state.action_history[-1]
|
| 299 |
+
if last.get("action_type") == "investigate_publisher" and last.get("publisher_id"):
|
| 300 |
+
investigated_today.add(last["publisher_id"])
|
| 301 |
+
|
| 302 |
+
for ps in self._state.publishers:
|
| 303 |
+
hi = self._pub_internal[ps.publisher_id]
|
| 304 |
+
if hi.is_fraudulent and ps.publisher_id not in investigated_today:
|
| 305 |
+
hi.suspicion_level = decay_suspicion(hi.suspicion_level)
|
| 306 |
+
hi.adaptation_stage = get_adaptation_stage(hi.suspicion_level)
|
| 307 |
+
|
| 308 |
+
# Generate traffic
|
| 309 |
+
daily_traffic: List[Dict[str, Any]] = []
|
| 310 |
+
daily_metrics: List[DailyPublisherMetrics] = []
|
| 311 |
+
campaign = self._case["campaign"]
|
| 312 |
+
benchmarks = campaign.get("industry_benchmarks", {})
|
| 313 |
+
|
| 314 |
+
daily_fraud_spend = 0.0
|
| 315 |
+
for ps in self._state.publishers:
|
| 316 |
+
cfg = self._pub_cfgs.get(ps.publisher_id, {})
|
| 317 |
+
hi = self._pub_internal[ps.publisher_id]
|
| 318 |
+
traffic = generate_daily_traffic(
|
| 319 |
+
day=day, publisher_cfg=cfg,
|
| 320 |
+
budget_allocation=ps.budget_allocation,
|
| 321 |
+
adaptation_stage=hi.adaptation_stage,
|
| 322 |
+
is_paused=ps.is_flagged,
|
| 323 |
+
)
|
| 324 |
+
daily_traffic.append(traffic)
|
| 325 |
+
self._daily_logs[ps.publisher_id].append(traffic)
|
| 326 |
+
|
| 327 |
+
hi.total_legitimate_spend += traffic["legitimate_spend"]
|
| 328 |
+
hi.total_fraudulent_spend += traffic["fraudulent_spend"]
|
| 329 |
+
hi.total_legitimate_revenue += traffic["legitimate_revenue"]
|
| 330 |
+
|
| 331 |
+
if hi.is_fraudulent and not ps.is_flagged:
|
| 332 |
+
daily_fraud_spend += traffic["fraudulent_spend"]
|
| 333 |
+
|
| 334 |
+
daily_metrics.append(DailyPublisherMetrics(
|
| 335 |
+
publisher_id=ps.publisher_id, name=ps.name,
|
| 336 |
+
impressions=traffic["impressions"], clicks=traffic["clicks"],
|
| 337 |
+
conversions=traffic["conversions"], spend=traffic["spend"],
|
| 338 |
+
ctr=traffic["ctr"], cvr=traffic["cvr"],
|
| 339 |
+
))
|
| 340 |
+
|
| 341 |
+
# --- Compute step reward ---
|
| 342 |
+
action = self._step_action
|
| 343 |
+
total_budget = campaign["total_budget"]
|
| 344 |
+
|
| 345 |
+
if action is None:
|
| 346 |
+
step_reward = 0.0
|
| 347 |
+
elif self._invalid_action:
|
| 348 |
+
step_reward = compute_step_reward(
|
| 349 |
+
action_type="invalid",
|
| 350 |
+
daily_fraud_spend=daily_fraud_spend,
|
| 351 |
+
total_budget=total_budget,
|
| 352 |
+
day=day,
|
| 353 |
+
episode_days=EPISODE_DAYS,
|
| 354 |
+
)
|
| 355 |
+
elif action.action_type == "flag_fraud":
|
| 356 |
+
last_flag = self._state.flags_submitted[-1] if self._state.flags_submitted else {}
|
| 357 |
+
step_reward = compute_step_reward(
|
| 358 |
+
action_type="flag_fraud",
|
| 359 |
+
flag_correct=last_flag.get("correct"),
|
| 360 |
+
flag_type_correct=last_flag.get("type_correct"),
|
| 361 |
+
daily_fraud_spend=daily_fraud_spend,
|
| 362 |
+
total_budget=total_budget,
|
| 363 |
+
day=day,
|
| 364 |
+
episode_days=EPISODE_DAYS,
|
| 365 |
+
)
|
| 366 |
+
elif action.action_type == "investigate_publisher":
|
| 367 |
+
pub_cfg = self._pub_cfgs.get(action.publisher_id, {})
|
| 368 |
+
step_reward = compute_step_reward(
|
| 369 |
+
action_type="investigate_publisher",
|
| 370 |
+
publisher_cfg=pub_cfg,
|
| 371 |
+
daily_fraud_spend=daily_fraud_spend,
|
| 372 |
+
total_budget=total_budget,
|
| 373 |
+
day=day,
|
| 374 |
+
episode_days=EPISODE_DAYS,
|
| 375 |
+
)
|
| 376 |
+
else:
|
| 377 |
+
step_reward = compute_step_reward(
|
| 378 |
+
action_type=action.action_type,
|
| 379 |
+
daily_fraud_spend=daily_fraud_spend,
|
| 380 |
+
total_budget=total_budget,
|
| 381 |
+
day=day,
|
| 382 |
+
episode_days=EPISODE_DAYS,
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
# Trend + alerts
|
| 386 |
+
trend_data = "" # TODO: generate_trend_summary(self._daily_logs, self._pub_names, day)
|
| 387 |
+
raw_metrics = [
|
| 388 |
+
{"publisher_id": m.publisher_id, "ctr": m.ctr, "cvr": m.cvr,
|
| 389 |
+
"impressions": m.impressions, "clicks": m.clicks}
|
| 390 |
+
for m in daily_metrics
|
| 391 |
+
]
|
| 392 |
+
alerts = [] # TODO: generate_alerts(raw_metrics, benchmarks, self._pub_names)
|
| 393 |
+
|
| 394 |
+
cumulative_metrics = self._compute_cumulative_metrics()
|
| 395 |
+
|
| 396 |
+
total_spend = sum(
|
| 397 |
+
hi.total_legitimate_spend + hi.total_fraudulent_spend
|
| 398 |
+
for hi in self._pub_internal.values()
|
| 399 |
+
)
|
| 400 |
+
budget_status = BudgetStatus(
|
| 401 |
+
total_campaign_budget=campaign["total_budget"],
|
| 402 |
+
spent_so_far=round(total_spend, 2),
|
| 403 |
+
remaining=round(campaign["total_budget"] - total_spend, 2),
|
| 404 |
+
investigation_budget_remaining=(
|
| 405 |
+
self._state.investigation_budget_total
|
| 406 |
+
- self._state.investigation_budget_used
|
| 407 |
+
),
|
| 408 |
+
daily_spend_rate=round(total_spend / day, 2) if day > 0 else 0.0,
|
| 409 |
+
)
|
| 410 |
+
|
| 411 |
+
pub_status = {
|
| 412 |
+
ps.publisher_id: ("flagged" if ps.is_flagged else "active")
|
| 413 |
+
for ps in self._state.publishers
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
# Termination
|
| 417 |
+
done = False
|
| 418 |
+
done_reason: Optional[str] = None
|
| 419 |
+
|
| 420 |
+
if day >= EPISODE_DAYS:
|
| 421 |
+
done = True
|
| 422 |
+
done_reason = f"Campaign ended (day {EPISODE_DAYS})."
|
| 423 |
+
elif budget_status.remaining <= 0:
|
| 424 |
+
done = True
|
| 425 |
+
done_reason = "Campaign budget exhausted."
|
| 426 |
+
|
| 427 |
+
# On episode end, compute grader (stored separately, not in step reward)
|
| 428 |
+
if done:
|
| 429 |
+
self._state.grader_inputs = grade_episode(
|
| 430 |
+
self._build_grader_state(), self._case,
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
step_reward = round(max(0.0, min(1.0, step_reward)), 4)
|
| 434 |
+
self._state.daily_rewards.append(step_reward)
|
| 435 |
+
self._state.cumulative_reward += step_reward
|
| 436 |
+
|
| 437 |
+
return self._apply_transform(AdAuditObservation(
|
| 438 |
+
day=day,
|
| 439 |
+
campaign_day_total=EPISODE_DAYS,
|
| 440 |
+
daily_metrics=daily_metrics,
|
| 441 |
+
cumulative_metrics=cumulative_metrics,
|
| 442 |
+
trend_data=trend_data,
|
| 443 |
+
investigation_results=investigation_result,
|
| 444 |
+
alerts=alerts,
|
| 445 |
+
budget_status=budget_status,
|
| 446 |
+
publisher_status=pub_status,
|
| 447 |
+
cumulative_reward=round(self._state.cumulative_reward, 4),
|
| 448 |
+
done=done,
|
| 449 |
+
done_reason=done_reason,
|
| 450 |
+
reward=step_reward,
|
| 451 |
+
))
|
| 452 |
+
|
| 453 |
+
# ------------------------------------------------------------------
|
| 454 |
+
# Finalize
|
| 455 |
+
# ------------------------------------------------------------------
|
| 456 |
+
|
| 457 |
+
def _finalize(self, reason: str) -> AdAuditObservation:
|
| 458 |
+
self._state.grader_inputs = grade_episode(
|
| 459 |
+
self._build_grader_state(), self._case,
|
| 460 |
+
)
|
| 461 |
+
grader_score = self._state.grader_inputs.get("final_score", 0.0)
|
| 462 |
+
|
| 463 |
+
step_reward = grader_score
|
| 464 |
+
self._state.daily_rewards.append(step_reward)
|
| 465 |
+
self._state.cumulative_reward += step_reward
|
| 466 |
+
|
| 467 |
+
return AdAuditObservation(
|
| 468 |
+
day=self._state.current_day,
|
| 469 |
+
campaign_day_total=EPISODE_DAYS,
|
| 470 |
+
trend_data=f"Episode complete. Grader score: {grader_score:.4f}",
|
| 471 |
+
done=True,
|
| 472 |
+
done_reason=reason,
|
| 473 |
+
reward=step_reward,
|
| 474 |
+
cumulative_reward=round(self._state.cumulative_reward, 4),
|
| 475 |
+
publisher_status={
|
| 476 |
+
ps.publisher_id: ("flagged" if ps.is_flagged else "active")
|
| 477 |
+
for ps in self._state.publishers
|
| 478 |
+
},
|
| 479 |
+
)
|
| 480 |
+
|
| 481 |
+
# ------------------------------------------------------------------
|
| 482 |
+
# Helpers
|
| 483 |
+
# ------------------------------------------------------------------
|
| 484 |
+
|
| 485 |
+
def _get_pub_state(self, pub_id: Optional[str]) -> Optional[PublisherState]:
|
| 486 |
+
for ps in self._state.publishers:
|
| 487 |
+
if ps.publisher_id == pub_id:
|
| 488 |
+
return ps
|
| 489 |
+
return None
|
| 490 |
+
|
| 491 |
+
def _build_grader_state(self) -> Dict[str, Any]:
|
| 492 |
+
"""Build the state dict the grader expects, including hidden fields."""
|
| 493 |
+
state_dict = self._state.model_dump()
|
| 494 |
+
# Enrich publisher entries with hidden internal state for grading
|
| 495 |
+
for pub_dict in state_dict["publishers"]:
|
| 496 |
+
pub_id = pub_dict["publisher_id"]
|
| 497 |
+
hi = self._pub_internal[pub_id]
|
| 498 |
+
pub_dict["is_fraudulent"] = hi.is_fraudulent
|
| 499 |
+
pub_dict["fraud_type"] = hi.fraud_type
|
| 500 |
+
pub_dict["suspicion_level"] = hi.suspicion_level
|
| 501 |
+
pub_dict["total_fraudulent_spend"] = hi.total_fraudulent_spend
|
| 502 |
+
pub_dict["total_legitimate_spend"] = hi.total_legitimate_spend
|
| 503 |
+
return state_dict
|
| 504 |
+
|
| 505 |
+
def _compute_cumulative_metrics(self) -> List[DailyPublisherMetrics]:
|
| 506 |
+
result = []
|
| 507 |
+
for ps in self._state.publishers:
|
| 508 |
+
logs = self._daily_logs.get(ps.publisher_id, [])
|
| 509 |
+
if not logs:
|
| 510 |
+
continue
|
| 511 |
+
total_imp = sum(d["impressions"] for d in logs)
|
| 512 |
+
total_clicks = sum(d["clicks"] for d in logs)
|
| 513 |
+
total_conv = sum(d["conversions"] for d in logs)
|
| 514 |
+
total_spend = sum(d["spend"] for d in logs)
|
| 515 |
+
ctr = total_clicks / total_imp if total_imp > 0 else 0.0
|
| 516 |
+
cvr = total_conv / total_clicks if total_clicks > 0 else 0.0
|
| 517 |
+
result.append(DailyPublisherMetrics(
|
| 518 |
+
publisher_id=ps.publisher_id, name=ps.name,
|
| 519 |
+
impressions=total_imp, clicks=total_clicks,
|
| 520 |
+
conversions=total_conv, spend=round(total_spend, 2),
|
| 521 |
+
ctr=round(ctr, 4), cvr=round(cvr, 4),
|
| 522 |
+
))
|
| 523 |
+
return result
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
# Alias used by app.py and server/__init__.py
|
| 527 |
+
AdAuditEnvironment = AdAuditEnv
|
server/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Ad Audit environment server components."""
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from .Ad_Audit_environment import AdAuditEnvironment
|
| 11 |
+
except ImportError:
|
| 12 |
+
from server.Ad_Audit_environment import AdAuditEnvironment # type: ignore[no-redef]
|
| 13 |
+
|
| 14 |
+
__all__ = ["AdAuditEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Ad Audit Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the AdAuditEnvironment
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Endpoints:
|
| 14 |
+
- POST /reset: Reset the environment
|
| 15 |
+
- POST /step: Execute an action
|
| 16 |
+
- GET /state: Get current environment state
|
| 17 |
+
- GET /schema: Get action/observation schemas
|
| 18 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Development (with auto-reload):
|
| 22 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
+
|
| 24 |
+
# Production:
|
| 25 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
+
|
| 27 |
+
# Or run directly:
|
| 28 |
+
python -m server.app
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import os
|
| 32 |
+
|
| 33 |
+
os.environ.setdefault("ENABLE_WEB_INTERFACE", "true")
|
| 34 |
+
os.environ.setdefault("ENV_README_PATH", os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md"))
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
from openenv.core.env_server.http_server import create_app
|
| 38 |
+
except Exception as e: # pragma: no cover
|
| 39 |
+
raise ImportError(
|
| 40 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 41 |
+
) from e
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
from ..models import AdAuditAction, AdAuditObservation
|
| 45 |
+
from .Ad_Audit_environment import AdAuditEnvironment
|
| 46 |
+
except (ImportError, ModuleNotFoundError):
|
| 47 |
+
from models import AdAuditAction, AdAuditObservation
|
| 48 |
+
from server.Ad_Audit_environment import AdAuditEnvironment
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# Create the app with web interface and README integration
|
| 52 |
+
app = create_app(
|
| 53 |
+
AdAuditEnvironment,
|
| 54 |
+
AdAuditAction,
|
| 55 |
+
AdAuditObservation,
|
| 56 |
+
env_name="Ad_Audit",
|
| 57 |
+
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 62 |
+
"""
|
| 63 |
+
Entry point for direct execution via uv run or python -m.
|
| 64 |
+
|
| 65 |
+
This function enables running the server without Docker:
|
| 66 |
+
uv run --project . server
|
| 67 |
+
uv run --project . server --port 8001
|
| 68 |
+
python -m Ad_Audit.server.app
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
host: Host address to bind to (default: "0.0.0.0")
|
| 72 |
+
port: Port number to listen on (default: 8000)
|
| 73 |
+
|
| 74 |
+
For production deployments, consider using uvicorn directly with
|
| 75 |
+
multiple workers:
|
| 76 |
+
uvicorn Ad_Audit.server.app:app --workers 4
|
| 77 |
+
"""
|
| 78 |
+
import uvicorn
|
| 79 |
+
|
| 80 |
+
uvicorn.run(app, host=host, port=port)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
main()
|
server/fraud_engine.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fraud engine β suspicion tracking and fraud intensity for adaptive publishers.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
from typing import Any, Dict
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# ββ Suspicion tracking ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
+
|
| 12 |
+
def update_suspicion(
|
| 13 |
+
current_level: float,
|
| 14 |
+
tool: str,
|
| 15 |
+
reactivity: float,
|
| 16 |
+
) -> float:
|
| 17 |
+
"""Increase suspicion after an investigation tool is used on a fraudster.
|
| 18 |
+
|
| 19 |
+
Each tool adds a fixed bump scaled by the publisher's reactivity.
|
| 20 |
+
"""
|
| 21 |
+
bump = {
|
| 22 |
+
"click_timestamps": 0.15,
|
| 23 |
+
"ip_distribution": 0.12,
|
| 24 |
+
"device_fingerprints": 0.10,
|
| 25 |
+
"referral_urls": 0.10,
|
| 26 |
+
"viewability_scores": 0.08,
|
| 27 |
+
"conversion_quality": 0.10,
|
| 28 |
+
}.get(tool, 0.10)
|
| 29 |
+
return min(1.0, current_level + bump * reactivity)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def decay_suspicion(level: float, rate: float = 0.05) -> float:
|
| 33 |
+
"""Decay suspicion each day a fraudster is NOT investigated."""
|
| 34 |
+
return max(0.0, level - rate)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_adaptation_stage(suspicion_level: float) -> str:
|
| 38 |
+
"""Map suspicion level to an adaptation stage for response generation."""
|
| 39 |
+
if suspicion_level >= 0.8:
|
| 40 |
+
return "dark"
|
| 41 |
+
if suspicion_level >= 0.5:
|
| 42 |
+
return "covering_tracks"
|
| 43 |
+
if suspicion_level >= 0.25:
|
| 44 |
+
return "cautious"
|
| 45 |
+
return "normal"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ββ Fraud intensity βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
|
| 50 |
+
def compute_fraud_intensity(
|
| 51 |
+
day: int,
|
| 52 |
+
fraud_schedule: Dict[str, Any],
|
| 53 |
+
adaptation_stage: str,
|
| 54 |
+
) -> float:
|
| 55 |
+
"""Compute how aggressively a publisher is committing fraud on a given day.
|
| 56 |
+
|
| 57 |
+
``fraud_schedule`` comes from the case profile and has:
|
| 58 |
+
start_day: int β first day fraud begins
|
| 59 |
+
ramp_days: int β days to ramp from 0 β peak
|
| 60 |
+
peak_intensity: float β maximum multiplier on legitimate traffic
|
| 61 |
+
"""
|
| 62 |
+
start = fraud_schedule.get("start_day", 1)
|
| 63 |
+
ramp = fraud_schedule.get("ramp_days", 3)
|
| 64 |
+
peak = fraud_schedule.get("peak_intensity", 1.0)
|
| 65 |
+
|
| 66 |
+
if day < start:
|
| 67 |
+
return 0.0
|
| 68 |
+
|
| 69 |
+
# Ramp up
|
| 70 |
+
days_active = day - start
|
| 71 |
+
if ramp > 0 and days_active < ramp:
|
| 72 |
+
base = peak * (days_active / ramp)
|
| 73 |
+
else:
|
| 74 |
+
base = peak
|
| 75 |
+
|
| 76 |
+
# Adaptation dampening β fraudster backs off when suspicion rises
|
| 77 |
+
stage_mult = {
|
| 78 |
+
"normal": 1.0,
|
| 79 |
+
"cautious": 0.7,
|
| 80 |
+
"covering_tracks": 0.4,
|
| 81 |
+
"dark": 0.05,
|
| 82 |
+
}.get(adaptation_stage, 1.0)
|
| 83 |
+
|
| 84 |
+
return base * stage_mult
|
server/grader.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Episode-end grader [0, 1].
|
| 2 |
+
|
| 3 |
+
Three components:
|
| 4 |
+
1. Fraud detection accuracy (weight 0.50)
|
| 5 |
+
2. Detection timeliness (weight 0.30)
|
| 6 |
+
3. Investigation efficiency (weight 0.20)
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from typing import Any, Dict, List
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def grade_episode(
|
| 15 |
+
state_dict: Dict[str, Any],
|
| 16 |
+
case_dict: Dict[str, Any],
|
| 17 |
+
) -> Dict:
|
| 18 |
+
"""Return grading breakdown and final score in [0, 1].
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
state_dict: Result of _build_grader_state() β enriched AdAuditState dict.
|
| 22 |
+
case_dict: The raw case JSON (with publisher configs).
|
| 23 |
+
"""
|
| 24 |
+
publishers = state_dict.get("publishers", [])
|
| 25 |
+
flags_submitted = state_dict.get("flags_submitted", [])
|
| 26 |
+
investigation_budget_used = state_dict.get("investigation_budget_used", 0)
|
| 27 |
+
investigation_budget_total = state_dict.get("investigation_budget_total", 0)
|
| 28 |
+
|
| 29 |
+
# Derive fraud info from the enriched publisher entries
|
| 30 |
+
fraudulent_publishers: List[str] = []
|
| 31 |
+
fraud_types: Dict[str, str] = {}
|
| 32 |
+
publisher_internals: Dict[str, Dict] = {}
|
| 33 |
+
tools_used_per_publisher: Dict[str, List[str]] = {}
|
| 34 |
+
|
| 35 |
+
case_publishers = case_dict.get("publishers", {})
|
| 36 |
+
|
| 37 |
+
for pub in publishers:
|
| 38 |
+
pid = pub["publisher_id"]
|
| 39 |
+
tools_used_per_publisher[pid] = pub.get("tools_used", [])
|
| 40 |
+
|
| 41 |
+
if pub.get("is_fraudulent"):
|
| 42 |
+
fraudulent_publishers.append(pid)
|
| 43 |
+
fraud_types[pid] = pub.get("fraud_type", "")
|
| 44 |
+
|
| 45 |
+
# Get fraud_start_day from case config's fraud_schedule
|
| 46 |
+
cfg = case_publishers.get(pid, {})
|
| 47 |
+
fraud_schedule = cfg.get("fraud_schedule", {})
|
| 48 |
+
publisher_internals[pid] = {
|
| 49 |
+
"fraud_start_day": fraud_schedule.get("start_day", 1),
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
num_fraudulent = len(fraudulent_publishers)
|
| 53 |
+
if num_fraudulent == 0:
|
| 54 |
+
return {
|
| 55 |
+
"accuracy": 1.0,
|
| 56 |
+
"timeliness": 1.0,
|
| 57 |
+
"efficiency": 1.0,
|
| 58 |
+
"final_score": 1.0,
|
| 59 |
+
"num_fraudulent": 0,
|
| 60 |
+
"num_flagged_correct": 0,
|
| 61 |
+
"num_false_positives": 0,
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
# Build lookup of flags by publisher_id
|
| 65 |
+
flag_lookup: Dict[str, Dict] = {}
|
| 66 |
+
for f in flags_submitted:
|
| 67 |
+
flag_lookup[f["publisher_id"]] = f
|
| 68 |
+
|
| 69 |
+
# Count false positives
|
| 70 |
+
flagged_ids = {f["publisher_id"] for f in flags_submitted}
|
| 71 |
+
false_positives = [pid for pid in flagged_ids if pid not in fraudulent_publishers]
|
| 72 |
+
num_false_positives = len(false_positives)
|
| 73 |
+
|
| 74 |
+
# ββ 1. Fraud Detection Accuracy βββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
accuracy = 0.0
|
| 76 |
+
for pid in fraudulent_publishers:
|
| 77 |
+
if pid in flag_lookup:
|
| 78 |
+
if flag_lookup[pid].get("type_correct"):
|
| 79 |
+
accuracy += 1.0 / num_fraudulent
|
| 80 |
+
else:
|
| 81 |
+
accuracy += 0.5 / num_fraudulent
|
| 82 |
+
# Penalty for false positives
|
| 83 |
+
accuracy -= num_false_positives * (0.5 / num_fraudulent)
|
| 84 |
+
accuracy = max(0.0, min(1.0, accuracy))
|
| 85 |
+
|
| 86 |
+
# ββ 2. Detection Timeliness βββββββββββββββββββββββββββββββββββββββββ
|
| 87 |
+
timeliness_scores = []
|
| 88 |
+
for pid in fraudulent_publishers:
|
| 89 |
+
internal = publisher_internals.get(pid, {})
|
| 90 |
+
fraud_start = internal.get("fraud_start_day", 1)
|
| 91 |
+
if pid in flag_lookup:
|
| 92 |
+
day_flagged = flag_lookup[pid].get("day", 14)
|
| 93 |
+
denom = 14 - fraud_start
|
| 94 |
+
if denom <= 0:
|
| 95 |
+
t = 1.0
|
| 96 |
+
else:
|
| 97 |
+
t = 1.0 - (day_flagged - fraud_start) / denom
|
| 98 |
+
timeliness_scores.append(max(0.0, min(1.0, t)))
|
| 99 |
+
else:
|
| 100 |
+
timeliness_scores.append(0.0)
|
| 101 |
+
timeliness = sum(timeliness_scores) / len(timeliness_scores) if timeliness_scores else 0.0
|
| 102 |
+
|
| 103 |
+
# ββ 3. Investigation Efficiency βββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
total = investigation_budget_used
|
| 105 |
+
useful = 0
|
| 106 |
+
for pid in fraudulent_publishers:
|
| 107 |
+
useful += len(tools_used_per_publisher.get(pid, []))
|
| 108 |
+
|
| 109 |
+
if total > 0:
|
| 110 |
+
information_value = useful / total
|
| 111 |
+
else:
|
| 112 |
+
information_value = 0.0
|
| 113 |
+
|
| 114 |
+
if investigation_budget_total > 0:
|
| 115 |
+
budget_efficiency = 1.0 - (total / investigation_budget_total)
|
| 116 |
+
else:
|
| 117 |
+
budget_efficiency = 1.0
|
| 118 |
+
|
| 119 |
+
fp_penalty = num_false_positives * 0.2
|
| 120 |
+
efficiency = 0.5 * information_value + 0.3 * budget_efficiency - fp_penalty
|
| 121 |
+
efficiency = max(0.0, min(1.0, efficiency))
|
| 122 |
+
|
| 123 |
+
# ββ Final Score βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
+
final = min(1.0, 0.50 * accuracy + 0.30 * timeliness + 0.20 * efficiency)
|
| 125 |
+
|
| 126 |
+
return {
|
| 127 |
+
"accuracy": round(accuracy, 4),
|
| 128 |
+
"timeliness": round(timeliness, 4),
|
| 129 |
+
"efficiency": round(efficiency, 4),
|
| 130 |
+
"final_score": round(final, 4),
|
| 131 |
+
"num_fraudulent": num_fraudulent,
|
| 132 |
+
"num_flagged_correct": sum(1 for pid in fraudulent_publishers if pid in flag_lookup),
|
| 133 |
+
"num_false_positives": num_false_positives,
|
| 134 |
+
}
|
server/publisher_engine.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Publisher engine β per-publisher traffic generation each day.
|
| 3 |
+
|
| 4 |
+
All randomness is pre-baked into ``day_factors`` and ``noise_factors``
|
| 5 |
+
arrays in the case profile, ensuring full determinism.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from typing import Any, Dict
|
| 11 |
+
|
| 12 |
+
from .fraud_engine import compute_fraud_intensity
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def generate_daily_traffic(
|
| 16 |
+
day: int,
|
| 17 |
+
publisher_cfg: Dict[str, Any],
|
| 18 |
+
budget_allocation: float,
|
| 19 |
+
adaptation_stage: str,
|
| 20 |
+
is_paused: bool,
|
| 21 |
+
) -> Dict[str, Any]:
|
| 22 |
+
"""Generate one day of traffic for a single publisher.
|
| 23 |
+
|
| 24 |
+
Returns a dict with keys:
|
| 25 |
+
impressions, clicks, conversions, spend,
|
| 26 |
+
ctr, cvr,
|
| 27 |
+
legitimate_spend, fraudulent_spend, legitimate_revenue
|
| 28 |
+
"""
|
| 29 |
+
if is_paused:
|
| 30 |
+
return _zero_traffic()
|
| 31 |
+
|
| 32 |
+
day_idx = day - 1 # 0-indexed into factor arrays
|
| 33 |
+
day_factors = publisher_cfg.get("day_factors", [1.0] * 30)
|
| 34 |
+
noise_factors = publisher_cfg.get("noise_factors", [1.0] * 30)
|
| 35 |
+
|
| 36 |
+
day_factor = day_factors[day_idx] if day_idx < len(day_factors) else 1.0
|
| 37 |
+
noise_factor = noise_factors[day_idx] if day_idx < len(noise_factors) else 1.0
|
| 38 |
+
|
| 39 |
+
base_rate: float = publisher_cfg["base_traffic_rate"]
|
| 40 |
+
true_ctr: float = publisher_cfg["true_ctr"]
|
| 41 |
+
true_cvr: float = publisher_cfg["true_cvr"]
|
| 42 |
+
cpm_rate: float = publisher_cfg.get("cpm_rate", 2.0)
|
| 43 |
+
conversion_value: float = publisher_cfg.get("conversion_value", 10.0)
|
| 44 |
+
|
| 45 |
+
# --- Legitimate traffic ---
|
| 46 |
+
legit_impressions = base_rate * budget_allocation * day_factor * noise_factor
|
| 47 |
+
legit_clicks = legit_impressions * true_ctr * noise_factor
|
| 48 |
+
legit_conversions = legit_clicks * true_cvr * noise_factor
|
| 49 |
+
legit_spend = legit_impressions * cpm_rate / 1000.0
|
| 50 |
+
legit_revenue = legit_conversions * conversion_value
|
| 51 |
+
|
| 52 |
+
# --- Fraudulent traffic (only for fraudulent publishers) ---
|
| 53 |
+
fraud_impressions = 0.0
|
| 54 |
+
fraud_clicks = 0.0
|
| 55 |
+
fraud_conversions = 0.0
|
| 56 |
+
fraud_spend = 0.0
|
| 57 |
+
|
| 58 |
+
if publisher_cfg.get("is_fraudulent", False):
|
| 59 |
+
fraud_schedule = publisher_cfg.get("fraud_schedule", {})
|
| 60 |
+
if fraud_schedule:
|
| 61 |
+
intensity = compute_fraud_intensity(day, fraud_schedule, adaptation_stage)
|
| 62 |
+
if intensity > 0:
|
| 63 |
+
fake_ctr = publisher_cfg.get("fake_ctr", 0.045)
|
| 64 |
+
fake_cvr = publisher_cfg.get("fake_cvr", 0.001)
|
| 65 |
+
|
| 66 |
+
fraud_impressions = legit_impressions * intensity
|
| 67 |
+
fraud_clicks = fraud_impressions * fake_ctr
|
| 68 |
+
fraud_conversions = fraud_clicks * fake_cvr
|
| 69 |
+
fraud_spend = fraud_impressions * cpm_rate / 1000.0
|
| 70 |
+
|
| 71 |
+
total_impressions = int(round(legit_impressions + fraud_impressions))
|
| 72 |
+
total_clicks = int(round(legit_clicks + fraud_clicks))
|
| 73 |
+
total_conversions = int(round(legit_conversions + fraud_conversions))
|
| 74 |
+
total_spend = legit_spend + fraud_spend
|
| 75 |
+
|
| 76 |
+
ctr = total_clicks / total_impressions if total_impressions > 0 else 0.0
|
| 77 |
+
cvr = total_conversions / total_clicks if total_clicks > 0 else 0.0
|
| 78 |
+
|
| 79 |
+
return {
|
| 80 |
+
"impressions": total_impressions,
|
| 81 |
+
"clicks": total_clicks,
|
| 82 |
+
"conversions": total_conversions,
|
| 83 |
+
"spend": round(total_spend, 2),
|
| 84 |
+
"ctr": round(ctr, 4),
|
| 85 |
+
"cvr": round(cvr, 4),
|
| 86 |
+
"legitimate_spend": round(legit_spend, 2),
|
| 87 |
+
"fraudulent_spend": round(fraud_spend, 2),
|
| 88 |
+
"legitimate_revenue": round(legit_revenue, 2),
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _zero_traffic() -> Dict[str, Any]:
|
| 93 |
+
return {
|
| 94 |
+
"impressions": 0,
|
| 95 |
+
"clicks": 0,
|
| 96 |
+
"conversions": 0,
|
| 97 |
+
"spend": 0.0,
|
| 98 |
+
"ctr": 0.0,
|
| 99 |
+
"cvr": 0.0,
|
| 100 |
+
"legitimate_spend": 0.0,
|
| 101 |
+
"fraudulent_spend": 0.0,
|
| 102 |
+
"legitimate_revenue": 0.0,
|
| 103 |
+
}
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
server/response_generator.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Investigation tool response generator.
|
| 2 |
+
|
| 3 |
+
Returns structured numerical metrics deterministically via SHA256 seeding.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import hashlib
|
| 9 |
+
from typing import Any, Dict
|
| 10 |
+
|
| 11 |
+
# ββ Legitimate metric ranges ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
+
|
| 13 |
+
LEGIT_RANGES: Dict[str, Dict[str, tuple]] = {
|
| 14 |
+
"click_timestamps": {
|
| 15 |
+
"avg_interval_seconds": (30.0, 90.0),
|
| 16 |
+
"interval_std_dev": (15.0, 45.0),
|
| 17 |
+
"pct_clicks_2am_5am": (0.02, 0.08),
|
| 18 |
+
"weekday_weekend_ratio": (1.2, 2.5),
|
| 19 |
+
"pct_sub_second_pairs": (0.0, 0.02),
|
| 20 |
+
"hourly_entropy": (3.0, 3.8),
|
| 21 |
+
},
|
| 22 |
+
"ip_distribution": {
|
| 23 |
+
"unique_ips_per_1000_clicks": (600.0, 950.0),
|
| 24 |
+
"top_ip_pct": (0.5, 3.0),
|
| 25 |
+
"pct_datacenter_ips": (2.0, 8.0),
|
| 26 |
+
"pct_residential_ips": (85.0, 95.0),
|
| 27 |
+
"country_entropy": (1.5, 3.0),
|
| 28 |
+
"pct_ip_subnet_collision": (1.0, 5.0),
|
| 29 |
+
},
|
| 30 |
+
"device_fingerprints": {
|
| 31 |
+
"unique_fps_per_1000_clicks": (700.0, 950.0),
|
| 32 |
+
"top_fp_pct": (0.3, 2.0),
|
| 33 |
+
"pct_headless_browser": (0.0, 0.5),
|
| 34 |
+
"avg_screen_resolutions": (8.0, 25.0),
|
| 35 |
+
"pct_mismatched_timezone_ip": (1.0, 5.0),
|
| 36 |
+
"os_entropy": (1.5, 2.5),
|
| 37 |
+
},
|
| 38 |
+
"referral_urls": {
|
| 39 |
+
"pct_direct_navigation": (15.0, 40.0),
|
| 40 |
+
"pct_referral_domain_mismatch": (1.0, 5.0),
|
| 41 |
+
"unique_referral_domains": (50.0, 200.0),
|
| 42 |
+
"pct_referral_chain_length_gt_2": (1.0, 5.0),
|
| 43 |
+
"referral_domain_entropy": (3.0, 4.5),
|
| 44 |
+
},
|
| 45 |
+
"viewability_scores": {
|
| 46 |
+
"pct_in_viewport_gt_1s": (60.0, 85.0),
|
| 47 |
+
"avg_viewport_dwell_seconds": (3.0, 12.0),
|
| 48 |
+
"pct_zero_pixel_ads": (0.0, 0.5),
|
| 49 |
+
"pct_stacked_ads": (0.0, 1.0),
|
| 50 |
+
"avg_focus_time_seconds": (5.0, 20.0),
|
| 51 |
+
"pct_mouse_nearby": (30.0, 60.0),
|
| 52 |
+
},
|
| 53 |
+
"conversion_quality": {
|
| 54 |
+
"click_to_conversion_seconds_mean": (120.0, 1800.0),
|
| 55 |
+
"conversion_rate": (1.0, 8.0),
|
| 56 |
+
"pct_bounce_after_click": (30.0, 55.0),
|
| 57 |
+
"avg_pages_per_session": (2.5, 6.0),
|
| 58 |
+
"pct_prior_engagement": (20.0, 50.0),
|
| 59 |
+
"pct_last_click_attributed": (40.0, 70.0),
|
| 60 |
+
},
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# ββ Fraud metric ranges βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
+
# fraud_type -> adaptation_stage -> tool -> metric -> (lo, hi)
|
| 65 |
+
# Only distinctive signals are defined; unlisted combos fall through to LEGIT.
|
| 66 |
+
|
| 67 |
+
FRAUD_RANGES: Dict[str, Dict[str, Dict[str, Dict[str, tuple]]]] = {
|
| 68 |
+
"bot_traffic": {
|
| 69 |
+
"normal": {
|
| 70 |
+
"click_timestamps": {
|
| 71 |
+
"avg_interval_seconds": (2.0, 5.0),
|
| 72 |
+
"interval_std_dev": (0.5, 2.0),
|
| 73 |
+
"pct_clicks_2am_5am": (0.20, 0.45),
|
| 74 |
+
"pct_sub_second_pairs": (0.10, 0.35),
|
| 75 |
+
"hourly_entropy": (1.0, 2.0),
|
| 76 |
+
},
|
| 77 |
+
"ip_distribution": {
|
| 78 |
+
"unique_ips_per_1000_clicks": (50.0, 200.0),
|
| 79 |
+
"top_ip_pct": (10.0, 35.0),
|
| 80 |
+
"pct_datacenter_ips": (60.0, 90.0),
|
| 81 |
+
"pct_residential_ips": (10.0, 35.0),
|
| 82 |
+
"pct_ip_subnet_collision": (20.0, 50.0),
|
| 83 |
+
},
|
| 84 |
+
"device_fingerprints": {
|
| 85 |
+
"unique_fps_per_1000_clicks": (20.0, 100.0),
|
| 86 |
+
"top_fp_pct": (15.0, 45.0),
|
| 87 |
+
"pct_headless_browser": (30.0, 80.0),
|
| 88 |
+
"pct_mismatched_timezone_ip": (15.0, 40.0),
|
| 89 |
+
"os_entropy": (0.3, 0.8),
|
| 90 |
+
},
|
| 91 |
+
"referral_urls": {
|
| 92 |
+
"pct_direct_navigation": (60.0, 90.0),
|
| 93 |
+
"unique_referral_domains": (3.0, 15.0),
|
| 94 |
+
"referral_domain_entropy": (0.5, 1.5),
|
| 95 |
+
},
|
| 96 |
+
"viewability_scores": {
|
| 97 |
+
"pct_in_viewport_gt_1s": (10.0, 30.0),
|
| 98 |
+
"avg_viewport_dwell_seconds": (0.2, 1.5),
|
| 99 |
+
"avg_focus_time_seconds": (0.1, 1.0),
|
| 100 |
+
"pct_mouse_nearby": (0.0, 5.0),
|
| 101 |
+
},
|
| 102 |
+
"conversion_quality": {
|
| 103 |
+
"conversion_rate": (0.0, 0.1),
|
| 104 |
+
"pct_bounce_after_click": (85.0, 99.0),
|
| 105 |
+
"avg_pages_per_session": (1.0, 1.2),
|
| 106 |
+
"pct_prior_engagement": (0.0, 2.0),
|
| 107 |
+
},
|
| 108 |
+
},
|
| 109 |
+
"cautious": {
|
| 110 |
+
"click_timestamps": {
|
| 111 |
+
"avg_interval_seconds": (8.0, 20.0),
|
| 112 |
+
"interval_std_dev": (3.0, 10.0),
|
| 113 |
+
"pct_clicks_2am_5am": (0.10, 0.25),
|
| 114 |
+
"pct_sub_second_pairs": (0.05, 0.15),
|
| 115 |
+
"hourly_entropy": (2.0, 2.8),
|
| 116 |
+
},
|
| 117 |
+
"ip_distribution": {
|
| 118 |
+
"unique_ips_per_1000_clicks": (200.0, 400.0),
|
| 119 |
+
"top_ip_pct": (5.0, 15.0),
|
| 120 |
+
"pct_datacenter_ips": (30.0, 55.0),
|
| 121 |
+
"pct_residential_ips": (40.0, 65.0),
|
| 122 |
+
"pct_ip_subnet_collision": (10.0, 25.0),
|
| 123 |
+
},
|
| 124 |
+
"device_fingerprints": {
|
| 125 |
+
"unique_fps_per_1000_clicks": (200.0, 450.0),
|
| 126 |
+
"top_fp_pct": (5.0, 15.0),
|
| 127 |
+
"pct_headless_browser": (10.0, 30.0),
|
| 128 |
+
"pct_mismatched_timezone_ip": (8.0, 20.0),
|
| 129 |
+
"os_entropy": (0.8, 1.3),
|
| 130 |
+
},
|
| 131 |
+
"referral_urls": {
|
| 132 |
+
"pct_direct_navigation": (40.0, 65.0),
|
| 133 |
+
"unique_referral_domains": (15.0, 40.0),
|
| 134 |
+
"referral_domain_entropy": (1.5, 2.5),
|
| 135 |
+
},
|
| 136 |
+
"viewability_scores": {
|
| 137 |
+
"pct_in_viewport_gt_1s": (25.0, 45.0),
|
| 138 |
+
"avg_viewport_dwell_seconds": (1.0, 3.0),
|
| 139 |
+
"avg_focus_time_seconds": (1.0, 3.0),
|
| 140 |
+
"pct_mouse_nearby": (5.0, 15.0),
|
| 141 |
+
},
|
| 142 |
+
"conversion_quality": {
|
| 143 |
+
"conversion_rate": (0.1, 0.5),
|
| 144 |
+
"pct_bounce_after_click": (70.0, 85.0),
|
| 145 |
+
"avg_pages_per_session": (1.2, 1.8),
|
| 146 |
+
"pct_prior_engagement": (2.0, 8.0),
|
| 147 |
+
},
|
| 148 |
+
},
|
| 149 |
+
"covering_tracks": {
|
| 150 |
+
"click_timestamps": {
|
| 151 |
+
"avg_interval_seconds": (18.0, 35.0),
|
| 152 |
+
"interval_std_dev": (8.0, 18.0),
|
| 153 |
+
"pct_clicks_2am_5am": (0.06, 0.12),
|
| 154 |
+
"pct_sub_second_pairs": (0.02, 0.06),
|
| 155 |
+
"hourly_entropy": (2.5, 3.2),
|
| 156 |
+
},
|
| 157 |
+
"ip_distribution": {
|
| 158 |
+
"unique_ips_per_1000_clicks": (400.0, 600.0),
|
| 159 |
+
"top_ip_pct": (3.0, 8.0),
|
| 160 |
+
"pct_datacenter_ips": (12.0, 25.0),
|
| 161 |
+
"pct_residential_ips": (65.0, 82.0),
|
| 162 |
+
"pct_ip_subnet_collision": (5.0, 12.0),
|
| 163 |
+
},
|
| 164 |
+
"device_fingerprints": {
|
| 165 |
+
"unique_fps_per_1000_clicks": (450.0, 650.0),
|
| 166 |
+
"top_fp_pct": (2.0, 6.0),
|
| 167 |
+
"pct_headless_browser": (2.0, 8.0),
|
| 168 |
+
"pct_mismatched_timezone_ip": (4.0, 10.0),
|
| 169 |
+
"os_entropy": (1.2, 1.8),
|
| 170 |
+
},
|
| 171 |
+
"conversion_quality": {
|
| 172 |
+
"conversion_rate": (0.5, 1.5),
|
| 173 |
+
"pct_bounce_after_click": (55.0, 70.0),
|
| 174 |
+
"avg_pages_per_session": (1.5, 2.5),
|
| 175 |
+
},
|
| 176 |
+
},
|
| 177 |
+
"dark": {}, # no fraud signals β looks legit
|
| 178 |
+
},
|
| 179 |
+
"domain_spoofing": {
|
| 180 |
+
"normal": {
|
| 181 |
+
"referral_urls": {
|
| 182 |
+
"pct_referral_domain_mismatch": (40.0, 75.0),
|
| 183 |
+
"pct_referral_chain_length_gt_2": (15.0, 35.0),
|
| 184 |
+
"referral_domain_entropy": (0.8, 1.8),
|
| 185 |
+
},
|
| 186 |
+
"viewability_scores": {
|
| 187 |
+
"pct_zero_pixel_ads": (15.0, 45.0),
|
| 188 |
+
"pct_stacked_ads": (10.0, 30.0),
|
| 189 |
+
"pct_in_viewport_gt_1s": (15.0, 35.0),
|
| 190 |
+
"avg_viewport_dwell_seconds": (0.5, 2.0),
|
| 191 |
+
},
|
| 192 |
+
"ip_distribution": {
|
| 193 |
+
"pct_datacenter_ips": (25.0, 50.0),
|
| 194 |
+
"pct_residential_ips": (45.0, 70.0),
|
| 195 |
+
},
|
| 196 |
+
"device_fingerprints": {
|
| 197 |
+
"pct_headless_browser": (5.0, 20.0),
|
| 198 |
+
},
|
| 199 |
+
"click_timestamps": {
|
| 200 |
+
"avg_interval_seconds": (15.0, 35.0),
|
| 201 |
+
"pct_clicks_2am_5am": (0.10, 0.20),
|
| 202 |
+
},
|
| 203 |
+
"conversion_quality": {
|
| 204 |
+
"pct_bounce_after_click": (65.0, 85.0),
|
| 205 |
+
"avg_pages_per_session": (1.2, 2.0),
|
| 206 |
+
},
|
| 207 |
+
},
|
| 208 |
+
"cautious": {
|
| 209 |
+
"referral_urls": {
|
| 210 |
+
"pct_referral_domain_mismatch": (20.0, 40.0),
|
| 211 |
+
"pct_referral_chain_length_gt_2": (8.0, 18.0),
|
| 212 |
+
"referral_domain_entropy": (1.8, 2.8),
|
| 213 |
+
},
|
| 214 |
+
"viewability_scores": {
|
| 215 |
+
"pct_zero_pixel_ads": (5.0, 15.0),
|
| 216 |
+
"pct_stacked_ads": (3.0, 10.0),
|
| 217 |
+
"pct_in_viewport_gt_1s": (35.0, 50.0),
|
| 218 |
+
"avg_viewport_dwell_seconds": (2.0, 4.0),
|
| 219 |
+
},
|
| 220 |
+
},
|
| 221 |
+
"covering_tracks": {
|
| 222 |
+
"referral_urls": {
|
| 223 |
+
"pct_referral_domain_mismatch": (8.0, 18.0),
|
| 224 |
+
"pct_referral_chain_length_gt_2": (4.0, 8.0),
|
| 225 |
+
},
|
| 226 |
+
"viewability_scores": {
|
| 227 |
+
"pct_zero_pixel_ads": (1.0, 5.0),
|
| 228 |
+
"pct_stacked_ads": (1.0, 3.0),
|
| 229 |
+
"pct_in_viewport_gt_1s": (45.0, 60.0),
|
| 230 |
+
},
|
| 231 |
+
},
|
| 232 |
+
"dark": {},
|
| 233 |
+
},
|
| 234 |
+
"click_injection": {
|
| 235 |
+
"normal": {
|
| 236 |
+
"conversion_quality": {
|
| 237 |
+
"click_to_conversion_seconds_mean": (2.0, 15.0),
|
| 238 |
+
"conversion_rate": (15.0, 50.0),
|
| 239 |
+
"pct_last_click_attributed": (85.0, 99.0),
|
| 240 |
+
"pct_bounce_after_click": (10.0, 25.0),
|
| 241 |
+
"avg_pages_per_session": (1.0, 1.5),
|
| 242 |
+
},
|
| 243 |
+
"click_timestamps": {
|
| 244 |
+
"avg_interval_seconds": (5.0, 15.0),
|
| 245 |
+
"pct_sub_second_pairs": (0.15, 0.40),
|
| 246 |
+
"hourly_entropy": (1.5, 2.5),
|
| 247 |
+
},
|
| 248 |
+
"device_fingerprints": {
|
| 249 |
+
"pct_headless_browser": (5.0, 25.0),
|
| 250 |
+
"pct_mismatched_timezone_ip": (10.0, 25.0),
|
| 251 |
+
},
|
| 252 |
+
"ip_distribution": {
|
| 253 |
+
"pct_datacenter_ips": (15.0, 35.0),
|
| 254 |
+
},
|
| 255 |
+
},
|
| 256 |
+
"cautious": {
|
| 257 |
+
"conversion_quality": {
|
| 258 |
+
"click_to_conversion_seconds_mean": (15.0, 60.0),
|
| 259 |
+
"conversion_rate": (8.0, 20.0),
|
| 260 |
+
"pct_last_click_attributed": (70.0, 85.0),
|
| 261 |
+
"pct_bounce_after_click": (25.0, 40.0),
|
| 262 |
+
},
|
| 263 |
+
"click_timestamps": {
|
| 264 |
+
"avg_interval_seconds": (12.0, 25.0),
|
| 265 |
+
"pct_sub_second_pairs": (0.05, 0.15),
|
| 266 |
+
"hourly_entropy": (2.2, 3.0),
|
| 267 |
+
},
|
| 268 |
+
"device_fingerprints": {
|
| 269 |
+
"pct_headless_browser": (2.0, 8.0),
|
| 270 |
+
"pct_mismatched_timezone_ip": (5.0, 12.0),
|
| 271 |
+
},
|
| 272 |
+
},
|
| 273 |
+
"covering_tracks": {
|
| 274 |
+
"conversion_quality": {
|
| 275 |
+
"click_to_conversion_seconds_mean": (50.0, 120.0),
|
| 276 |
+
"conversion_rate": (5.0, 10.0),
|
| 277 |
+
"pct_last_click_attributed": (60.0, 72.0),
|
| 278 |
+
},
|
| 279 |
+
"click_timestamps": {
|
| 280 |
+
"avg_interval_seconds": (20.0, 35.0),
|
| 281 |
+
"pct_sub_second_pairs": (0.02, 0.06),
|
| 282 |
+
},
|
| 283 |
+
},
|
| 284 |
+
"dark": {},
|
| 285 |
+
},
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
# ββ Seeded value generator ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 290 |
+
|
| 291 |
+
def _seeded_value(seed_str: str, lo: float, hi: float) -> float:
|
| 292 |
+
h = int(hashlib.sha256(seed_str.encode()).hexdigest()[:8], 16)
|
| 293 |
+
t = (h % 10000) / 10000.0
|
| 294 |
+
return round(lo + t * (hi - lo), 4)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 298 |
+
|
| 299 |
+
def generate_investigation_metrics(
|
| 300 |
+
case_id: str,
|
| 301 |
+
publisher_id: str,
|
| 302 |
+
publisher_cfg: Dict[str, Any],
|
| 303 |
+
tool_name: str,
|
| 304 |
+
adaptation_stage: str,
|
| 305 |
+
) -> Dict[str, Any]:
|
| 306 |
+
"""Return structured numerical metrics for an investigation tool."""
|
| 307 |
+
is_fraudulent = publisher_cfg.get("is_fraudulent", False)
|
| 308 |
+
fraud_type = publisher_cfg.get("fraud_type")
|
| 309 |
+
|
| 310 |
+
legit = LEGIT_RANGES.get(tool_name, {})
|
| 311 |
+
if not legit:
|
| 312 |
+
return {"error": f"Unknown tool: {tool_name}"}
|
| 313 |
+
|
| 314 |
+
# Determine which ranges to use
|
| 315 |
+
fraud_tool_ranges: Dict[str, tuple] = {}
|
| 316 |
+
if is_fraudulent and fraud_type and adaptation_stage != "dark":
|
| 317 |
+
type_ranges = FRAUD_RANGES.get(fraud_type, {})
|
| 318 |
+
stage_ranges = type_ranges.get(adaptation_stage, {})
|
| 319 |
+
fraud_tool_ranges = stage_ranges.get(tool_name, {})
|
| 320 |
+
|
| 321 |
+
metrics: Dict[str, Any] = {}
|
| 322 |
+
for metric_name, legit_range in legit.items():
|
| 323 |
+
seed = f"{case_id}:{publisher_id}:{tool_name}:{metric_name}"
|
| 324 |
+
if metric_name in fraud_tool_ranges:
|
| 325 |
+
lo, hi = fraud_tool_ranges[metric_name]
|
| 326 |
+
else:
|
| 327 |
+
lo, hi = legit_range
|
| 328 |
+
metrics[metric_name] = _seeded_value(seed, lo, hi)
|
| 329 |
+
|
| 330 |
+
return {
|
| 331 |
+
"tool": tool_name,
|
| 332 |
+
"publisher_id": publisher_id,
|
| 333 |
+
"metrics": metrics,
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def generate_trend_summary() -> str:
|
| 338 |
+
"""Placeholder for trend summary (currently muted)."""
|
| 339 |
+
return ""
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def generate_alerts() -> list:
|
| 343 |
+
"""Placeholder for alerts (currently muted)."""
|
| 344 |
+
return []
|
server/step_reward.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Per-step reward calculator [0, 1]."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def compute_step_reward(
|
| 5 |
+
action_type: str,
|
| 6 |
+
daily_fraud_spend: float,
|
| 7 |
+
total_budget: float,
|
| 8 |
+
publisher_cfg: dict | None = None,
|
| 9 |
+
flag_correct: bool | None = None,
|
| 10 |
+
flag_type_correct: bool | None = None,
|
| 11 |
+
day: int = 1,
|
| 12 |
+
episode_days: int = 14,
|
| 13 |
+
) -> float:
|
| 14 |
+
"""Return a reward in [0.0, 1.0] for a single step.
|
| 15 |
+
|
| 16 |
+
Centered at 0.5 (neutral). Rewards scale with timing and precision.
|
| 17 |
+
"""
|
| 18 |
+
if action_type == "monitor":
|
| 19 |
+
if daily_fraud_spend > 0:
|
| 20 |
+
# Active unflagged fraud β penalty grows with time (urgency)
|
| 21 |
+
progress = day / episode_days # 0.07 on day 1, 1.0 on day 14
|
| 22 |
+
penalty = 0.10 + 0.20 * progress # 0.10 early, up to 0.30 late
|
| 23 |
+
return max(0.05, 0.50 - penalty)
|
| 24 |
+
return 0.50
|
| 25 |
+
|
| 26 |
+
if action_type == "investigate_publisher":
|
| 27 |
+
if publisher_cfg is not None and publisher_cfg.get("is_fraudulent"):
|
| 28 |
+
# Investigating a real fraudster β reward scales with how early
|
| 29 |
+
early_bonus = max(0.0, (episode_days - day) / episode_days) * 0.10
|
| 30 |
+
return min(1.0, 0.55 + early_bonus)
|
| 31 |
+
# Investigating a clean publisher β wastes budget
|
| 32 |
+
return 0.35
|
| 33 |
+
|
| 34 |
+
if action_type == "flag_fraud":
|
| 35 |
+
if flag_correct is True and flag_type_correct is True:
|
| 36 |
+
# Perfect flag β bonus for catching it early
|
| 37 |
+
early_bonus = max(0.0, (episode_days - day) / episode_days) * 0.05
|
| 38 |
+
return min(1.0, 0.95 + early_bonus)
|
| 39 |
+
if flag_correct is True:
|
| 40 |
+
# Right publisher, wrong type
|
| 41 |
+
return 0.70
|
| 42 |
+
# False positive β heavy penalty
|
| 43 |
+
return 0.05
|
| 44 |
+
|
| 45 |
+
if action_type == "submit_report":
|
| 46 |
+
return 0.50
|
| 47 |
+
|
| 48 |
+
# invalid / malformed
|
| 49 |
+
return 0.05
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|