v2: Add in-match opponent profiling and adaptive parameter tuning
Browse files- submission.py +147 -0
submission.py
CHANGED
|
@@ -3350,6 +3350,85 @@ def plan_moves(world, deadline=None):
|
|
| 3350 |
_agent_step = 0
|
| 3351 |
|
| 3352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3353 |
def _read(obs, key, default=None):
|
| 3354 |
if isinstance(obs, dict):
|
| 3355 |
return obs.get(key, default)
|
|
@@ -3386,8 +3465,76 @@ def build_world(obs, inferred_step=None):
|
|
| 3386 |
|
| 3387 |
def agent(obs, config=None):
|
| 3388 |
global _agent_step
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3389 |
_agent_step += 1
|
| 3390 |
start_time = time.perf_counter()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3391 |
world = build_world(obs, inferred_step=_agent_step - 1)
|
| 3392 |
if not world.my_planets:
|
| 3393 |
return []
|
|
|
|
| 3350 |
_agent_step = 0
|
| 3351 |
|
| 3352 |
|
| 3353 |
+
# ============================================================
|
| 3354 |
+
# In-Match Opponent Profiler
|
| 3355 |
+
# ============================================================
|
| 3356 |
+
|
| 3357 |
+
class _OpponentProfilerImpl:
|
| 3358 |
+
"""Tracks opponent behavior during a match for real-time adaptation.
|
| 3359 |
+
|
| 3360 |
+
Maintains exponential moving averages of:
|
| 3361 |
+
- Aggression: fleet launch rate (how often they attack)
|
| 3362 |
+
- Expansion: planet capture speed
|
| 3363 |
+
- Strength balance: whether we're ahead or behind
|
| 3364 |
+
"""
|
| 3365 |
+
|
| 3366 |
+
def __init__(self):
|
| 3367 |
+
self.alpha = 0.08 # EMA smoothing factor
|
| 3368 |
+
self.aggression = 0.5
|
| 3369 |
+
self.expansion_rate = 0.5
|
| 3370 |
+
self.prev_enemy_planets = 0
|
| 3371 |
+
self.prev_enemy_fleets = 0
|
| 3372 |
+
self.prev_my_ships = 0
|
| 3373 |
+
self.prev_enemy_ships = 0
|
| 3374 |
+
self.my_planet_count = 0
|
| 3375 |
+
self.enemy_planet_count = 0
|
| 3376 |
+
self.step_count = 0
|
| 3377 |
+
self.current_step = 0
|
| 3378 |
+
|
| 3379 |
+
def update(self, obs):
|
| 3380 |
+
get = obs.get if isinstance(obs, dict) else lambda k, d=None: getattr(obs, k, d)
|
| 3381 |
+
player = int(get("player", 0) or 0)
|
| 3382 |
+
step = int(get("step", 0) or 0)
|
| 3383 |
+
planets = get("planets") or []
|
| 3384 |
+
fleets = get("fleets") or []
|
| 3385 |
+
|
| 3386 |
+
self.current_step = step
|
| 3387 |
+
|
| 3388 |
+
enemy_planets = sum(1 for p in planets if p[1] not in (-1, player))
|
| 3389 |
+
enemy_fleets = sum(1 for f in fleets if f[1] != player)
|
| 3390 |
+
my_ships = sum(p[5] for p in planets if p[1] == player)
|
| 3391 |
+
my_ships += sum(f[6] for f in fleets if f[1] == player)
|
| 3392 |
+
enemy_ships = sum(p[5] for p in planets if p[1] not in (-1, player))
|
| 3393 |
+
enemy_ships += sum(f[6] for f in fleets if f[1] != player)
|
| 3394 |
+
|
| 3395 |
+
self.my_planet_count = sum(1 for p in planets if p[1] == player)
|
| 3396 |
+
self.enemy_planet_count = enemy_planets
|
| 3397 |
+
|
| 3398 |
+
if self.step_count > 0:
|
| 3399 |
+
# Track fleet launch frequency
|
| 3400 |
+
fleet_delta = max(0, enemy_fleets - self.prev_enemy_fleets)
|
| 3401 |
+
self.aggression = (1 - self.alpha) * self.aggression + self.alpha * min(1.0, fleet_delta / 4.0)
|
| 3402 |
+
|
| 3403 |
+
# Track expansion rate
|
| 3404 |
+
planet_delta = enemy_planets - self.prev_enemy_planets
|
| 3405 |
+
expansion_signal = max(0.0, min(1.0, (planet_delta + 1) / 3.0))
|
| 3406 |
+
self.expansion_rate = (1 - self.alpha) * self.expansion_rate + self.alpha * expansion_signal
|
| 3407 |
+
|
| 3408 |
+
self.prev_enemy_planets = enemy_planets
|
| 3409 |
+
self.prev_enemy_fleets = enemy_fleets
|
| 3410 |
+
self.prev_my_ships = my_ships
|
| 3411 |
+
self.prev_enemy_ships = enemy_ships
|
| 3412 |
+
self.step_count += 1
|
| 3413 |
+
|
| 3414 |
+
def get_profile(self):
|
| 3415 |
+
total = self.prev_my_ships + self.prev_enemy_ships
|
| 3416 |
+
my_share = self.prev_my_ships / max(1, total)
|
| 3417 |
+
|
| 3418 |
+
return {
|
| 3419 |
+
"aggression": self.aggression,
|
| 3420 |
+
"expansion_rate": self.expansion_rate,
|
| 3421 |
+
"we_are_ahead": my_share > 0.55,
|
| 3422 |
+
"we_are_behind": my_share < 0.42,
|
| 3423 |
+
"enemy_expanding_fast": self.expansion_rate > 0.6,
|
| 3424 |
+
"late_game": self.current_step > 350,
|
| 3425 |
+
"confident": self.step_count > 30,
|
| 3426 |
+
}
|
| 3427 |
+
|
| 3428 |
+
|
| 3429 |
+
_profiler = _OpponentProfilerImpl()
|
| 3430 |
+
|
| 3431 |
+
|
| 3432 |
def _read(obs, key, default=None):
|
| 3433 |
if isinstance(obs, dict):
|
| 3434 |
return obs.get(key, default)
|
|
|
|
| 3465 |
|
| 3466 |
def agent(obs, config=None):
|
| 3467 |
global _agent_step
|
| 3468 |
+
global HOSTILE_TARGET_VALUE_MULT, ELIMINATION_BONUS, PROACTIVE_DEFENSE_RATIO
|
| 3469 |
+
global FINISHING_HOSTILE_VALUE_MULT, WEAK_ENEMY_THRESHOLD, ATTACK_COST_TURN_WEIGHT
|
| 3470 |
+
global HOSTILE_MARGIN_BASE, FOUR_PLAYER_TARGET_MARGIN, FINISHING_HOSTILE_SEND_BONUS
|
| 3471 |
+
global STATIC_HOSTILE_VALUE_MULT, GANG_UP_VALUE_MULT, EXPOSED_PLANET_VALUE_MULT
|
| 3472 |
+
global REINFORCE_VALUE_MULT, DEFENSE_SHIP_VALUE, BEHIND_DOMINATION, AHEAD_DOMINATION
|
| 3473 |
+
global LATE_REMAINING_TURNS, REAR_SEND_RATIO_TWO_PLAYER, COMET_VALUE_MULT, SNIPE_VALUE_MULT
|
| 3474 |
+
|
| 3475 |
_agent_step += 1
|
| 3476 |
start_time = time.perf_counter()
|
| 3477 |
+
|
| 3478 |
+
# ---- In-match opponent profiling & adaptive parameter adjustment ----
|
| 3479 |
+
_profiler.update(obs)
|
| 3480 |
+
|
| 3481 |
+
if _agent_step > 20: # wait for enough data
|
| 3482 |
+
profile = _profiler.get_profile()
|
| 3483 |
+
|
| 3484 |
+
# Adapt based on opponent aggression
|
| 3485 |
+
if profile["aggression"] > 0.6:
|
| 3486 |
+
# Opponent is very aggressive → be more defensive
|
| 3487 |
+
PROACTIVE_DEFENSE_RATIO = 0.38
|
| 3488 |
+
DEFENSE_SHIP_VALUE = 0.70
|
| 3489 |
+
HOSTILE_TARGET_VALUE_MULT = 1.80
|
| 3490 |
+
REINFORCE_VALUE_MULT = 1.55
|
| 3491 |
+
BEHIND_DOMINATION = -0.30
|
| 3492 |
+
elif profile["aggression"] < 0.3:
|
| 3493 |
+
# Opponent is passive/turtle → be very aggressive, expand fast
|
| 3494 |
+
PROACTIVE_DEFENSE_RATIO = 0.15
|
| 3495 |
+
DEFENSE_SHIP_VALUE = 0.40
|
| 3496 |
+
HOSTILE_TARGET_VALUE_MULT = 2.30
|
| 3497 |
+
ELIMINATION_BONUS = 70.0
|
| 3498 |
+
EXPOSED_PLANET_VALUE_MULT = 2.5
|
| 3499 |
+
GANG_UP_VALUE_MULT = 1.6
|
| 3500 |
+
else:
|
| 3501 |
+
# Balanced opponent → use tuned defaults
|
| 3502 |
+
PROACTIVE_DEFENSE_RATIO = 0.28
|
| 3503 |
+
DEFENSE_SHIP_VALUE = 0.55
|
| 3504 |
+
HOSTILE_TARGET_VALUE_MULT = 2.05
|
| 3505 |
+
ELIMINATION_BONUS = 55.0
|
| 3506 |
+
EXPOSED_PLANET_VALUE_MULT = 2.0
|
| 3507 |
+
GANG_UP_VALUE_MULT = 1.4
|
| 3508 |
+
|
| 3509 |
+
# Adapt based on relative strength
|
| 3510 |
+
if profile["we_are_ahead"]:
|
| 3511 |
+
# We're ahead → play safe, consolidate, don't overextend
|
| 3512 |
+
ATTACK_COST_TURN_WEIGHT = 0.55
|
| 3513 |
+
BEHIND_DOMINATION = -0.15
|
| 3514 |
+
AHEAD_DOMINATION = 0.20
|
| 3515 |
+
REAR_SEND_RATIO_TWO_PLAYER = 0.55
|
| 3516 |
+
elif profile["we_are_behind"]:
|
| 3517 |
+
# We're behind → be more aggressive, take risks
|
| 3518 |
+
ATTACK_COST_TURN_WEIGHT = 0.40
|
| 3519 |
+
BEHIND_DOMINATION = -0.30
|
| 3520 |
+
WEAK_ENEMY_THRESHOLD = 140
|
| 3521 |
+
FINISHING_HOSTILE_VALUE_MULT = 1.5
|
| 3522 |
+
FINISHING_HOSTILE_SEND_BONUS = 7
|
| 3523 |
+
SNIPE_VALUE_MULT = 1.3
|
| 3524 |
+
|
| 3525 |
+
# Adapt based on expansion rate
|
| 3526 |
+
if profile["enemy_expanding_fast"]:
|
| 3527 |
+
# Enemy is expanding fast → contest neutrals more aggressively
|
| 3528 |
+
COMET_VALUE_MULT = 0.85
|
| 3529 |
+
STATIC_HOSTILE_VALUE_MULT = 1.85
|
| 3530 |
+
FOUR_PLAYER_TARGET_MARGIN = 1
|
| 3531 |
+
|
| 3532 |
+
# Late-game adaptation
|
| 3533 |
+
if profile["late_game"]:
|
| 3534 |
+
LATE_REMAINING_TURNS = 80
|
| 3535 |
+
ELIMINATION_BONUS = max(ELIMINATION_BONUS, 65.0)
|
| 3536 |
+
FINISHING_HOSTILE_VALUE_MULT = max(FINISHING_HOSTILE_VALUE_MULT, 1.4)
|
| 3537 |
+
|
| 3538 |
world = build_world(obs, inferred_step=_agent_step - 1)
|
| 3539 |
if not world.my_planets:
|
| 3540 |
return []
|