ryworld-vln-discrete / eval_results /scripts /ground_truth_analysis.py
ttll0928's picture
Initial release: RyWorld VLN stage1 discrete step15000 ckpt + eval pkg
4a61963
"""Ground-truth per-episode agent.step analysis from eval.log."""
import re
import sys
from collections import Counter
eval_log = open(sys.argv[1]).read()
# Pattern for actual executed action (only count the unique one, not the dup INFO line)
# Each agent.step produces 2 lines with same timestamp; use the one without file path
action_re = re.compile(
r"^\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\[INFO\] get \d+ actions :\[\[(-?\d+)\]\]",
re.M,
)
actions = [(m.group(1), int(m.group(2))) for m in action_re.finditer(eval_log)]
print(f"Total agent.step calls: {len(actions)}")
# Episode boundaries
finishes = list(
re.finditer(
r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\].*?\[(\d+)/30\]\[step_index:(\d+)\] "
r"finish: \[trajectory_id:(\S+?)\]\[duration:([\d.]+) s\]\[step_count:(\d+)\]\[fps:([\d.]+)\]\[result:(\w+)\]",
eval_log,
)
)
print(f"Episodes finished: {len(finishes)}")
# For each episode (1-indexed), assign actions whose timestamp <= episode finish timestamp
# but > previous episode finish timestamp
boundaries = [m.group(1) for m in finishes] # finish timestamps
ep_info = [(m.group(2), m.group(4), m.group(6), m.group(8), m.group(5)) for m in finishes]
# (idx, tid, step_count, result, duration)
def in_episode(action_ts, ep_idx):
"""action_ts > prev_finish_ts AND action_ts <= this_finish_ts"""
if ep_idx == 0:
return action_ts <= boundaries[0]
return boundaries[ep_idx - 1] < action_ts <= boundaries[ep_idx]
# bucket
ep_actions = {i: [] for i in range(len(finishes))}
for ts, cls in actions:
for i in range(len(finishes)):
if in_episode(ts, i):
ep_actions[i].append(cls)
break
print()
print(f"{'ep':>3} {'tid':<25} {'result':<14} {'sim_step':>9} {'agent_step':>10} {'first_Stop':>11} {'F%':>5} {'TL%':>5} {'TR%':>5} {'Stop%':>5}")
print("-" * 125)
for i, (idx, tid, sim_step, result, dur) in enumerate(ep_info):
acts = ep_actions[i]
n = len(acts)
if n == 0:
print(f"{idx:>3} {tid[:25]:<25} {result[:14]:<14} NO ACTIONS")
continue
c = Counter(acts)
pct = lambda k: 100 * c.get(k, 0) / n
# first Stop position
first_stop = next((j for j, a in enumerate(acts) if a == 0), None)
fst = f"{first_stop+1}/{n}" if first_stop is not None else "never"
short = "_".join(tid.split("_")[: len(tid.split("_")) // 2])
print(
f"{idx:>3} {short[:25]:<25} {result[:14]:<14} {sim_step:>9} {n:>10} {fst:>11} "
f"{pct(1):>5.1f} {pct(2):>5.1f} {pct(3):>5.1f} {pct(0):>5.1f}"
)
# How many actions came AFTER the last episode finish (in-progress episode)
last_finish_ts = boundaries[-1] if boundaries else None
in_progress = [c for ts, c in actions if last_finish_ts is None or ts > last_finish_ts]
print(f"\n[in-progress episode]: {len(in_progress)} agent.step calls so far")
if in_progress:
c = Counter(in_progress)
print(f" class dist: {dict(c)}")