File size: 8,451 Bytes
b4b2877 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | #!/usr/bin/env python3
"""
Analyze segment lengths in the recognition dataset.
For each annotation file, computes segment lengths in:
- Raw frames (at 100Hz sampling rate)
- Downsampled frames (downsample=5 -> 20Hz effective)
Reports statistics and distribution relative to window_frames used in training.
"""
import os
import sys
import json
import re
import numpy as np
from collections import defaultdict
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS
ANNOTATION_DIR = "${PULSE_ROOT}"
SAMPLING_RATE = 100 # Hz
DOWNSAMPLE = 5
def parse_timestamp(ts_str):
parts = ts_str.strip().split(':')
if len(parts) == 2:
return int(parts[0]) * 60 + int(parts[1])
elif len(parts) == 3:
return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
return 0
def main():
all_vols = TRAIN_VOLS + VAL_VOLS + TEST_VOLS
# Collect segment lengths
raw_lengths_sec = [] # in seconds
raw_lengths_frames = [] # in raw 100Hz frames
ds_lengths_frames = [] # in downsampled frames (100/5 = 20Hz)
split_stats = defaultdict(list) # split -> list of ds_lengths
total_scenarios = 0
total_segments = 0
skipped_segments = 0
for vol in sorted(all_vols):
# Determine split
if vol in TRAIN_VOLS:
split = 'train'
elif vol in VAL_VOLS:
split = 'val'
else:
split = 'test'
ann_vol_dir = os.path.join(ANNOTATION_DIR, vol)
if not os.path.isdir(ann_vol_dir):
print(f"WARNING: No annotation dir for {vol}")
continue
for ann_file in sorted(os.listdir(ann_vol_dir)):
if not ann_file.endswith('.json'):
continue
scenario = ann_file.replace('.json', '')
ann_path = os.path.join(ann_vol_dir, ann_file)
# Also check that corresponding dataset dir exists
scenario_dir = os.path.join(DATASET_DIR, vol, scenario)
if not os.path.isdir(scenario_dir):
continue
with open(ann_path) as f:
ann = json.load(f)
total_scenarios += 1
for seg in ann.get('segments', []):
m = re.match(r'(\d+:\d+(?::\d+)?)\s*-\s*(\d+:\d+(?::\d+)?)',
seg['timestamp'])
if not m:
skipped_segments += 1
continue
start_sec = parse_timestamp(m.group(1))
end_sec = parse_timestamp(m.group(2))
if end_sec <= start_sec:
skipped_segments += 1
continue
duration_sec = end_sec - start_sec
raw_frames = duration_sec * SAMPLING_RATE
ds_frames = int(end_sec * SAMPLING_RATE / DOWNSAMPLE) - int(start_sec * SAMPLING_RATE / DOWNSAMPLE)
raw_lengths_sec.append(duration_sec)
raw_lengths_frames.append(raw_frames)
ds_lengths_frames.append(ds_frames)
split_stats[split].append(ds_frames)
total_segments += 1
# Convert to numpy
raw_sec = np.array(raw_lengths_sec)
raw_fr = np.array(raw_lengths_frames)
ds_fr = np.array(ds_lengths_frames)
print("=" * 70)
print("SEGMENT LENGTH ANALYSIS FOR RECOGNITION DATASET")
print("=" * 70)
print(f"\nTotal scenarios: {total_scenarios}")
print(f"Total valid segments: {total_segments}")
print(f"Skipped segments (bad timestamp): {skipped_segments}")
print(f"Sampling rate: {SAMPLING_RATE} Hz")
print(f"Downsample factor: {DOWNSAMPLE}")
print(f"Effective rate after downsample: {SAMPLING_RATE / DOWNSAMPLE} Hz")
# --- Raw seconds ---
print("\n" + "-" * 70)
print("SEGMENT DURATION (seconds)")
print("-" * 70)
print(f" Min: {raw_sec.min():.1f}s")
print(f" Max: {raw_sec.max():.1f}s")
print(f" Mean: {raw_sec.mean():.2f}s")
print(f" Median: {np.median(raw_sec):.1f}s")
print(f" Std: {raw_sec.std():.2f}s")
# Percentiles
for p in [5, 10, 25, 50, 75, 90, 95]:
print(f" P{p:2d}: {np.percentile(raw_sec, p):.1f}s")
# --- Raw frames (100Hz) ---
print("\n" + "-" * 70)
print("SEGMENT LENGTH (raw frames @ 100Hz)")
print("-" * 70)
print(f" Min: {raw_fr.min()}")
print(f" Max: {raw_fr.max()}")
print(f" Mean: {raw_fr.mean():.1f}")
print(f" Median: {np.median(raw_fr):.0f}")
# --- Downsampled frames ---
print("\n" + "-" * 70)
print(f"SEGMENT LENGTH (downsampled frames @ {SAMPLING_RATE/DOWNSAMPLE:.0f}Hz)")
print("-" * 70)
print(f" Min: {ds_fr.min()}")
print(f" Max: {ds_fr.max()}")
print(f" Mean: {ds_fr.mean():.1f}")
print(f" Median: {np.median(ds_fr):.0f}")
print(f" Std: {ds_fr.std():.1f}")
for p in [5, 10, 25, 50, 75, 90, 95]:
print(f" P{p:2d}: {np.percentile(ds_fr, p):.0f}")
# --- Comparison with window_frames ---
print("\n" + "-" * 70)
print("COMPARISON WITH window_frames SETTINGS")
print("-" * 70)
# Common window_sec values and their corresponding window_frames
for window_sec in [5.0, 10.0, 15.0, 20.0, 30.0]:
wf = int(window_sec * SAMPLING_RATE / DOWNSAMPLE)
shorter = (ds_fr < wf).sum()
equal_or_longer = (ds_fr >= wf).sum()
longer = (ds_fr > wf).sum()
pct_shorter = 100.0 * shorter / len(ds_fr)
pct_longer = 100.0 * longer / len(ds_fr)
print(f"\n window_sec={window_sec:5.1f}s -> window_frames={wf}")
print(f" Segments SHORTER than window: {shorter:4d} ({pct_shorter:5.1f}%) -> will be PADDED")
print(f" Segments LONGER than window: {longer:4d} ({pct_longer:5.1f}%) -> will be CENTER-CROPPED")
# --- Thresholds in downsampled frames ---
print("\n" + "-" * 70)
print("PERCENTAGE SHORTER THAN THRESHOLDS (downsampled frames)")
print("-" * 70)
for thresh in [20, 40, 60, 100, 200, 300, 400, 500, 1000, 2000]:
pct = 100.0 * (ds_fr < thresh).sum() / len(ds_fr)
print(f" < {thresh:5d} frames ({thresh * DOWNSAMPLE / SAMPLING_RATE:6.1f}s): {pct:5.1f}%")
# --- Per-split stats ---
print("\n" + "-" * 70)
print("PER-SPLIT STATISTICS (downsampled frames)")
print("-" * 70)
for split in ['train', 'val', 'test']:
arr = np.array(split_stats[split])
if len(arr) == 0:
print(f" {split}: no segments")
continue
print(f"\n {split.upper()} ({len(arr)} segments):")
print(f" Min={arr.min()}, Max={arr.max()}, Mean={arr.mean():.1f}, Median={np.median(arr):.0f}")
# --- Histogram (text-based) ---
print("\n" + "-" * 70)
print("HISTOGRAM OF SEGMENT DURATIONS (seconds)")
print("-" * 70)
bins = [0, 1, 2, 3, 4, 5, 7, 10, 15, 20, 30, 60, 120, 300, 600]
for i in range(len(bins) - 1):
count = ((raw_sec >= bins[i]) & (raw_sec < bins[i + 1])).sum()
pct = 100.0 * count / len(raw_sec)
bar = '#' * int(pct / 2)
print(f" [{bins[i]:4d}-{bins[i+1]:4d})s: {count:5d} ({pct:5.1f}%) {bar}")
# Last bin: >= 600
count = (raw_sec >= bins[-1]).sum()
pct = 100.0 * count / len(raw_sec)
bar = '#' * int(pct / 2)
print(f" [{bins[-1]:4d}+ )s: {count:5d} ({pct:5.1f}%) {bar}")
# --- Key insight ---
print("\n" + "=" * 70)
print("KEY INSIGHTS")
print("=" * 70)
median_sec = np.median(raw_sec)
mean_sec = raw_sec.mean()
print(f" Median segment duration: {median_sec:.1f}s ({median_sec * SAMPLING_RATE / DOWNSAMPLE:.0f} ds-frames)")
print(f" Mean segment duration: {mean_sec:.1f}s ({mean_sec * SAMPLING_RATE / DOWNSAMPLE:.0f} ds-frames)")
print()
# Suggest optimal window
p95_sec = np.percentile(raw_sec, 95)
print(f" 95th percentile duration: {p95_sec:.1f}s")
print(f" -> A window of {p95_sec:.0f}s would cover 95% of segments without cropping")
print(f" -> Current default window_sec=15.0 -> window_frames={int(15.0 * SAMPLING_RATE / DOWNSAMPLE)}")
wf15 = int(15.0 * SAMPLING_RATE / DOWNSAMPLE)
pct_crop = 100.0 * (ds_fr > wf15).sum() / len(ds_fr)
pct_pad = 100.0 * (ds_fr < wf15).sum() / len(ds_fr)
print(f" {pct_pad:.1f}% segments padded, {pct_crop:.1f}% center-cropped")
if __name__ == '__main__':
main()
|