| |
| """ |
| Analyze segment lengths in the recognition dataset. |
| |
| For each annotation file, computes segment lengths in: |
| - Raw frames (at 100Hz sampling rate) |
| - Downsampled frames (downsample=5 -> 20Hz effective) |
| |
| Reports statistics and distribution relative to window_frames used in training. |
| """ |
|
|
| import os |
| import sys |
| import json |
| import re |
| import numpy as np |
| from collections import defaultdict |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS |
|
|
| ANNOTATION_DIR = "${PULSE_ROOT}" |
| SAMPLING_RATE = 100 |
| DOWNSAMPLE = 5 |
|
|
|
|
| def parse_timestamp(ts_str): |
| parts = ts_str.strip().split(':') |
| if len(parts) == 2: |
| return int(parts[0]) * 60 + int(parts[1]) |
| elif len(parts) == 3: |
| return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2]) |
| return 0 |
|
|
|
|
| def main(): |
| all_vols = TRAIN_VOLS + VAL_VOLS + TEST_VOLS |
|
|
| |
| raw_lengths_sec = [] |
| raw_lengths_frames = [] |
| ds_lengths_frames = [] |
|
|
| split_stats = defaultdict(list) |
|
|
| total_scenarios = 0 |
| total_segments = 0 |
| skipped_segments = 0 |
|
|
| for vol in sorted(all_vols): |
| |
| if vol in TRAIN_VOLS: |
| split = 'train' |
| elif vol in VAL_VOLS: |
| split = 'val' |
| else: |
| split = 'test' |
|
|
| ann_vol_dir = os.path.join(ANNOTATION_DIR, vol) |
| if not os.path.isdir(ann_vol_dir): |
| print(f"WARNING: No annotation dir for {vol}") |
| continue |
|
|
| for ann_file in sorted(os.listdir(ann_vol_dir)): |
| if not ann_file.endswith('.json'): |
| continue |
| scenario = ann_file.replace('.json', '') |
| ann_path = os.path.join(ann_vol_dir, ann_file) |
|
|
| |
| scenario_dir = os.path.join(DATASET_DIR, vol, scenario) |
| if not os.path.isdir(scenario_dir): |
| continue |
|
|
| with open(ann_path) as f: |
| ann = json.load(f) |
|
|
| total_scenarios += 1 |
|
|
| for seg in ann.get('segments', []): |
| m = re.match(r'(\d+:\d+(?::\d+)?)\s*-\s*(\d+:\d+(?::\d+)?)', |
| seg['timestamp']) |
| if not m: |
| skipped_segments += 1 |
| continue |
|
|
| start_sec = parse_timestamp(m.group(1)) |
| end_sec = parse_timestamp(m.group(2)) |
|
|
| if end_sec <= start_sec: |
| skipped_segments += 1 |
| continue |
|
|
| duration_sec = end_sec - start_sec |
| raw_frames = duration_sec * SAMPLING_RATE |
| ds_frames = int(end_sec * SAMPLING_RATE / DOWNSAMPLE) - int(start_sec * SAMPLING_RATE / DOWNSAMPLE) |
|
|
| raw_lengths_sec.append(duration_sec) |
| raw_lengths_frames.append(raw_frames) |
| ds_lengths_frames.append(ds_frames) |
| split_stats[split].append(ds_frames) |
| total_segments += 1 |
|
|
| |
| raw_sec = np.array(raw_lengths_sec) |
| raw_fr = np.array(raw_lengths_frames) |
| ds_fr = np.array(ds_lengths_frames) |
|
|
| print("=" * 70) |
| print("SEGMENT LENGTH ANALYSIS FOR RECOGNITION DATASET") |
| print("=" * 70) |
| print(f"\nTotal scenarios: {total_scenarios}") |
| print(f"Total valid segments: {total_segments}") |
| print(f"Skipped segments (bad timestamp): {skipped_segments}") |
| print(f"Sampling rate: {SAMPLING_RATE} Hz") |
| print(f"Downsample factor: {DOWNSAMPLE}") |
| print(f"Effective rate after downsample: {SAMPLING_RATE / DOWNSAMPLE} Hz") |
|
|
| |
| print("\n" + "-" * 70) |
| print("SEGMENT DURATION (seconds)") |
| print("-" * 70) |
| print(f" Min: {raw_sec.min():.1f}s") |
| print(f" Max: {raw_sec.max():.1f}s") |
| print(f" Mean: {raw_sec.mean():.2f}s") |
| print(f" Median: {np.median(raw_sec):.1f}s") |
| print(f" Std: {raw_sec.std():.2f}s") |
|
|
| |
| for p in [5, 10, 25, 50, 75, 90, 95]: |
| print(f" P{p:2d}: {np.percentile(raw_sec, p):.1f}s") |
|
|
| |
| print("\n" + "-" * 70) |
| print("SEGMENT LENGTH (raw frames @ 100Hz)") |
| print("-" * 70) |
| print(f" Min: {raw_fr.min()}") |
| print(f" Max: {raw_fr.max()}") |
| print(f" Mean: {raw_fr.mean():.1f}") |
| print(f" Median: {np.median(raw_fr):.0f}") |
|
|
| |
| print("\n" + "-" * 70) |
| print(f"SEGMENT LENGTH (downsampled frames @ {SAMPLING_RATE/DOWNSAMPLE:.0f}Hz)") |
| print("-" * 70) |
| print(f" Min: {ds_fr.min()}") |
| print(f" Max: {ds_fr.max()}") |
| print(f" Mean: {ds_fr.mean():.1f}") |
| print(f" Median: {np.median(ds_fr):.0f}") |
| print(f" Std: {ds_fr.std():.1f}") |
|
|
| for p in [5, 10, 25, 50, 75, 90, 95]: |
| print(f" P{p:2d}: {np.percentile(ds_fr, p):.0f}") |
|
|
| |
| print("\n" + "-" * 70) |
| print("COMPARISON WITH window_frames SETTINGS") |
| print("-" * 70) |
|
|
| |
| for window_sec in [5.0, 10.0, 15.0, 20.0, 30.0]: |
| wf = int(window_sec * SAMPLING_RATE / DOWNSAMPLE) |
| shorter = (ds_fr < wf).sum() |
| equal_or_longer = (ds_fr >= wf).sum() |
| longer = (ds_fr > wf).sum() |
| pct_shorter = 100.0 * shorter / len(ds_fr) |
| pct_longer = 100.0 * longer / len(ds_fr) |
| print(f"\n window_sec={window_sec:5.1f}s -> window_frames={wf}") |
| print(f" Segments SHORTER than window: {shorter:4d} ({pct_shorter:5.1f}%) -> will be PADDED") |
| print(f" Segments LONGER than window: {longer:4d} ({pct_longer:5.1f}%) -> will be CENTER-CROPPED") |
|
|
| |
| print("\n" + "-" * 70) |
| print("PERCENTAGE SHORTER THAN THRESHOLDS (downsampled frames)") |
| print("-" * 70) |
| for thresh in [20, 40, 60, 100, 200, 300, 400, 500, 1000, 2000]: |
| pct = 100.0 * (ds_fr < thresh).sum() / len(ds_fr) |
| print(f" < {thresh:5d} frames ({thresh * DOWNSAMPLE / SAMPLING_RATE:6.1f}s): {pct:5.1f}%") |
|
|
| |
| print("\n" + "-" * 70) |
| print("PER-SPLIT STATISTICS (downsampled frames)") |
| print("-" * 70) |
| for split in ['train', 'val', 'test']: |
| arr = np.array(split_stats[split]) |
| if len(arr) == 0: |
| print(f" {split}: no segments") |
| continue |
| print(f"\n {split.upper()} ({len(arr)} segments):") |
| print(f" Min={arr.min()}, Max={arr.max()}, Mean={arr.mean():.1f}, Median={np.median(arr):.0f}") |
|
|
| |
| print("\n" + "-" * 70) |
| print("HISTOGRAM OF SEGMENT DURATIONS (seconds)") |
| print("-" * 70) |
| bins = [0, 1, 2, 3, 4, 5, 7, 10, 15, 20, 30, 60, 120, 300, 600] |
| for i in range(len(bins) - 1): |
| count = ((raw_sec >= bins[i]) & (raw_sec < bins[i + 1])).sum() |
| pct = 100.0 * count / len(raw_sec) |
| bar = '#' * int(pct / 2) |
| print(f" [{bins[i]:4d}-{bins[i+1]:4d})s: {count:5d} ({pct:5.1f}%) {bar}") |
| |
| count = (raw_sec >= bins[-1]).sum() |
| pct = 100.0 * count / len(raw_sec) |
| bar = '#' * int(pct / 2) |
| print(f" [{bins[-1]:4d}+ )s: {count:5d} ({pct:5.1f}%) {bar}") |
|
|
| |
| print("\n" + "=" * 70) |
| print("KEY INSIGHTS") |
| print("=" * 70) |
| median_sec = np.median(raw_sec) |
| mean_sec = raw_sec.mean() |
| print(f" Median segment duration: {median_sec:.1f}s ({median_sec * SAMPLING_RATE / DOWNSAMPLE:.0f} ds-frames)") |
| print(f" Mean segment duration: {mean_sec:.1f}s ({mean_sec * SAMPLING_RATE / DOWNSAMPLE:.0f} ds-frames)") |
| print() |
| |
| p95_sec = np.percentile(raw_sec, 95) |
| print(f" 95th percentile duration: {p95_sec:.1f}s") |
| print(f" -> A window of {p95_sec:.0f}s would cover 95% of segments without cropping") |
| print(f" -> Current default window_sec=15.0 -> window_frames={int(15.0 * SAMPLING_RATE / DOWNSAMPLE)}") |
| wf15 = int(15.0 * SAMPLING_RATE / DOWNSAMPLE) |
| pct_crop = 100.0 * (ds_fr > wf15).sum() / len(ds_fr) |
| pct_pad = 100.0 * (ds_fr < wf15).sum() / len(ds_fr) |
| print(f" {pct_pad:.1f}% segments padded, {pct_crop:.1f}% center-cropped") |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|