welcom / frame_namer.py
Skydata001's picture
Upload 3 files
925fbb1 verified
"""
Smart Frame Naming Module
Automatically names frames based on their content/pose
"""
import cv2
import numpy as np
from typing import List
from scipy.spatial import distance
# Optional imports for advanced features
try:
import torch
from transformers import pipeline
HAS_TRANSFORMERS = True
except ImportError:
HAS_TRANSFORMERS = False
torch = None
pipeline = None
class FrameNamer:
"""Intelligent frame naming based on pose analysis"""
def __init__(self):
self.pose_keywords = {
'idle': ['standing', 'still', 'neutral', 'waiting'],
'walk': ['walking', 'moving', 'step'],
'run': ['running', 'fast', 'sprint'],
'jump': ['jumping', 'leap', 'air'],
'attack': ['attacking', 'strike', 'hit', 'swing'],
'hurt': ['hurt', 'damage', 'hit', 'pain'],
'die': ['dying', 'dead', 'fall'],
'cast': ['casting', 'spell', 'magic'],
'block': ['blocking', 'defend', 'guard'],
'shoot': ['shooting', 'bow', 'arrow', 'ranged']
}
# Initialize pose classifier if available
self.classifier = None
self._init_classifier()
def _init_classifier(self):
"""Initialize image classifier for pose detection"""
try:
# Try to load a lightweight classifier
# Note: In production, you'd use a custom-trained model
self.classifier = None # Placeholder for actual model
except Exception as e:
print(f"Could not load classifier: {e}")
self.classifier = None
def name_frames(self, frames: List[np.ndarray]) -> List[str]:
"""
Generate intelligent names for frames
Args:
frames: List of frame images
Returns:
List of frame names
"""
if len(frames) == 0:
return []
# Analyze each frame
frame_features = []
for frame in frames:
features = self._extract_features(frame)
frame_features.append(features)
# Detect animation type
animation_type = self._detect_animation_type(frame_features)
# Generate names
names = []
for i, features in enumerate(frame_features):
# Determine pose variation
pose_variation = self._get_pose_variation(features, frame_features, i)
# Generate name
if animation_type == 'idle':
name = f"idle_{i+1:02d}"
elif animation_type == 'walk':
name = f"walk_{i+1:02d}"
elif animation_type == 'run':
name = f"run_{i+1:02d}"
elif animation_type == 'jump':
if i == 0:
name = "jump_start"
elif i == len(frames) - 1:
name = "jump_land"
else:
name = f"jump_{i:02d}"
elif animation_type == 'attack':
if i == 0:
name = "attack_windup"
elif i == len(frames) // 2:
name = "attack_strike"
elif i == len(frames) - 1:
name = "attack_recover"
else:
name = f"attack_{i:02d}"
elif animation_type == 'hurt':
name = f"hurt_{i+1:02d}"
elif animation_type == 'die':
if i == len(frames) - 1:
name = "die_dead"
else:
name = f"die_{i+1:02d}"
elif animation_type == 'cast':
if i == 0:
name = "cast_start"
elif i == len(frames) - 1:
name = "cast_release"
else:
name = f"cast_{i:02d}"
elif animation_type == 'block':
name = f"block_{i+1:02d}"
elif animation_type == 'shoot':
if i == 0:
name = "shoot_draw"
elif i == len(frames) - 1:
name = "shoot_release"
else:
name = f"shoot_{i:02d}"
else:
name = f"frame_{i+1:03d}"
names.append(name)
return names
def _extract_features(self, frame: np.ndarray) -> dict:
"""
Extract features from a frame for analysis
Args:
frame: Input frame image
Returns:
Dictionary of features
"""
features = {}
# Handle alpha channel
if len(frame.shape) == 3 and frame.shape[2] == 4:
alpha = frame[:, :, 3]
bgr = frame[:, :, :3]
# Create mask from alpha
mask = alpha > 10
else:
bgr = frame
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, mask = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
# Get bounding box of content
coords = np.column_stack(np.where(mask))
if len(coords) > 0:
y_min, x_min = coords.min(axis=0)
y_max, x_max = coords.max(axis=0)
features['bbox'] = (x_min, y_min, x_max, y_max)
features['width'] = x_max - x_min
features['height'] = y_max - y_min
features['center_x'] = (x_min + x_max) / 2
features['center_y'] = (y_min + y_max) / 2
features['aspect_ratio'] = features['width'] / max(features['height'], 1)
# Calculate centroid
moments = cv2.moments(mask.astype(np.uint8))
if moments['m00'] > 0:
features['centroid_x'] = moments['m10'] / moments['m00']
features['centroid_y'] = moments['m01'] / moments['m00']
else:
features['centroid_x'] = features['center_x']
features['centroid_y'] = features['center_y']
# Calculate pixel count (area)
features['area'] = np.sum(mask)
# Calculate center of mass height ratio
features['com_height_ratio'] = features['centroid_y'] / frame.shape[0]
else:
features['bbox'] = (0, 0, frame.shape[1], frame.shape[0])
features['width'] = frame.shape[1]
features['height'] = frame.shape[0]
features['center_x'] = frame.shape[1] / 2
features['center_y'] = frame.shape[0] / 2
features['aspect_ratio'] = 1.0
features['centroid_x'] = frame.shape[1] / 2
features['centroid_y'] = frame.shape[0] / 2
features['area'] = 0
features['com_height_ratio'] = 0.5
return features
def _detect_animation_type(self, frame_features: List[dict]) -> str:
"""
Detect the type of animation based on frame features
Args:
frame_features: List of feature dictionaries
Returns:
Animation type string
"""
if len(frame_features) < 2:
return 'idle'
# Calculate motion metrics
center_x_changes = []
center_y_changes = []
area_changes = []
com_height_changes = []
for i in range(1, len(frame_features)):
prev = frame_features[i - 1]
curr = frame_features[i]
center_x_changes.append(abs(curr['center_x'] - prev['center_x']))
center_y_changes.append(abs(curr['center_y'] - prev['center_y']))
area_changes.append(abs(curr['area'] - prev['area']))
com_height_changes.append(abs(curr['com_height_ratio'] - prev['com_height_ratio']))
avg_x_change = np.mean(center_x_changes) if center_x_changes else 0
avg_y_change = np.mean(center_y_changes) if center_y_changes else 0
avg_area_change = np.mean(area_changes) if area_changes else 0
avg_com_height_change = np.mean(com_height_changes) if com_height_changes else 0
# Detect based on motion patterns
total_horizontal_movement = abs(frame_features[-1]['center_x'] - frame_features[0]['center_x'])
total_vertical_movement = abs(frame_features[-1]['center_y'] - frame_features[0]['center_y'])
# Calculate height variation
heights = [f['height'] for f in frame_features]
height_variance = np.var(heights)
max_height = max(heights)
min_height = min(heights)
height_range = max_height - min_height
# Calculate width variation
widths = [f['width'] for f in frame_features]
width_variance = np.var(widths)
# Detect animation type
# Jump: Significant vertical movement and height variation
if avg_y_change > avg_x_change * 1.5 and height_range > max_height * 0.15:
return 'jump'
# Attack: Large area changes (weapon swing) or horizontal extension
if avg_area_change > np.mean([f['area'] for f in frame_features]) * 0.1:
return 'attack'
# Hurt/Die: Center of mass moves down
if frame_features[-1]['com_height_ratio'] > frame_features[0]['com_height_ratio'] + 0.1:
if frame_features[-1]['height'] < frame_features[0]['height'] * 0.7:
return 'die'
return 'hurt'
# Cast: Arms up (height increases then decreases)
if height_variance > max_height * 0.1:
mid_idx = len(frame_features) // 2
if (frame_features[mid_idx]['height'] > frame_features[0]['height'] and
frame_features[mid_idx]['height'] > frame_features[-1]['height']):
return 'cast'
# Block: Wide stance (width increases)
if frame_features[0]['width'] * 1.2 < max(widths):
return 'block'
# Shoot: One arm extended
if width_variance > np.mean(widths) * 0.05:
return 'shoot'
# Run vs Walk: Speed of horizontal movement
if avg_x_change > frame_features[0]['width'] * 0.15:
return 'run'
elif avg_x_change > frame_features[0]['width'] * 0.05:
return 'walk'
# Default to idle
return 'idle'
def _get_pose_variation(self, features: dict, all_features: List[dict],
index: int) -> str:
"""
Get pose variation descriptor
Args:
features: Current frame features
all_features: All frame features
index: Current frame index
Returns:
Variation descriptor
"""
if index == 0:
return 'start'
elif index == len(all_features) - 1:
return 'end'
else:
return 'mid'
def suggest_animation_name(self, frames: List[np.ndarray]) -> str:
"""
Suggest a name for the entire animation
Args:
frames: List of frame images
Returns:
Suggested animation name
"""
animation_type = self._detect_animation_type([self._extract_features(f) for f in frames])
suggestions = {
'idle': 'character_idle',
'walk': 'character_walk',
'run': 'character_run',
'jump': 'character_jump',
'attack': 'character_attack',
'hurt': 'character_hurt',
'die': 'character_die',
'cast': 'character_cast_spell',
'block': 'character_block',
'shoot': 'character_shoot'
}
return suggestions.get(animation_type, 'character_animation')