Optimus-Agent-Performance

Running

Optimus-Agent-Performance / fetch_and_preprocess_data.py

gauravlochab

feat: implement data fetching for APR and ROI metrics

2425de8 11 months ago

11.3 kB

	import pandas as pd
	import numpy as np
	import random
	from datetime import datetime, timedelta
	import logging

	# Get the logger
	logger = logging.getLogger(__name__)

	def generate_continuous_random_data(existing_data, end_time=None):
	"""
	Generate authentic-looking random data that continues from existing data
	with adjusted APR following APR with a small offset

	Args:
	existing_data: DataFrame containing the existing data
	end_time: Optional end time (defaults to current time)

	Returns:
	DataFrame with dummy data points
	"""
	# Use current time if not specified
	if end_time is None:
	end_time = datetime.now()

	# Find the latest timestamp in the existing data
	if not existing_data.empty:
	start_time = existing_data['timestamp'].max() + timedelta(minutes=10)
	else:
	# If no existing data, start from 30 days ago
	start_time = end_time - timedelta(days=30)

	# Generate timestamps with 10-minute intervals
	timestamps = []
	current = start_time
	while current <= end_time:
	timestamps.append(current)
	current += timedelta(minutes=10)

	if not timestamps:
	return pd.DataFrame() # No new data needed

	# Get unique agents from existing data
	if not existing_data.empty:
	unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records')
	else:
	# Create one dummy agent if no existing data
	unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}]

	dummy_data_list = []

	# For each agent, create continuous dummy data
	for agent in unique_agents:
	agent_id = agent['agent_id']

	# Get the last real values for this agent to ensure continuity
	last_apr = None
	last_adjusted_apr = None
	last_roi = None

	if not existing_data.empty:
	# Get last APR value
	agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) &
	(existing_data['metric_type'] == 'APR')]
	if not agent_apr_data.empty:
	last_apr = agent_apr_data['apr'].iloc[-1]
	last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1]

	# Get last ROI value
	agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) &
	(existing_data['metric_type'] == 'ROI')]
	if not agent_roi_data.empty:
	last_roi = agent_roi_data['roi'].iloc[-1]

	# If no last values, start with reasonable values in our range
	if last_apr is None or pd.isna(last_apr):
	last_apr = random.uniform(-0.1, 0.1) # Start close to zero

	if last_adjusted_apr is None or pd.isna(last_adjusted_apr):
	# If we have APR but no adjusted APR, make it slightly different than APR
	# Sometimes higher, sometimes lower to look more natural
	if random.random() > 0.5:
	last_adjusted_apr = last_apr + random.uniform(0.05, 0.15)
	else:
	last_adjusted_apr = last_apr - random.uniform(0.05, 0.15)
	last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr))

	if last_roi is None or pd.isna(last_roi):
	last_roi = random.uniform(-0.1, 0.1) # Start close to zero

	# Generate APR values using smoother random walk
	apr_values = [last_apr]

	# Create a more natural pattern with some trends
	# Define a few trend periods to make it look more authentic
	num_points = len(timestamps)
	trend_periods = []

	# Create 3-5 trend periods
	num_trends = random.randint(3, 5)
	period_length = num_points // num_trends

	for i in range(num_trends):
	# Each trend has a direction (up, down, or sideways)
	# and a strength (how strong the trend is)
	direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up
	strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look

	start_idx = i * period_length
	end_idx = min((i + 1) * period_length, num_points)

	trend_periods.append({
	'start': start_idx,
	'end': end_idx,
	'direction': direction,
	'strength': strength
	})

	# Generate values following the trends
	for i in range(1, num_points):
	# Find which trend period we're in
	current_trend = None
	for trend in trend_periods:
	if trend['start'] <= i < trend['end']:
	current_trend = trend
	break

	# If we couldn't find a trend (shouldn't happen), use a neutral trend
	if current_trend is None:
	current_trend = {'direction': 0, 'strength': 0.01}

	# Base change is influenced by the trend
	base_change = current_trend['direction'] * current_trend['strength']

	# Add some randomness
	random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness

	# Previous momentum (30% influence to make it smoother)
	prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2]
	momentum = 0.3 * prev_change

	# Combine all factors
	total_change = base_change + random_change + momentum

	# Apply the change
	new_value = apr_values[i-1] + total_change

	# Keep within reasonable bounds (-0.5 to 1.0)
	new_value = max(-0.5, min(1.0, new_value))

	apr_values.append(new_value)

	# Generate adjusted APR values that follow APR with a small, varying offset
	adjusted_apr_values = []
	for i, apr_value in enumerate(apr_values):
	# Make adjusted APR follow APR but with a small, varying offset
	# Sometimes higher, sometimes lower to look more natural
	if i % 5 == 0: # Periodically recalculate the offset direction
	offset_direction = 1 if random.random() > 0.5 else -1

	offset = offset_direction * random.uniform(0.05, 0.15)
	adjusted_value = apr_value + offset

	# Keep within reasonable bounds (-0.5 to 1.0)
	adjusted_value = max(-0.5, min(1.0, adjusted_value))
	adjusted_apr_values.append(adjusted_value)

	# Generate ROI values with a completely different approach to ensure better distribution
	# Note: ROI values will be multiplied by 100 in app.py, so we need to generate values
	# between -0.01 and 0 to get final values between -1 and 0

	# Instead of building on the last_roi value, we'll generate a completely new sequence
	# that's well-distributed between -0.01 and 0

	# First, create a sequence of target values that we want to hit
	# This ensures we get good coverage of the entire range
	target_points = []
	for i in range(5): # Create 5 target points
	# Distribute targets across the range, but avoid exactly 0
	target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025
	target_points.append(target)

	# Shuffle the targets to make the pattern less predictable
	random.shuffle(target_points)

	# Divide the total points into segments, one for each target
	segment_length = num_points // len(target_points)

	# Generate the ROI values
	roi_values = []

	# Start with the last real value, or a random value in our range if none exists
	if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0:
	# If no valid last value, start in the middle of our range
	current_value = -0.005
	else:
	current_value = last_roi

	roi_values.append(current_value)

	# For each segment, gradually move toward the target value
	for segment_idx, target in enumerate(target_points):
	start_idx = segment_idx * segment_length
	end_idx = min((segment_idx + 1) * segment_length, num_points)

	# How many steps we have to reach the target
	steps = end_idx - start_idx

	if steps <= 0:
	continue # Skip if this segment has no points

	# Current value is the last value in roi_values
	current_value = roi_values[-1]

	# Calculate how much to change per step to reach the target
	step_change = (target - current_value) / steps

	# Generate values for this segment
	for step in range(steps):
	# Base change to move toward target
	base_change = step_change

	# Add some randomness, but make sure we're still generally moving toward the target
	random_factor = random.uniform(-0.0005, 0.0005)

	# Calculate new value
	new_value = current_value + base_change + random_factor

	# Ensure we stay within range
	new_value = max(-0.01, min(0, new_value))

	roi_values.append(new_value)
	current_value = new_value

	# If we didn't generate enough points, add more
	while len(roi_values) < num_points + 1:
	# Add a point with small random variation from the last point
	last_value = roi_values[-1]
	new_value = last_value + random.uniform(-0.001, 0.001)
	new_value = max(-0.01, min(0, new_value))
	roi_values.append(new_value)

	# If we generated too many points, trim the list
	roi_values = roi_values[:num_points + 1]

	# Create dummy data points
	for i, timestamp in enumerate(timestamps):
	# APR data
	dummy_apr = {
	'timestamp': timestamp,
	'apr': apr_values[i],
	'adjusted_apr': adjusted_apr_values[i],
	'roi': None,
	'agent_id': agent_id,
	'agent_name': agent['agent_name'],
	'is_dummy': True,
	'metric_type': 'APR'
	}
	dummy_data_list.append(dummy_apr)

	# ROI data
	dummy_roi = {
	'timestamp': timestamp,
	'apr': None,
	'adjusted_apr': None,
	'roi': roi_values[i],
	'agent_id': agent_id,
	'agent_name': agent['agent_name'],
	'is_dummy': True,
	'metric_type': 'ROI'
	}
	dummy_data_list.append(dummy_roi)

	return pd.DataFrame(dummy_data_list)