| import pandas as pd |
| import numpy as np |
| import random |
| from datetime import datetime, timedelta |
| import logging |
|
|
| |
| logger = logging.getLogger(__name__) |
|
|
| def generate_continuous_random_data(existing_data, end_time=None): |
| """ |
| Generate authentic-looking random data that continues from existing data |
| with adjusted APR following APR with a small offset |
| |
| Args: |
| existing_data: DataFrame containing the existing data |
| end_time: Optional end time (defaults to current time) |
| |
| Returns: |
| DataFrame with dummy data points |
| """ |
| |
| if end_time is None: |
| end_time = datetime.now() |
| |
| |
| if not existing_data.empty: |
| start_time = existing_data['timestamp'].max() + timedelta(minutes=10) |
| else: |
| |
| start_time = end_time - timedelta(days=30) |
| |
| |
| timestamps = [] |
| current = start_time |
| while current <= end_time: |
| timestamps.append(current) |
| current += timedelta(minutes=10) |
| |
| if not timestamps: |
| return pd.DataFrame() |
| |
| |
| if not existing_data.empty: |
| unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records') |
| else: |
| |
| unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}] |
| |
| dummy_data_list = [] |
| |
| |
| for agent in unique_agents: |
| agent_id = agent['agent_id'] |
| |
| |
| last_apr = None |
| last_adjusted_apr = None |
| last_roi = None |
| |
| if not existing_data.empty: |
| |
| agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) & |
| (existing_data['metric_type'] == 'APR')] |
| if not agent_apr_data.empty: |
| last_apr = agent_apr_data['apr'].iloc[-1] |
| last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1] |
| |
| |
| agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) & |
| (existing_data['metric_type'] == 'ROI')] |
| if not agent_roi_data.empty: |
| last_roi = agent_roi_data['roi'].iloc[-1] |
| |
| |
| if last_apr is None or pd.isna(last_apr): |
| last_apr = random.uniform(-0.1, 0.1) |
| |
| if last_adjusted_apr is None or pd.isna(last_adjusted_apr): |
| |
| |
| if random.random() > 0.5: |
| last_adjusted_apr = last_apr + random.uniform(0.05, 0.15) |
| else: |
| last_adjusted_apr = last_apr - random.uniform(0.05, 0.15) |
| last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr)) |
| |
| if last_roi is None or pd.isna(last_roi): |
| last_roi = random.uniform(-0.1, 0.1) |
| |
| |
| apr_values = [last_apr] |
| |
| |
| |
| num_points = len(timestamps) |
| trend_periods = [] |
| |
| |
| num_trends = random.randint(3, 5) |
| period_length = num_points // num_trends |
| |
| for i in range(num_trends): |
| |
| |
| direction = random.choice([-1, 0, 1]) |
| strength = random.uniform(0.01, 0.03) |
| |
| start_idx = i * period_length |
| end_idx = min((i + 1) * period_length, num_points) |
| |
| trend_periods.append({ |
| 'start': start_idx, |
| 'end': end_idx, |
| 'direction': direction, |
| 'strength': strength |
| }) |
| |
| |
| for i in range(1, num_points): |
| |
| current_trend = None |
| for trend in trend_periods: |
| if trend['start'] <= i < trend['end']: |
| current_trend = trend |
| break |
| |
| |
| if current_trend is None: |
| current_trend = {'direction': 0, 'strength': 0.01} |
| |
| |
| base_change = current_trend['direction'] * current_trend['strength'] |
| |
| |
| random_change = random.normalvariate(0, 0.01) |
| |
| |
| prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2] |
| momentum = 0.3 * prev_change |
| |
| |
| total_change = base_change + random_change + momentum |
| |
| |
| new_value = apr_values[i-1] + total_change |
| |
| |
| new_value = max(-0.5, min(1.0, new_value)) |
| |
| apr_values.append(new_value) |
| |
| |
| adjusted_apr_values = [] |
| for i, apr_value in enumerate(apr_values): |
| |
| |
| if i % 5 == 0: |
| offset_direction = 1 if random.random() > 0.5 else -1 |
| |
| offset = offset_direction * random.uniform(0.05, 0.15) |
| adjusted_value = apr_value + offset |
| |
| |
| adjusted_value = max(-0.5, min(1.0, adjusted_value)) |
| adjusted_apr_values.append(adjusted_value) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| target_points = [] |
| for i in range(5): |
| |
| target = -0.01 + (i * 0.0025) |
| target_points.append(target) |
| |
| |
| random.shuffle(target_points) |
| |
| |
| segment_length = num_points // len(target_points) |
| |
| |
| roi_values = [] |
| |
| |
| if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0: |
| |
| current_value = -0.005 |
| else: |
| current_value = last_roi |
| |
| roi_values.append(current_value) |
| |
| |
| for segment_idx, target in enumerate(target_points): |
| start_idx = segment_idx * segment_length |
| end_idx = min((segment_idx + 1) * segment_length, num_points) |
| |
| |
| steps = end_idx - start_idx |
| |
| if steps <= 0: |
| continue |
| |
| |
| current_value = roi_values[-1] |
| |
| |
| step_change = (target - current_value) / steps |
| |
| |
| for step in range(steps): |
| |
| base_change = step_change |
| |
| |
| random_factor = random.uniform(-0.0005, 0.0005) |
| |
| |
| new_value = current_value + base_change + random_factor |
| |
| |
| new_value = max(-0.01, min(0, new_value)) |
| |
| roi_values.append(new_value) |
| current_value = new_value |
| |
| |
| while len(roi_values) < num_points + 1: |
| |
| last_value = roi_values[-1] |
| new_value = last_value + random.uniform(-0.001, 0.001) |
| new_value = max(-0.01, min(0, new_value)) |
| roi_values.append(new_value) |
| |
| |
| roi_values = roi_values[:num_points + 1] |
| |
| |
| for i, timestamp in enumerate(timestamps): |
| |
| dummy_apr = { |
| 'timestamp': timestamp, |
| 'apr': apr_values[i], |
| 'adjusted_apr': adjusted_apr_values[i], |
| 'roi': None, |
| 'agent_id': agent_id, |
| 'agent_name': agent['agent_name'], |
| 'is_dummy': True, |
| 'metric_type': 'APR' |
| } |
| dummy_data_list.append(dummy_apr) |
| |
| |
| dummy_roi = { |
| 'timestamp': timestamp, |
| 'apr': None, |
| 'adjusted_apr': None, |
| 'roi': roi_values[i], |
| 'agent_id': agent_id, |
| 'agent_name': agent['agent_name'], |
| 'is_dummy': True, |
| 'metric_type': 'ROI' |
| } |
| dummy_data_list.append(dummy_roi) |
| |
| return pd.DataFrame(dummy_data_list) |
|
|