# ========================================= # Entity Encoders # ========================================= # These are generated offline/streaming and are the "vocabulary" for the model. # Embedding of a wallet's relationships, behavior, and history. = [ // Data from the 'wallet_profiles' table (Wallet-level lifetime and daily/weekly stats) wallet_profiles_row: [ // Core Info & Timestamps age, // No Contextual wallet_address, // Primary wallet identifier // 7. NEW: Deployed Token Aggregates (8 Features) deployed_tokens_count, // Total tokens created deployed_tokens_migrated_pct, // % that migrated deployed_tokens_avg_lifetime_sec, // Avg duration before dev selling deployed_tokens_avg_peak_mc_usd, // Avg peak marketcap deployed_tokens_median_peak_mc_usd, // Metadata & Balances balance, // Current SOL balance // Lifetime Transaction Counts (Total history) transfers_in_count, // Total native transfers received transfers_out_count, // Total native transfers sent spl_transfers_in_count, // Total SPL token transfers received spl_transfers_out_count,// Total SPL token transfers sent // Lifetime Trading Stats (Total history) total_buys_count, // Total buys across all tokens total_sells_count, // Total sells across all tokens total_winrate, // Overall trading winrate // 1-Day Stats (Realized P&L, Counts, Averages, Volume, Fees, Winrate) stats_1d_realized_profit_sol, stats_1d_realized_profit_pnl, stats_1d_buy_count, stats_1d_sell_count, stats_1d_transfer_in_count, stats_1d_transfer_out_count, stats_1d_avg_holding_period, stats_1d_total_bought_cost_sol, stats_1d_total_sold_income_sol, stats_1d_total_fee, stats_1d_winrate, stats_1d_tokens_traded, // 7-Day Stats (Realized P&L, Counts, Averages, Volume, Fees, Winrate) stats_7d_realized_profit_sol, stats_7d_realized_profit_pnl, stats_7d_buy_count, stats_7d_sell_count, stats_7d_transfer_in_count, stats_7d_transfer_out_count, stats_7d_avg_holding_period, stats_7d_total_bought_cost_sol, stats_7d_total_sold_income_sol, stats_7d_total_fee, stats_7d_winrate, stats_7d_tokens_traded, // 30 Days is to useless in the context ], // Data from the 'wallet_socials' table (Social media and profile info) wallet_socials_row: [ has_pf_profile, has_twitter, has_telegram, is_exchange_wallet, username, ], // Data from the 'wallet_holdings' table (Token-level statistics for held tokens) wallet_holdings_pool: [ , holding_time, // How much he held the token (We check only tokens that currently is holding, or recently traded) balance_pct_to_supply, // Current quantity of the token held // History (Amounts & Costs) history_bought_amount_sol, // Total amount of token bought bought_amount_sol_pct_to_native_balance // Is he traded a lot of his wallet size // History (Counts) history_total_buys, // Total number of buy transactions history_total_sells, // Total number of sell transactions // Profit and Loss realized_profit_pnl, // Realized P&L as a percentage realized_profit_sol, // Transfers (Non-trade movements) history_transfer_in, history_transfer_out, avarage_trade_gap_seconds, total_priority_fees, // Total tips + Priority Fees ] ] # Multimodal embedding of a token's identity = [, , , , protocol_id] # Text embedding MultiModal processor. # Multimodal VIT encoder. # ----------------------------------------- # 1. TradeEncoder # ----------------------------------------- # Captures large-size trades from any wallet. [timestamp, 'LargeTrade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd] # Captures the high-signal "Dev Sold or Bought" event. [timestamp, 'Deployer_Trade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd] # Captures *all* trades from pre-defined high-P&L/win-rate, kol and known wallets. [timestamp, 'SmartWallet_Trade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd] # Raw trades. Loaded in H/B/H Prefix (first ~10k) and Suffix (last ~5k). [timestamp, 'Trade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd] # ----------------------------------------- # 2. TransferEncoder # ----------------------------------------- # Raw transfers. Loaded in H/B/H Prefix (all in first ~10k trade window) and Suffix (all in last ~5k trade window). [timestamp, 'Transfer', relative_ts, , , token_amount, transfer_pct_of_total_supply, transfer_pct_of_holding, priority_fee] # Captures scarce, large transfers *after* the initial launch window. [timestamp, 'LargeTransfer', relative_ts, , , token_amount, transfer_pct_of_total_supply, transfer_pct_of_holding, priority_fee] # ----------------------------------------- # 3. LifecycleEncoder # ----------------------------------------- # The T0 event. [timestamp, 'Mint', 0, , ] # ----------------------------------------- # 3. PoolEncoder # ----------------------------------------- # Signals migration from launchpad to a real pool. [timestamp, 'PoolCreated', relative_ts, , protocol_id, , base_amount, quote_amount, quote_pct_to_main_pool_balance, base_pct_to_main_pool_balance] # Signals LP addition or removal. [timestamp, 'LiquidityChange', relative_ts, , , change_type_id, quote_amount, quote_pct_to_current_pool_balance] # Signals creator/dev taking platform fees. [timestamp, 'FeeCollected', relative_ts, , sol_amount, token_amount] # ----------------------------------------- # SupplyEncoder # ----------------------------------------- # Signals a supply reduction. [timestamp, 'TokenBurn', relative_ts, , amount_pct_of_total_supply, amount_tokens_burned] # Signals locked supply, e.g., for team/marketing. [timestamp, 'SupplyLock', relative_ts, , amount_pct_of_total_supply, lock_duration] # ----------------------------------------- # ChartEncoder # ----------------------------------------- # (The "Sliding Window") This is the new chart event. [timestamp, 'Chart_Segment', relative_ts, OHLC_segment, chart_interval_id] # ----------------------------------------- # PulseEncoder # ----------------------------------------- # It is a low-frequency event (Dynamic Interval: 5min, 15min, or 1hr based on token age). [timestamp, 'OnChain_Snapshot', relative_ts, total_holders, smart_traders, kols, holder_growth_rate, top_10_holder_pct, sniper_holding_pct, rat_wallets_holding_pct, bundle_holding_pct, current_market_cap, liquidity, volume, buy_count, sell_count, total_txns, global_fees_paid] # ----------------------------------------- # HoldersListEncoder # ----------------------------------------- # Transformer-based embedding of the top holders (WalletEmbeddings + Pct). # Token-specific holder analysis. [timestamp, 'HolderSnapshot', relative_ts, ] # ----------------------------------------- # ChainSnapshotEncoder # ----------------------------------------- # Broad chain-level market conditions. [timestamp, 'ChainSnapshot', relative_ts, native_token_price_usd, gas_fee] # Launchpad market regime (using absolute, log-normalized values). [timestamp, 'Lighthouse_Snapshot', relative_ts, protocol_id, timeframe_id, total_volume, total_transactions, total_traders, total_tokens_created, total_migrations] # ----------------------------------------- # TokenTrendingListEncoder # ----------------------------------------- # Fires *per token* on a trending list. The high-attention "meta" signal. [timestamp, 'TrendingToken', relative_ts, , list_source_id, timeframe_id, rank] # Fires *per token* on the boosted list. [timestamp, 'BoostedToken', relative_ts, , total_boost_amount, rank] # ----------------------------------------- # LaunchpadTheadEncoder # ----------------------------------------- # On-platform social signal (Pump.fun comments). [timestamp, 'PumpReply', relative_ts, , ] # ----------------------------------------- # CTEncoder # ----------------------------------------- # Off-platform social signal (Twitter). [timestamp, 'XPost', relative_ts, , , ] [timestamp, 'XRetweet', relative_ts, , , , ] [timestamp, 'XReply', relative_ts, , , , ] [timestamp, 'XQuoteTweet', relative_ts, , , , , ] # ----------------------------------------- # GlobalTrendingEncoder # ----------------------------------------- # Broader cultural trend signal (TikTok). [timestamp, 'TikTok_Trending_Hashtag', relative_ts, , rank] # Broader cultural trend signal (Twitter). [timestamp, 'XTrending_Hashtag', relative_ts, , rank] # ----------------------------------------- # TrackerEncoder # ----------------------------------------- # Retail marketing signal (Paid groups). [timestamp, 'AlphaGroup_Call', relative_ts, group_id] [timestamp, 'Call_Channel', relative_ts, channel_id] # High-impact catalyst event. [timestamp, 'CexListing', relative_ts, exchange_id] # High-impact catalyst event. [timestamp, 'Migrated', relative_ts, protocol_id] # ----------------------------------------- # Dex Encoder # ----------------------------------------- [timestamp, 'DexBoost_Paid', relative_ts, amount, total_amount_on_token] [timestamp, 'DexProfile_Updated', relative_ts, has_changed_website_flag, has_changed_twitter_flag, has_changed_telegram_flag, has_changed_description_flag, , , ] ### **Global Context Injection** ### **Token Role Embedding** + Subject_Token_Role + Trending_Token_Role + Quote_Token_Role # **Links** ### `TransferLink` ``` ['signature', 'source', 'destination', 'mint', 'timestamp'] ``` ----- ### `BundleTradeLink` ``` ['signatures', 'wallet_a', 'wallet_b', 'mint', 'slot', 'timestamp'] ``` ----- ### `CopiedTradeLink` ``` ['leader_buy_sig', 'leader_sell_sig', 'follower_buy_sig', 'follower_sell_sig', 'follower', 'leader', 'mint', 'time_gap_on_buy_sec', 'time_gap_on_sell_sec', 'leader_pnl', 'follower_pnl', 'leader_buy_total', 'leader_sell_total', 'follower_buy_total', 'follower_sell_total', 'follower_buy_slippage', 'follower_sell_slippage'] ``` ----- ### `CoordinatedActivityLink` ``` ['leader_first_sig', 'leader_second_sig', 'follower_first_sig', 'follower_second_sig', 'follower', 'leader', 'mint', 'time_gap_on_first_sec', 'time_gap_on_second_sec'] ``` ----- ### `MintedLink` ``` ['signature', 'timestamp', 'buy_amount'] ``` ----- ### `SnipedLink` ``` ['signature', 'rank', 'sniped_amount'] ``` ----- ### `LockedSupplyLink` ``` ['signature', 'amount', 'unlock_timestamp'] ``` ----- ### `BurnedLink` ``` ['signature', 'amount', 'timestamp'] ``` ----- ### `ProvidedLiquidityLink` ``` ['signature', 'wallet', 'token', 'pool_address', 'amount_base', 'amount_quote', 'timestamp'] ``` ----- ### `WhaleOfLink` ``` ['wallet', 'token', 'holding_pct_at_creation', 'ath_usd_at_creation'] ``` ----- ### `TopTraderOfLink` ``` ['wallet', 'token', 'pnl_at_creation', 'ath_usd_at_creation'] ``` ///// def __gettestitem__(self, idx: int) -> Dict[str, Any]: """ Generates a single complex data item, structured for the MemecoinCollator. NOTE: This currently returns the same mock data regardless of `idx`. """ # --- 1. Setup Pooler and Define Raw Data --- pooler = EmbeddingPooler() # --- 5. Create Mock Raw Batch Data (FIXED) --- print("Creating mock raw batch...") # (Wallet profiles, socials, holdings definitions are unchanged) profile1 = { 'wallet_address': 'addrW1', 'age': 1.5e7, 'balance': 10.5, 'deployed_tokens_count': 2, 'deployed_tokens_migrated_pct': 0.5, 'deployed_tokens_avg_lifetime_sec': 36000.0, 'deployed_tokens_avg_peak_mc_usd': 100000.0, 'deployed_tokens_median_peak_mc_usd': 50000.0, 'transfers_in_count': 10, 'transfers_out_count': 5, 'spl_transfers_in_count': 20, 'spl_transfers_out_count': 15, 'total_buys_count': 50, 'total_sells_count': 40, 'total_winrate': 0.6, 'stats_1d_realized_profit_sol': 1.2, 'stats_1d_realized_profit_pnl': 0.1, 'stats_1d_buy_count': 5, 'stats_1d_sell_count': 3, 'stats_1d_transfer_in_count': 2, 'stats_1d_transfer_out_count': 1, 'stats_1d_avg_holding_period': 3600, 'stats_1d_total_bought_cost_sol': 10.0, 'stats_1d_total_sold_income_sol': 11.2, 'stats_1d_total_fee': 0.1, 'stats_1d_winrate': 0.7, 'stats_1d_tokens_traded': 4, 'stats_7d_realized_profit_sol': 5.0, 'stats_7d_realized_profit_pnl': 0.2, 'stats_7d_buy_count': 20, 'stats_7d_sell_count': 15, 'stats_7d_transfer_in_count': 8, 'stats_7d_transfer_out_count': 4, 'stats_7d_avg_holding_period': 7200, 'stats_7d_total_bought_cost_sol': 40.0, 'stats_7d_total_sold_income_sol': 45.0, 'stats_7d_total_fee': 0.5, 'stats_7d_winrate': 0.65, 'stats_7d_tokens_traded': 10, } social1 = {'has_pf_profile': True, 'has_twitter': True, 'has_telegram': False, 'is_exchange_wallet': False, 'username': 'trader_one'} holdings1 = [ {'mint_address': 'tknA', 'holding_time': 3600.0, 'realized_profit_sol': 5.2, 'total_priority_fees': 0.05, 'balance_pct_to_supply': 0.01, 'history_bought_amount_sol': 10, 'bought_amount_sol_pct_to_native_balance': 0.5, 'history_total_buys': 5, 'history_total_sells': 2, 'realized_profit_pnl': 0.52, 'history_transfer_in': 1, 'history_transfer_out': 0, 'avarage_trade_gap_seconds': 300}, ] profile2 = { 'wallet_address': 'addrW2', 'age': 1e6, 'balance': 1.0, 'deployed_tokens_count': 0, 'deployed_tokens_migrated_pct': 0.0, 'deployed_tokens_avg_lifetime_sec': 0.0, 'deployed_tokens_avg_peak_mc_usd': 0.0, 'deployed_tokens_median_peak_mc_usd': 0.0, 'transfers_in_count': 1, 'transfers_out_count': 0, 'spl_transfers_in_count': 0, 'spl_transfers_out_count': 0, 'total_buys_count': 0, 'total_sells_count': 0, 'total_winrate': 0.0, 'stats_1d_realized_profit_sol': 0.0, 'stats_1d_realized_profit_pnl': 0.0, 'stats_1d_buy_count': 0, 'stats_1d_sell_count': 0, 'stats_1d_transfer_in_count': 0, 'stats_1d_transfer_out_count': 0, 'stats_1d_avg_holding_period': 0, 'stats_1d_total_bought_cost_sol': 0.0, 'stats_1d_total_sold_income_sol': 0.0, 'stats_1d_total_fee': 0.0, 'stats_1d_winrate': 0.0, 'stats_1d_tokens_traded': 0, 'stats_7d_realized_profit_sol': 0.0, 'stats_7d_realized_profit_pnl': 0.0, 'stats_7d_buy_count': 0, 'stats_7d_sell_count': 0, 'stats_7d_transfer_in_count': 0, 'stats_7d_transfer_out_count': 0, 'stats_7d_avg_holding_period': 0, 'stats_7d_total_bought_cost_sol': 0.0, 'stats_7d_total_sold_income_sol': 0.0, 'stats_7d_total_fee': 0.0, 'stats_7d_winrate': 0.0, 'stats_7d_tokens_traded': 0, } social2 = {'has_pf_profile': False, 'has_twitter': False, 'has_telegram': False, 'is_exchange_wallet': True, 'username': 'cex_wallet'} holdings2 = [] # Define raw data and get their indices tokenA_data = { 'address_emb_idx': pooler.get_idx('tknA'), 'name_emb_idx': pooler.get_idx('Token A'), 'symbol_emb_idx': pooler.get_idx('TKA'), 'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')), 'protocol': 1 } # Add wallet usernames to the pool wallet1_user_idx = pooler.get_idx(social1['username']) wallet2_user_idx = pooler.get_idx(social2['username']) social1['username_emb_idx'] = wallet1_user_idx social2['username_emb_idx'] = wallet2_user_idx # --- NEW: Add a third wallet for social tests --- social3 = {'has_pf_profile': False, 'has_twitter': True, 'has_telegram': True, 'is_exchange_wallet': False, 'username': 'social_butterfly'} wallet3_user_idx = pooler.get_idx(social3['username']) social3['username_emb_idx'] = wallet3_user_idx # Create the final pre-computed data structures tokenB_data = { 'address_emb_idx': pooler.get_idx('tknA'), 'name_emb_idx': pooler.get_idx('Token A'), 'symbol_emb_idx': pooler.get_idx('TKA'), 'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')), 'protocol': 1 } tokenC_data = { 'address_emb_idx': pooler.get_idx('tknA'), 'name_emb_idx': pooler.get_idx('Token A'), 'symbol_emb_idx': pooler.get_idx('TKA'), 'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')), 'protocol': 1 } tokenD_data = { 'address_emb_idx': pooler.get_idx('tknA'), 'name_emb_idx': pooler.get_idx('Token A'), 'symbol_emb_idx': pooler.get_idx('TKA'), 'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')), 'protocol': 1 } item = { 'event_sequence': [ {'event_type': 'XPost', # NEW 'timestamp': 1729711350, 'relative_ts': -25, 'wallet_address': 'addrW1', # Author 'text_emb_idx': pooler.get_idx('This is the main tweet about $TKA'), 'media_emb_idx': pooler.get_idx(Image.new('RGB', (100,100), color='cyan')) }, {'event_type': 'XReply', # NEW 'timestamp': 1729711360, 'relative_ts': -35, 'wallet_address': 'addrW2', # Replier 'text_emb_idx': pooler.get_idx('This is a reply to the main tweet'), 'media_emb_idx': pooler.get_idx(None), # No media in reply 'main_tweet_text_emb_idx': pooler.get_idx('This is the main tweet about $TKA') }, {'event_type': 'XRetweet', # NEW 'timestamp': 1729711370, 'relative_ts': -40, 'wallet_address': 'addrW3', # The retweeter 'original_author_wallet_address': 'addrW1', # The original author 'original_post_text_emb_idx': pooler.get_idx('This is the main tweet about $TKA'), 'original_post_media_emb_idx': pooler.get_idx(Image.new('RGB', (100,100), color='cyan')) }, # --- CORRECTED: Test a pre-launch event with negative relative_ts --- {'event_type': 'Transfer', 'timestamp': 1729711180, 'relative_ts': -10, # Negative relative_ts indicates pre-launch 'wallet_address': 'addrW2', 'destination_wallet_address': 'addrW1', 'token_address': 'tknA', 'token_amount': 1000.0, 'transfer_pct_of_total_supply': 0.0, 'transfer_pct_of_holding': 0.0, 'priority_fee': 0.0 }, {'event_type': 'Mint', 'timestamp': 1729711190, 'relative_ts': 0, 'wallet_address': 'addrW1', 'token_address': 'tknA'}, {'event_type': 'Chart_Segment', 'timestamp': 1729711200, 'relative_ts': 60, 'opens': [1.0]*OHLC_SEQ_LEN, 'closes': [1.1]*OHLC_SEQ_LEN, 'i': '1s'}, # This is high-def (segment 0) by default {'event_type': 'Chart_Segment', 'timestamp': 1729711260, 'relative_ts': 120, 'opens': [1.0]*OHLC_SEQ_LEN, 'closes': [1.1]*OHLC_SEQ_LEN, 'i': '1s'}, # You can mark this as blurry {'event_type': 'Transfer', 'timestamp': 1729711210, 'relative_ts': 20, 'wallet_address': 'addrW1', # Source 'destination_wallet_address': 'addrW2', # Destination 'token_address': 'tknA', # Need token for context? (Optional, depends on design) 'token_amount': 500.0, 'transfer_pct_of_total_supply': 0.005, 'transfer_pct_of_holding': 0.1, 'priority_fee': 0.0001 }, {'event_type': 'Trade', 'timestamp': 1729711220, 'relative_ts': 30, 'wallet_address': 'addrW1', 'token_address': 'tknA', 'trade_direction': 0, 'sol_amount': 0.5, # --- FIXED: Pass the integer ID directly --- 'dex_platform_id': vocab.DEX_TO_ID['Axiom'], 'priority_fee': 0.0002, 'mev_protection': False, 'token_amount_pct_of_holding': 0.05, 'quote_amount_pct_of_holding': 0.02, 'slippage': 0.01, 'price_impact': 0.005, 'success': True, 'is_bundle': False, 'total_usd': 75.0 }, {'event_type': 'Deployer_Trade', # NEW: Testing a trade variant 'timestamp': 1729711230, 'relative_ts': 40, 'wallet_address': 'addrW1', # The creator wallet 'token_address': 'tknA', 'trade_direction': 1, 'sol_amount': 0.2, # --- FIXED: Pass the integer ID directly --- 'dex_platform_id': vocab.DEX_TO_ID['Trojan'], 'priority_fee': 0.0005, 'mev_protection': True, 'token_amount_pct_of_holding': 0.1, 'quote_amount_pct_of_holding': 0.0, 'slippage': 0.02, 'price_impact': 0.01, 'success': True, 'is_bundle': False, 'total_usd': 30.0 }, {'event_type': 'SmartWallet_Trade', # NEW 'timestamp': 1729711240, 'relative_ts': 50, 'wallet_address': 'addrW1', # A known smart wallet 'token_address': 'tknA', 'trade_direction': 0, 'sol_amount': 1.5, # --- FIXED: Pass the integer ID directly --- 'dex_platform_id': vocab.DEX_TO_ID['Axiom'], 'priority_fee': 0.001, 'mev_protection': True, 'token_amount_pct_of_holding': 0.2, 'quote_amount_pct_of_holding': 0.1, 'slippage': 0.01, 'price_impact': 0.008, 'success': True, 'is_bundle': False, 'total_usd': 225.0 }, {'event_type': 'LargeTrade', # NEW 'timestamp': 1729711250, 'relative_ts': 60, 'wallet_address': 'addrW2', # Some other wallet 'token_address': 'tknA', 'trade_direction': 0, 'sol_amount': 10.0, # --- FIXED: Pass the integer ID directly --- 'dex_platform_id': vocab.DEX_TO_ID['OXK'], 'priority_fee': 0.002, 'mev_protection': False, 'token_amount_pct_of_holding': 0.8, 'quote_amount_pct_of_holding': 0.5, 'slippage': 0.03, 'price_impact': 0.05, 'success': True, 'is_bundle': False, 'total_usd': 1500.0 }, {'event_type': 'Chart_Segment', 'timestamp': 1729711260, 'relative_ts': 70, 'opens': [1.0]*OHLC_SEQ_LEN, 'closes': [1.1]*OHLC_SEQ_LEN, 'i': '1s'}, {'event_type': 'PoolCreated', # NEW 'timestamp': 1729711270, 'relative_ts': 80, 'wallet_address': 'addrW1', 'protocol_id': vocab.PROTOCOL_TO_ID['Raydium CPMM'], 'quote_token_address': 'tknB', 'base_amount': 1000000.0, 'quote_amount': 10.0 }, {'event_type': 'LiquidityChange', # NEW 'timestamp': 1729711280, 'relative_ts': 90, 'wallet_address': 'addrW2', 'quote_token_address': 'tknB', 'change_type_id': 0, # 0 for 'add' 'quote_amount': 2.0 }, {'event_type': 'FeeCollected', # NEW 'timestamp': 1729711290, 'relative_ts': 100, 'wallet_address': 'addrW1', # The recipient (e.g., dev wallet) 'sol_amount': 0.1 }, {'event_type': 'TokenBurn', # NEW 'timestamp': 1729711300, 'relative_ts': 110, 'wallet_address': 'addrW2', # The burner wallet 'amount_pct_of_total_supply': 0.01, # 1% of supply 'amount_tokens_burned': 10000000.0 }, {'event_type': 'SupplyLock', # NEW 'timestamp': 1729711310, 'relative_ts': 120, 'wallet_address': 'addrW1', # The locker wallet 'amount_pct_of_total_supply': 0.10, # 10% of supply 'lock_duration': 2592000 # 30 days in seconds }, {'event_type': 'HolderSnapshot', # NEW 'timestamp': 1729711320, 'relative_ts': 130, # This is a pointer to the pre-computed embedding # In a real system, this would be the index of the embedding 'holders': [ # Raw holder data {'wallet': 'addrW1', 'holding_pct': 0.15}, {'wallet': 'addrW2', 'holding_pct': 0.05}, # Add more mock holders if needed ] }, {'event_type': 'OnChain_Snapshot', # NEW 'timestamp': 1729711320, 'relative_ts': 130, 'total_holders': 500, 'smart_traders': 25, 'kols': 3, 'holder_growth_rate': 0.15, 'top_10_holder_pct': 0.22, 'sniper_holding_pct': 0.05, 'rat_wallets_holding_pct': 0.02, 'bundle_holding_pct': 0.01, 'current_market_cap': 150000.0, 'volume': 50000.0, 'buy_count': 120, 'sell_count': 80, 'total_txns': 200, 'global_fees_paid': 1.5 }, {'event_type': 'TrendingToken', # NEW 'timestamp': 1729711330, 'relative_ts': 140, 'token_address': 'tknC', # The token that is trending 'list_source_id': vocab.TRENDING_LIST_SOURCE_TO_ID['Phantom'], 'timeframe_id': vocab.TRENDING_LIST_TIMEFRAME_TO_ID['1h'], 'rank': 3 }, {'event_type': 'BoostedToken', # NEW 'timestamp': 1729711340, 'relative_ts': 150, 'token_address': 'tknD', # The token that is boosted 'total_boost_amount': 5000.0, 'rank': 1 }, {'event_type': 'XQuoteTweet', # NEW 'timestamp': 1729711380, 'relative_ts': 190, 'wallet_address': 'addrW3', # The quoter 'quoter_text_emb_idx': pooler.get_idx('Wow, look at this! $TKA'), 'original_author_wallet_address': 'addrW1', # The original author 'original_post_text_emb_idx': pooler.get_idx('This is the main tweet about $TKA'), 'original_post_media_emb_idx': pooler.get_idx(Image.new('RGB', (100,100), color='cyan')) }, # --- NEW: Add special context tokens --- {'event_type': 'MIDDLE', 'timestamp': 1729711500, 'relative_ts': 195}, {'event_type': 'PumpReply', # NEW 'timestamp': 1729711390, 'relative_ts': 200, 'wallet_address': 'addrW2', # The user who replied 'reply_text_emb_idx': pooler.get_idx('to the moon!') }, {'event_type': 'DexBoost_Paid', # NEW 'timestamp': 1729711400, 'relative_ts': 210, 'amount': 5.0, # e.g., 5 Boost 'total_amount_on_token': 25.0 # 25 Boost Points }, {'event_type': 'DexProfile_Updated', # NEW 'timestamp': 1729711410, 'relative_ts': 220, 'has_changed_website_flag': True, 'has_changed_twitter_flag': False, 'has_changed_telegram_flag': True, 'has_changed_description_flag': True, # Pre-computed text embeddings 'website_emb_idx': pooler.get_idx('new-token-website.com'), 'twitter_link_emb_idx': pooler.get_idx('old_handle'), # No change, so old link 'telegram_link_emb_idx': pooler.get_idx('new_tg_group'), 'description_emb_idx': pooler.get_idx('This is the new and improved token description.') }, {'event_type': 'AlphaGroup_Call', # NEW 'timestamp': 1729711420, 'relative_ts': 230, 'group_id': vocab.ALPHA_GROUPS_TO_ID['Potion'] }, {'event_type': 'Channel_Call', # NEW 'timestamp': 1729711430, 'relative_ts': 240, 'channel_id': vocab.CALL_CHANNELS_TO_ID['MarcosCalls'] }, {'event_type': 'RECENT', 'timestamp': 1729711510, 'relative_ts': 245}, {'event_type': 'CexListing', # NEW 'timestamp': 1729711440, 'relative_ts': 250, 'exchange_id': vocab.EXCHANGES_TO_ID['mexc'] }, {'event_type': 'TikTok_Trending_Hashtag', # NEW 'timestamp': 1729711450, 'relative_ts': 260, 'hashtag_name_emb_idx': pooler.get_idx('CryptoTok'), 'rank': 5 }, {'event_type': 'XTrending_Hashtag', # NEW 'timestamp': 1729711460, 'relative_ts': 270, 'hashtag_name_emb_idx': pooler.get_idx('SolanaMemes'), 'rank': 2 }, {'event_type': 'ChainSnapshot', # NEW 'timestamp': 1729711470, 'relative_ts': 280, 'native_token_price_usd': 150.75, 'gas_fee': 0.00015 # Example gas fee }, {'event_type': 'Lighthouse_Snapshot', # NEW 'timestamp': 1729711480, 'relative_ts': 290, 'protocol_id': vocab.PROTOCOL_TO_ID['Pump V1'], 'timeframe_id': vocab.LIGHTHOUSE_TIMEFRAME_TO_ID['1h'], 'total_volume': 1.2e6, 'total_transactions': 5000, 'total_traders': 1200, 'total_tokens_created': 85, 'total_migrations': 70 }, {'event_type': 'Migrated', # NEW 'timestamp': 1729711490, 'relative_ts': 300, 'protocol_id': vocab.PROTOCOL_TO_ID['Raydium CPMM'] }, ], 'wallets': { 'addrW1': {'profile': profile1, 'socials': social1, 'holdings': holdings1}, 'addrW2': {'profile': profile2, 'socials': social2, 'holdings': holdings2}, # --- NEW: Add wallet 3 data --- 'addrW3': { 'profile': {**profile2, 'wallet_address': 'addrW3'}, # Reuse profile2 but change address 'socials': social3, 'holdings': [] } }, 'tokens': { 'tknA': tokenA_data, # Main token 'tknB': tokenB_data, # Quote token 'tknC': tokenC_data, # Trending token 'tknD': tokenD_data # Boosted token }, # --- NEW: The pre-computed embedding pool is generated after collecting all items 'embedding_pooler': pooler, # Pass the pooler to generate the tensor later # --- NEW: Expanded graph_links to test all encoders --- # --- FIXED: Removed useless logging fields as per user request --- 'graph_links': { 'TransferLink': {'links': [{'timestamp': 1729711205}], 'edges': [('addrW1', 'addrW2')]}, # Keep timestamp 'BundleTradeLink': {'links': [{'timestamp': 1729711215}], 'edges': [('addrW1', 'addrW2')]}, # Keep timestamp 'CopiedTradeLink': {'links': [ {'time_gap_on_buy_sec': 10, 'time_gap_on_sell_sec': 120, 'leader_pnl': 5.0, 'follower_pnl': 4.0, 'follower_buy_total': 100, 'follower_sell_total': 120} ], 'edges': [('addrW1', 'addrW2')]}, 'CoordinatedActivityLink': {'links': [ {'time_gap_on_first_sec': 5, 'time_gap_on_second_sec': 8} ], 'edges': [('addrW1', 'addrW2')]}, 'MintedLink': {'links': [ {'timestamp': 1729711200, 'buy_amount': 1e9} ], 'edges': [('addrW1', 'tknA')]}, 'SnipedLink': {'links': [ {'rank': 1, 'sniped_amount': 5e8} ], 'edges': [('addrW1', 'tknA')]}, 'LockedSupplyLink': {'links': [ {'amount': 1e10} # Only amount is needed ], 'edges': [('addrW1', 'tknA')]}, 'BurnedLink': {'links': [ {'timestamp': 1729711300} # Only timestamp is needed ], 'edges': [('addrW2', 'tknA')]}, 'ProvidedLiquidityLink': {'links': [ {'timestamp': 1729711250} # Only timestamp is needed ], 'edges': [('addrW1', 'tknA')]}, 'WhaleOfLink': {'links': [ {} # Just the existence of the link is the feature ], 'edges': [('addrW1', 'tknA')]}, 'TopTraderOfLink': {'links': [ {'pnl_at_creation': 50000.0} # Only PnL is needed ], 'edges': [('addrW2', 'tknA')]} }, # --- FIXED: Removed chart_segments dictionary --- 'labels': torch.randn(self.num_outputs) if self.num_outputs > 0 else torch.zeros(0), 'labels_mask': torch.ones(self.num_outputs) if self.num_outputs > 0 else torch.zeros(0) } print("Mock raw batch created.") return item