=========================================
Entity Encoders
=========================================
These are generated offline/streaming and are the "vocabulary" for the model.
# Embedding of a wallet's relationships, behavior, and history. = [ // Data from the 'wallet_profiles' table (Wallet-level lifetime and daily/weekly stats) wallet_profiles_row: [ // Core Info & Timestamps age, // No Contextual wallet_address, // Primary wallet identifier
// 7. NEW: Deployed Token Aggregates (8 Features)
deployed_tokens_count, // Total tokens created
deployed_tokens_migrated_pct, // % that migrated
deployed_tokens_avg_lifetime_sec, // Avg duration before dev selling
deployed_tokens_avg_peak_mc_usd, // Avg peak marketcap
deployed_tokens_median_peak_mc_usd,
// Metadata & Balances
balance, // Current SOL balance
// Lifetime Transaction Counts (Total history)
transfers_in_count, // Total native transfers received
transfers_out_count, // Total native transfers sent
spl_transfers_in_count, // Total SPL token transfers received
spl_transfers_out_count,// Total SPL token transfers sent
// Lifetime Trading Stats (Total history)
total_buys_count, // Total buys across all tokens
total_sells_count, // Total sells across all tokens
total_winrate, // Overall trading winrate
// 1-Day Stats (Realized P&L, Counts, Averages, Volume, Fees, Winrate)
stats_1d_realized_profit_sol,
stats_1d_realized_profit_pnl,
stats_1d_buy_count,
stats_1d_sell_count,
stats_1d_transfer_in_count,
stats_1d_transfer_out_count,
stats_1d_avg_holding_period,
stats_1d_total_bought_cost_sol,
stats_1d_total_sold_income_sol,
stats_1d_total_fee,
stats_1d_winrate,
stats_1d_tokens_traded,
// 7-Day Stats (Realized P&L, Counts, Averages, Volume, Fees, Winrate)
stats_7d_realized_profit_sol,
stats_7d_realized_profit_pnl,
stats_7d_buy_count,
stats_7d_sell_count,
stats_7d_transfer_in_count,
stats_7d_transfer_out_count,
stats_7d_avg_holding_period,
stats_7d_total_bought_cost_sol,
stats_7d_total_sold_income_sol,
stats_7d_total_fee,
stats_7d_winrate,
stats_7d_tokens_traded,
// 30 Days is to useless in the context
],
// Data from the 'wallet_socials' table (Social media and profile info)
wallet_socials_row: [
has_pf_profile,
has_twitter,
has_telegram,
is_exchange_wallet,
username,
],
// Data from the 'wallet_holdings' table (Token-level statistics for held tokens)
wallet_holdings_pool: [
<TokenVibeEmbedding>,
holding_time, // How much he held the token (We check only tokens that currently is holding, or recently traded)
balance_pct_to_supply, // Current quantity of the token held
// History (Amounts & Costs)
history_bought_amount_sol, // Total amount of token bought
bought_amount_sol_pct_to_native_balance // Is he traded a lot of his wallet size
// History (Counts)
history_total_buys, // Total number of buy transactions
history_total_sells, // Total number of sell transactions
// Profit and Loss
realized_profit_pnl, // Realized P&L as a percentage
realized_profit_sol,
// Transfers (Non-trade movements)
history_transfer_in,
history_transfer_out,
avarage_trade_gap_seconds,
total_priority_fees, // Total tips + Priority Fees
]
]
# Multimodal embedding of a token's identity = [, , , , protocol_id]
# Text embedding MultiModal processor. # Multimodal VIT encoder.
-----------------------------------------
1. TradeEncoder
-----------------------------------------
Captures large-size trades from any wallet.
[timestamp, 'LargeTrade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd]
Captures the high-signal "Dev Sold or Bought" event.
[timestamp, 'Deployer_Trade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd]
Captures all trades from pre-defined high-P&L/win-rate, kol and known wallets.
[timestamp, 'SmartWallet_Trade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd]
Raw trades. Loaded in H/B/H Prefix (first ~10k) and Suffix (last ~5k).
[timestamp, 'Trade', relative_ts, , trade_direction, sol_amount, dex_platform_id, priority_fee, mev_protection, token_amount_pct_of_holding, quote_amount_pct_of_holding, slippage, price_impact, success, is_bundle, total_usd]
-----------------------------------------
2. TransferEncoder
-----------------------------------------
Raw transfers. Loaded in H/B/H Prefix (all in first ~10k trade window) and Suffix (all in last ~5k trade window).
[timestamp, 'Transfer', relative_ts, , , token_amount, transfer_pct_of_total_supply, transfer_pct_of_holding, priority_fee]
Captures scarce, large transfers after the initial launch window.
[timestamp, 'LargeTransfer', relative_ts, , , token_amount, transfer_pct_of_total_supply, transfer_pct_of_holding, priority_fee]
-----------------------------------------
3. LifecycleEncoder
-----------------------------------------
The T0 event.
[timestamp, 'Mint', 0, , ]
-----------------------------------------
3. PoolEncoder
-----------------------------------------
Signals migration from launchpad to a real pool.
[timestamp, 'PoolCreated', relative_ts, , protocol_id, , base_amount, quote_amount, quote_pct_to_main_pool_balance, base_pct_to_main_pool_balance]
Signals LP addition or removal.
[timestamp, 'LiquidityChange', relative_ts, , , change_type_id, quote_amount, quote_pct_to_current_pool_balance]
Signals creator/dev taking platform fees.
[timestamp, 'FeeCollected', relative_ts, , sol_amount, token_amount]
-----------------------------------------
SupplyEncoder
-----------------------------------------
Signals a supply reduction.
[timestamp, 'TokenBurn', relative_ts, , amount_pct_of_total_supply, amount_tokens_burned]
Signals locked supply, e.g., for team/marketing.
[timestamp, 'SupplyLock', relative_ts, , amount_pct_of_total_supply, lock_duration]
-----------------------------------------
ChartEncoder
-----------------------------------------
(The "Sliding Window") This is the new chart event.
[timestamp, 'Chart_Segment', relative_ts, OHLC_segment, chart_interval_id]
-----------------------------------------
PulseEncoder
-----------------------------------------
It is a low-frequency event (Dynamic Interval: 5min, 15min, or 1hr based on token age).
[timestamp, 'OnChain_Snapshot', relative_ts, total_holders, smart_traders, kols, holder_growth_rate, top_10_holder_pct, sniper_holding_pct, rat_wallets_holding_pct, bundle_holding_pct, current_market_cap, liquidity, volume, buy_count, sell_count, total_txns, global_fees_paid]
-----------------------------------------
HoldersListEncoder
-----------------------------------------
# Transformer-based embedding of the top holders (WalletEmbeddings + Pct).
Token-specific holder analysis.
[timestamp, 'HolderSnapshot', relative_ts, ]
-----------------------------------------
ChainSnapshotEncoder
-----------------------------------------
Broad chain-level market conditions.
[timestamp, 'ChainSnapshot', relative_ts, native_token_price_usd, gas_fee]
Launchpad market regime (using absolute, log-normalized values).
[timestamp, 'Lighthouse_Snapshot', relative_ts, protocol_id, timeframe_id, total_volume, total_transactions, total_traders, total_tokens_created, total_migrations]
-----------------------------------------
TokenTrendingListEncoder
-----------------------------------------
Fires per token on a trending list. The high-attention "meta" signal.
[timestamp, 'TrendingToken', relative_ts, , list_source_id, timeframe_id, rank]
Fires per token on the boosted list.
[timestamp, 'BoostedToken', relative_ts, , total_boost_amount, rank]
-----------------------------------------
LaunchpadTheadEncoder
-----------------------------------------
On-platform social signal (Pump.fun comments).
[timestamp, 'PumpReply', relative_ts, , ]
-----------------------------------------
CTEncoder
-----------------------------------------
Off-platform social signal (Twitter).
[timestamp, 'XPost', relative_ts, , , ] [timestamp, 'XRetweet', relative_ts, , , , ] [timestamp, 'XReply', relative_ts, , , , ] [timestamp, 'XQuoteTweet', relative_ts, , , , , ]
-----------------------------------------
GlobalTrendingEncoder
-----------------------------------------
Broader cultural trend signal (TikTok).
[timestamp, 'TikTok_Trending_Hashtag', relative_ts, , rank]
Broader cultural trend signal (Twitter).
[timestamp, 'XTrending_Hashtag', relative_ts, , rank]
-----------------------------------------
TrackerEncoder
-----------------------------------------
Retail marketing signal (Paid groups).
[timestamp, 'AlphaGroup_Call', relative_ts, group_id]
[timestamp, 'Call_Channel', relative_ts, channel_id]
High-impact catalyst event.
[timestamp, 'CexListing', relative_ts, exchange_id]
High-impact catalyst event.
[timestamp, 'Migrated', relative_ts, protocol_id]
-----------------------------------------
Dex Encoder
-----------------------------------------
[timestamp, 'DexBoost_Paid', relative_ts, amount, total_amount_on_token]
[timestamp, 'DexProfile_Updated', relative_ts, has_changed_website_flag, has_changed_twitter_flag, has_changed_telegram_flag, has_changed_description_flag, , , ]
Global Context Injection
Token Role Embedding
+ Subject_Token_Role
+ Trending_Token_Role
+ Quote_Token_Role
Links
TransferLink
['signature', 'source', 'destination', 'mint', 'timestamp']
BundleTradeLink
['signatures', 'wallet_a', 'wallet_b', 'mint', 'slot', 'timestamp']
CopiedTradeLink
['leader_buy_sig', 'leader_sell_sig', 'follower_buy_sig', 'follower_sell_sig', 'follower', 'leader', 'mint', 'time_gap_on_buy_sec', 'time_gap_on_sell_sec', 'leader_pnl', 'follower_pnl', 'leader_buy_total', 'leader_sell_total', 'follower_buy_total', 'follower_sell_total', 'follower_buy_slippage', 'follower_sell_slippage']
CoordinatedActivityLink
['leader_first_sig', 'leader_second_sig', 'follower_first_sig', 'follower_second_sig', 'follower', 'leader', 'mint', 'time_gap_on_first_sec', 'time_gap_on_second_sec']
MintedLink
['signature', 'timestamp', 'buy_amount']
SnipedLink
['signature', 'rank', 'sniped_amount']
LockedSupplyLink
['signature', 'amount', 'unlock_timestamp']
BurnedLink
['signature', 'amount', 'timestamp']
ProvidedLiquidityLink
['signature', 'wallet', 'token', 'pool_address', 'amount_base', 'amount_quote', 'timestamp']
WhaleOfLink
['wallet', 'token', 'holding_pct_at_creation', 'ath_usd_at_creation']
TopTraderOfLink
['wallet', 'token', 'pnl_at_creation', 'ath_usd_at_creation']
/////
def __gettestitem__(self, idx: int) -> Dict[str, Any]:
"""
Generates a single complex data item, structured for the MemecoinCollator.
NOTE: This currently returns the same mock data regardless of `idx`.
"""
# --- 1. Setup Pooler and Define Raw Data ---
pooler = EmbeddingPooler()
# --- 5. Create Mock Raw Batch Data (FIXED) ---
print("Creating mock raw batch...")
# (Wallet profiles, socials, holdings definitions are unchanged)
profile1 = {
'wallet_address': 'addrW1', 'age': 1.5e7, 'balance': 10.5,
'deployed_tokens_count': 2, 'deployed_tokens_migrated_pct': 0.5, 'deployed_tokens_avg_lifetime_sec': 36000.0, 'deployed_tokens_avg_peak_mc_usd': 100000.0, 'deployed_tokens_median_peak_mc_usd': 50000.0,
'transfers_in_count': 10, 'transfers_out_count': 5, 'spl_transfers_in_count': 20, 'spl_transfers_out_count': 15,
'total_buys_count': 50, 'total_sells_count': 40, 'total_winrate': 0.6,
'stats_1d_realized_profit_sol': 1.2, 'stats_1d_realized_profit_pnl': 0.1, 'stats_1d_buy_count': 5, 'stats_1d_sell_count': 3, 'stats_1d_transfer_in_count': 2, 'stats_1d_transfer_out_count': 1, 'stats_1d_avg_holding_period': 3600, 'stats_1d_total_bought_cost_sol': 10.0, 'stats_1d_total_sold_income_sol': 11.2, 'stats_1d_total_fee': 0.1, 'stats_1d_winrate': 0.7, 'stats_1d_tokens_traded': 4,
'stats_7d_realized_profit_sol': 5.0, 'stats_7d_realized_profit_pnl': 0.2, 'stats_7d_buy_count': 20, 'stats_7d_sell_count': 15, 'stats_7d_transfer_in_count': 8, 'stats_7d_transfer_out_count': 4, 'stats_7d_avg_holding_period': 7200, 'stats_7d_total_bought_cost_sol': 40.0, 'stats_7d_total_sold_income_sol': 45.0, 'stats_7d_total_fee': 0.5, 'stats_7d_winrate': 0.65, 'stats_7d_tokens_traded': 10,
}
social1 = {'has_pf_profile': True, 'has_twitter': True, 'has_telegram': False, 'is_exchange_wallet': False, 'username': 'trader_one'}
holdings1 = [
{'mint_address': 'tknA', 'holding_time': 3600.0, 'realized_profit_sol': 5.2, 'total_priority_fees': 0.05, 'balance_pct_to_supply': 0.01, 'history_bought_amount_sol': 10, 'bought_amount_sol_pct_to_native_balance': 0.5, 'history_total_buys': 5, 'history_total_sells': 2, 'realized_profit_pnl': 0.52, 'history_transfer_in': 1, 'history_transfer_out': 0, 'avarage_trade_gap_seconds': 300},
]
profile2 = {
'wallet_address': 'addrW2', 'age': 1e6, 'balance': 1.0,
'deployed_tokens_count': 0, 'deployed_tokens_migrated_pct': 0.0, 'deployed_tokens_avg_lifetime_sec': 0.0, 'deployed_tokens_avg_peak_mc_usd': 0.0, 'deployed_tokens_median_peak_mc_usd': 0.0,
'transfers_in_count': 1, 'transfers_out_count': 0, 'spl_transfers_in_count': 0, 'spl_transfers_out_count': 0,
'total_buys_count': 0, 'total_sells_count': 0, 'total_winrate': 0.0,
'stats_1d_realized_profit_sol': 0.0, 'stats_1d_realized_profit_pnl': 0.0, 'stats_1d_buy_count': 0, 'stats_1d_sell_count': 0, 'stats_1d_transfer_in_count': 0, 'stats_1d_transfer_out_count': 0, 'stats_1d_avg_holding_period': 0, 'stats_1d_total_bought_cost_sol': 0.0, 'stats_1d_total_sold_income_sol': 0.0, 'stats_1d_total_fee': 0.0, 'stats_1d_winrate': 0.0, 'stats_1d_tokens_traded': 0,
'stats_7d_realized_profit_sol': 0.0, 'stats_7d_realized_profit_pnl': 0.0, 'stats_7d_buy_count': 0, 'stats_7d_sell_count': 0, 'stats_7d_transfer_in_count': 0, 'stats_7d_transfer_out_count': 0, 'stats_7d_avg_holding_period': 0, 'stats_7d_total_bought_cost_sol': 0.0, 'stats_7d_total_sold_income_sol': 0.0, 'stats_7d_total_fee': 0.0, 'stats_7d_winrate': 0.0, 'stats_7d_tokens_traded': 0,
}
social2 = {'has_pf_profile': False, 'has_twitter': False, 'has_telegram': False, 'is_exchange_wallet': True, 'username': 'cex_wallet'}
holdings2 = []
# Define raw data and get their indices
tokenA_data = {
'address_emb_idx': pooler.get_idx('tknA'),
'name_emb_idx': pooler.get_idx('Token A'),
'symbol_emb_idx': pooler.get_idx('TKA'),
'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')),
'protocol': 1
}
# Add wallet usernames to the pool
wallet1_user_idx = pooler.get_idx(social1['username'])
wallet2_user_idx = pooler.get_idx(social2['username'])
social1['username_emb_idx'] = wallet1_user_idx
social2['username_emb_idx'] = wallet2_user_idx
# --- NEW: Add a third wallet for social tests ---
social3 = {'has_pf_profile': False, 'has_twitter': True, 'has_telegram': True, 'is_exchange_wallet': False, 'username': 'social_butterfly'}
wallet3_user_idx = pooler.get_idx(social3['username'])
social3['username_emb_idx'] = wallet3_user_idx
# Create the final pre-computed data structures
tokenB_data = {
'address_emb_idx': pooler.get_idx('tknA'),
'name_emb_idx': pooler.get_idx('Token A'),
'symbol_emb_idx': pooler.get_idx('TKA'),
'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')),
'protocol': 1
}
tokenC_data = {
'address_emb_idx': pooler.get_idx('tknA'),
'name_emb_idx': pooler.get_idx('Token A'),
'symbol_emb_idx': pooler.get_idx('TKA'),
'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')),
'protocol': 1
}
tokenD_data = {
'address_emb_idx': pooler.get_idx('tknA'),
'name_emb_idx': pooler.get_idx('Token A'),
'symbol_emb_idx': pooler.get_idx('TKA'),
'image_emb_idx': pooler.get_idx(Image.new('RGB',(256,256), color='blue')),
'protocol': 1
}
item = {
'event_sequence': [
{'event_type': 'XPost', # NEW
'timestamp': 1729711350,
'relative_ts': -25,
'wallet_address': 'addrW1', # Author
'text_emb_idx': pooler.get_idx('This is the main tweet about $TKA'),
'media_emb_idx': pooler.get_idx(Image.new('RGB', (100,100), color='cyan'))
},
{'event_type': 'XReply', # NEW
'timestamp': 1729711360,
'relative_ts': -35,
'wallet_address': 'addrW2', # Replier
'text_emb_idx': pooler.get_idx('This is a reply to the main tweet'),
'media_emb_idx': pooler.get_idx(None), # No media in reply
'main_tweet_text_emb_idx': pooler.get_idx('This is the main tweet about $TKA')
},
{'event_type': 'XRetweet', # NEW
'timestamp': 1729711370,
'relative_ts': -40,
'wallet_address': 'addrW3', # The retweeter
'original_author_wallet_address': 'addrW1', # The original author
'original_post_text_emb_idx': pooler.get_idx('This is the main tweet about $TKA'),
'original_post_media_emb_idx': pooler.get_idx(Image.new('RGB', (100,100), color='cyan'))
},
# --- CORRECTED: Test a pre-launch event with negative relative_ts ---
{'event_type': 'Transfer',
'timestamp': 1729711180,
'relative_ts': -10, # Negative relative_ts indicates pre-launch
'wallet_address': 'addrW2',
'destination_wallet_address': 'addrW1',
'token_address': 'tknA',
'token_amount': 1000.0, 'transfer_pct_of_total_supply': 0.0, 'transfer_pct_of_holding': 0.0, 'priority_fee': 0.0
},
{'event_type': 'Mint', 'timestamp': 1729711190, 'relative_ts': 0, 'wallet_address': 'addrW1', 'token_address': 'tknA'},
{'event_type': 'Chart_Segment', 'timestamp': 1729711200, 'relative_ts': 60, 'opens': [1.0]*OHLC_SEQ_LEN, 'closes': [1.1]*OHLC_SEQ_LEN, 'i': '1s'}, # This is high-def (segment 0) by default
{'event_type': 'Chart_Segment', 'timestamp': 1729711260, 'relative_ts': 120, 'opens': [1.0]*OHLC_SEQ_LEN, 'closes': [1.1]*OHLC_SEQ_LEN, 'i': '1s'}, # You can mark this as blurry
{'event_type': 'Transfer',
'timestamp': 1729711210,
'relative_ts': 20,
'wallet_address': 'addrW1', # Source
'destination_wallet_address': 'addrW2', # Destination
'token_address': 'tknA', # Need token for context? (Optional, depends on design)
'token_amount': 500.0,
'transfer_pct_of_total_supply': 0.005,
'transfer_pct_of_holding': 0.1,
'priority_fee': 0.0001
},
{'event_type': 'Trade',
'timestamp': 1729711220,
'relative_ts': 30,
'wallet_address': 'addrW1',
'token_address': 'tknA',
'trade_direction': 0,
'sol_amount': 0.5,
# --- FIXED: Pass the integer ID directly ---
'dex_platform_id': vocab.DEX_TO_ID['Axiom'],
'priority_fee': 0.0002,
'mev_protection': False,
'token_amount_pct_of_holding': 0.05, 'quote_amount_pct_of_holding': 0.02,
'slippage': 0.01, 'price_impact': 0.005, 'success': True, 'is_bundle': False, 'total_usd': 75.0
},
{'event_type': 'Deployer_Trade', # NEW: Testing a trade variant
'timestamp': 1729711230,
'relative_ts': 40,
'wallet_address': 'addrW1', # The creator wallet
'token_address': 'tknA',
'trade_direction': 1, 'sol_amount': 0.2,
# --- FIXED: Pass the integer ID directly ---
'dex_platform_id': vocab.DEX_TO_ID['Trojan'],
'priority_fee': 0.0005,
'mev_protection': True,
'token_amount_pct_of_holding': 0.1, 'quote_amount_pct_of_holding': 0.0,
'slippage': 0.02, 'price_impact': 0.01, 'success': True, 'is_bundle': False, 'total_usd': 30.0
},
{'event_type': 'SmartWallet_Trade', # NEW
'timestamp': 1729711240,
'relative_ts': 50,
'wallet_address': 'addrW1', # A known smart wallet
'token_address': 'tknA',
'trade_direction': 0, 'sol_amount': 1.5,
# --- FIXED: Pass the integer ID directly ---
'dex_platform_id': vocab.DEX_TO_ID['Axiom'],
'priority_fee': 0.001,
'mev_protection': True,
'token_amount_pct_of_holding': 0.2, 'quote_amount_pct_of_holding': 0.1,
'slippage': 0.01, 'price_impact': 0.008, 'success': True, 'is_bundle': False, 'total_usd': 225.0
},
{'event_type': 'LargeTrade', # NEW
'timestamp': 1729711250,
'relative_ts': 60,
'wallet_address': 'addrW2', # Some other wallet
'token_address': 'tknA',
'trade_direction': 0, 'sol_amount': 10.0,
# --- FIXED: Pass the integer ID directly ---
'dex_platform_id': vocab.DEX_TO_ID['OXK'],
'priority_fee': 0.002,
'mev_protection': False,
'token_amount_pct_of_holding': 0.8, 'quote_amount_pct_of_holding': 0.5,
'slippage': 0.03, 'price_impact': 0.05, 'success': True, 'is_bundle': False, 'total_usd': 1500.0
},
{'event_type': 'Chart_Segment', 'timestamp': 1729711260, 'relative_ts': 70, 'opens': [1.0]*OHLC_SEQ_LEN, 'closes': [1.1]*OHLC_SEQ_LEN, 'i': '1s'},
{'event_type': 'PoolCreated', # NEW
'timestamp': 1729711270,
'relative_ts': 80,
'wallet_address': 'addrW1',
'protocol_id': vocab.PROTOCOL_TO_ID['Raydium CPMM'],
'quote_token_address': 'tknB',
'base_amount': 1000000.0,
'quote_amount': 10.0
},
{'event_type': 'LiquidityChange', # NEW
'timestamp': 1729711280,
'relative_ts': 90,
'wallet_address': 'addrW2',
'quote_token_address': 'tknB',
'change_type_id': 0, # 0 for 'add'
'quote_amount': 2.0
},
{'event_type': 'FeeCollected', # NEW
'timestamp': 1729711290,
'relative_ts': 100,
'wallet_address': 'addrW1', # The recipient (e.g., dev wallet)
'sol_amount': 0.1
},
{'event_type': 'TokenBurn', # NEW
'timestamp': 1729711300,
'relative_ts': 110,
'wallet_address': 'addrW2', # The burner wallet
'amount_pct_of_total_supply': 0.01, # 1% of supply
'amount_tokens_burned': 10000000.0
},
{'event_type': 'SupplyLock', # NEW
'timestamp': 1729711310,
'relative_ts': 120,
'wallet_address': 'addrW1', # The locker wallet
'amount_pct_of_total_supply': 0.10, # 10% of supply
'lock_duration': 2592000 # 30 days in seconds
},
{'event_type': 'HolderSnapshot', # NEW
'timestamp': 1729711320,
'relative_ts': 130,
# This is a pointer to the pre-computed embedding
# In a real system, this would be the index of the embedding
'holders': [ # Raw holder data
{'wallet': 'addrW1', 'holding_pct': 0.15},
{'wallet': 'addrW2', 'holding_pct': 0.05},
# Add more mock holders if needed
]
},
{'event_type': 'OnChain_Snapshot', # NEW
'timestamp': 1729711320,
'relative_ts': 130,
'total_holders': 500,
'smart_traders': 25,
'kols': 3,
'holder_growth_rate': 0.15,
'top_10_holder_pct': 0.22,
'sniper_holding_pct': 0.05,
'rat_wallets_holding_pct': 0.02,
'bundle_holding_pct': 0.01,
'current_market_cap': 150000.0,
'volume': 50000.0,
'buy_count': 120,
'sell_count': 80,
'total_txns': 200,
'global_fees_paid': 1.5
},
{'event_type': 'TrendingToken', # NEW
'timestamp': 1729711330,
'relative_ts': 140,
'token_address': 'tknC', # The token that is trending
'list_source_id': vocab.TRENDING_LIST_SOURCE_TO_ID['Phantom'],
'timeframe_id': vocab.TRENDING_LIST_TIMEFRAME_TO_ID['1h'],
'rank': 3
},
{'event_type': 'BoostedToken', # NEW
'timestamp': 1729711340,
'relative_ts': 150,
'token_address': 'tknD', # The token that is boosted
'total_boost_amount': 5000.0,
'rank': 1
},
{'event_type': 'XQuoteTweet', # NEW
'timestamp': 1729711380,
'relative_ts': 190,
'wallet_address': 'addrW3', # The quoter
'quoter_text_emb_idx': pooler.get_idx('Wow, look at this! $TKA'),
'original_author_wallet_address': 'addrW1', # The original author
'original_post_text_emb_idx': pooler.get_idx('This is the main tweet about $TKA'),
'original_post_media_emb_idx': pooler.get_idx(Image.new('RGB', (100,100), color='cyan'))
},
# --- NEW: Add special context tokens ---
{'event_type': 'MIDDLE', 'timestamp': 1729711500, 'relative_ts': 195},
{'event_type': 'PumpReply', # NEW
'timestamp': 1729711390,
'relative_ts': 200,
'wallet_address': 'addrW2', # The user who replied
'reply_text_emb_idx': pooler.get_idx('to the moon!')
},
{'event_type': 'DexBoost_Paid', # NEW
'timestamp': 1729711400,
'relative_ts': 210,
'amount': 5.0, # e.g., 5 Boost
'total_amount_on_token': 25.0 # 25 Boost Points
},
{'event_type': 'DexProfile_Updated', # NEW
'timestamp': 1729711410,
'relative_ts': 220,
'has_changed_website_flag': True,
'has_changed_twitter_flag': False,
'has_changed_telegram_flag': True,
'has_changed_description_flag': True,
# Pre-computed text embeddings
'website_emb_idx': pooler.get_idx('new-token-website.com'),
'twitter_link_emb_idx': pooler.get_idx('old_handle'), # No change, so old link
'telegram_link_emb_idx': pooler.get_idx('new_tg_group'),
'description_emb_idx': pooler.get_idx('This is the new and improved token description.')
},
{'event_type': 'AlphaGroup_Call', # NEW
'timestamp': 1729711420,
'relative_ts': 230,
'group_id': vocab.ALPHA_GROUPS_TO_ID['Potion']
},
{'event_type': 'Channel_Call', # NEW
'timestamp': 1729711430,
'relative_ts': 240,
'channel_id': vocab.CALL_CHANNELS_TO_ID['MarcosCalls']
},
{'event_type': 'RECENT', 'timestamp': 1729711510, 'relative_ts': 245},
{'event_type': 'CexListing', # NEW
'timestamp': 1729711440,
'relative_ts': 250,
'exchange_id': vocab.EXCHANGES_TO_ID['mexc']
},
{'event_type': 'TikTok_Trending_Hashtag', # NEW
'timestamp': 1729711450,
'relative_ts': 260,
'hashtag_name_emb_idx': pooler.get_idx('CryptoTok'),
'rank': 5
},
{'event_type': 'XTrending_Hashtag', # NEW
'timestamp': 1729711460,
'relative_ts': 270,
'hashtag_name_emb_idx': pooler.get_idx('SolanaMemes'),
'rank': 2
},
{'event_type': 'ChainSnapshot', # NEW
'timestamp': 1729711470,
'relative_ts': 280,
'native_token_price_usd': 150.75,
'gas_fee': 0.00015 # Example gas fee
},
{'event_type': 'Lighthouse_Snapshot', # NEW
'timestamp': 1729711480,
'relative_ts': 290,
'protocol_id': vocab.PROTOCOL_TO_ID['Pump V1'],
'timeframe_id': vocab.LIGHTHOUSE_TIMEFRAME_TO_ID['1h'],
'total_volume': 1.2e6,
'total_transactions': 5000,
'total_traders': 1200,
'total_tokens_created': 85,
'total_migrations': 70
},
{'event_type': 'Migrated', # NEW
'timestamp': 1729711490,
'relative_ts': 300,
'protocol_id': vocab.PROTOCOL_TO_ID['Raydium CPMM']
},
],
'wallets': {
'addrW1': {'profile': profile1, 'socials': social1, 'holdings': holdings1},
'addrW2': {'profile': profile2, 'socials': social2, 'holdings': holdings2},
# --- NEW: Add wallet 3 data ---
'addrW3': {
'profile': {**profile2, 'wallet_address': 'addrW3'}, # Reuse profile2 but change address
'socials': social3,
'holdings': []
}
},
'tokens': {
'tknA': tokenA_data, # Main token
'tknB': tokenB_data, # Quote token
'tknC': tokenC_data, # Trending token
'tknD': tokenD_data # Boosted token
},
# --- NEW: The pre-computed embedding pool is generated after collecting all items
'embedding_pooler': pooler, # Pass the pooler to generate the tensor later
# --- NEW: Expanded graph_links to test all encoders ---
# --- FIXED: Removed useless logging fields as per user request ---
'graph_links': {
'TransferLink': {'links': [{'timestamp': 1729711205}], 'edges': [('addrW1', 'addrW2')]}, # Keep timestamp
'BundleTradeLink': {'links': [{'timestamp': 1729711215}], 'edges': [('addrW1', 'addrW2')]}, # Keep timestamp
'CopiedTradeLink': {'links': [
{'time_gap_on_buy_sec': 10, 'time_gap_on_sell_sec': 120, 'leader_pnl': 5.0, 'follower_pnl': 4.0, 'follower_buy_total': 100, 'follower_sell_total': 120}
], 'edges': [('addrW1', 'addrW2')]},
'CoordinatedActivityLink': {'links': [
{'time_gap_on_first_sec': 5, 'time_gap_on_second_sec': 8}
], 'edges': [('addrW1', 'addrW2')]},
'MintedLink': {'links': [
{'timestamp': 1729711200, 'buy_amount': 1e9}
], 'edges': [('addrW1', 'tknA')]},
'SnipedLink': {'links': [
{'rank': 1, 'sniped_amount': 5e8}
], 'edges': [('addrW1', 'tknA')]},
'LockedSupplyLink': {'links': [
{'amount': 1e10} # Only amount is needed
], 'edges': [('addrW1', 'tknA')]},
'BurnedLink': {'links': [
{'timestamp': 1729711300} # Only timestamp is needed
], 'edges': [('addrW2', 'tknA')]},
'ProvidedLiquidityLink': {'links': [
{'timestamp': 1729711250} # Only timestamp is needed
], 'edges': [('addrW1', 'tknA')]},
'WhaleOfLink': {'links': [
{} # Just the existence of the link is the feature
], 'edges': [('addrW1', 'tknA')]},
'TopTraderOfLink': {'links': [
{'pnl_at_creation': 50000.0} # Only PnL is needed
], 'edges': [('addrW2', 'tknA')]}
},
# --- FIXED: Removed chart_segments dictionary ---
'labels': torch.randn(self.num_outputs) if self.num_outputs > 0 else torch.zeros(0),
'labels_mask': torch.ones(self.num_outputs) if self.num_outputs > 0 else torch.zeros(0)
}
print("Mock raw batch created.")
return item