doulfa commited on
Commit
b004b0c
·
verified ·
1 Parent(s): 8aeacc5

Upload trading_bot/data_fetcher.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. trading_bot/data_fetcher.py +118 -0
trading_bot/data_fetcher.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Fetcher - Market Data from Multiple Sources
3
+ ==================================================
4
+ Fetches OHLCV data from exchanges via ccxt.
5
+ """
6
+ import pandas as pd
7
+ import numpy as np
8
+ from datetime import datetime, timedelta
9
+ from typing import Optional, List
10
+
11
+
12
+ def fetch_ohlcv_ccxt(symbol: str = 'BTC/USDT', timeframe: str = '1h',
13
+ exchange_name: str = 'binance', limit: int = 1000,
14
+ since: Optional[datetime] = None) -> pd.DataFrame:
15
+ """Fetch OHLCV data using ccxt."""
16
+ import ccxt
17
+
18
+ exchange_class = getattr(ccxt, exchange_name)
19
+ exchange = exchange_class({'enableRateLimit': True})
20
+
21
+ if since:
22
+ since_ms = int(since.timestamp() * 1000)
23
+ else:
24
+ since_ms = None
25
+
26
+ all_ohlcv = []
27
+ fetched = 0
28
+ batch_limit = min(limit, 1000)
29
+
30
+ while fetched < limit:
31
+ ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since=since_ms, limit=batch_limit)
32
+ if not ohlcv:
33
+ break
34
+ all_ohlcv.extend(ohlcv)
35
+ fetched += len(ohlcv)
36
+ since_ms = ohlcv[-1][0] + 1
37
+ if len(ohlcv) < batch_limit:
38
+ break
39
+
40
+ df = pd.DataFrame(all_ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
41
+ df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
42
+ df.set_index('timestamp', inplace=True)
43
+ df = df[~df.index.duplicated(keep='last')]
44
+ df.sort_index(inplace=True)
45
+
46
+ return df
47
+
48
+
49
+ def generate_sample_data(days: int = 365, timeframe: str = '1h',
50
+ start_price: float = 40000, volatility: float = 0.02) -> pd.DataFrame:
51
+ """
52
+ Generate realistic synthetic OHLCV data for testing.
53
+ Uses geometric Brownian motion with regime switching.
54
+ """
55
+ np.random.seed(42)
56
+
57
+ tf_minutes = {
58
+ '1m': 1, '5m': 5, '15m': 15, '30m': 30,
59
+ '1h': 60, '4h': 240, '1d': 1440
60
+ }
61
+ minutes = tf_minutes.get(timeframe, 60)
62
+ total_bars = int(days * 24 * 60 / minutes)
63
+
64
+ # Regime switching: trending vs ranging
65
+ regimes = []
66
+ current_regime = 'trending'
67
+ regime_length = 0
68
+ for _ in range(total_bars):
69
+ regime_length += 1
70
+ if current_regime == 'trending' and regime_length > np.random.geometric(1/100):
71
+ current_regime = 'ranging'
72
+ regime_length = 0
73
+ elif current_regime == 'ranging' and regime_length > np.random.geometric(1/50):
74
+ current_regime = 'trending'
75
+ regime_length = 0
76
+ regimes.append(current_regime)
77
+
78
+ # Generate prices
79
+ prices = [start_price]
80
+ for i in range(1, total_bars):
81
+ if regimes[i] == 'trending':
82
+ drift = np.random.choice([-1, 1]) * 0.0002
83
+ vol = volatility * 1.2
84
+ else:
85
+ drift = 0
86
+ vol = volatility * 0.6
87
+
88
+ ret = drift + vol * np.random.randn() * np.sqrt(minutes / 1440)
89
+ prices.append(prices[-1] * (1 + ret))
90
+
91
+ prices = np.array(prices)
92
+
93
+ # Generate OHLCV
94
+ dates = pd.date_range(end=datetime.now(), periods=total_bars, freq=f'{minutes}min')
95
+
96
+ opens = prices.copy()
97
+ closes = prices.copy()
98
+ noise = np.abs(np.random.randn(total_bars)) * volatility * prices * 0.5
99
+ highs = np.maximum(opens, closes) + noise
100
+ lows = np.minimum(opens, closes) - noise
101
+ lows = np.maximum(lows, prices * 0.9) # prevent negative
102
+
103
+ # Volume with patterns
104
+ base_volume = 1000
105
+ volume = base_volume * (1 + np.abs(np.random.randn(total_bars)) * 2)
106
+ # Higher volume on big moves
107
+ moves = np.abs(np.diff(np.log(prices), prepend=np.log(prices[0])))
108
+ volume *= (1 + moves * 50)
109
+
110
+ df = pd.DataFrame({
111
+ 'open': opens,
112
+ 'high': highs,
113
+ 'low': lows,
114
+ 'close': closes,
115
+ 'volume': volume
116
+ }, index=dates)
117
+
118
+ return df