# main.py from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification from alpaca_trade_api.rest import REST, TimeFrame import yfinance as yf import pandas as pd import pandas_ta as ta import torch import warnings import matplotlib.pyplot as plt # Suppress specific FutureWarning warnings.filterwarnings("ignore", category=FutureWarning) # Configuration ALPACA_API_KEY = 'AKRII2NASCQ4UYYNMUBJ' ALPACA_SECRET_KEY = 'amTJvuxDsojAZFVU3wEt6IZPBN9L5VvhfbwR28fj' ALPACA_BASE_URL = 'https://paper-api.alpaca.markets' MODEL_NAME = "databricks/dolly-v2-3b" FINBERT_MODEL_NAME = "yiyanghkust/finbert-tone" SYMBOL = '^KLSE' # KLCI index symbol START_DATE = '2020-01-01' END_DATE = '2024-10-31' # Initialize Alpaca API api = REST(ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPACA_BASE_URL) # Load Models dolly_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) dolly_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32, force_download=True) # half precision dolly_model.gradient_checkpointing_enable() # memory-efficient loading print("Loading FinBERT model...") finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL_NAME) finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL_NAME) # Step 1: Fetch KLCI Data def fetch_stock_data(symbol, start_date, end_date): print("Fetching stock data...") data = yf.download(symbol, start=start_date, end=end_date) if data.empty: raise ValueError("Stock data could not be fetched. Please check the symbol and date range.") data = add_technical_indicators(data) return data # Step 2: Add Technical Indicators def add_technical_indicators(data): print("Adding technical indicators...") data['RSI'] = ta.rsi(data['Close'], length=14) # Calculate MACD and select only the MACD line macd = ta.macd(data['Close'], fast=12, slow=26) if macd is not None and 'MACD_12_26_9' in macd.columns: data['MACD'] = macd['MACD_12_26_9'] else: print("MACD data not available.") # Add Bollinger Bands and handle potential missing columns bbands = ta.bbands(data['Close']) if bbands is not None: data['BB_upper'] = bbands.get('BBU_20_2.0') data['BB_middle'] = bbands.get('BBM_20_2.0') data['BB_lower'] = bbands.get('BBL_20_2.0') # Ensure the columns are added only if they exist if data[['BB_upper', 'BB_middle', 'BB_lower']].isnull().any().any(): print("Bollinger Bands data is incomplete or has NaNs. Dropping these columns.") data.drop(columns=['BB_upper', 'BB_middle', 'BB_lower'], inplace=True, errors='ignore') else: print("Bollinger Bands data not available.") return data # Step 3: Analyze Sentiment using FinBERT def analyze_sentiment(text): print("Analyzing sentiment...") inputs = finbert_tokenizer(text, return_tensors="pt") outputs = finbert_model(**inputs) predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) sentiment = torch.argmax(predictions).item() return ["Negative", "Neutral", "Positive"][sentiment] # Step 4: Generate Dolly Prediction def generate_prediction(prompt): print("Generating prediction...") inputs = dolly_tokenizer(prompt, return_tensors="pt") outputs = dolly_model.generate(**inputs, max_length=100) return dolly_tokenizer.decode(outputs[0], skip_special_tokens=True) # Step 5: Execute Trade with Alpaca def execute_trade(signal, symbol='Genting Bhd', qty=1): try: print(f"Executing trade signal: {signal}") if signal == "buy": api.submit_order(symbol=symbol, qty=qty, side='buy', type='market', time_in_force='gtc') print(f"Executed buy order for {qty} shares of {symbol}.") elif signal == "sell": api.submit_order(symbol=symbol, qty=qty, side='sell', type='market', time_in_force='gtc') print(f"Executed sell order for {qty} shares of {symbol}.") except Exception as e: print(f"Error executing trade: {e}") # Step 6: Plot KLCI Data with Technical Indicators def plot_klci(data): plt.figure(figsize=(14, 7)) plt.plot(data['Close'], label='KLCI Close Price', color='blue') if 'BB_upper' in data.columns and 'BB_middle' in data.columns and 'BB_lower' in data.columns: plt.plot(data['BB_upper'], label='Bollinger Upper Band', color='red') plt.plot(data['BB_middle'], label='Bollinger Middle Band', color='green') plt.plot(data['BB_lower'], label='Bollinger Lower Band', color='red') plt.title('KLCI with Technical Indicators') plt.legend() plt.show() # Step 7: Main Function to Run Pipeline def main(): print("initializing main function...") try: # 1. Fetch and Prepare Data klci_data = fetch_stock_data(SYMBOL, START_DATE, END_DATE) # Ensure columns for Bollinger Bands exist before referencing them bb_columns = ['BB_upper', 'BB_middle', 'BB_lower'] available_columns = [col for col in bb_columns if col in klci_data.columns] # 2. Run FinBERT Sentiment Analysis sample_text = "The market sentiment is bullish for KLCI." # Example text sentiment = analyze_sentiment(sample_text) print(f"Sentiment: {sentiment}") # 3. Generate Dolly Prediction indicators = ['RSI', 'MACD', 'BB_upper', 'BB_middle', 'BB_lower'] available_columns = [col for col in indicators if col in klci_data.columns] indicator_data = klci_data[available_columns].iloc[-1].to_dict() if available_columns: prompt = f"The market trend for KLCI with sentiment {sentiment} and indicators {klci_data[['RSI', 'MACD'] + available_columns].iloc[-1].to_dict()}" else: prompt = f"The market trend for KLCI with sentiment {sentiment} and indicators {klci_data[['RSI', 'MACD']].iloc[-1].to_dict()}" prediction = generate_prediction(prompt) print(f"Dolly Prediction: {prediction}") # 4. Decide Buy/Sell based on Prediction and Execute Trade if "buy" in prediction.lower(): execute_trade("buy", symbol=SYMBOL, qty=1) elif "sell" in prediction.lower(): execute_trade("sell", symbol=SYMBOL, qty=1) else: print("No clear trade signal from prediction.") # 5. Plot KLCI Data with Indicators if available_columns: plot_klci(klci_data) except Exception as e: print (f"An error occured: {e}") # Run the main function if __name__ == "__main__": main()