Ken Sang Tang
Update app.py
168efed verified
# main.py
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
from alpaca_trade_api.rest import REST, TimeFrame
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import torch
import warnings
import matplotlib.pyplot as plt
# Suppress specific FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)
# Configuration
ALPACA_API_KEY = 'AKRII2NASCQ4UYYNMUBJ'
ALPACA_SECRET_KEY = 'amTJvuxDsojAZFVU3wEt6IZPBN9L5VvhfbwR28fj'
ALPACA_BASE_URL = 'https://paper-api.alpaca.markets'
MODEL_NAME = "databricks/dolly-v2-3b"
FINBERT_MODEL_NAME = "yiyanghkust/finbert-tone"
SYMBOL = '^KLSE' # KLCI index symbol
START_DATE = '2020-01-01'
END_DATE = '2024-10-31'
# Initialize Alpaca API
api = REST(ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPACA_BASE_URL)
# Load Models
dolly_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
dolly_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32, force_download=True) # half precision
dolly_model.gradient_checkpointing_enable() # memory-efficient loading
print("Loading FinBERT model...")
finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL_NAME)
finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL_NAME)
# Step 1: Fetch KLCI Data
def fetch_stock_data(symbol, start_date, end_date):
print("Fetching stock data...")
data = yf.download(symbol, start=start_date, end=end_date)
if data.empty:
raise ValueError("Stock data could not be fetched. Please check the symbol and date range.")
data = add_technical_indicators(data)
return data
# Step 2: Add Technical Indicators
def add_technical_indicators(data):
print("Adding technical indicators...")
data['RSI'] = ta.rsi(data['Close'], length=14)
# Calculate MACD and select only the MACD line
macd = ta.macd(data['Close'], fast=12, slow=26)
if macd is not None and 'MACD_12_26_9' in macd.columns:
data['MACD'] = macd['MACD_12_26_9']
else:
print("MACD data not available.")
# Add Bollinger Bands and handle potential missing columns
bbands = ta.bbands(data['Close'])
if bbands is not None:
data['BB_upper'] = bbands.get('BBU_20_2.0')
data['BB_middle'] = bbands.get('BBM_20_2.0')
data['BB_lower'] = bbands.get('BBL_20_2.0')
# Ensure the columns are added only if they exist
if data[['BB_upper', 'BB_middle', 'BB_lower']].isnull().any().any():
print("Bollinger Bands data is incomplete or has NaNs. Dropping these columns.")
data.drop(columns=['BB_upper', 'BB_middle', 'BB_lower'], inplace=True, errors='ignore')
else:
print("Bollinger Bands data not available.")
return data
# Step 3: Analyze Sentiment using FinBERT
def analyze_sentiment(text):
print("Analyzing sentiment...")
inputs = finbert_tokenizer(text, return_tensors="pt")
outputs = finbert_model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
sentiment = torch.argmax(predictions).item()
return ["Negative", "Neutral", "Positive"][sentiment]
# Step 4: Generate Dolly Prediction
def generate_prediction(prompt):
print("Generating prediction...")
inputs = dolly_tokenizer(prompt, return_tensors="pt")
outputs = dolly_model.generate(**inputs, max_length=100)
return dolly_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Step 5: Execute Trade with Alpaca
def execute_trade(signal, symbol='Genting Bhd', qty=1):
try:
print(f"Executing trade signal: {signal}")
if signal == "buy":
api.submit_order(symbol=symbol, qty=qty, side='buy', type='market', time_in_force='gtc')
print(f"Executed buy order for {qty} shares of {symbol}.")
elif signal == "sell":
api.submit_order(symbol=symbol, qty=qty, side='sell', type='market', time_in_force='gtc')
print(f"Executed sell order for {qty} shares of {symbol}.")
except Exception as e:
print(f"Error executing trade: {e}")
# Step 6: Plot KLCI Data with Technical Indicators
def plot_klci(data):
plt.figure(figsize=(14, 7))
plt.plot(data['Close'], label='KLCI Close Price', color='blue')
if 'BB_upper' in data.columns and 'BB_middle' in data.columns and 'BB_lower' in data.columns:
plt.plot(data['BB_upper'], label='Bollinger Upper Band', color='red')
plt.plot(data['BB_middle'], label='Bollinger Middle Band', color='green')
plt.plot(data['BB_lower'], label='Bollinger Lower Band', color='red')
plt.title('KLCI with Technical Indicators')
plt.legend()
plt.show()
# Step 7: Main Function to Run Pipeline
def main():
print("initializing main function...")
try:
# 1. Fetch and Prepare Data
klci_data = fetch_stock_data(SYMBOL, START_DATE, END_DATE)
# Ensure columns for Bollinger Bands exist before referencing them
bb_columns = ['BB_upper', 'BB_middle', 'BB_lower']
available_columns = [col for col in bb_columns if col in klci_data.columns]
# 2. Run FinBERT Sentiment Analysis
sample_text = "The market sentiment is bullish for KLCI." # Example text
sentiment = analyze_sentiment(sample_text)
print(f"Sentiment: {sentiment}")
# 3. Generate Dolly Prediction
indicators = ['RSI', 'MACD', 'BB_upper', 'BB_middle', 'BB_lower']
available_columns = [col for col in indicators if col in klci_data.columns]
indicator_data = klci_data[available_columns].iloc[-1].to_dict()
if available_columns:
prompt = f"The market trend for KLCI with sentiment {sentiment} and indicators {klci_data[['RSI', 'MACD'] + available_columns].iloc[-1].to_dict()}"
else:
prompt = f"The market trend for KLCI with sentiment {sentiment} and indicators {klci_data[['RSI', 'MACD']].iloc[-1].to_dict()}"
prediction = generate_prediction(prompt)
print(f"Dolly Prediction: {prediction}")
# 4. Decide Buy/Sell based on Prediction and Execute Trade
if "buy" in prediction.lower():
execute_trade("buy", symbol=SYMBOL, qty=1)
elif "sell" in prediction.lower():
execute_trade("sell", symbol=SYMBOL, qty=1)
else:
print("No clear trade signal from prediction.")
# 5. Plot KLCI Data with Indicators
if available_columns:
plot_klci(klci_data)
except Exception as e:
print (f"An error occured: {e}")
# Run the main function
if __name__ == "__main__":
main()