File size: 6,629 Bytes
38d6fb7
 
 
 
 
 
 
7b2cec5
3b29b3b
184ce6a
 
3b29b3b
 
 
f88901d
38d6fb7
 
 
 
 
 
 
 
23fda02
38d6fb7
 
 
 
 
 
168efed
47d5964
 
38d6fb7
 
 
 
 
 
 
b607e46
 
38d6fb7
 
 
 
 
 
 
19d8247
 
 
b607e46
19d8247
 
 
 
 
 
 
df4937d
 
 
 
 
 
b607e46
 
df4937d
184ce6a
 
df4937d
38d6fb7
 
 
 
 
 
 
 
 
 
7b2cec5
38d6fb7
 
 
 
 
 
f88901d
38d6fb7
47d5964
184ce6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b607e46
 
 
 
184ce6a
 
 
7b2cec5
184ce6a
38d6fb7
168efed
 
23fda02
 
 
83d6aef
 
 
 
 
38d6fb7
23fda02
 
 
 
38d6fb7
23fda02
 
 
 
 
83d6aef
 
 
 
 
23fda02
 
38d6fb7
23fda02
 
 
 
 
 
 
184ce6a
23fda02
83d6aef
 
23fda02
 
 
184ce6a
7b2cec5
38d6fb7
 
 
f88901d
bb46f09
f88901d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# main.py

from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
from alpaca_trade_api.rest import REST, TimeFrame
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import torch
import warnings
import matplotlib.pyplot as plt


# Suppress specific FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)

# Configuration
ALPACA_API_KEY = 'AKRII2NASCQ4UYYNMUBJ'
ALPACA_SECRET_KEY = 'amTJvuxDsojAZFVU3wEt6IZPBN9L5VvhfbwR28fj'
ALPACA_BASE_URL = 'https://paper-api.alpaca.markets'
MODEL_NAME = "databricks/dolly-v2-3b"
FINBERT_MODEL_NAME = "yiyanghkust/finbert-tone"
SYMBOL = '^KLSE'  # KLCI index symbol
START_DATE = '2020-01-01'
END_DATE = '2024-10-31'

# Initialize Alpaca API
api = REST(ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPACA_BASE_URL)

# Load Models
dolly_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
dolly_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32, force_download=True) # half precision
dolly_model.gradient_checkpointing_enable() # memory-efficient loading
print("Loading FinBERT model...")
finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL_NAME)
finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL_NAME)

# Step 1: Fetch KLCI Data
def fetch_stock_data(symbol, start_date, end_date):
    print("Fetching stock data...")
    data = yf.download(symbol, start=start_date, end=end_date)
    if data.empty:
        raise ValueError("Stock data could not be fetched. Please check the symbol and date range.")
    data = add_technical_indicators(data)
    return data

# Step 2: Add Technical Indicators
def add_technical_indicators(data):
    print("Adding technical indicators...")
    data['RSI'] = ta.rsi(data['Close'], length=14)

    # Calculate MACD  and select only the MACD line
    macd = ta.macd(data['Close'], fast=12, slow=26)
    if macd is not None and 'MACD_12_26_9' in macd.columns:
        data['MACD'] = macd['MACD_12_26_9']
    else:
        print("MACD data not available.")


    # Add Bollinger Bands and handle potential missing columns
    bbands = ta.bbands(data['Close'])
    if bbands is not None:
        data['BB_upper'] = bbands.get('BBU_20_2.0')
        data['BB_middle'] = bbands.get('BBM_20_2.0')
        data['BB_lower'] = bbands.get('BBL_20_2.0')

        # Ensure the columns are added only if they exist
        if data[['BB_upper', 'BB_middle', 'BB_lower']].isnull().any().any():
            print("Bollinger Bands data is incomplete or has NaNs. Dropping these columns.")
            data.drop(columns=['BB_upper', 'BB_middle', 'BB_lower'], inplace=True, errors='ignore')
    else:
        print("Bollinger Bands data not available.")

    return data

# Step 3: Analyze Sentiment using FinBERT
def analyze_sentiment(text):
    print("Analyzing sentiment...")
    inputs = finbert_tokenizer(text, return_tensors="pt")
    outputs = finbert_model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment = torch.argmax(predictions).item()
    return ["Negative", "Neutral", "Positive"][sentiment]

# Step 4: Generate Dolly Prediction
def generate_prediction(prompt):
    print("Generating prediction...")
    inputs = dolly_tokenizer(prompt, return_tensors="pt")
    outputs = dolly_model.generate(**inputs, max_length=100)
    return dolly_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 5: Execute Trade with Alpaca
def execute_trade(signal, symbol='Genting Bhd', qty=1):
    try:
        print(f"Executing trade signal: {signal}")
        if signal == "buy":
            api.submit_order(symbol=symbol, qty=qty, side='buy', type='market', time_in_force='gtc')
            print(f"Executed buy order for {qty} shares of {symbol}.")
        elif signal == "sell":
            api.submit_order(symbol=symbol, qty=qty, side='sell', type='market', time_in_force='gtc')
            print(f"Executed sell order for {qty} shares of {symbol}.")
    except Exception as e:
        print(f"Error executing trade: {e}")

# Step 6: Plot KLCI Data with Technical Indicators
def plot_klci(data):
    plt.figure(figsize=(14, 7))
    plt.plot(data['Close'], label='KLCI Close Price', color='blue')
    if 'BB_upper' in data.columns and 'BB_middle' in data.columns and 'BB_lower' in data.columns:
        plt.plot(data['BB_upper'], label='Bollinger Upper Band', color='red')
        plt.plot(data['BB_middle'], label='Bollinger Middle Band', color='green')
        plt.plot(data['BB_lower'], label='Bollinger Lower Band', color='red')
    plt.title('KLCI with Technical Indicators')
    plt.legend()
    plt.show()

# Step 7: Main Function to Run Pipeline
def main():
    print("initializing main function...")
    
    try:
        # 1. Fetch and Prepare Data
        klci_data = fetch_stock_data(SYMBOL, START_DATE, END_DATE)

        # Ensure columns for Bollinger Bands exist before referencing them
        bb_columns = ['BB_upper', 'BB_middle', 'BB_lower']
        available_columns = [col for col in bb_columns if col in klci_data.columns]

    
        # 2. Run FinBERT Sentiment Analysis
        sample_text = "The market sentiment is bullish for KLCI."  # Example text
        sentiment = analyze_sentiment(sample_text)
        print(f"Sentiment: {sentiment}")
    
        # 3. Generate Dolly Prediction
        indicators = ['RSI', 'MACD', 'BB_upper', 'BB_middle', 'BB_lower']
        available_columns = [col for col in indicators if col in klci_data.columns]
        indicator_data = klci_data[available_columns].iloc[-1].to_dict()

        if available_columns:
            prompt = f"The market trend for KLCI with sentiment {sentiment} and indicators {klci_data[['RSI', 'MACD'] + available_columns].iloc[-1].to_dict()}"
        else:
            prompt = f"The market trend for KLCI with sentiment {sentiment} and indicators {klci_data[['RSI', 'MACD']].iloc[-1].to_dict()}"
         
        prediction = generate_prediction(prompt)
        print(f"Dolly Prediction: {prediction}")
    
        # 4. Decide Buy/Sell based on Prediction and Execute Trade
        if "buy" in prediction.lower():
            execute_trade("buy", symbol=SYMBOL, qty=1)
        elif "sell" in prediction.lower():
            execute_trade("sell", symbol=SYMBOL, qty=1)
        else:
            print("No clear trade signal from prediction.")
    
        # 5. Plot KLCI Data with Indicators
        if available_columns:
            plot_klci(klci_data)

    except Exception as e:
        print (f"An error occured: {e}")


# Run the main function
if __name__ == "__main__":
    main()