Ken Sang Tang commited on
Commit
bb46f09
·
verified ·
1 Parent(s): 4dbac4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -7
app.py CHANGED
@@ -1,17 +1,84 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
2
  import gradio as gr
 
 
3
 
4
- # Load a suitable LLM model (replace with the chosen model name)
5
- model_name = "databricks/dolly-v2-3b" # Example: you can replace with a financial model
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def generate_response(prompt):
10
- inputs = tokenizer(prompt, return_tensors="pt")
11
- outputs = model.generate(**inputs, max_length=100)
 
 
 
 
 
12
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
  return response
14
 
15
- # Set up Gradio interface
16
- gr.Interface(fn=generate_response, inputs="text", outputs="text").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
1
+ # main.py
2
+
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
4
+ import yfinance as yf
5
+ import talib as ta
6
  import gradio as gr
7
+ import pandas as pd
8
+ import torch
9
 
10
+ # Step 1: Load Model and Tokenizer
11
+ model_name = "databricks/dolly-v2-3b" # Update with chosen model if different
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
  model = AutoModelForCausalLM.from_pretrained(model_name)
14
 
15
+ # Step 2: Financial Data Fetching and Indicators
16
+ def fetch_stock_data(symbol="^KLSE"):
17
+ """Fetches financial data for a stock symbol."""
18
+ data = yf.download(symbol, start="2023-01-01", end="2023-10-30")
19
+ return data
20
+
21
+ def calculate_macd(data):
22
+ """Calculates MACD for given stock data."""
23
+ macd, macd_signal, _ = ta.MACD(data['Close'])
24
+ return macd, macd_signal
25
+
26
+ # Step 3: Fine-Tuning (Sample Training Setup, modify dataset path as needed)
27
+ def fine_tune_model():
28
+ """Fine-tunes Dolly-v2 with custom financial data."""
29
+ # Example dataset, replace with actual financial dataset
30
+ dataset = pd.DataFrame({
31
+ "prompt": ["Explain KLCI's MACD trend.", "Predict KLCI based on SMA."],
32
+ "response": ["KLCI MACD shows bullish trend.", "KLCI SMA indicates resistance."]
33
+ })
34
+
35
+ # Tokenize the prompts and responses
36
+ inputs = tokenizer(dataset["prompt"].tolist(), padding=True, truncation=True, return_tensors="pt")
37
+ labels = tokenizer(dataset["response"].tolist(), padding=True, truncation=True, return_tensors="pt")["input_ids"]
38
+
39
+ # Training arguments
40
+ training_args = TrainingArguments(
41
+ output_dir="./results",
42
+ evaluation_strategy="epoch",
43
+ per_device_train_batch_size=2,
44
+ num_train_epochs=3,
45
+ weight_decay=0.01,
46
+ )
47
+
48
+ # Set up Trainer
49
+ trainer = Trainer(
50
+ model=model,
51
+ args=training_args,
52
+ train_dataset=inputs,
53
+ )
54
+ trainer.train()
55
+
56
+ # Step 4: Response Generation with Dynamic Prompting
57
  def generate_response(prompt):
58
+ """Generates response using Dolly-v2 model with financial insights."""
59
+ data = fetch_stock_data()
60
+ macd, macd_signal = calculate_macd(data)
61
+ financial_prompt = f"The KLCI MACD value is {macd[-1]:.2f} with signal {macd_signal[-1]:.2f}. {prompt}"
62
+
63
+ inputs = tokenizer(financial_prompt, return_tensors="pt")
64
+ outputs = model.generate(**inputs, max_length=50)
65
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
  return response
67
 
68
+ # Step 5: Deploying with Gradio Interface
69
+ def build_interface():
70
+ """Sets up Gradio interface for user interaction."""
71
+ gr.Interface(
72
+ fn=generate_response,
73
+ inputs="text",
74
+ outputs="text"
75
+ ).launch(share=True) # Set share=True for public link, if desired
76
+
77
+ # Uncomment the below line to run fine-tuning when needed
78
+ # fine_tune_model()
79
+
80
+ # Run Gradio interface
81
+ if __name__ == "__main__":
82
+ build_interface()
83
+
84