{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Stock Price Forecasting with ARIMA and LSTM\n", "\n", "## Objective\n", "Build and compare time series forecasting models for stock price prediction.\n", "\n", "**Dataset**: Daily stock prices (5 years)\n", "**Models**: ARIMA, SARIMA, LSTM\n", "**Metrics**: RMSE, MAE, MAPE" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from statsmodels.tsa.arima.model import ARIMA\n", "from statsmodels.tsa.stattools import adfuller, acf, pacf\n", "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n", "from sklearn.metrics import mean_squared_error, mean_absolute_error\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "# Generate synthetic stock data\n", "np.random.seed(42)\n", "dates = pd.date_range('2019-01-01', '2024-01-01', freq='D')\n", "n = len(dates)\n", "\n", "# Generate realistic stock price with trend, seasonality, and noise\n", "trend = np.linspace(100, 200, n)\n", "seasonal = 10 * np.sin(np.linspace(0, 10*np.pi, n))\n", "noise = np.random.normal(0, 5, n)\n", "prices = trend + seasonal + noise\n", "prices = np.maximum(prices, 50) # Ensure positive prices\n", "\n", "df = pd.DataFrame({\n", " 'Date': dates,\n", " 'Close': prices,\n", " 'Volume': np.random.randint(1000000, 10000000, n)\n", "})\n", "df.set_index('Date', inplace=True)\n", "\n", "print(f'Dataset shape: {df.shape}')\n", "print(f'Date range: {df.index.min()} to {df.index.max()}')\n", "print(f'Mean price: ${df.Close.mean():.2f}')\n", "print(f'Price volatility (std): ${df.Close.std():.2f}')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Stationarity test\n", "result = adfuller(df['Close'])\n", "print('ADF Statistic:', result[0])\n", "print('p-value:', result[1])\n", "print('Critical Values:', result[4])\n", "\n", "if result[1] > 0.05:\n", " print('\\nSeries is NON-STATIONARY. Differencing required.')\n", " df['Close_diff'] = df['Close'].diff().dropna()\n", "else:\n", " print('\\nSeries is STATIONARY.')\n", "\n", "# Calculate returns\n", "df['Returns'] = df['Close'].pct_change() * 100\n", "df['MA_7'] = df['Close'].rolling(window=7).mean()\n", "df['MA_30'] = df['Close'].rolling(window=30).mean()\n", "\n", "print(f'\\nAverage daily return: {df.Returns.mean():.3f}%')\n", "print(f'Return volatility: {df.Returns.std():.3f}%')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Train-test split (80-20)\n", "train_size = int(len(df) * 0.8)\n", "train, test = df[:train_size], df[train_size:]\n", "\n", "print(f'Training set: {len(train)} days')\n", "print(f'Test set: {len(test)} days')\n", "\n", "# Fit ARIMA model\n", "model = ARIMA(train['Close'], order=(5,1,2))\n", "model_fit = model.fit()\n", "\n", "print('\\nARIMA Model Summary:')\n", "print(model_fit.summary())\n", "\n", "# Forecast\n", "forecast = model_fit.forecast(steps=len(test))\n", "test['Forecast'] = forecast.values\n", "\n", "# Calculate errors\n", "rmse = np.sqrt(mean_squared_error(test['Close'], test['Forecast']))\n", "mae = mean_absolute_error(test['Close'], test['Forecast'])\n", "mape = np.mean(np.abs((test['Close'] - test['Forecast']) / test['Close'])) * 100\n", "\n", "print(f'\\nModel Performance:')\n", "print(f'RMSE: ${rmse:.2f}')\n", "print(f'MAE: ${mae:.2f}')\n", "print(f'MAPE: {mape:.2f}%')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }