{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Stock Price Forecasting with ARIMA and LSTM\n",
        "\n",
        "## Objective\n",
        "Build and compare time series forecasting models for stock price prediction.\n",
        "\n",
        "**Dataset**: Daily stock prices (5 years)\n",
        "**Models**: ARIMA, SARIMA, LSTM\n",
        "**Metrics**: RMSE, MAE, MAPE"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {},
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "from statsmodels.tsa.arima.model import ARIMA\n",
        "from statsmodels.tsa.stattools import adfuller, acf, pacf\n",
        "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n",
        "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
        "import warnings\n",
        "warnings.filterwarnings('ignore')\n",
        "\n",
        "# Generate synthetic stock data\n",
        "np.random.seed(42)\n",
        "dates = pd.date_range('2019-01-01', '2024-01-01', freq='D')\n",
        "n = len(dates)\n",
        "\n",
        "# Generate realistic stock price with trend, seasonality, and noise\n",
        "trend = np.linspace(100, 200, n)\n",
        "seasonal = 10 * np.sin(np.linspace(0, 10*np.pi, n))\n",
        "noise = np.random.normal(0, 5, n)\n",
        "prices = trend + seasonal + noise\n",
        "prices = np.maximum(prices, 50)  # Ensure positive prices\n",
        "\n",
        "df = pd.DataFrame({\n",
        "    'Date': dates,\n",
        "    'Close': prices,\n",
        "    'Volume': np.random.randint(1000000, 10000000, n)\n",
        "})\n",
        "df.set_index('Date', inplace=True)\n",
        "\n",
        "print(f'Dataset shape: {df.shape}')\n",
        "print(f'Date range: {df.index.min()} to {df.index.max()}')\n",
        "print(f'Mean price: ${df.Close.mean():.2f}')\n",
        "print(f'Price volatility (std): ${df.Close.std():.2f}')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Stationarity test\n",
        "result = adfuller(df['Close'])\n",
        "print('ADF Statistic:', result[0])\n",
        "print('p-value:', result[1])\n",
        "print('Critical Values:', result[4])\n",
        "\n",
        "if result[1] > 0.05:\n",
        "    print('\\nSeries is NON-STATIONARY. Differencing required.')\n",
        "    df['Close_diff'] = df['Close'].diff().dropna()\n",
        "else:\n",
        "    print('\\nSeries is STATIONARY.')\n",
        "\n",
        "# Calculate returns\n",
        "df['Returns'] = df['Close'].pct_change() * 100\n",
        "df['MA_7'] = df['Close'].rolling(window=7).mean()\n",
        "df['MA_30'] = df['Close'].rolling(window=30).mean()\n",
        "\n",
        "print(f'\\nAverage daily return: {df.Returns.mean():.3f}%')\n",
        "print(f'Return volatility: {df.Returns.std():.3f}%')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Train-test split (80-20)\n",
        "train_size = int(len(df) * 0.8)\n",
        "train, test = df[:train_size], df[train_size:]\n",
        "\n",
        "print(f'Training set: {len(train)} days')\n",
        "print(f'Test set: {len(test)} days')\n",
        "\n",
        "# Fit ARIMA model\n",
        "model = ARIMA(train['Close'], order=(5,1,2))\n",
        "model_fit = model.fit()\n",
        "\n",
        "print('\\nARIMA Model Summary:')\n",
        "print(model_fit.summary())\n",
        "\n",
        "# Forecast\n",
        "forecast = model_fit.forecast(steps=len(test))\n",
        "test['Forecast'] = forecast.values\n",
        "\n",
        "# Calculate errors\n",
        "rmse = np.sqrt(mean_squared_error(test['Close'], test['Forecast']))\n",
        "mae = mean_absolute_error(test['Close'], test['Forecast'])\n",
        "mape = np.mean(np.abs((test['Close'] - test['Forecast']) / test['Close'])) * 100\n",
        "\n",
        "print(f'\\nModel Performance:')\n",
        "print(f'RMSE: ${rmse:.2f}')\n",
        "print(f'MAE: ${mae:.2f}')\n",
        "print(f'MAPE: {mape:.2f}%')"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}