{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "2eLLHNlMyFkT"
      },
      "outputs": [],
      "source": [
        "import tensorflow as tf\n",
        "import pickle\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "import kagglehub\n",
        "from sklearn.metrics import accuracy_score\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "\n",
        "path = kagglehub.dataset_download(\"shree1992/housedata\")\n",
        "\n",
        "print(\"Path to dataset files:\", path)\n",
        "df = pd.read_csv(path + \"/data.csv\")\n",
        "df2 = pd.read_csv(path + \"/data.csv\")\n",
        "X = df.drop([\"price\", \"street\", \"city\", \"statezip\", \"country\", \"date\", \"yr_built\", \"yr_renovated\",\"waterfront\"], axis=1)\n",
        "y = df[\"price\"]\n",
        "X_train, X_test, Y_train,Y_test = train_test_split(X, y, test_size=0.1)\n",
        "print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)\n",
        "scaler = StandardScaler()\n",
        "X_train = scaler.fit_transform(X_train)\n",
        "X_test = scaler.transform(X_test)\n",
        "Y_train = Y_train/1e6\n",
        "Y_test = Y_test/1e6\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "w = np.zeros(X_train.shape[1])\n",
        "b = np.zeros(1)\n",
        "\n",
        "def linear_func(X_train, w, b):\n",
        "  return np.dot(X_train, w) + b\n",
        "\n",
        "def mean_squared_error_cost_func(X_train, Y_train, w, b):\n",
        "  m = X_train.shape[0]\n",
        "  summation = 0\n",
        "  for i in range(m):\n",
        "    summation += ((np.dot(w, X_train[i,]) + b) - Y_train[i, ])**2\n",
        "  MSE = (1/(2*m))*(summation)\n",
        "  return MSE"
      ],
      "metadata": {
        "id": "Vz6pFm1J4-4c"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "NON-Vectorized"
      ],
      "metadata": {
        "id": "GHrilIG-qJt-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "alpha = 0.0001\n",
        "\n",
        "def mean_squared_error_cost_func_deriv(X_train, Y_train, w, b):\n",
        "  X_train_np = X_train\n",
        "  Y_train_np = Y_train.values\n",
        "  m = X_train_np.shape[0]\n",
        "  summation = 0\n",
        "  for i in range(m):\n",
        "    summation += (((np.dot(w, X_train_np[i,])+b) - Y_train_np[i,])*X_train_np[i,])\n",
        "  w = w - (alpha*((1/m)*summation))\n",
        "\n",
        "  summation = 0\n",
        "  for i in range(m):\n",
        "    summation += ((np.dot(w, X_train_np[i,])+b) - Y_train_np[i,])\n",
        "  b = b - (alpha*((1/m)*summation))\n",
        "  return w, b\n",
        "\n",
        "def gradient_descent(alpha, w, b, X_train, Y_train):\n",
        "  for i in range(1000):\n",
        "    w, b = mean_squared_error_cost_func_deriv(X_train, Y_train, w, b)\n",
        "    if i % 100 == 0:\n",
        "      preds = np.dot(X_train, w)\n",
        "      MSE = np.mean((preds-Y_train)**2)\n",
        "      print(f\"Iteration {i}, MSE: {MSE}, w: {w}, b: {b}\")\n",
        "  return w, b\n",
        "\n",
        "W, B = gradient_descent(alpha, w, b, X_train, Y_train)\n",
        "\n",
        "def predict(x, W, B):\n",
        "  y_pred = np.zeros(x.shape[0])\n",
        "  for i in range(x.shape[0]):\n",
        "    y_pred[i,] = linear_func(x[i,], W, B)\n",
        "  return y_pred\n",
        "\n",
        "\n",
        "y_pred = predict(X_test, W, B)\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "cn4BqyN6-h9_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Vectorized"
      ],
      "metadata": {
        "id": "acKPA93MqM9j"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "alpha = 0.0001\n",
        "\n",
        "def mean_squared_error_cost_func_deriv(X_train, Y_train, w, b):\n",
        "    m = X_train.shape[0]\n",
        "    predictions = np.dot(X_train, w) + b\n",
        "    errors = predictions - Y_train\n",
        "\n",
        "    dw = (1 / m) * np.dot(X_train.T, errors)\n",
        "    db = (1 / m) * np.sum(errors)\n",
        "\n",
        "    w -= alpha * dw\n",
        "    b -= alpha * db\n",
        "\n",
        "    return w, b\n",
        "\n",
        "def gradient_descent(alpha, w, b, X_train, Y_train):\n",
        "  for i in range(1000):\n",
        "    w, b = mean_squared_error_cost_func_deriv(X_train, Y_train, w, b)\n",
        "    if i % 100 == 0:\n",
        "      preds = np.dot(X_train, w)\n",
        "      MSE = np.mean((preds-Y_train)**2)\n",
        "      print(f\"Iteration {i}, MSE: {mse}, w: {w}, b: {b}\")\n",
        "  return w, b\n",
        "\n",
        "W, B = gradient_descent(alpha, w, b, X_train, Y_train)\n",
        "\n",
        "def predict(x, W, B):\n",
        "  y_pred = np.zeros(x.shape[0])\n",
        "  for i in range(x.shape[0]):\n",
        "    y_pred[i,] = linear_func(x[i,], W, B)\n",
        "  return y_pred\n",
        "\n",
        "\n",
        "y_pred = predict(X_test, W, B)"
      ],
      "metadata": {
        "id": "PFikMk1ZqGsK"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import mean_squared_error\n",
        "mse = mean_squared_error(Y_test.values, y_pred)\n",
        "print(\"Mean Squared Error:\", mse)\n",
        "print(Y_test.values)\n",
        "print(\"BREAK\")\n",
        "print(y_pred)\n",
        "print(W, B)\n",
        "\n",
        "X_feature = X_train[:, 0]\n",
        "x_range = np.linspace(X_feature.min(), X_feature.max(), 100)\n",
        "\n",
        "y_line = w[0] * x_range + b\n",
        "plt.scatter(X_feature, Y_train, color='blue', label='Training Data')\n",
        "\n",
        "plt.plot(x_range, y_line, color='red', label='Linear Regression Line')\n",
        "plt.xlabel('Feature 1')\n",
        "plt.ylabel('Y')\n",
        "plt.title('Linear Regression (Feature 1)')\n",
        "plt.legend()\n",
        "plt.show()\n"
      ],
      "metadata": {
        "id": "AE7epvEkcuGQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "XR8icKb1ow43"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}