{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "302d85be",
   "metadata": {},
   "outputs": [],
   "source": [
    "import yfinance as yf\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "3d36bd11",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import json\n",
    "from datetime import datetime, timedelta\n",
    "\n",
    "# Load the JSON data from the file\n",
    "# file_path = 'snow_volume_5_min'\n",
    "# file_path = 'AAPL_Volume_Footprint.txt'\n",
    "# file_path = 'MMM_Volume_Footprint.txt'\n",
    "# file_path = 'META_Volume_Footprint.txt'\n",
    "file_path = 'NKE_Volume_Footprint.txt'\n",
    "# file_path = 'CVS_Volume_Footprint.txt'\n",
    "\n",
    "\n",
    "with open(file_path, 'r') as file:\n",
    "    file_content = file.read()\n",
    "\n",
    "# Strip everything after the ~m~98~m~ marker\n",
    "# marker = '~m~98~m~'\n",
    "# marker = '~m~152~m~'\n",
    "\n",
    "marker = '~m~99~m~'\n",
    "# marker = '~m~98~m~'\n",
    "file_content = file_content.split(marker)[0]\n",
    "\n",
    "# file_content\n",
    "\n",
    "main_data = json.loads(file_content)\n",
    "\n",
    "# data_section = main_data['p'][1]['st1']['ns']['d']\n",
    "# data_section = main_data['p'][1]['st16']['ns']['d']\n",
    "data_section = main_data['p'][1]['st22']['ns']['d']\n",
    "# data_section = main_data['p'][1]['st1']['ns']['d']\n",
    "\n",
    "nested_data = json.loads(data_section)\n",
    "footprint_levels = nested_data['graphicsCmds']['create']['footprintLevels']\n",
    "df = pd.DataFrame(footprint_levels[0]['data'])\n",
    "\n",
    "footprints = nested_data['graphicsCmds']['create']['footprints']\n",
    "\n",
    "df1 = pd.DataFrame(footprints[0]['data'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "ee55ebb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>index</th>\n",
       "      <th>poc</th>\n",
       "      <th>val</th>\n",
       "      <th>vah</th>\n",
       "      <th>levels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>81.17</td>\n",
       "      <td>81.16</td>\n",
       "      <td>81.18</td>\n",
       "      <td>[{'buyVolume': 1072.0, 'sellVolume': 1388.0, '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>81.21</td>\n",
       "      <td>81.16</td>\n",
       "      <td>81.24</td>\n",
       "      <td>[{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>81.25</td>\n",
       "      <td>81.24</td>\n",
       "      <td>81.28</td>\n",
       "      <td>[{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>81.25</td>\n",
       "      <td>81.22</td>\n",
       "      <td>81.26</td>\n",
       "      <td>[{'buyVolume': 836.0, 'sellVolume': 533.0, 'im...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>81.23</td>\n",
       "      <td>81.22</td>\n",
       "      <td>81.26</td>\n",
       "      <td>[{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2830</th>\n",
       "      <td>6699</td>\n",
       "      <td>2830</td>\n",
       "      <td>78.07</td>\n",
       "      <td>78.06</td>\n",
       "      <td>78.08</td>\n",
       "      <td>[{'buyVolume': 2138.0, 'sellVolume': 3137.0, '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2831</th>\n",
       "      <td>6700</td>\n",
       "      <td>2831</td>\n",
       "      <td>78.13</td>\n",
       "      <td>78.12</td>\n",
       "      <td>78.16</td>\n",
       "      <td>[{'buyVolume': 2696.0, 'sellVolume': 6590.0, '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2832</th>\n",
       "      <td>6701</td>\n",
       "      <td>2832</td>\n",
       "      <td>78.23</td>\n",
       "      <td>78.20</td>\n",
       "      <td>78.24</td>\n",
       "      <td>[{'buyVolume': 604.0, 'sellVolume': 83.0, 'imb...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2833</th>\n",
       "      <td>6706</td>\n",
       "      <td>2833</td>\n",
       "      <td>78.27</td>\n",
       "      <td>78.24</td>\n",
       "      <td>78.32</td>\n",
       "      <td>[{'buyVolume': 1773.0, 'sellVolume': 1903.0, '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2834</th>\n",
       "      <td>6707</td>\n",
       "      <td>2834</td>\n",
       "      <td>78.45</td>\n",
       "      <td>78.40</td>\n",
       "      <td>78.50</td>\n",
       "      <td>[{'buyVolume': 865.0, 'sellVolume': 3717.0, 'i...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2835 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id  index    poc    val    vah  \\\n",
       "0        2      0  81.17  81.16  81.18   \n",
       "1        3      1  81.21  81.16  81.24   \n",
       "2        4      2  81.25  81.24  81.28   \n",
       "3        5      3  81.25  81.22  81.26   \n",
       "4        6      4  81.23  81.22  81.26   \n",
       "...    ...    ...    ...    ...    ...   \n",
       "2830  6699   2830  78.07  78.06  78.08   \n",
       "2831  6700   2831  78.13  78.12  78.16   \n",
       "2832  6701   2832  78.23  78.20  78.24   \n",
       "2833  6706   2833  78.27  78.24  78.32   \n",
       "2834  6707   2834  78.45  78.40  78.50   \n",
       "\n",
       "                                                 levels  \n",
       "0     [{'buyVolume': 1072.0, 'sellVolume': 1388.0, '...  \n",
       "1     [{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb...  \n",
       "2     [{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i...  \n",
       "3     [{'buyVolume': 836.0, 'sellVolume': 533.0, 'im...  \n",
       "4     [{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb...  \n",
       "...                                                 ...  \n",
       "2830  [{'buyVolume': 2138.0, 'sellVolume': 3137.0, '...  \n",
       "2831  [{'buyVolume': 2696.0, 'sellVolume': 6590.0, '...  \n",
       "2832  [{'buyVolume': 604.0, 'sellVolume': 83.0, 'imb...  \n",
       "2833  [{'buyVolume': 1773.0, 'sellVolume': 1903.0, '...  \n",
       "2834  [{'buyVolume': 865.0, 'sellVolume': 3717.0, 'i...  \n",
       "\n",
       "[2835 rows x 6 columns]"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "9204ce99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>startIndex</th>\n",
       "      <th>type</th>\n",
       "      <th>price</th>\n",
       "      <th>extendTo</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13</td>\n",
       "      <td>5</td>\n",
       "      <td>buy</td>\n",
       "      <td>81.38</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>buy</td>\n",
       "      <td>81.28</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>buy</td>\n",
       "      <td>81.30</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>5</td>\n",
       "      <td>buy</td>\n",
       "      <td>81.32</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11</td>\n",
       "      <td>5</td>\n",
       "      <td>buy</td>\n",
       "      <td>81.34</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  startIndex type  price  extendTo\n",
       "0  13           5  buy  81.38       6.0\n",
       "1   8           5  buy  81.28       7.0\n",
       "2   9           5  buy  81.30       7.0\n",
       "3  10           5  buy  81.32       7.0\n",
       "4  11           5  buy  81.34       7.0"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "3fee201c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>index</th>\n",
       "      <th>poc</th>\n",
       "      <th>val</th>\n",
       "      <th>vah</th>\n",
       "      <th>levels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>81.17</td>\n",
       "      <td>81.16</td>\n",
       "      <td>81.18</td>\n",
       "      <td>[{'buyVolume': 1072.0, 'sellVolume': 1388.0, '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>81.21</td>\n",
       "      <td>81.16</td>\n",
       "      <td>81.24</td>\n",
       "      <td>[{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>81.25</td>\n",
       "      <td>81.24</td>\n",
       "      <td>81.28</td>\n",
       "      <td>[{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>81.25</td>\n",
       "      <td>81.22</td>\n",
       "      <td>81.26</td>\n",
       "      <td>[{'buyVolume': 836.0, 'sellVolume': 533.0, 'im...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>81.23</td>\n",
       "      <td>81.22</td>\n",
       "      <td>81.26</td>\n",
       "      <td>[{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  index    poc    val    vah  \\\n",
       "0   2      0  81.17  81.16  81.18   \n",
       "1   3      1  81.21  81.16  81.24   \n",
       "2   4      2  81.25  81.24  81.28   \n",
       "3   5      3  81.25  81.22  81.26   \n",
       "4   6      4  81.23  81.22  81.26   \n",
       "\n",
       "                                              levels  \n",
       "0  [{'buyVolume': 1072.0, 'sellVolume': 1388.0, '...  \n",
       "1  [{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb...  \n",
       "2  [{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i...  \n",
       "3  [{'buyVolume': 836.0, 'sellVolume': 533.0, 'im...  \n",
       "4  [{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb...  "
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "e808128e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>19250</th>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>78.18</td>\n",
       "      <td>276.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2832</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19251</th>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>78.20</td>\n",
       "      <td>181.0</td>\n",
       "      <td>497.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2832</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19252</th>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>78.22</td>\n",
       "      <td>3586.0</td>\n",
       "      <td>499.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2832</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19253</th>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>78.24</td>\n",
       "      <td>513.0</td>\n",
       "      <td>138.0</td>\n",
       "      <td></td>\n",
       "      <td>2832</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19254</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.24</td>\n",
       "      <td>1773.0</td>\n",
       "      <td>1903.0</td>\n",
       "      <td></td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19255</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.26</td>\n",
       "      <td>2938.0</td>\n",
       "      <td>2038.0</td>\n",
       "      <td></td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19256</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.28</td>\n",
       "      <td>1250.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19257</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.30</td>\n",
       "      <td>428.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19258</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.32</td>\n",
       "      <td>1241.0</td>\n",
       "      <td>1163.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19259</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.34</td>\n",
       "      <td>651.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td></td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19260</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.36</td>\n",
       "      <td>449.0</td>\n",
       "      <td>668.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19261</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.38</td>\n",
       "      <td>823.0</td>\n",
       "      <td>570.0</td>\n",
       "      <td></td>\n",
       "      <td>2833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19262</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.34</td>\n",
       "      <td>865.0</td>\n",
       "      <td>3717.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19263</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.36</td>\n",
       "      <td>4654.0</td>\n",
       "      <td>4021.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19264</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.38</td>\n",
       "      <td>4378.0</td>\n",
       "      <td>1201.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19265</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.40</td>\n",
       "      <td>478.0</td>\n",
       "      <td>1584.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19266</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.42</td>\n",
       "      <td>1472.0</td>\n",
       "      <td>4033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19267</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19268</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.46</td>\n",
       "      <td>4136.0</td>\n",
       "      <td>2046.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19269</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.48</td>\n",
       "      <td>4377.0</td>\n",
       "      <td>1388.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                timestamp  price  buyVolume  sellVolume imbalance  index\n",
       "19250 2024-12-04 15:45:00  78.18      276.0         0.0       buy   2832\n",
       "19251 2024-12-04 15:45:00  78.20      181.0       497.0       buy   2832\n",
       "19252 2024-12-04 15:45:00  78.22     3586.0       499.0       buy   2832\n",
       "19253 2024-12-04 15:45:00  78.24      513.0       138.0             2832\n",
       "19254 2024-12-04 15:50:00  78.24     1773.0      1903.0             2833\n",
       "19255 2024-12-04 15:50:00  78.26     2938.0      2038.0             2833\n",
       "19256 2024-12-04 15:50:00  78.28     1250.0         0.0             2833\n",
       "19257 2024-12-04 15:50:00  78.30      428.0         0.0       buy   2833\n",
       "19258 2024-12-04 15:50:00  78.32     1241.0      1163.0       buy   2833\n",
       "19259 2024-12-04 15:50:00  78.34      651.0       113.0             2833\n",
       "19260 2024-12-04 15:50:00  78.36      449.0       668.0       buy   2833\n",
       "19261 2024-12-04 15:50:00  78.38      823.0       570.0             2833\n",
       "19262 2024-12-04 15:55:00  78.34      865.0      3717.0             2834\n",
       "19263 2024-12-04 15:55:00  78.36     4654.0      4021.0             2834\n",
       "19264 2024-12-04 15:55:00  78.38     4378.0      1201.0             2834\n",
       "19265 2024-12-04 15:55:00  78.40      478.0      1584.0             2834\n",
       "19266 2024-12-04 15:55:00  78.42     1472.0      4033.0             2834\n",
       "19267 2024-12-04 15:55:00  78.44     5457.0      7033.0             2834\n",
       "19268 2024-12-04 15:55:00  78.46     4136.0      2046.0             2834\n",
       "19269 2024-12-04 15:55:00  78.48     4377.0      1388.0             2834"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# # Define the trading holidays\n",
    "# trading_holidays = [\n",
    "#     datetime(2024, 1, 1),   # Monday, January 1 - New Year's Day\n",
    "#     datetime(2024, 1, 15),  # Monday, January 15 - Martin Luther King Jr. Day\n",
    "#     datetime(2024, 2, 19),  # Monday, February 19 - Presidents' Day\n",
    "#     datetime(2024, 3, 29),  # Friday, March 29 - Good Friday\n",
    "#     datetime(2024, 5, 27),  # Monday, May 27 - Memorial Day\n",
    "#     datetime(2024, 6, 19),  # Wednesday, June 19 - Juneteenth National Independence Day\n",
    "#     datetime(2024, 7, 4),   # Thursday, July 4 - Independence Day\n",
    "#     datetime(2024, 9, 2),   # Monday, September 2 - Labor Day\n",
    "#     datetime(2024, 11, 28), # Thursday, November 28 - Thanksgiving Day\n",
    "#     datetime(2024, 12, 25), # Wednesday, December 25 - Christmas Day\n",
    "#     datetime(2025, 1, 1),   # Wednesday, January 1 - New Year's Day\n",
    "#     datetime(2025, 1, 20),  # Monday, January 20 - Martin Luther King Jr. Day\n",
    "#     datetime(2025, 2, 17),  # Monday, February 17 - Presidents' Day\n",
    "#     datetime(2025, 4, 18),  # Friday, April 18 - Good Friday\n",
    "#     datetime(2025, 5, 26),  # Monday, May 26 - Memorial Day\n",
    "#     datetime(2025, 6, 19),  # Thursday, June 19 - Juneteenth National Independence Day\n",
    "#     datetime(2025, 7, 4),   # Friday, July 4 - Independence Day\n",
    "#     datetime(2025, 9, 1),   # Monday, September 1 - Labor Day\n",
    "#     datetime(2025, 11, 27), # Thursday, November 27 - Thanksgiving Day\n",
    "#     datetime(2025, 12, 25)  # Thursday, December 25 - Christmas Day\n",
    "# ]\n",
    "\n",
    "# # Initialize the base timestamp for index 2245\n",
    "# base_timestamp = datetime.strptime('2024-12-02 15:55:00', '%Y-%m-%d %H:%M:%S')\n",
    "# current_timestamp = base_timestamp\n",
    "\n",
    "# # Initialize a dictionary to store timestamps for each index\n",
    "# # index_to_timestamp = {2245: current_timestamp}\n",
    "# index_to_timestamp = {max(df1['index']): current_timestamp}\n",
    "\n",
    "# # Market hours\n",
    "# market_open = timedelta(hours=9, minutes=30)\n",
    "# market_close = timedelta(hours=15, minutes=55)\n",
    "# day_increment = timedelta(days=1)\n",
    "# weekend_days = [5, 6]  # Saturday and Sunday\n",
    "\n",
    "# # Calculate the timestamps backward in 5-minute intervals, excluding weekends and outside market hours\n",
    "# # for index in range(2244, -1, -1):\n",
    "# for index in range(max(df1['index'])-1, -1, -1):\n",
    "#     # Subtract 5 minutes\n",
    "#     current_timestamp -= timedelta(minutes=5)\n",
    "    \n",
    "#     # Check if current timestamp is before market open\n",
    "#     while (current_timestamp.time() < (datetime.min + market_open).time() or\n",
    "#            current_timestamp.time() > (datetime.min + market_close).time() or\n",
    "#            current_timestamp.weekday() in weekend_days or\n",
    "#            current_timestamp.date() in [holiday.date() for holiday in trading_holidays]):\n",
    "#         # Move to previous trading day if before market open\n",
    "#         if current_timestamp.time() < (datetime.min + market_open).time():\n",
    "#             current_timestamp = datetime.combine(current_timestamp.date() - day_increment, (datetime.min + market_close).time())\n",
    "#         else:\n",
    "#             # Otherwise, just subtract 5 minutes\n",
    "#             current_timestamp -= timedelta(minutes=5)\n",
    "        \n",
    "#         # Skip weekends and trading holidays\n",
    "#         while current_timestamp.weekday() in weekend_days or current_timestamp.date() in [holiday.date() for holiday in trading_holidays]:\n",
    "#             current_timestamp -= day_increment\n",
    "#             current_timestamp = datetime.combine(current_timestamp.date(), (datetime.min + market_close).time())\n",
    "    \n",
    "#     # Assign the calculated timestamp to the index\n",
    "#     index_to_timestamp[index] = current_timestamp\n",
    "\n",
    "# # Create a list to hold the time series data\n",
    "# time_series_data = []\n",
    "\n",
    "# # Iterate over df1 and extract levels data\n",
    "# for i, row in df1.iterrows():\n",
    "#     timestamp = index_to_timestamp.get(row['index'])\n",
    "    \n",
    "#     if timestamp:\n",
    "#         levels = row['levels']\n",
    "#         for level in levels:\n",
    "#             time_series_data.append({\n",
    "#                 'timestamp': timestamp,\n",
    "#                 'price': level['price'],\n",
    "#                 'buyVolume': level['buyVolume'],\n",
    "#                 'sellVolume': level['sellVolume'],\n",
    "#                 'imbalance': level['imbalance'],\n",
    "#                 'index': row['index']\n",
    "#             })\n",
    "\n",
    "# # Create the dataframe from the time series data\n",
    "# series_df = pd.DataFrame(time_series_data)\n",
    "\n",
    "# series_df.tail(20)\n",
    "\n",
    "\n",
    "# Define the trading holidays\n",
    "trading_holidays = [\n",
    "    datetime(2024, 1, 1),   # Monday, January 1 - New Year's Day\n",
    "    datetime(2024, 1, 15),  # Monday, January 15 - Martin Luther King Jr. Day\n",
    "    datetime(2024, 2, 19),  # Monday, February 19 - Presidents' Day\n",
    "    datetime(2024, 3, 29),  # Friday, March 29 - Good Friday\n",
    "    datetime(2024, 5, 27),  # Monday, May 27 - Memorial Day\n",
    "    datetime(2024, 6, 19),  # Wednesday, June 19 - Juneteenth National Independence Day\n",
    "    datetime(2024, 7, 4),   # Thursday, July 4 - Independence Day\n",
    "    datetime(2024, 9, 2),   # Monday, September 2 - Labor Day\n",
    "    datetime(2024, 11, 28), # Thursday, November 28 - Thanksgiving Day\n",
    "    datetime(2024, 12, 25), # Wednesday, December 25 - Christmas Day\n",
    "    datetime(2025, 1, 1),   # Wednesday, January 1 - New Year's Day\n",
    "    datetime(2025, 1, 20),  # Monday, January 20 - Martin Luther King Jr. Day\n",
    "    datetime(2025, 2, 17),  # Monday, February 17 - Presidents' Day\n",
    "    datetime(2025, 4, 18),  # Friday, April 18 - Good Friday\n",
    "    datetime(2025, 5, 26),  # Monday, May 26 - Memorial Day\n",
    "    datetime(2025, 6, 19),  # Thursday, June 19 - Juneteenth National Independence Day\n",
    "    datetime(2025, 7, 4),   # Friday, July 4 - Independence Day\n",
    "    datetime(2025, 9, 1),   # Monday, September 1 - Labor Day\n",
    "    datetime(2025, 11, 27), # Thursday, November 27 - Thanksgiving Day\n",
    "    datetime(2025, 12, 25)  # Thursday, December 25 - Christmas Day\n",
    "]\n",
    "\n",
    "# Initialize the base timestamp for index 2245\n",
    "base_timestamp = datetime.strptime('2024-12-04 15:55:00', '%Y-%m-%d %H:%M:%S')\n",
    "current_timestamp = base_timestamp\n",
    "\n",
    "# Initialize a dictionary to store timestamps for each index\n",
    "# index_to_timestamp = {2245: current_timestamp}\n",
    "index_to_timestamp = {max(df1['index']): current_timestamp}\n",
    "\n",
    "# Market hours\n",
    "market_open = timedelta(hours=9, minutes=30)\n",
    "market_close = timedelta(hours=15, minutes=55)\n",
    "special_close = {\n",
    "    datetime(2024, 11, 29).date(): timedelta(hours=12, minutes=55)  # Special close time on 2024-11-29\n",
    "}\n",
    "day_increment = timedelta(days=1)\n",
    "weekend_days = [5, 6]  # Saturday and Sunday\n",
    "\n",
    "# Calculate the timestamps backward in 5-minute intervals, excluding weekends and outside market hours\n",
    "# for index in range(2244, -1, -1):\n",
    "for index in range(max(df1['index'])-1, -1, -1):\n",
    "    # Subtract 5 minutes\n",
    "    current_timestamp -= timedelta(minutes=5)\n",
    "    \n",
    "    # Check if current timestamp is before market open\n",
    "    while (current_timestamp.time() < (datetime.min + market_open).time() or\n",
    "           current_timestamp.time() > (datetime.min + special_close.get(current_timestamp.date(), market_close)).time() or\n",
    "           current_timestamp.weekday() in weekend_days or\n",
    "           current_timestamp.date() in [holiday.date() for holiday in trading_holidays]):\n",
    "        # Move to previous trading day if before market open\n",
    "        if current_timestamp.time() < (datetime.min + market_open).time():\n",
    "            current_timestamp = datetime.combine(current_timestamp.date() - day_increment, (datetime.min + special_close.get(current_timestamp.date() - day_increment, market_close)).time())\n",
    "        else:\n",
    "            # Otherwise, just subtract 5 minutes\n",
    "            current_timestamp -= timedelta(minutes=5)\n",
    "        \n",
    "        # Skip weekends and trading holidays\n",
    "        while current_timestamp.weekday() in weekend_days or current_timestamp.date() in [holiday.date() for holiday in trading_holidays]:\n",
    "            current_timestamp -= day_increment\n",
    "            current_timestamp = datetime.combine(current_timestamp.date(), (datetime.min + special_close.get(current_timestamp.date(), market_close)).time())\n",
    "    \n",
    "    # Assign the calculated timestamp to the index\n",
    "    index_to_timestamp[index] = current_timestamp\n",
    "\n",
    "# Create a list to hold the time series data\n",
    "time_series_data = []\n",
    "\n",
    "# Iterate over df1 and extract levels data\n",
    "for i, row in df1.iterrows():\n",
    "    timestamp = index_to_timestamp.get(row['index'])\n",
    "    \n",
    "    if timestamp:\n",
    "        levels = row['levels']\n",
    "        for level in levels:\n",
    "            time_series_data.append({\n",
    "                'timestamp': timestamp,\n",
    "                'price': level['price'],\n",
    "                'buyVolume': level['buyVolume'],\n",
    "                'sellVolume': level['sellVolume'],\n",
    "                'imbalance': level['imbalance'],\n",
    "                'index': row['index']\n",
    "            })\n",
    "\n",
    "# Create the dataframe from the time series data\n",
    "series_df = pd.DataFrame(time_series_data)\n",
    "\n",
    "series_df.tail(20)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "ccb6e9dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>18670</th>\n",
       "      <td>2024-12-03 14:55:00</td>\n",
       "      <td>78.96</td>\n",
       "      <td>0.0</td>\n",
       "      <td>156.0</td>\n",
       "      <td></td>\n",
       "      <td>2744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18671</th>\n",
       "      <td>2024-12-03 15:00:00</td>\n",
       "      <td>78.96</td>\n",
       "      <td>227.0</td>\n",
       "      <td>1918.0</td>\n",
       "      <td></td>\n",
       "      <td>2745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18672</th>\n",
       "      <td>2024-12-03 15:00:00</td>\n",
       "      <td>78.98</td>\n",
       "      <td>1366.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td></td>\n",
       "      <td>2745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18673</th>\n",
       "      <td>2024-12-03 15:05:00</td>\n",
       "      <td>78.98</td>\n",
       "      <td>1228.0</td>\n",
       "      <td>1333.0</td>\n",
       "      <td></td>\n",
       "      <td>2746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18674</th>\n",
       "      <td>2024-12-03 15:05:00</td>\n",
       "      <td>79.00</td>\n",
       "      <td>566.0</td>\n",
       "      <td>255.0</td>\n",
       "      <td></td>\n",
       "      <td>2746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19265</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.40</td>\n",
       "      <td>478.0</td>\n",
       "      <td>1584.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19266</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.42</td>\n",
       "      <td>1472.0</td>\n",
       "      <td>4033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19267</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19268</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.46</td>\n",
       "      <td>4136.0</td>\n",
       "      <td>2046.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19269</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.48</td>\n",
       "      <td>4377.0</td>\n",
       "      <td>1388.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>600 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                timestamp  price  buyVolume  sellVolume imbalance  index\n",
       "18670 2024-12-03 14:55:00  78.96        0.0       156.0             2744\n",
       "18671 2024-12-03 15:00:00  78.96      227.0      1918.0             2745\n",
       "18672 2024-12-03 15:00:00  78.98     1366.0       161.0             2745\n",
       "18673 2024-12-03 15:05:00  78.98     1228.0      1333.0             2746\n",
       "18674 2024-12-03 15:05:00  79.00      566.0       255.0             2746\n",
       "...                   ...    ...        ...         ...       ...    ...\n",
       "19265 2024-12-04 15:55:00  78.40      478.0      1584.0             2834\n",
       "19266 2024-12-04 15:55:00  78.42     1472.0      4033.0             2834\n",
       "19267 2024-12-04 15:55:00  78.44     5457.0      7033.0             2834\n",
       "19268 2024-12-04 15:55:00  78.46     4136.0      2046.0             2834\n",
       "19269 2024-12-04 15:55:00  78.48     4377.0      1388.0             2834\n",
       "\n",
       "[600 rows x 6 columns]"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_df.tail(600)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "71d91b66",
   "metadata": {},
   "outputs": [],
   "source": [
    "series_df.to_csv('nke_5_min_series_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "4e1436c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Filter the DataFrame\n",
    "filtered_df = series_df[series_df['timestamp'] >= '2024-12-04 09:30:00']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "id": "1e7f8c72",
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_df.reset_index(inplace=True,drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "7b01d454",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sort by timestamp and price ascending\n",
    "filtered_df = filtered_df.sort_values(by=['timestamp', 'price']).reset_index(drop=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "id": "d3a75a98",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "timestamp\n",
       "2024-12-04 09:30:00    40\n",
       "2024-12-04 09:35:00    19\n",
       "2024-12-04 10:05:00    18\n",
       "2024-12-04 09:40:00    12\n",
       "2024-12-04 09:55:00    12\n",
       "                       ..\n",
       "2024-12-04 15:00:00     3\n",
       "2024-12-04 13:35:00     3\n",
       "2024-12-04 13:25:00     3\n",
       "2024-12-04 14:20:00     2\n",
       "2024-12-04 13:45:00     2\n",
       "Name: count, Length: 78, dtype: int64"
      ]
     },
     "execution_count": 181,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df['timestamp'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "id": "1a234672",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.10</td>\n",
       "      <td>597.0</td>\n",
       "      <td>119.0</td>\n",
       "      <td></td>\n",
       "      <td>2757</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.12</td>\n",
       "      <td>1306.0</td>\n",
       "      <td>985.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2757</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.14</td>\n",
       "      <td>156.0</td>\n",
       "      <td>2510.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2757</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.16</td>\n",
       "      <td>100.0</td>\n",
       "      <td>709.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2757</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.18</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1658.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2757</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>531</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.40</td>\n",
       "      <td>478.0</td>\n",
       "      <td>1584.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>532</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.42</td>\n",
       "      <td>1472.0</td>\n",
       "      <td>4033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>533</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>534</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.46</td>\n",
       "      <td>4136.0</td>\n",
       "      <td>2046.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>535</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.48</td>\n",
       "      <td>4377.0</td>\n",
       "      <td>1388.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>536 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp  price  buyVolume  sellVolume imbalance  index\n",
       "0   2024-12-04 09:30:00  77.10      597.0       119.0             2757\n",
       "1   2024-12-04 09:30:00  77.12     1306.0       985.0      both   2757\n",
       "2   2024-12-04 09:30:00  77.14      156.0      2510.0      sell   2757\n",
       "3   2024-12-04 09:30:00  77.16      100.0       709.0      sell   2757\n",
       "4   2024-12-04 09:30:00  77.18        0.0      1658.0      sell   2757\n",
       "..                  ...    ...        ...         ...       ...    ...\n",
       "531 2024-12-04 15:55:00  78.40      478.0      1584.0             2834\n",
       "532 2024-12-04 15:55:00  78.42     1472.0      4033.0             2834\n",
       "533 2024-12-04 15:55:00  78.44     5457.0      7033.0             2834\n",
       "534 2024-12-04 15:55:00  78.46     4136.0      2046.0             2834\n",
       "535 2024-12-04 15:55:00  78.48     4377.0      1388.0             2834\n",
       "\n",
       "[536 rows x 6 columns]"
      ]
     },
     "execution_count": 182,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "id": "e0ca7553",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [timestamp, price, buyVolume, sellVolume, imbalance, index]\n",
       "Index: []"
      ]
     },
     "execution_count": 183,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df[filtered_df['timestamp'] == '2024-12-02 09:30:00']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "id": "a2366d90",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:44: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
      "  filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n",
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:44: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
      "  filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n",
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:70: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
      "  filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_imbalances)\n",
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:132: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
      "  filtered_df = filtered_df.groupby('timestamp').apply(check_auctions).reset_index(drop=True)\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "# Calculate total volume at each price level\n",
    "filtered_df['totalVolume'] = filtered_df['buyVolume'] + filtered_df['sellVolume']\n",
    "\n",
    "# # Group by timestamp and identify the Point of Control (POC) for each 5-minute interval\n",
    "# def calculate_poc(group):\n",
    "#     poc_price = group.loc[group['totalVolume'].idxmax(), 'price']\n",
    "#     group['poc'] = poc_price\n",
    "    \n",
    "#     # Calculate highest bid stacked imbalance and ask stacked imbalance\n",
    "#     group['highest_bid_stacked_imbalance'] = group['buyVolume'].max()\n",
    "#     group['highest_ask_stacked_imbalance'] = group['sellVolume'].max()\n",
    "    \n",
    "#     return group\n",
    "\n",
    "# filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n",
    "\n",
    "# Group by timestamp and identify the Point of Control (POC) for each 5-minute interval\n",
    "def calculate_poc(group):\n",
    "    poc_price = group.loc[group['totalVolume'].idxmax(), 'price']\n",
    "    group['poc'] = poc_price\n",
    "    \n",
    "    # Calculate highest bid stacked imbalance and ask stacked imbalance\n",
    "    group['highest_bid_stacked_imbalance'] = group['buyVolume'].max()\n",
    "    group['highest_ask_stacked_imbalance'] = group['sellVolume'].max()\n",
    "    \n",
    "    # Calculate highest ask imbalance stack price (consider imbalance as 'sell' or 'both')\n",
    "    ask_imbalance_filter = group[(group['imbalance'] == 'sell') | (group['imbalance'] == 'both')]\n",
    "    if not ask_imbalance_filter.empty:\n",
    "        highest_ask_imbalance_stack_price = ask_imbalance_filter.loc[ask_imbalance_filter['sellVolume'].idxmax(), 'price']\n",
    "    else:\n",
    "        highest_ask_imbalance_stack_price = None\n",
    "    group['highest_ask_imbalance_stack_price'] = highest_ask_imbalance_stack_price\n",
    "    \n",
    "    # Calculate highest bid imbalance stack price (consider imbalance as 'buy' or 'both')\n",
    "    bid_imbalance_filter = group[(group['imbalance'] == 'buy') | (group['imbalance'] == 'both')]\n",
    "    if not bid_imbalance_filter.empty:\n",
    "        highest_bid_imbalance_stack_price = bid_imbalance_filter.loc[bid_imbalance_filter['buyVolume'].idxmax(), 'price']\n",
    "    else:\n",
    "        highest_bid_imbalance_stack_price = None\n",
    "    group['highest_bid_imbalance_stack_price'] = highest_bid_imbalance_stack_price\n",
    "    \n",
    "    return group\n",
    "\n",
    "filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n",
    "\n",
    "\n",
    "# Calculate delta (buyVolume - sellVolume)\n",
    "filtered_df['delta'] = filtered_df['buyVolume'] - filtered_df['sellVolume']\n",
    "\n",
    "# Calculate total ask imbalance count and highest stacked imbalance count\n",
    "def calculate_imbalances(group):\n",
    "    # Total ask imbalance count (where imbalance is 'sell' or 'both')\n",
    "    ask_imbalance_count = ((group['imbalance'] == 'sell') | (group['imbalance'] == 'both')).sum()\n",
    "    group['total_ask_imbalance_count'] = ask_imbalance_count\n",
    "\n",
    "    # Highest stacked ask imbalance count (consecutive 'sell' or 'both' imbalance)\n",
    "    max_stacked_ask_imbalance = ((group['imbalance'] == 'sell') | (group['imbalance'] == 'both')).astype(int).groupby(((group['imbalance'] != 'sell') & (group['imbalance'] != 'both')).cumsum()).cumsum().max()\n",
    "    group['highest_stacked_ask_imbalance'] = max_stacked_ask_imbalance\n",
    "\n",
    "    # Total bid imbalance count (where imbalance is 'buy' or 'both')\n",
    "    bid_imbalance_count = ((group['imbalance'] == 'buy') | (group['imbalance'] == 'both')).sum()\n",
    "    group['total_bid_imbalance_count'] = bid_imbalance_count\n",
    "\n",
    "    # Highest stacked bid imbalance count (consecutive 'buy' or 'both' imbalance)\n",
    "    max_stacked_bid_imbalance = ((group['imbalance'] == 'buy') | (group['imbalance'] == 'both')).astype(int).groupby(((group['imbalance'] != 'buy') & (group['imbalance'] != 'both')).cumsum()).cumsum().max()\n",
    "    group['highest_stacked_bid_imbalance'] = max_stacked_bid_imbalance\n",
    "\n",
    "    return group\n",
    "\n",
    "filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_imbalances)\n",
    "\n",
    "filtered_df = filtered_df.sort_values(by=['timestamp', 'price']).reset_index(drop=True)\n",
    "\n",
    "def add_support_resistance_by_timestamp(df):\n",
    "    # Initialize lists for final results\n",
    "    support_levels_per_timestamp = []\n",
    "    resistance_levels_per_timestamp = []\n",
    "\n",
    "    # Group by timestamp\n",
    "    grouped = df.groupby('timestamp')\n",
    "\n",
    "    for timestamp, group in grouped:\n",
    "        support_levels = []\n",
    "        resistance_levels = []\n",
    "        \n",
    "        # Group consecutive rows with the same imbalance\n",
    "        for _, sub_group in group.groupby((group['imbalance'] != group['imbalance'].shift()).cumsum()):\n",
    "            if len(sub_group) >= 3:  # Ensure the sub-group has at least 3 rows\n",
    "                prices = sub_group['price'].tolist()\n",
    "                imbalance_type = sub_group['imbalance'].iloc[0]\n",
    "                \n",
    "                # Identify support and resistance levels based on imbalance type\n",
    "                if all(sub_group['imbalance'].isin(['buy', 'both'])):\n",
    "                    support_levels.append([round(p, 2) for p in prices])\n",
    "                elif all(sub_group['imbalance'].isin(['sell', 'both'])):\n",
    "                    resistance_levels.append([round(p, 2) for p in prices])\n",
    "\n",
    "        # Store levels for the current timestamp\n",
    "        support_levels_per_timestamp.append((timestamp, support_levels))\n",
    "        resistance_levels_per_timestamp.append((timestamp, resistance_levels))\n",
    "    \n",
    "    # Create new DataFrame columns\n",
    "    df['support_imbalance'] = df['timestamp'].map(\n",
    "        dict((timestamp, levels) for timestamp, levels in support_levels_per_timestamp)\n",
    "    )\n",
    "    df['resistance_imbalance'] = df['timestamp'].map(\n",
    "        dict((timestamp, levels) for timestamp, levels in resistance_levels_per_timestamp)\n",
    "    )\n",
    "\n",
    "    return df\n",
    "\n",
    "# Apply the function to the filtered DataFrame\n",
    "filtered_df = add_support_resistance_by_timestamp(filtered_df)\n",
    "\n",
    "# Sort by timestamp and price\n",
    "filtered_df = filtered_df.sort_values(by=['timestamp', 'price']).reset_index(drop=True)\n",
    "\n",
    "# Group by timestamp and check for unfinished bid and ask auctions\n",
    "def check_auctions(group):\n",
    "    min_price_row = group.loc[group['price'].idxmin()]\n",
    "    max_price_row = group.loc[group['price'].idxmax()]\n",
    "    \n",
    "    buy_auction_status = 'incomplete' if min_price_row['buyVolume'] > 0 and min_price_row['sellVolume'] > 0 else 'complete'\n",
    "    sell_auction_status = 'incomplete' if max_price_row['buyVolume'] > 0 and max_price_row['sellVolume'] > 0 else 'complete'\n",
    "    \n",
    "    group['buy_auction_status'] = buy_auction_status\n",
    "    group['sell_auction_status'] = sell_auction_status\n",
    "    \n",
    "    return group\n",
    "\n",
    "# Apply the auction check function to each group\n",
    "filtered_df = filtered_df.groupby('timestamp').apply(check_auctions).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "id": "85bdf0b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "      <th>totalVolume</th>\n",
       "      <th>poc</th>\n",
       "      <th>highest_bid_stacked_imbalance</th>\n",
       "      <th>highest_ask_stacked_imbalance</th>\n",
       "      <th>...</th>\n",
       "      <th>highest_bid_imbalance_stack_price</th>\n",
       "      <th>delta</th>\n",
       "      <th>total_ask_imbalance_count</th>\n",
       "      <th>highest_stacked_ask_imbalance</th>\n",
       "      <th>total_bid_imbalance_count</th>\n",
       "      <th>highest_stacked_bid_imbalance</th>\n",
       "      <th>support_imbalance</th>\n",
       "      <th>resistance_imbalance</th>\n",
       "      <th>buy_auction_status</th>\n",
       "      <th>sell_auction_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.10</td>\n",
       "      <td>597.0</td>\n",
       "      <td>119.0</td>\n",
       "      <td></td>\n",
       "      <td>2757</td>\n",
       "      <td>716.0</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.4</td>\n",
       "      <td>478.0</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.12</td>\n",
       "      <td>1306.0</td>\n",
       "      <td>985.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2757</td>\n",
       "      <td>2291.0</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.4</td>\n",
       "      <td>321.0</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.14</td>\n",
       "      <td>156.0</td>\n",
       "      <td>2510.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2757</td>\n",
       "      <td>2666.0</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.4</td>\n",
       "      <td>-2354.0</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.16</td>\n",
       "      <td>100.0</td>\n",
       "      <td>709.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2757</td>\n",
       "      <td>809.0</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.4</td>\n",
       "      <td>-609.0</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.18</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1658.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2757</td>\n",
       "      <td>1658.0</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.4</td>\n",
       "      <td>-1658.0</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>531</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.40</td>\n",
       "      <td>478.0</td>\n",
       "      <td>1584.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "      <td>2062.0</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1106.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>532</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.42</td>\n",
       "      <td>1472.0</td>\n",
       "      <td>4033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "      <td>5505.0</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-2561.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>533</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "      <td>12490.0</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1576.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>534</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.46</td>\n",
       "      <td>4136.0</td>\n",
       "      <td>2046.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "      <td>6182.0</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2090.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>535</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.48</td>\n",
       "      <td>4377.0</td>\n",
       "      <td>1388.0</td>\n",
       "      <td></td>\n",
       "      <td>2834</td>\n",
       "      <td>5765.0</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2989.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>536 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp  price  buyVolume  sellVolume imbalance  index  \\\n",
       "0   2024-12-04 09:30:00  77.10      597.0       119.0             2757   \n",
       "1   2024-12-04 09:30:00  77.12     1306.0       985.0      both   2757   \n",
       "2   2024-12-04 09:30:00  77.14      156.0      2510.0      sell   2757   \n",
       "3   2024-12-04 09:30:00  77.16      100.0       709.0      sell   2757   \n",
       "4   2024-12-04 09:30:00  77.18        0.0      1658.0      sell   2757   \n",
       "..                  ...    ...        ...         ...       ...    ...   \n",
       "531 2024-12-04 15:55:00  78.40      478.0      1584.0             2834   \n",
       "532 2024-12-04 15:55:00  78.42     1472.0      4033.0             2834   \n",
       "533 2024-12-04 15:55:00  78.44     5457.0      7033.0             2834   \n",
       "534 2024-12-04 15:55:00  78.46     4136.0      2046.0             2834   \n",
       "535 2024-12-04 15:55:00  78.48     4377.0      1388.0             2834   \n",
       "\n",
       "     totalVolume    poc  highest_bid_stacked_imbalance  \\\n",
       "0          716.0  77.38                         3760.0   \n",
       "1         2291.0  77.38                         3760.0   \n",
       "2         2666.0  77.38                         3760.0   \n",
       "3          809.0  77.38                         3760.0   \n",
       "4         1658.0  77.38                         3760.0   \n",
       "..           ...    ...                            ...   \n",
       "531       2062.0  78.44                         5457.0   \n",
       "532       5505.0  78.44                         5457.0   \n",
       "533      12490.0  78.44                         5457.0   \n",
       "534       6182.0  78.44                         5457.0   \n",
       "535       5765.0  78.44                         5457.0   \n",
       "\n",
       "     highest_ask_stacked_imbalance  ...  highest_bid_imbalance_stack_price  \\\n",
       "0                           3415.0  ...                               77.4   \n",
       "1                           3415.0  ...                               77.4   \n",
       "2                           3415.0  ...                               77.4   \n",
       "3                           3415.0  ...                               77.4   \n",
       "4                           3415.0  ...                               77.4   \n",
       "..                             ...  ...                                ...   \n",
       "531                         7033.0  ...                                NaN   \n",
       "532                         7033.0  ...                                NaN   \n",
       "533                         7033.0  ...                                NaN   \n",
       "534                         7033.0  ...                                NaN   \n",
       "535                         7033.0  ...                                NaN   \n",
       "\n",
       "      delta  total_ask_imbalance_count  highest_stacked_ask_imbalance  \\\n",
       "0     478.0                         18                              7   \n",
       "1     321.0                         18                              7   \n",
       "2   -2354.0                         18                              7   \n",
       "3    -609.0                         18                              7   \n",
       "4   -1658.0                         18                              7   \n",
       "..      ...                        ...                            ...   \n",
       "531 -1106.0                          0                              0   \n",
       "532 -2561.0                          0                              0   \n",
       "533 -1576.0                          0                              0   \n",
       "534  2090.0                          0                              0   \n",
       "535  2989.0                          0                              0   \n",
       "\n",
       "     total_bid_imbalance_count  highest_stacked_bid_imbalance  \\\n",
       "0                            5                              2   \n",
       "1                            5                              2   \n",
       "2                            5                              2   \n",
       "3                            5                              2   \n",
       "4                            5                              2   \n",
       "..                         ...                            ...   \n",
       "531                          0                              0   \n",
       "532                          0                              0   \n",
       "533                          0                              0   \n",
       "534                          0                              0   \n",
       "535                          0                              0   \n",
       "\n",
       "     support_imbalance                               resistance_imbalance  \\\n",
       "0                   []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "1                   []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "2                   []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "3                   []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "4                   []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "..                 ...                                                ...   \n",
       "531                 []                                                 []   \n",
       "532                 []                                                 []   \n",
       "533                 []                                                 []   \n",
       "534                 []                                                 []   \n",
       "535                 []                                                 []   \n",
       "\n",
       "    buy_auction_status sell_auction_status  \n",
       "0           incomplete            complete  \n",
       "1           incomplete            complete  \n",
       "2           incomplete            complete  \n",
       "3           incomplete            complete  \n",
       "4           incomplete            complete  \n",
       "..                 ...                 ...  \n",
       "531         incomplete          incomplete  \n",
       "532         incomplete          incomplete  \n",
       "533         incomplete          incomplete  \n",
       "534         incomplete          incomplete  \n",
       "535         incomplete          incomplete  \n",
       "\n",
       "[536 rows x 21 columns]"
      ]
     },
     "execution_count": 185,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "1f47fb07",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "      <th>totalVolume</th>\n",
       "      <th>poc</th>\n",
       "      <th>highest_bid_stacked_imbalance</th>\n",
       "      <th>highest_ask_stacked_imbalance</th>\n",
       "      <th>...</th>\n",
       "      <th>highest_bid_imbalance_stack_price</th>\n",
       "      <th>delta</th>\n",
       "      <th>total_ask_imbalance_count</th>\n",
       "      <th>highest_stacked_ask_imbalance</th>\n",
       "      <th>total_bid_imbalance_count</th>\n",
       "      <th>highest_stacked_bid_imbalance</th>\n",
       "      <th>support_imbalance</th>\n",
       "      <th>resistance_imbalance</th>\n",
       "      <th>buy_auction_status</th>\n",
       "      <th>sell_auction_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.12</td>\n",
       "      <td>1306.0</td>\n",
       "      <td>985.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2757</td>\n",
       "      <td>2291.0</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.40</td>\n",
       "      <td>321.0</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>2024-12-04 09:55:00</td>\n",
       "      <td>77.54</td>\n",
       "      <td>2391.0</td>\n",
       "      <td>1477.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2762</td>\n",
       "      <td>3868.0</td>\n",
       "      <td>77.54</td>\n",
       "      <td>2855.0</td>\n",
       "      <td>1477.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.50</td>\n",
       "      <td>914.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>[[77.36, 77.38, 77.4, 77.42, 77.44], [77.48, 7...</td>\n",
       "      <td>[]</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>2024-12-04 10:15:00</td>\n",
       "      <td>77.76</td>\n",
       "      <td>906.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2766</td>\n",
       "      <td>917.0</td>\n",
       "      <td>77.82</td>\n",
       "      <td>1604.0</td>\n",
       "      <td>1717.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.76</td>\n",
       "      <td>895.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <td>2024-12-04 10:25:00</td>\n",
       "      <td>77.94</td>\n",
       "      <td>2203.0</td>\n",
       "      <td>1934.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2768</td>\n",
       "      <td>4137.0</td>\n",
       "      <td>77.92</td>\n",
       "      <td>4363.0</td>\n",
       "      <td>2211.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.92</td>\n",
       "      <td>269.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>218</th>\n",
       "      <td>2024-12-04 11:05:00</td>\n",
       "      <td>77.88</td>\n",
       "      <td>486.0</td>\n",
       "      <td>601.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2776</td>\n",
       "      <td>1087.0</td>\n",
       "      <td>77.84</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>1199.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.88</td>\n",
       "      <td>-115.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>2024-12-04 11:45:00</td>\n",
       "      <td>78.18</td>\n",
       "      <td>1117.0</td>\n",
       "      <td>2176.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2784</td>\n",
       "      <td>3293.0</td>\n",
       "      <td>78.18</td>\n",
       "      <td>1767.0</td>\n",
       "      <td>2176.0</td>\n",
       "      <td>...</td>\n",
       "      <td>78.22</td>\n",
       "      <td>-1059.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>2024-12-04 12:55:00</td>\n",
       "      <td>78.02</td>\n",
       "      <td>367.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2798</td>\n",
       "      <td>487.0</td>\n",
       "      <td>77.96</td>\n",
       "      <td>737.0</td>\n",
       "      <td>381.0</td>\n",
       "      <td>...</td>\n",
       "      <td>78.02</td>\n",
       "      <td>247.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>435</th>\n",
       "      <td>2024-12-04 14:10:00</td>\n",
       "      <td>77.96</td>\n",
       "      <td>1160.0</td>\n",
       "      <td>1394.0</td>\n",
       "      <td>both</td>\n",
       "      <td>2813</td>\n",
       "      <td>2554.0</td>\n",
       "      <td>77.96</td>\n",
       "      <td>1160.0</td>\n",
       "      <td>1394.0</td>\n",
       "      <td>...</td>\n",
       "      <td>77.96</td>\n",
       "      <td>-234.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp  price  buyVolume  sellVolume imbalance  index  \\\n",
       "1   2024-12-04 09:30:00  77.12     1306.0       985.0      both   2757   \n",
       "103 2024-12-04 09:55:00  77.54     2391.0      1477.0      both   2762   \n",
       "146 2024-12-04 10:15:00  77.76      906.0        11.0      both   2766   \n",
       "164 2024-12-04 10:25:00  77.94     2203.0      1934.0      both   2768   \n",
       "218 2024-12-04 11:05:00  77.88      486.0       601.0      both   2776   \n",
       "272 2024-12-04 11:45:00  78.18     1117.0      2176.0      both   2784   \n",
       "365 2024-12-04 12:55:00  78.02      367.0       120.0      both   2798   \n",
       "435 2024-12-04 14:10:00  77.96     1160.0      1394.0      both   2813   \n",
       "\n",
       "     totalVolume    poc  highest_bid_stacked_imbalance  \\\n",
       "1         2291.0  77.38                         3760.0   \n",
       "103       3868.0  77.54                         2855.0   \n",
       "146        917.0  77.82                         1604.0   \n",
       "164       4137.0  77.92                         4363.0   \n",
       "218       1087.0  77.84                         1400.0   \n",
       "272       3293.0  78.18                         1767.0   \n",
       "365        487.0  77.96                          737.0   \n",
       "435       2554.0  77.96                         1160.0   \n",
       "\n",
       "     highest_ask_stacked_imbalance  ...  highest_bid_imbalance_stack_price  \\\n",
       "1                           3415.0  ...                              77.40   \n",
       "103                         1477.0  ...                              77.50   \n",
       "146                         1717.0  ...                              77.76   \n",
       "164                         2211.0  ...                              77.92   \n",
       "218                         1199.0  ...                              77.88   \n",
       "272                         2176.0  ...                              78.22   \n",
       "365                          381.0  ...                              78.02   \n",
       "435                         1394.0  ...                              77.96   \n",
       "\n",
       "      delta  total_ask_imbalance_count  highest_stacked_ask_imbalance  \\\n",
       "1     321.0                         18                              7   \n",
       "103   914.0                          1                              1   \n",
       "146   895.0                          2                              1   \n",
       "164   269.0                          1                              1   \n",
       "218  -115.0                          4                              3   \n",
       "272 -1059.0                          1                              1   \n",
       "365   247.0                          1                              1   \n",
       "435  -234.0                          1                              1   \n",
       "\n",
       "     total_bid_imbalance_count  highest_stacked_bid_imbalance  \\\n",
       "1                            5                              2   \n",
       "103                          9                              5   \n",
       "146                          1                              1   \n",
       "164                          3                              3   \n",
       "218                          1                              1   \n",
       "272                          2                              1   \n",
       "365                          1                              1   \n",
       "435                          2                              2   \n",
       "\n",
       "                                     support_imbalance  \\\n",
       "1                                                   []   \n",
       "103  [[77.36, 77.38, 77.4, 77.42, 77.44], [77.48, 7...   \n",
       "146                                                 []   \n",
       "164                                                 []   \n",
       "218                                                 []   \n",
       "272                                                 []   \n",
       "365                                                 []   \n",
       "435                                                 []   \n",
       "\n",
       "                                  resistance_imbalance buy_auction_status  \\\n",
       "1    [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...         incomplete   \n",
       "103                                                 []           complete   \n",
       "146                                                 []           complete   \n",
       "164                                                 []         incomplete   \n",
       "218                                                 []         incomplete   \n",
       "272                                                 []           complete   \n",
       "365                                                 []           complete   \n",
       "435                                                 []           complete   \n",
       "\n",
       "    sell_auction_status  \n",
       "1              complete  \n",
       "103            complete  \n",
       "146            complete  \n",
       "164          incomplete  \n",
       "218            complete  \n",
       "272            complete  \n",
       "365            complete  \n",
       "435            complete  \n",
       "\n",
       "[8 rows x 21 columns]"
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df[filtered_df['imbalance'] == 'both']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "id": "039d89b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>buy_auction_status</th>\n",
       "      <th>sell_auction_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>2024-12-04 09:35:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>2024-12-04 09:40:00</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>2024-12-04 09:45:00</td>\n",
       "      <td>complete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>2024-12-04 09:50:00</td>\n",
       "      <td>complete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504</th>\n",
       "      <td>2024-12-04 15:35:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>complete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>509</th>\n",
       "      <td>2024-12-04 15:40:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>513</th>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>520</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>528</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>incomplete</td>\n",
       "      <td>incomplete</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>78 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp buy_auction_status sell_auction_status\n",
       "0   2024-12-04 09:30:00         incomplete            complete\n",
       "40  2024-12-04 09:35:00         incomplete            complete\n",
       "59  2024-12-04 09:40:00           complete            complete\n",
       "71  2024-12-04 09:45:00           complete          incomplete\n",
       "82  2024-12-04 09:50:00           complete            complete\n",
       "..                  ...                ...                 ...\n",
       "504 2024-12-04 15:35:00         incomplete            complete\n",
       "509 2024-12-04 15:40:00         incomplete          incomplete\n",
       "513 2024-12-04 15:45:00         incomplete          incomplete\n",
       "520 2024-12-04 15:50:00         incomplete          incomplete\n",
       "528 2024-12-04 15:55:00         incomplete          incomplete\n",
       "\n",
       "[78 rows x 3 columns]"
      ]
     },
     "execution_count": 187,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df[['timestamp','buy_auction_status','sell_auction_status']].drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "id": "84a6f333",
   "metadata": {},
   "outputs": [],
   "source": [
    "import ast\n",
    "\n",
    "# Define a function that converts the value to a list if needed\n",
    "def safe_literal_eval(val):\n",
    "    if isinstance(val, str):\n",
    "        try:\n",
    "            return ast.literal_eval(val)\n",
    "        except ValueError:\n",
    "            print(f\"Error in evaluating: {val}\")\n",
    "            return val  # Optionally handle bad strings gracefully\n",
    "    return val\n",
    "\n",
    "# Apply to the columns\n",
    "filtered_df['support_imbalance'] = filtered_df['support_imbalance'].apply(safe_literal_eval)\n",
    "filtered_df['resistance_imbalance'] = filtered_df['resistance_imbalance'].apply(safe_literal_eval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "id": "248390d3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      []\n",
       "1      []\n",
       "2      []\n",
       "3      []\n",
       "4      []\n",
       "       ..\n",
       "531    []\n",
       "532    []\n",
       "533    []\n",
       "534    []\n",
       "535    []\n",
       "Name: support_imbalance, Length: 536, dtype: object"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df['support_imbalance']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "ccd75649",
   "metadata": {},
   "outputs": [],
   "source": [
    "# filtered_df['support_imbalance_count'] = filtered_df['support_imbalance'].apply(lambda x: len(set(x[0])) if isinstance(x, list) and len(x) > 0 and isinstance(x[0], list) else 0)\n",
    "# filtered_df['resistance_imbalance_count'] = filtered_df['resistance_imbalance'].apply(lambda x: len(set(x[0])) if isinstance(x, list) and len(x) > 0 and isinstance(x[0], list) else 0)\n",
    "\n",
    "\n",
    "import itertools\n",
    "\n",
    "# Flatten the nested list and then take the set of unique elements\n",
    "filtered_df['support_imbalance_count'] = filtered_df['support_imbalance'].apply(\n",
    "    lambda x: len(set(itertools.chain.from_iterable(x))) if isinstance(x, list) else 0\n",
    ")\n",
    "\n",
    "filtered_df['resistance_imbalance_count'] = filtered_df['resistance_imbalance'].apply(\n",
    "    lambda x: len(set(itertools.chain.from_iterable(x))) if isinstance(x, list) else 0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "c9820a16",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>support_imbalance</th>\n",
       "      <th>resistance_imbalance</th>\n",
       "      <th>support_imbalance_count</th>\n",
       "      <th>resistance_imbalance_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>531</th>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>532</th>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>533</th>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>534</th>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>535</th>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>536 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    support_imbalance                               resistance_imbalance  \\\n",
       "0                  []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "1                  []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "2                  []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "3                  []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "4                  []  [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...   \n",
       "..                ...                                                ...   \n",
       "531                []                                                 []   \n",
       "532                []                                                 []   \n",
       "533                []                                                 []   \n",
       "534                []                                                 []   \n",
       "535                []                                                 []   \n",
       "\n",
       "     support_imbalance_count  resistance_imbalance_count  \n",
       "0                          0                          17  \n",
       "1                          0                          17  \n",
       "2                          0                          17  \n",
       "3                          0                          17  \n",
       "4                          0                          17  \n",
       "..                       ...                         ...  \n",
       "531                        0                           0  \n",
       "532                        0                           0  \n",
       "533                        0                           0  \n",
       "534                        0                           0  \n",
       "535                        0                           0  \n",
       "\n",
       "[536 rows x 4 columns]"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df[['support_imbalance','resistance_imbalance','support_imbalance_count','resistance_imbalance_count']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "id": "acc8670e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# filtered_df[filtered_df['timestamp'] == '2024-12-04 09:40:00'][['resistance_imbalance']]\n",
    "filtered_df.loc[59,'resistance_imbalance']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "91e912d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-12-04 09:30:00\n",
      "[]\n",
      "77.1\n",
      "77.12\n",
      "77.14\n",
      "77.16\n",
      "77.18\n",
      "77.2\n",
      "77.22\n",
      "77.24\n",
      "77.26\n",
      "77.28\n",
      "77.3\n",
      "77.32\n",
      "77.34\n",
      "77.36\n",
      "77.38\n",
      "77.4\n",
      "77.42\n",
      "77.44\n",
      "77.46\n",
      "77.48\n",
      "77.5\n",
      "77.52\n",
      "77.54\n",
      "77.56\n",
      "77.58\n",
      "77.6\n",
      "77.62\n",
      "77.64\n",
      "77.66\n",
      "77.68\n",
      "77.7\n",
      "77.72\n",
      "77.74\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "2024-12-04 09:35:00\n",
      "[]\n",
      "77.26\n",
      "77.28\n",
      "77.3\n",
      "77.32\n",
      "77.34\n",
      "77.36\n",
      "77.38\n",
      "77.4\n",
      "77.42\n",
      "77.44\n",
      "77.46\n",
      "77.48\n",
      "77.5\n",
      "77.52\n",
      "77.54\n",
      "77.56\n",
      "77.58\n",
      "77.6\n",
      "77.62\n",
      "2024-12-04 09:40:00\n",
      "[]\n",
      "77.38\n",
      "77.4\n",
      "77.42\n",
      "77.44\n",
      "77.46\n",
      "77.48\n",
      "77.5\n",
      "77.52\n",
      "77.54\n",
      "77.56\n",
      "77.58\n",
      "77.6\n",
      "2024-12-04 09:45:00\n",
      "[77.42, 77.44, 77.46, 77.48]\n",
      "77.36\n",
      "77.38\n",
      "77.4\n",
      "77.42\n",
      "77.44\n",
      "77.46\n",
      "77.48\n",
      "77.5\n",
      "77.52\n",
      "77.54\n",
      "77.56\n",
      "2024-12-04 09:50:00\n",
      "[]\n",
      "77.32\n",
      "77.34\n",
      "77.36\n",
      "77.38\n",
      "77.4\n",
      "77.42\n",
      "77.44\n",
      "77.46\n",
      "77.48\n",
      "77.5\n",
      "77.52\n",
      "2024-12-04 09:55:00\n",
      "[]\n",
      "77.34\n",
      "77.36\n",
      "77.38\n",
      "77.4\n",
      "77.42\n",
      "77.44\n",
      "77.46\n",
      "77.48\n",
      "77.5\n",
      "77.52\n",
      "77.54\n",
      "77.56\n",
      "2024-12-04 10:00:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.52]\n",
      "77.52\n",
      "77.54\n",
      "77.56\n",
      "77.58\n",
      "77.6\n",
      "77.62\n",
      "77.64\n",
      "77.66\n",
      "77.68\n",
      "2024-12-04 10:05:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.58]\n",
      "77.58\n",
      "77.6\n",
      "77.62\n",
      "77.64\n",
      "77.66\n",
      "77.68\n",
      "77.7\n",
      "77.72\n",
      "77.74\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "2024-12-04 10:10:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.74, 77.76, 77.78]\n",
      "77.74\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "2024-12-04 10:15:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.68\n",
      "77.7\n",
      "77.72\n",
      "77.74\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "2024-12-04 10:20:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "2024-12-04 10:25:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9, 77.92, 77.94]\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 10:30:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "2024-12-04 10:35:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "2024-12-04 10:40:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "2024-12-04 10:45:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.66\n",
      "77.68\n",
      "77.7\n",
      "77.72\n",
      "77.74\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "2024-12-04 10:50:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.74\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "2024-12-04 10:55:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "2024-12-04 11:00:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9]\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "2024-12-04 11:05:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9]\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "2024-12-04 11:10:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "78.0\n",
      "2024-12-04 11:15:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "2024-12-04 11:20:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "2024-12-04 11:25:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "78.26\n",
      "2024-12-04 11:30:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "2024-12-04 11:35:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "2024-12-04 11:40:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "78.26\n",
      "78.28\n",
      "2024-12-04 11:45:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "78.26\n",
      "2024-12-04 11:50:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "78.26\n",
      "2024-12-04 11:55:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "78.26\n",
      "2024-12-04 12:00:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "2024-12-04 12:05:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "2024-12-04 12:10:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "78.26\n",
      "78.28\n",
      "2024-12-04 12:15:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "2024-12-04 12:20:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "2024-12-04 12:25:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n",
      "77.96\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "2024-12-04 12:30:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94]\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "2024-12-04 12:35:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "2024-12-04 12:40:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "2024-12-04 12:45:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 12:50:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.92, 77.94, 77.96]\n",
      "77.96\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "2024-12-04 12:55:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.92, 77.94]\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "2024-12-04 13:00:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.92]\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "2024-12-04 13:05:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 13:10:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 13:15:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "2024-12-04 13:20:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "2024-12-04 13:25:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "2024-12-04 13:30:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "2024-12-04 13:35:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "2024-12-04 13:40:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "2024-12-04 13:45:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "2024-12-04 13:50:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.76\n",
      "77.78\n",
      "77.8\n",
      "77.82\n",
      "2024-12-04 13:55:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.8\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "2024-12-04 14:00:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.82\n",
      "77.84\n",
      "77.86\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "2024-12-04 14:05:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9, 77.92]\n",
      "77.88\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 14:10:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 14:15:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.9\n",
      "77.92\n",
      "77.94\n",
      "77.96\n",
      "2024-12-04 14:20:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.96\n",
      "77.98\n",
      "2024-12-04 14:25:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "77.98\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "2024-12-04 14:30:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.02\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "2024-12-04 14:35:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "2024-12-04 14:40:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "2024-12-04 14:45:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.02\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "2024-12-04 14:50:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.0\n",
      "78.02\n",
      "78.04\n",
      "2024-12-04 14:55:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.02\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "2024-12-04 15:00:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "2024-12-04 15:05:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "2024-12-04 15:10:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "2024-12-04 15:15:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "2024-12-04 15:20:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "2024-12-04 15:25:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 78.12, 78.14, 78.16]\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "2024-12-04 15:30:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.06\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "2024-12-04 15:35:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.04\n",
      "78.06\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "2024-12-04 15:40:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.08\n",
      "78.1\n",
      "78.12\n",
      "78.14\n",
      "2024-12-04 15:45:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n",
      "78.12\n",
      "78.14\n",
      "78.16\n",
      "78.18\n",
      "78.2\n",
      "78.22\n",
      "78.24\n",
      "2024-12-04 15:50:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 78.16, 78.18, 78.2, 78.22]\n",
      "78.24\n",
      "78.26\n",
      "78.28\n",
      "78.3\n",
      "78.32\n",
      "78.34\n",
      "78.36\n",
      "78.38\n",
      "2024-12-04 15:55:00\n",
      "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 78.16, 78.18, 78.2, 78.22]\n",
      "78.34\n",
      "78.36\n",
      "78.38\n",
      "78.4\n",
      "78.42\n",
      "78.44\n",
      "78.46\n",
      "78.48\n"
     ]
    }
   ],
   "source": [
    "# Initialize active support and resistance levels\n",
    "active_support_levels = [item for sublist in filtered_df.loc[0, 'support_imbalance'] for item in sublist]\n",
    "active_resistance_levels = [item for sublist in filtered_df.loc[0, 'resistance_imbalance'] for item in sublist]\n",
    "\n",
    "# Function to update active support and resistance levels\n",
    "def update_active_levels(active_levels, traded_price):\n",
    "    # Remove levels that have been breached\n",
    "    return [level for level in active_levels if level != traded_price]\n",
    "\n",
    "# Group by timestamp and update active levels\n",
    "active_levels_df = []\n",
    "for timestamp, group in filtered_df.groupby('timestamp'):\n",
    "    print(timestamp)\n",
    "    print(active_support_levels)\n",
    "    for idx, row in group.iterrows():\n",
    "        traded_price = round(row['price'],2)\n",
    "        print(traded_price)\n",
    "        # Add new support and resistance levels from the current row\n",
    "        new_support_levels = [item for sublist in row['support_imbalance'] for item in sublist]\n",
    "        new_resistance_levels = [item for sublist in row['resistance_imbalance'] for item in sublist]\n",
    "        \n",
    "        # Update active support and resistance levels with new levels\n",
    "        active_support_levels = list(set(active_support_levels + new_support_levels))\n",
    "        active_resistance_levels = list(set(active_resistance_levels + new_resistance_levels))\n",
    "        \n",
    "        # Update support levels\n",
    "        active_support_levels = update_active_levels(active_support_levels, traded_price)\n",
    "        \n",
    "        # Update resistance levels\n",
    "        active_resistance_levels = update_active_levels(active_resistance_levels, traded_price)\n",
    "    \n",
    "    # Sort the active support and resistance levels\n",
    "    active_support_levels = sorted(active_support_levels)\n",
    "    active_resistance_levels = sorted(active_resistance_levels)\n",
    "    \n",
    "    # Append the updated levels to the dataframe\n",
    "    active_levels_df.append({\n",
    "        'timestamp': timestamp,\n",
    "        'active_support_levels': active_support_levels,\n",
    "        'active_resistance_levels': active_resistance_levels\n",
    "    })\n",
    "\n",
    "# Create a DataFrame for active levels\n",
    "active_levels_df = pd.DataFrame(active_levels_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "600f3efa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>active_support_levels</th>\n",
       "      <th>active_resistance_levels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>[]</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2024-12-04 09:35:00</td>\n",
       "      <td>[]</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2024-12-04 09:40:00</td>\n",
       "      <td>[77.42, 77.44, 77.46, 77.48]</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2024-12-04 09:45:00</td>\n",
       "      <td>[]</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2024-12-04 09:50:00</td>\n",
       "      <td>[]</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>73</td>\n",
       "      <td>2024-12-04 15:35:00</td>\n",
       "      <td>[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>74</td>\n",
       "      <td>2024-12-04 15:40:00</td>\n",
       "      <td>[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>75</td>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>76</td>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>77</td>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...</td>\n",
       "      <td>[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>78 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    index           timestamp  \\\n",
       "0       0 2024-12-04 09:30:00   \n",
       "1       1 2024-12-04 09:35:00   \n",
       "2       2 2024-12-04 09:40:00   \n",
       "3       3 2024-12-04 09:45:00   \n",
       "4       4 2024-12-04 09:50:00   \n",
       "..    ...                 ...   \n",
       "73     73 2024-12-04 15:35:00   \n",
       "74     74 2024-12-04 15:40:00   \n",
       "75     75 2024-12-04 15:45:00   \n",
       "76     76 2024-12-04 15:50:00   \n",
       "77     77 2024-12-04 15:55:00   \n",
       "\n",
       "                                active_support_levels  \\\n",
       "0                                                  []   \n",
       "1                                                  []   \n",
       "2                        [77.42, 77.44, 77.46, 77.48]   \n",
       "3                                                  []   \n",
       "4                                                  []   \n",
       "..                                                ...   \n",
       "73  [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...   \n",
       "74  [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...   \n",
       "75  [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...   \n",
       "76  [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...   \n",
       "77  [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...   \n",
       "\n",
       "                             active_resistance_levels  \n",
       "0   [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.2...  \n",
       "1           [77.14, 77.16, 77.18, 77.2, 77.22, 77.24]  \n",
       "2           [77.14, 77.16, 77.18, 77.2, 77.22, 77.24]  \n",
       "3           [77.14, 77.16, 77.18, 77.2, 77.22, 77.24]  \n",
       "4           [77.14, 77.16, 77.18, 77.2, 77.22, 77.24]  \n",
       "..                                                ...  \n",
       "73   [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]  \n",
       "74   [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]  \n",
       "75   [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]  \n",
       "76   [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]  \n",
       "77   [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]  \n",
       "\n",
       "[78 rows x 4 columns]"
      ]
     },
     "execution_count": 194,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_df = active_levels_df\n",
    "\n",
    "temp_df['active_support_levels'] = temp_df['active_support_levels'].apply(str)\n",
    "temp_df['active_resistance_levels'] = temp_df['active_resistance_levels'].apply(str)\n",
    "temp_df = temp_df.drop_duplicates().reset_index()\n",
    "\n",
    "temp_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "1d9c8662",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]'"
      ]
     },
     "execution_count": 195,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_df.loc[77,'active_resistance_levels']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "id": "90d27fd9",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Convert 'support_imbalance' and 'resistance_imbalance' columns to strings\n",
    "filtered_df['support_imbalance'] = filtered_df['support_imbalance'].apply(str)\n",
    "filtered_df['resistance_imbalance'] = filtered_df['resistance_imbalance'].apply(str)\n",
    "\n",
    "# Reset index and filter required columns\n",
    "temp_filter_df = filtered_df[['timestamp','poc','highest_bid_stacked_imbalance','highest_ask_stacked_imbalance','highest_ask_imbalance_stack_price','highest_bid_imbalance_stack_price',\n",
    "                              \n",
    "                              'total_ask_imbalance_count','highest_stacked_ask_imbalance',\n",
    "                              'total_bid_imbalance_count','highest_stacked_bid_imbalance', \n",
    "                              'support_imbalance_count','resistance_imbalance_count',\n",
    "                              'support_imbalance', 'resistance_imbalance']].drop_duplicates().reset_index(drop=True)\n",
    "\n",
    "# Create consecutive POC flag and count highest consecutive POC\n",
    "temp_filter_df['consecutive_poc_flag'] = temp_filter_df['poc'].eq(temp_filter_df['poc'].shift())\n",
    "\n",
    "# Calculate the highest consecutive POC count\n",
    "temp_filter_df['highest_consecutive_poc_count'] = temp_filter_df['poc'].groupby((temp_filter_df['poc'] != temp_filter_df['poc'].shift()).cumsum()).transform('count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "9de5cc77",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>poc</th>\n",
       "      <th>highest_bid_stacked_imbalance</th>\n",
       "      <th>highest_ask_stacked_imbalance</th>\n",
       "      <th>highest_ask_imbalance_stack_price</th>\n",
       "      <th>highest_bid_imbalance_stack_price</th>\n",
       "      <th>total_ask_imbalance_count</th>\n",
       "      <th>highest_stacked_ask_imbalance</th>\n",
       "      <th>total_bid_imbalance_count</th>\n",
       "      <th>highest_stacked_bid_imbalance</th>\n",
       "      <th>support_imbalance_count</th>\n",
       "      <th>resistance_imbalance_count</th>\n",
       "      <th>support_imbalance</th>\n",
       "      <th>resistance_imbalance</th>\n",
       "      <th>consecutive_poc_flag</th>\n",
       "      <th>highest_consecutive_poc_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.38</td>\n",
       "      <td>3760.0</td>\n",
       "      <td>3415.0</td>\n",
       "      <td>77.34</td>\n",
       "      <td>77.40</td>\n",
       "      <td>18</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>[]</td>\n",
       "      <td>[[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-12-04 09:35:00</td>\n",
       "      <td>77.56</td>\n",
       "      <td>2322.0</td>\n",
       "      <td>2573.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.52</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-12-04 09:40:00</td>\n",
       "      <td>77.52</td>\n",
       "      <td>3673.0</td>\n",
       "      <td>4166.0</td>\n",
       "      <td>77.56</td>\n",
       "      <td>77.48</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>[[77.42, 77.44, 77.46, 77.48]]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-12-04 09:45:00</td>\n",
       "      <td>77.44</td>\n",
       "      <td>5001.0</td>\n",
       "      <td>1856.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.44</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-12-04 09:50:00</td>\n",
       "      <td>77.36</td>\n",
       "      <td>992.0</td>\n",
       "      <td>1419.0</td>\n",
       "      <td>77.40</td>\n",
       "      <td>77.48</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>2024-12-04 15:35:00</td>\n",
       "      <td>78.06</td>\n",
       "      <td>2138.0</td>\n",
       "      <td>3137.0</td>\n",
       "      <td>78.08</td>\n",
       "      <td>78.12</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>2024-12-04 15:40:00</td>\n",
       "      <td>78.12</td>\n",
       "      <td>3081.0</td>\n",
       "      <td>6590.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>2024-12-04 15:45:00</td>\n",
       "      <td>78.22</td>\n",
       "      <td>3586.0</td>\n",
       "      <td>499.0</td>\n",
       "      <td>78.12</td>\n",
       "      <td>78.22</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>[[78.16, 78.18, 78.2, 78.22]]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>2024-12-04 15:50:00</td>\n",
       "      <td>78.26</td>\n",
       "      <td>2938.0</td>\n",
       "      <td>2038.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>78.32</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>2024-12-04 15:55:00</td>\n",
       "      <td>78.44</td>\n",
       "      <td>5457.0</td>\n",
       "      <td>7033.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>78 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             timestamp    poc  highest_bid_stacked_imbalance  \\\n",
       "0  2024-12-04 09:30:00  77.38                         3760.0   \n",
       "1  2024-12-04 09:35:00  77.56                         2322.0   \n",
       "2  2024-12-04 09:40:00  77.52                         3673.0   \n",
       "3  2024-12-04 09:45:00  77.44                         5001.0   \n",
       "4  2024-12-04 09:50:00  77.36                          992.0   \n",
       "..                 ...    ...                            ...   \n",
       "73 2024-12-04 15:35:00  78.06                         2138.0   \n",
       "74 2024-12-04 15:40:00  78.12                         3081.0   \n",
       "75 2024-12-04 15:45:00  78.22                         3586.0   \n",
       "76 2024-12-04 15:50:00  78.26                         2938.0   \n",
       "77 2024-12-04 15:55:00  78.44                         5457.0   \n",
       "\n",
       "    highest_ask_stacked_imbalance  highest_ask_imbalance_stack_price  \\\n",
       "0                          3415.0                              77.34   \n",
       "1                          2573.0                                NaN   \n",
       "2                          4166.0                              77.56   \n",
       "3                          1856.0                                NaN   \n",
       "4                          1419.0                              77.40   \n",
       "..                            ...                                ...   \n",
       "73                         3137.0                              78.08   \n",
       "74                         6590.0                                NaN   \n",
       "75                          499.0                              78.12   \n",
       "76                         2038.0                                NaN   \n",
       "77                         7033.0                                NaN   \n",
       "\n",
       "    highest_bid_imbalance_stack_price  total_ask_imbalance_count  \\\n",
       "0                               77.40                         18   \n",
       "1                               77.52                          0   \n",
       "2                               77.48                          2   \n",
       "3                               77.44                          0   \n",
       "4                               77.48                          2   \n",
       "..                                ...                        ...   \n",
       "73                              78.12                          1   \n",
       "74                                NaN                          0   \n",
       "75                              78.22                          1   \n",
       "76                              78.32                          0   \n",
       "77                                NaN                          0   \n",
       "\n",
       "    highest_stacked_ask_imbalance  total_bid_imbalance_count  \\\n",
       "0                               7                          5   \n",
       "1                               0                          6   \n",
       "2                               2                          4   \n",
       "3                               0                          6   \n",
       "4                               2                          2   \n",
       "..                            ...                        ...   \n",
       "73                              1                          1   \n",
       "74                              0                          0   \n",
       "75                              1                          4   \n",
       "76                              0                          3   \n",
       "77                              0                          0   \n",
       "\n",
       "    highest_stacked_bid_imbalance  support_imbalance_count  \\\n",
       "0                               2                        0   \n",
       "1                               2                        0   \n",
       "2                               4                        4   \n",
       "3                               2                        0   \n",
       "4                               2                        0   \n",
       "..                            ...                      ...   \n",
       "73                              1                        0   \n",
       "74                              0                        0   \n",
       "75                              4                        4   \n",
       "76                              2                        0   \n",
       "77                              0                        0   \n",
       "\n",
       "    resistance_imbalance_count               support_imbalance  \\\n",
       "0                           17                              []   \n",
       "1                            0                              []   \n",
       "2                            0  [[77.42, 77.44, 77.46, 77.48]]   \n",
       "3                            0                              []   \n",
       "4                            0                              []   \n",
       "..                         ...                             ...   \n",
       "73                           0                              []   \n",
       "74                           0                              []   \n",
       "75                           0   [[78.16, 78.18, 78.2, 78.22]]   \n",
       "76                           0                              []   \n",
       "77                           0                              []   \n",
       "\n",
       "                                 resistance_imbalance  consecutive_poc_flag  \\\n",
       "0   [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...                 False   \n",
       "1                                                  []                 False   \n",
       "2                                                  []                 False   \n",
       "3                                                  []                 False   \n",
       "4                                                  []                 False   \n",
       "..                                                ...                   ...   \n",
       "73                                                 []                 False   \n",
       "74                                                 []                 False   \n",
       "75                                                 []                 False   \n",
       "76                                                 []                 False   \n",
       "77                                                 []                 False   \n",
       "\n",
       "    highest_consecutive_poc_count  \n",
       "0                               1  \n",
       "1                               1  \n",
       "2                               1  \n",
       "3                               1  \n",
       "4                               1  \n",
       "..                            ...  \n",
       "73                              1  \n",
       "74                              1  \n",
       "75                              1  \n",
       "76                              1  \n",
       "77                              1  \n",
       "\n",
       "[78 rows x 16 columns]"
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_filter_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "id": "bcdef9d5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import yfinance as yf\n",
    "\n",
    "temp_filter_df['timestamp'] = pd.to_datetime(temp_filter_df['timestamp'])\n",
    "\n",
    "# Downloading NKE data from yfinance in 5-minute intervals (only available for the last 60 days)\n",
    "stock_data = yf.download('NKE', interval='5m', period='5d', progress=False)\n",
    "\n",
    "# Resetting the index of downloaded NKE data and renaming columns\n",
    "stock_data.reset_index(inplace=True)\n",
    "stock_data['timestamp'] = pd.to_datetime(stock_data['Datetime']).dt.tz_localize(None)\n",
    "\n",
    "# Rounding Open, High, Low, Close, and Adj Close columns to two decimals\n",
    "stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close']] = stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close']].round(2)\n",
    "\n",
    "# Left join the existing dataframe with NKE data\n",
    "merged_df = pd.merge(temp_filter_df, stock_data, how='left', on='timestamp')\n",
    "\n",
    "# Adding a new column 'poc_direction'\n",
    "merged_df['poc_direction'] = merged_df.apply(lambda row: 'bullish' if row['Adj Close'] >= row['poc'] else 'bearish', axis=1)\n",
    "merged_df['highest_ask_imbalance_price_direction'] = merged_df.apply(\n",
    "    lambda row: '' if np.isnan(row['highest_ask_imbalance_stack_price']) else \n",
    "                ('bullish' if row['Adj Close'] >= row['highest_ask_imbalance_stack_price'] else 'bearish'),\n",
    "    axis=1\n",
    ")\n",
    "merged_df['highest_bid_imbalance_price_direction'] = merged_df.apply(\n",
    "    lambda row: '' if np.isnan(row['highest_bid_imbalance_stack_price']) else \n",
    "                ('bullish' if row['Adj Close'] >= row['highest_bid_imbalance_stack_price'] else 'bearish'),\n",
    "    axis=1\n",
    ")\n",
    "\n",
    "merged_df['total_bid_ask_count_direction'] = merged_df.apply(\n",
    "    lambda row: 'bearish' if row['total_ask_imbalance_count'] > row['total_bid_imbalance_count'] else \n",
    "                ('neutral' if row['total_ask_imbalance_count'] == row['total_bid_imbalance_count'] else 'bullish'),\n",
    "    axis=1\n",
    ")\n",
    "\n",
    "merged_df['imbalance_support_resistance_direction'] = merged_df.apply(\n",
    "    lambda row: 'bearish' if row['resistance_imbalance_count'] > row['support_imbalance_count'] else \n",
    "                ('neutral' if row['resistance_imbalance_count'] == row['support_imbalance_count'] else 'bullish'),\n",
    "    axis=1\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "id": "8580b4cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>poc</th>\n",
       "      <th>Adj Close</th>\n",
       "      <th>poc_direction</th>\n",
       "      <th>highest_ask_imbalance_price_direction</th>\n",
       "      <th>highest_bid_imbalance_price_direction</th>\n",
       "      <th>total_bid_ask_count_direction</th>\n",
       "      <th>imbalance_support_resistance_direction</th>\n",
       "      <th>support_imbalance_count</th>\n",
       "      <th>resistance_imbalance_count</th>\n",
       "      <th>highest_ask_imbalance_stack_price</th>\n",
       "      <th>highest_bid_imbalance_stack_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-12-04 09:30:00</td>\n",
       "      <td>77.38</td>\n",
       "      <td>77.50</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>77.34</td>\n",
       "      <td>77.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-12-04 09:35:00</td>\n",
       "      <td>77.56</td>\n",
       "      <td>77.64</td>\n",
       "      <td>bullish</td>\n",
       "      <td></td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-12-04 09:40:00</td>\n",
       "      <td>77.52</td>\n",
       "      <td>77.56</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>77.56</td>\n",
       "      <td>77.48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-12-04 09:45:00</td>\n",
       "      <td>77.44</td>\n",
       "      <td>77.53</td>\n",
       "      <td>bullish</td>\n",
       "      <td></td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-12-04 09:50:00</td>\n",
       "      <td>77.36</td>\n",
       "      <td>77.36</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>77.40</td>\n",
       "      <td>77.48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2024-12-04 09:55:00</td>\n",
       "      <td>77.54</td>\n",
       "      <td>77.56</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>77.54</td>\n",
       "      <td>77.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2024-12-04 10:00:00</td>\n",
       "      <td>77.58</td>\n",
       "      <td>77.69</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>77.66</td>\n",
       "      <td>77.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2024-12-04 10:05:00</td>\n",
       "      <td>77.78</td>\n",
       "      <td>77.94</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>77.82</td>\n",
       "      <td>77.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2024-12-04 10:10:00</td>\n",
       "      <td>77.80</td>\n",
       "      <td>77.77</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>77.86</td>\n",
       "      <td>77.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2024-12-04 10:15:00</td>\n",
       "      <td>77.82</td>\n",
       "      <td>77.84</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>77.84</td>\n",
       "      <td>77.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2024-12-04 10:20:00</td>\n",
       "      <td>77.88</td>\n",
       "      <td>77.98</td>\n",
       "      <td>bullish</td>\n",
       "      <td></td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2024-12-04 10:25:00</td>\n",
       "      <td>77.92</td>\n",
       "      <td>77.92</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>77.94</td>\n",
       "      <td>77.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2024-12-04 10:30:00</td>\n",
       "      <td>77.78</td>\n",
       "      <td>77.84</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>77.80</td>\n",
       "      <td>77.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2024-12-04 10:35:00</td>\n",
       "      <td>77.82</td>\n",
       "      <td>77.80</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>bearish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>77.82</td>\n",
       "      <td>77.86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2024-12-04 10:40:00</td>\n",
       "      <td>77.78</td>\n",
       "      <td>77.81</td>\n",
       "      <td>bullish</td>\n",
       "      <td></td>\n",
       "      <td>bullish</td>\n",
       "      <td>bullish</td>\n",
       "      <td>neutral</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.78</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             timestamp    poc  Adj Close poc_direction  \\\n",
       "0  2024-12-04 09:30:00  77.38      77.50       bullish   \n",
       "1  2024-12-04 09:35:00  77.56      77.64       bullish   \n",
       "2  2024-12-04 09:40:00  77.52      77.56       bullish   \n",
       "3  2024-12-04 09:45:00  77.44      77.53       bullish   \n",
       "4  2024-12-04 09:50:00  77.36      77.36       bullish   \n",
       "5  2024-12-04 09:55:00  77.54      77.56       bullish   \n",
       "6  2024-12-04 10:00:00  77.58      77.69       bullish   \n",
       "7  2024-12-04 10:05:00  77.78      77.94       bullish   \n",
       "8  2024-12-04 10:10:00  77.80      77.77       bearish   \n",
       "9  2024-12-04 10:15:00  77.82      77.84       bullish   \n",
       "10 2024-12-04 10:20:00  77.88      77.98       bullish   \n",
       "11 2024-12-04 10:25:00  77.92      77.92       bullish   \n",
       "12 2024-12-04 10:30:00  77.78      77.84       bullish   \n",
       "13 2024-12-04 10:35:00  77.82      77.80       bearish   \n",
       "14 2024-12-04 10:40:00  77.78      77.81       bullish   \n",
       "\n",
       "   highest_ask_imbalance_price_direction  \\\n",
       "0                                bullish   \n",
       "1                                          \n",
       "2                                bullish   \n",
       "3                                          \n",
       "4                                bearish   \n",
       "5                                bullish   \n",
       "6                                bullish   \n",
       "7                                bullish   \n",
       "8                                bearish   \n",
       "9                                bullish   \n",
       "10                                         \n",
       "11                               bearish   \n",
       "12                               bullish   \n",
       "13                               bearish   \n",
       "14                                         \n",
       "\n",
       "   highest_bid_imbalance_price_direction total_bid_ask_count_direction  \\\n",
       "0                                bullish                       bearish   \n",
       "1                                bullish                       bullish   \n",
       "2                                bullish                       bullish   \n",
       "3                                bullish                       bullish   \n",
       "4                                bearish                       neutral   \n",
       "5                                bullish                       bullish   \n",
       "6                                bullish                       bullish   \n",
       "7                                bullish                       bullish   \n",
       "8                                bearish                       bearish   \n",
       "9                                bullish                       bearish   \n",
       "10                               bullish                       bullish   \n",
       "11                               bullish                       bullish   \n",
       "12                               bullish                       bearish   \n",
       "13                               bearish                       neutral   \n",
       "14                               bullish                       bullish   \n",
       "\n",
       "   imbalance_support_resistance_direction  support_imbalance_count  \\\n",
       "0                                 bearish                        0   \n",
       "1                                 neutral                        0   \n",
       "2                                 bullish                        4   \n",
       "3                                 neutral                        0   \n",
       "4                                 neutral                        0   \n",
       "5                                 bullish                        8   \n",
       "6                                 bullish                        3   \n",
       "7                                 bullish                        6   \n",
       "8                                 neutral                        0   \n",
       "9                                 neutral                        0   \n",
       "10                                bullish                        5   \n",
       "11                                neutral                        0   \n",
       "12                                neutral                        0   \n",
       "13                                neutral                        0   \n",
       "14                                neutral                        0   \n",
       "\n",
       "    resistance_imbalance_count  highest_ask_imbalance_stack_price  \\\n",
       "0                           17                              77.34   \n",
       "1                            0                                NaN   \n",
       "2                            0                              77.56   \n",
       "3                            0                                NaN   \n",
       "4                            0                              77.40   \n",
       "5                            0                              77.54   \n",
       "6                            0                              77.66   \n",
       "7                            0                              77.82   \n",
       "8                            0                              77.86   \n",
       "9                            0                              77.84   \n",
       "10                           0                                NaN   \n",
       "11                           0                              77.94   \n",
       "12                           0                              77.80   \n",
       "13                           0                              77.82   \n",
       "14                           0                                NaN   \n",
       "\n",
       "    highest_bid_imbalance_stack_price  \n",
       "0                               77.40  \n",
       "1                               77.52  \n",
       "2                               77.48  \n",
       "3                               77.44  \n",
       "4                               77.48  \n",
       "5                               77.50  \n",
       "6                               77.58  \n",
       "7                               77.74  \n",
       "8                               77.80  \n",
       "9                               77.76  \n",
       "10                              77.88  \n",
       "11                              77.92  \n",
       "12                              77.78  \n",
       "13                              77.86  \n",
       "14                              77.78  "
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df[['timestamp','poc','Adj Close','poc_direction','highest_ask_imbalance_price_direction','highest_bid_imbalance_price_direction','total_bid_ask_count_direction','imbalance_support_resistance_direction','support_imbalance_count','resistance_imbalance_count','highest_ask_imbalance_stack_price','highest_bid_imbalance_stack_price']].head(15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "5f85e064",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['timestamp', 'poc', 'highest_bid_stacked_imbalance',\n",
       "       'highest_ask_stacked_imbalance', 'highest_ask_imbalance_stack_price',\n",
       "       'highest_bid_imbalance_stack_price', 'total_ask_imbalance_count',\n",
       "       'highest_stacked_ask_imbalance', 'total_bid_imbalance_count',\n",
       "       'highest_stacked_bid_imbalance', 'support_imbalance',\n",
       "       'resistance_imbalance', 'consecutive_poc_flag',\n",
       "       'highest_consecutive_poc_count', 'Datetime', 'Open', 'High', 'Low',\n",
       "       'Close', 'Adj Close', 'Volume', 'poc_direction',\n",
       "       'highest_ask_imbalance_price_direction',\n",
       "       'highest_bid_imbalance_price_direction'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "433923fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "id": "0aaace44",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[[576.3, 576.47, 576.64], [577.15, 577.32, 577.49, 577.66, 577.83]]'"
      ]
     },
     "execution_count": 242,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df.loc[0,'resistance_imbalance'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "37b59f94",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[580.55, 580.72, 580.89],\n",
       " [581.23, 581.4, 581.57, 581.74, 581.91, 582.08, 582.25, 582.42, 582.59],\n",
       " [582.76, 582.93, 583.1, 583.27, 583.44],\n",
       " [583.61, 583.78, 583.95, 584.12, 584.29, 584.46],\n",
       " [584.8, 584.97, 585.14, 585.31, 585.48],\n",
       " [585.99, 586.16, 586.33, 586.5, 586.67, 586.84],\n",
       " [586.67, 586.84, 587.01, 587.18],\n",
       " [588.2, 588.37, 588.54],\n",
       " [588.88, 589.05, 589.22],\n",
       " [589.73, 589.9, 590.07, 590.24],\n",
       " [591.26, 591.43, 591.6],\n",
       " [592.28, 592.45, 592.62],\n",
       " [593.81, 593.98, 594.15],\n",
       " [591.43, 591.6, 591.77, 591.94]]"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_filter_df.loc[0,'support_imbalance']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "5c967fc2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>poc</th>\n",
       "      <th>highest_bid_stacked_imbalance</th>\n",
       "      <th>highest_ask_stacked_imbalance</th>\n",
       "      <th>total_ask_imbalance_count</th>\n",
       "      <th>highest_stacked_ask_imbalance</th>\n",
       "      <th>total_bid_imbalance_count</th>\n",
       "      <th>highest_stacked_bid_imbalance</th>\n",
       "      <th>consecutive_poc_flag</th>\n",
       "      <th>highest_consecutive_poc_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2024-12-02 10:35:00</td>\n",
       "      <td>587.35</td>\n",
       "      <td>4426.0</td>\n",
       "      <td>861.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>2024-12-02 13:20:00</td>\n",
       "      <td>591.09</td>\n",
       "      <td>4249.0</td>\n",
       "      <td>1262.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>2024-12-02 14:55:00</td>\n",
       "      <td>590.92</td>\n",
       "      <td>2789.0</td>\n",
       "      <td>3055.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             timestamp     poc  highest_bid_stacked_imbalance  \\\n",
       "13 2024-12-02 10:35:00  587.35                         4426.0   \n",
       "46 2024-12-02 13:20:00  591.09                         4249.0   \n",
       "65 2024-12-02 14:55:00  590.92                         2789.0   \n",
       "\n",
       "    highest_ask_stacked_imbalance  total_ask_imbalance_count  \\\n",
       "13                          861.0                          2   \n",
       "46                         1262.0                          0   \n",
       "65                         3055.0                          1   \n",
       "\n",
       "    highest_stacked_ask_imbalance  total_bid_imbalance_count  \\\n",
       "13                              1                          1   \n",
       "46                              0                          2   \n",
       "65                              1                          0   \n",
       "\n",
       "    highest_stacked_bid_imbalance  consecutive_poc_flag  \\\n",
       "13                              1                  True   \n",
       "46                              1                  True   \n",
       "65                              0                  True   \n",
       "\n",
       "    highest_consecutive_poc_count  \n",
       "13                              2  \n",
       "46                              2  \n",
       "65                              2  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_filter_df[temp_filter_df['consecutive_poc_flag']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60f8fea8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d066931",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fe2eed0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "610f5a11",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\1965833506.py:38: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['minute'] = filtered_df['timestamp'].dt.minute  # Add minute to the original DataFrame\n"
     ]
    }
   ],
   "source": [
    "# Group by minute to analyze reversal and initiation\n",
    "def analyze_minute(group):\n",
    "    bid_prices = group.loc[group['imbalance'] == \"sell\", \"price\"]\n",
    "    ask_prices = group.loc[group['imbalance'] == \"buy\", \"price\"]\n",
    "    close_price = group['price'].iloc[-1]  # Last price in the minute\n",
    "\n",
    "    # Initialize comment\n",
    "    comment = \"\"\n",
    "\n",
    "    # Check if close price below ask prices (buyers absorbed by sellers)\n",
    "    if not ask_prices.empty and close_price < ask_prices.min():\n",
    "        comment = \"Buyers absorbed by sellers.\"\n",
    "\n",
    "    # Check if close price above bid prices (sellers absorbed by buyers)\n",
    "    elif not bid_prices.empty and close_price > bid_prices.max():\n",
    "        comment = \"Sellers absorbed by buyers.\"\n",
    "\n",
    "    # Fallback comment\n",
    "    if comment == \"\":\n",
    "        comment = \"No significant absorption detected.\"\n",
    "\n",
    "    return pd.Series({\n",
    "        \"minute\": group['timestamp'].iloc[0].minute,  # Add the minute column explicitly\n",
    "        \"close_price\": close_price,\n",
    "        \"comment\": comment\n",
    "    })\n",
    "\n",
    "# Apply analysis for each minute\n",
    "result_df = filtered_df.groupby(filtered_df['timestamp'].dt.minute).apply(analyze_minute).reset_index(drop=True)\n",
    "\n",
    "# Ensure 'minute' column in result_df exists\n",
    "if 'minute' in result_df.columns:\n",
    "    result_df.rename(columns={\"minute\": \"grouped_minute\"}, inplace=True)\n",
    "else:\n",
    "    raise ValueError(\"The 'minute' column was not created in result_df.\")\n",
    "\n",
    "# Add the grouped analysis back to the original DataFrame by merging on the minute\n",
    "filtered_df['minute'] = filtered_df['timestamp'].dt.minute  # Add minute to the original DataFrame\n",
    "final_df = filtered_df.merge(result_df, how=\"left\", left_on=\"minute\", right_on=\"grouped_minute\")\n",
    "\n",
    "# Drop unnecessary columns to clean up\n",
    "final_df.drop(columns=[\"minute\", \"grouped_minute\"], inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "c35f27d8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "      <th>close_price</th>\n",
       "      <th>comment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>128.24</td>\n",
       "      <td>1032.0</td>\n",
       "      <td>756.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>4606</td>\n",
       "      <td>132.02</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>128.38</td>\n",
       "      <td>0.0</td>\n",
       "      <td>904.0</td>\n",
       "      <td></td>\n",
       "      <td>4606</td>\n",
       "      <td>132.02</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>128.10</td>\n",
       "      <td>74.0</td>\n",
       "      <td>657.0</td>\n",
       "      <td></td>\n",
       "      <td>4606</td>\n",
       "      <td>132.02</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>127.96</td>\n",
       "      <td>1219.0</td>\n",
       "      <td>585.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>4606</td>\n",
       "      <td>132.02</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>127.82</td>\n",
       "      <td>1024.0</td>\n",
       "      <td>1495.0</td>\n",
       "      <td></td>\n",
       "      <td>4606</td>\n",
       "      <td>132.02</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td>2024-11-29 12:50:00</td>\n",
       "      <td>133.56</td>\n",
       "      <td>1081.0</td>\n",
       "      <td>3640.0</td>\n",
       "      <td></td>\n",
       "      <td>4646</td>\n",
       "      <td>133.56</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>133.56</td>\n",
       "      <td>11117.0</td>\n",
       "      <td>11835.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>4647</td>\n",
       "      <td>133.28</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>133.70</td>\n",
       "      <td>945.0</td>\n",
       "      <td>661.0</td>\n",
       "      <td></td>\n",
       "      <td>4647</td>\n",
       "      <td>133.28</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>133.42</td>\n",
       "      <td>3027.0</td>\n",
       "      <td>4472.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>4647</td>\n",
       "      <td>133.28</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>133.28</td>\n",
       "      <td>0.0</td>\n",
       "      <td>124.0</td>\n",
       "      <td></td>\n",
       "      <td>4647</td>\n",
       "      <td>133.28</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>207 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp   price  buyVolume  sellVolume imbalance  index  \\\n",
       "0   2024-11-29 09:30:00  128.24     1032.0       756.0      sell   4606   \n",
       "1   2024-11-29 09:30:00  128.38        0.0       904.0             4606   \n",
       "2   2024-11-29 09:30:00  128.10       74.0       657.0             4606   \n",
       "3   2024-11-29 09:30:00  127.96     1219.0       585.0      sell   4606   \n",
       "4   2024-11-29 09:30:00  127.82     1024.0      1495.0             4606   \n",
       "..                  ...     ...        ...         ...       ...    ...   \n",
       "202 2024-11-29 12:50:00  133.56     1081.0      3640.0             4646   \n",
       "203 2024-11-29 12:55:00  133.56    11117.0     11835.0      sell   4647   \n",
       "204 2024-11-29 12:55:00  133.70      945.0       661.0             4647   \n",
       "205 2024-11-29 12:55:00  133.42     3027.0      4472.0       buy   4647   \n",
       "206 2024-11-29 12:55:00  133.28        0.0       124.0             4647   \n",
       "\n",
       "     close_price                      comment  \n",
       "0         132.02  Buyers absorbed by sellers.  \n",
       "1         132.02  Buyers absorbed by sellers.  \n",
       "2         132.02  Buyers absorbed by sellers.  \n",
       "3         132.02  Buyers absorbed by sellers.  \n",
       "4         132.02  Buyers absorbed by sellers.  \n",
       "..           ...                          ...  \n",
       "202       133.56  Sellers absorbed by buyers.  \n",
       "203       133.28  Buyers absorbed by sellers.  \n",
       "204       133.28  Buyers absorbed by sellers.  \n",
       "205       133.28  Buyers absorbed by sellers.  \n",
       "206       133.28  Buyers absorbed by sellers.  \n",
       "\n",
       "[207 rows x 8 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "3ce12eeb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>comment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2024-11-29 09:35:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2024-11-29 09:40:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2024-11-29 09:45:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2024-11-29 09:50:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2024-11-29 09:55:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2024-11-29 10:00:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2024-11-29 10:05:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>2024-11-29 10:10:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>2024-11-29 10:15:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2024-11-29 10:20:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>2024-11-29 10:25:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>2024-11-29 10:30:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>2024-11-29 10:35:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>2024-11-29 10:40:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>2024-11-29 10:45:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>2024-11-29 10:50:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>2024-11-29 10:55:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>2024-11-29 11:00:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>2024-11-29 11:05:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>2024-11-29 11:10:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>2024-11-29 11:15:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>2024-11-29 11:20:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113</th>\n",
       "      <td>2024-11-29 11:25:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>2024-11-29 11:30:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>2024-11-29 11:35:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>2024-11-29 11:40:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>2024-11-29 11:45:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>2024-11-29 11:50:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>2024-11-29 11:55:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>2024-11-29 12:00:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>157</th>\n",
       "      <td>2024-11-29 12:05:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>160</th>\n",
       "      <td>2024-11-29 12:10:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>2024-11-29 12:15:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>2024-11-29 12:20:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>2024-11-29 12:25:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>2024-11-29 12:30:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>2024-11-29 12:35:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>193</th>\n",
       "      <td>2024-11-29 12:40:00</td>\n",
       "      <td>No significant absorption detected.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>2024-11-29 12:45:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>2024-11-29 12:50:00</td>\n",
       "      <td>Sellers absorbed by buyers.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>Buyers absorbed by sellers.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp                              comment\n",
       "0   2024-11-29 09:30:00          Buyers absorbed by sellers.\n",
       "7   2024-11-29 09:35:00          Sellers absorbed by buyers.\n",
       "11  2024-11-29 09:40:00  No significant absorption detected.\n",
       "15  2024-11-29 09:45:00          Sellers absorbed by buyers.\n",
       "19  2024-11-29 09:50:00          Sellers absorbed by buyers.\n",
       "23  2024-11-29 09:55:00          Buyers absorbed by sellers.\n",
       "26  2024-11-29 10:00:00          Sellers absorbed by buyers.\n",
       "30  2024-11-29 10:05:00  No significant absorption detected.\n",
       "35  2024-11-29 10:10:00  No significant absorption detected.\n",
       "42  2024-11-29 10:15:00  No significant absorption detected.\n",
       "49  2024-11-29 10:20:00          Buyers absorbed by sellers.\n",
       "53  2024-11-29 10:25:00          Sellers absorbed by buyers.\n",
       "57  2024-11-29 10:30:00          Buyers absorbed by sellers.\n",
       "61  2024-11-29 10:35:00          Sellers absorbed by buyers.\n",
       "69  2024-11-29 10:40:00  No significant absorption detected.\n",
       "72  2024-11-29 10:45:00          Sellers absorbed by buyers.\n",
       "75  2024-11-29 10:50:00          Sellers absorbed by buyers.\n",
       "80  2024-11-29 10:55:00          Buyers absorbed by sellers.\n",
       "83  2024-11-29 11:00:00          Sellers absorbed by buyers.\n",
       "90  2024-11-29 11:05:00  No significant absorption detected.\n",
       "93  2024-11-29 11:10:00  No significant absorption detected.\n",
       "96  2024-11-29 11:15:00  No significant absorption detected.\n",
       "100 2024-11-29 11:20:00          Buyers absorbed by sellers.\n",
       "113 2024-11-29 11:25:00          Sellers absorbed by buyers.\n",
       "120 2024-11-29 11:30:00          Buyers absorbed by sellers.\n",
       "130 2024-11-29 11:35:00          Sellers absorbed by buyers.\n",
       "135 2024-11-29 11:40:00  No significant absorption detected.\n",
       "141 2024-11-29 11:45:00          Sellers absorbed by buyers.\n",
       "145 2024-11-29 11:50:00          Sellers absorbed by buyers.\n",
       "150 2024-11-29 11:55:00          Buyers absorbed by sellers.\n",
       "153 2024-11-29 12:00:00          Sellers absorbed by buyers.\n",
       "157 2024-11-29 12:05:00  No significant absorption detected.\n",
       "160 2024-11-29 12:10:00  No significant absorption detected.\n",
       "162 2024-11-29 12:15:00  No significant absorption detected.\n",
       "166 2024-11-29 12:20:00          Buyers absorbed by sellers.\n",
       "170 2024-11-29 12:25:00          Sellers absorbed by buyers.\n",
       "183 2024-11-29 12:30:00          Buyers absorbed by sellers.\n",
       "186 2024-11-29 12:35:00          Sellers absorbed by buyers.\n",
       "193 2024-11-29 12:40:00  No significant absorption detected.\n",
       "196 2024-11-29 12:45:00          Sellers absorbed by buyers.\n",
       "199 2024-11-29 12:50:00          Sellers absorbed by buyers.\n",
       "203 2024-11-29 12:55:00          Buyers absorbed by sellers."
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_df = final_df[['timestamp','comment']].drop_duplicates()\n",
    "\n",
    "temp_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "2ce9b495",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>234.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1193.0</td>\n",
       "      <td></td>\n",
       "      <td>2906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>234.84</td>\n",
       "      <td>1145.0</td>\n",
       "      <td>1269.0</td>\n",
       "      <td></td>\n",
       "      <td>2906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>234.90</td>\n",
       "      <td>2290.0</td>\n",
       "      <td>967.0</td>\n",
       "      <td></td>\n",
       "      <td>2906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>234.96</td>\n",
       "      <td>2290.0</td>\n",
       "      <td>646.0</td>\n",
       "      <td></td>\n",
       "      <td>2906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>235.02</td>\n",
       "      <td>382.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>2906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>280</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.30</td>\n",
       "      <td>5388.0</td>\n",
       "      <td>1265.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2947</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>281</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.36</td>\n",
       "      <td>13753.0</td>\n",
       "      <td>21027.0</td>\n",
       "      <td>buy</td>\n",
       "      <td>2947</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>282</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.42</td>\n",
       "      <td>23940.0</td>\n",
       "      <td>27431.0</td>\n",
       "      <td></td>\n",
       "      <td>2947</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>283</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.48</td>\n",
       "      <td>24117.0</td>\n",
       "      <td>11987.0</td>\n",
       "      <td>sell</td>\n",
       "      <td>2947</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>284</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.54</td>\n",
       "      <td>350.0</td>\n",
       "      <td>553.0</td>\n",
       "      <td></td>\n",
       "      <td>2947</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>285 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp   price  buyVolume  sellVolume imbalance  index\n",
       "0   2024-11-29 09:30:00  234.78        0.0      1193.0             2906\n",
       "1   2024-11-29 09:30:00  234.84     1145.0      1269.0             2906\n",
       "2   2024-11-29 09:30:00  234.90     2290.0       967.0             2906\n",
       "3   2024-11-29 09:30:00  234.96     2290.0       646.0             2906\n",
       "4   2024-11-29 09:30:00  235.02      382.0         0.0             2906\n",
       "..                  ...     ...        ...         ...       ...    ...\n",
       "280 2024-11-29 12:55:00  237.30     5388.0      1265.0       buy   2947\n",
       "281 2024-11-29 12:55:00  237.36    13753.0     21027.0       buy   2947\n",
       "282 2024-11-29 12:55:00  237.42    23940.0     27431.0             2947\n",
       "283 2024-11-29 12:55:00  237.48    24117.0     11987.0      sell   2947\n",
       "284 2024-11-29 12:55:00  237.54      350.0       553.0             2947\n",
       "\n",
       "[285 rows x 6 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "38a4465e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['imbalance'] = filtered_df['imbalance'].fillna('missing_value')\n",
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:7: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['imbalance'] = filtered_df['imbalance'].apply(lambda x: 1 if x == 'buy' else -1 if x == 'sell' else 0)\n",
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['timestamp'] = filtered_df['timestamp'].astype(np.int64) // 10**9\n",
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:21: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['anomaly'] = model.predict(filtered_df[features])\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import IsolationForest\n",
    "\n",
    "# Replace missing values in 'imbalance' with a specific value (e.g., 'missing_value')\n",
    "filtered_df['imbalance'] = filtered_df['imbalance'].fillna('missing_value')\n",
    "\n",
    "# Encode categorical 'imbalance' feature\n",
    "filtered_df['imbalance'] = filtered_df['imbalance'].apply(lambda x: 1 if x == 'buy' else -1 if x == 'sell' else 0)\n",
    "\n",
    "# Convert the timestamp to numerical format (e.g., seconds since epoch)\n",
    "filtered_df['timestamp'] = filtered_df['timestamp'].astype(np.int64) // 10**9\n",
    "\n",
    "# Select relevant features for anomaly detection\n",
    "features = ['timestamp', 'price', 'buyVolume', 'sellVolume', 'imbalance']\n",
    "\n",
    "# # Initialize and fit the Isolation Forest model\n",
    "# model = IsolationForest(contamination='auto', random_state=42)\n",
    "model = IsolationForest(contamination=0.03, random_state=42)\n",
    "model.fit(filtered_df[features])\n",
    "\n",
    "# Predict anomalies\n",
    "filtered_df['anomaly'] = model.predict(filtered_df[features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "63a670ed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "anomaly\n",
       " 1    276\n",
       "-1      9\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df['anomaly'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "489e5c5e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\1014059852.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['timestamp'] = pd.to_datetime(filtered_df['timestamp'], unit='s')\n"
     ]
    }
   ],
   "source": [
    "# Convert the numerical timestamp back to normal datetime format\n",
    "filtered_df['timestamp'] = pd.to_datetime(filtered_df['timestamp'], unit='s')\n",
    "\n",
    "filtered_anomaly = filtered_df[filtered_df['anomaly'] == -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c19f3ef3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "timestamp\n",
       "2024-11-29 12:55:00    3\n",
       "2024-11-29 09:30:00    2\n",
       "2024-11-29 10:25:00    1\n",
       "2024-11-29 11:10:00    1\n",
       "2024-11-29 11:50:00    1\n",
       "2024-11-29 12:50:00    1\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_anomaly['timestamp'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "33bc2737",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "      <th>anomaly</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>234.12</td>\n",
       "      <td>2746.0</td>\n",
       "      <td>14200.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2906</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2024-11-29 09:30:00</td>\n",
       "      <td>233.94</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5707.0</td>\n",
       "      <td>-1</td>\n",
       "      <td>2906</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>2024-11-29 10:25:00</td>\n",
       "      <td>235.26</td>\n",
       "      <td>5064.0</td>\n",
       "      <td>11759.0</td>\n",
       "      <td>-1</td>\n",
       "      <td>2917</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>2024-11-29 11:10:00</td>\n",
       "      <td>235.98</td>\n",
       "      <td>54888.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2926</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>2024-11-29 11:50:00</td>\n",
       "      <td>237.00</td>\n",
       "      <td>74386.0</td>\n",
       "      <td>1535.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2934</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>264</th>\n",
       "      <td>2024-11-29 12:50:00</td>\n",
       "      <td>237.42</td>\n",
       "      <td>96558.0</td>\n",
       "      <td>1084.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2946</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>281</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.36</td>\n",
       "      <td>13753.0</td>\n",
       "      <td>21027.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2947</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>282</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.42</td>\n",
       "      <td>23940.0</td>\n",
       "      <td>27431.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2947</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>283</th>\n",
       "      <td>2024-11-29 12:55:00</td>\n",
       "      <td>237.48</td>\n",
       "      <td>24117.0</td>\n",
       "      <td>11987.0</td>\n",
       "      <td>-1</td>\n",
       "      <td>2947</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp   price  buyVolume  sellVolume  imbalance  index  \\\n",
       "13  2024-11-29 09:30:00  234.12     2746.0     14200.0          0   2906   \n",
       "18  2024-11-29 09:30:00  233.94        0.0      5707.0         -1   2906   \n",
       "103 2024-11-29 10:25:00  235.26     5064.0     11759.0         -1   2917   \n",
       "150 2024-11-29 11:10:00  235.98    54888.0         0.0          1   2926   \n",
       "196 2024-11-29 11:50:00  237.00    74386.0      1535.0          1   2934   \n",
       "264 2024-11-29 12:50:00  237.42    96558.0      1084.0          1   2946   \n",
       "281 2024-11-29 12:55:00  237.36    13753.0     21027.0          1   2947   \n",
       "282 2024-11-29 12:55:00  237.42    23940.0     27431.0          0   2947   \n",
       "283 2024-11-29 12:55:00  237.48    24117.0     11987.0         -1   2947   \n",
       "\n",
       "     anomaly  \n",
       "13        -1  \n",
       "18        -1  \n",
       "103       -1  \n",
       "150       -1  \n",
       "196       -1  \n",
       "264       -1  \n",
       "281       -1  \n",
       "282       -1  \n",
       "283       -1  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_anomaly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "ac1b4ebf",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Found array with 0 sample(s) (shape=(0, 4)) while a minimum of 1 is required by MinMaxScaler.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[18], line 22\u001b[0m\n\u001b[0;32m     20\u001b[0m \u001b[38;5;66;03m# Normalize the data\u001b[39;00m\n\u001b[0;32m     21\u001b[0m scaler \u001b[38;5;241m=\u001b[39m MinMaxScaler()\n\u001b[1;32m---> 22\u001b[0m filtered_df[features] \u001b[38;5;241m=\u001b[39m \u001b[43mscaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiltered_df\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\utils\\_set_output.py:316\u001b[0m, in \u001b[0;36m_wrap_method_output.<locals>.wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m    314\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(f)\n\u001b[0;32m    315\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 316\u001b[0m     data_to_wrap \u001b[38;5;241m=\u001b[39m f(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    317\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_to_wrap, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m    318\u001b[0m         \u001b[38;5;66;03m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m    319\u001b[0m         return_tuple \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m    320\u001b[0m             _wrap_data_with_container(method, data_to_wrap[\u001b[38;5;241m0\u001b[39m], X, \u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m    321\u001b[0m             \u001b[38;5;241m*\u001b[39mdata_to_wrap[\u001b[38;5;241m1\u001b[39m:],\n\u001b[0;32m    322\u001b[0m         )\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\base.py:1098\u001b[0m, in \u001b[0;36mTransformerMixin.fit_transform\u001b[1;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[0;32m   1083\u001b[0m         warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m   1084\u001b[0m             (\n\u001b[0;32m   1085\u001b[0m                 \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis object (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) has a `transform`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   1093\u001b[0m             \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[0;32m   1094\u001b[0m         )\n\u001b[0;32m   1096\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m   1097\u001b[0m     \u001b[38;5;66;03m# fit method of arity 1 (unsupervised transformation)\u001b[39;00m\n\u001b[1;32m-> 1098\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n\u001b[0;32m   1099\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   1100\u001b[0m     \u001b[38;5;66;03m# fit method of arity 2 (supervised transformation)\u001b[39;00m\n\u001b[0;32m   1101\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:450\u001b[0m, in \u001b[0;36mMinMaxScaler.fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m    448\u001b[0m \u001b[38;5;66;03m# Reset internal state before fitting\u001b[39;00m\n\u001b[0;32m    449\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset()\n\u001b[1;32m--> 450\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpartial_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1466\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1469\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1470\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1471\u001b[0m     )\n\u001b[0;32m   1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:490\u001b[0m, in \u001b[0;36mMinMaxScaler.partial_fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m    487\u001b[0m xp, _ \u001b[38;5;241m=\u001b[39m get_namespace(X)\n\u001b[0;32m    489\u001b[0m first_pass \u001b[38;5;241m=\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_samples_seen_\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 490\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    491\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    492\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfirst_pass\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    493\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_array_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msupported_float_dtypes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxp\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    494\u001b[0m \u001b[43m    \u001b[49m\u001b[43mforce_all_finite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mallow-nan\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m    495\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    497\u001b[0m data_min \u001b[38;5;241m=\u001b[39m _array_api\u001b[38;5;241m.\u001b[39m_nanmin(X, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, xp\u001b[38;5;241m=\u001b[39mxp)\n\u001b[0;32m    498\u001b[0m data_max \u001b[38;5;241m=\u001b[39m _array_api\u001b[38;5;241m.\u001b[39m_nanmax(X, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, xp\u001b[38;5;241m=\u001b[39mxp)\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\base.py:633\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[1;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[0;32m    631\u001b[0m         out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[0;32m    632\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m no_val_y:\n\u001b[1;32m--> 633\u001b[0m     out \u001b[38;5;241m=\u001b[39m check_array(X, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n\u001b[0;32m    634\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_y:\n\u001b[0;32m    635\u001b[0m     out \u001b[38;5;241m=\u001b[39m _check_y(y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n",
      "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\utils\\validation.py:1087\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m   1085\u001b[0m     n_samples \u001b[38;5;241m=\u001b[39m _num_samples(array)\n\u001b[0;32m   1086\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m n_samples \u001b[38;5;241m<\u001b[39m ensure_min_samples:\n\u001b[1;32m-> 1087\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1088\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFound array with \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m sample(s) (shape=\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m) while a\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1089\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m minimum of \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m is required\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1090\u001b[0m             \u001b[38;5;241m%\u001b[39m (n_samples, array\u001b[38;5;241m.\u001b[39mshape, ensure_min_samples, context)\n\u001b[0;32m   1091\u001b[0m         )\n\u001b[0;32m   1093\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ensure_min_features \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m array\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m   1094\u001b[0m     n_features \u001b[38;5;241m=\u001b[39m array\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n",
      "\u001b[1;31mValueError\u001b[0m: Found array with 0 sample(s) (shape=(0, 4)) while a minimum of 1 is required by MinMaxScaler."
     ]
    }
   ],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "import matplotlib.pyplot as plt\n",
    "from keras.models import Sequential\n",
    "from keras.layers import LSTM, Dense, Dropout, RepeatVector, TimeDistributed\n",
    "\n",
    "\n",
    "# Filter the DataFrame\n",
    "filtered_df = series_df[(series_df['timestamp'] >= '2024-11-29 09:30:00') & (series_df['timestamp'] < '2024-07-23 15:50:00')]\n",
    "filtered_df.reset_index(inplace=True,drop=True)\n",
    "\n",
    "# Replace missing values in 'imbalance' with a specific value (e.g., 'missing_value')\n",
    "filtered_df['imbalance'] = filtered_df['imbalance'].fillna('missing_value')\n",
    "\n",
    "# Encode categorical 'imbalance' feature\n",
    "filtered_df['imbalance'] = filtered_df['imbalance'].apply(lambda x: 1 if x == 'buy' else -1 if x == 'sell' else 0)\n",
    "\n",
    "# Select relevant features for anomaly detection\n",
    "features = ['price', 'buyVolume', 'sellVolume', 'imbalance']\n",
    "\n",
    "# Normalize the data\n",
    "scaler = MinMaxScaler()\n",
    "filtered_df[features] = scaler.fit_transform(filtered_df[features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "8b4ce733",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_1\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " lstm_4 (LSTM)               (None, 10, 128)           68096     \n",
      "                                                                 \n",
      " dropout_2 (Dropout)         (None, 10, 128)           0         \n",
      "                                                                 \n",
      " lstm_5 (LSTM)               (None, 64)                49408     \n",
      "                                                                 \n",
      " repeat_vector_1 (RepeatVect  (None, 10, 64)           0         \n",
      " or)                                                             \n",
      "                                                                 \n",
      " lstm_6 (LSTM)               (None, 10, 64)            33024     \n",
      "                                                                 \n",
      " dropout_3 (Dropout)         (None, 10, 64)            0         \n",
      "                                                                 \n",
      " lstm_7 (LSTM)               (None, 10, 128)           98816     \n",
      "                                                                 \n",
      " time_distributed_1 (TimeDis  (None, 10, 4)            516       \n",
      " tributed)                                                       \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 249,860\n",
      "Trainable params: 249,860\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "Epoch 1/50\n",
      "12/12 [==============================] - 5s 67ms/step - loss: 0.1506 - val_loss: 0.1098\n",
      "Epoch 2/50\n",
      "12/12 [==============================] - 0s 20ms/step - loss: 0.0920 - val_loss: 0.1008\n",
      "Epoch 3/50\n",
      "12/12 [==============================] - 0s 19ms/step - loss: 0.0751 - val_loss: 0.0820\n",
      "Epoch 4/50\n",
      "12/12 [==============================] - 0s 19ms/step - loss: 0.0689 - val_loss: 0.0710\n",
      "Epoch 5/50\n",
      "12/12 [==============================] - 0s 19ms/step - loss: 0.0645 - val_loss: 0.0639\n",
      "Epoch 6/50\n",
      "12/12 [==============================] - 0s 20ms/step - loss: 0.0573 - val_loss: 0.0510\n",
      "Epoch 7/50\n",
      "12/12 [==============================] - 0s 27ms/step - loss: 0.0519 - val_loss: 0.0426\n",
      "Epoch 8/50\n",
      "12/12 [==============================] - 0s 23ms/step - loss: 0.0483 - val_loss: 0.0374\n",
      "Epoch 9/50\n",
      "12/12 [==============================] - 0s 22ms/step - loss: 0.0453 - val_loss: 0.0381\n",
      "Epoch 10/50\n",
      "12/12 [==============================] - 0s 26ms/step - loss: 0.0433 - val_loss: 0.0352\n",
      "Epoch 11/50\n",
      "12/12 [==============================] - 0s 28ms/step - loss: 0.0433 - val_loss: 0.0355\n",
      "Epoch 12/50\n",
      "12/12 [==============================] - 0s 26ms/step - loss: 0.0417 - val_loss: 0.0353\n",
      "Epoch 13/50\n",
      "12/12 [==============================] - 0s 26ms/step - loss: 0.0403 - val_loss: 0.0360\n",
      "Epoch 14/50\n",
      "12/12 [==============================] - 0s 22ms/step - loss: 0.0395 - val_loss: 0.0359\n",
      "Epoch 15/50\n",
      "12/12 [==============================] - 0s 24ms/step - loss: 0.0435 - val_loss: 0.0338\n",
      "Epoch 16/50\n",
      "12/12 [==============================] - 0s 27ms/step - loss: 0.0405 - val_loss: 0.0362\n",
      "Epoch 17/50\n",
      "12/12 [==============================] - 0s 24ms/step - loss: 0.0404 - val_loss: 0.0358\n",
      "Epoch 18/50\n",
      "12/12 [==============================] - 0s 24ms/step - loss: 0.0399 - val_loss: 0.0452\n",
      "Epoch 19/50\n",
      "12/12 [==============================] - 0s 24ms/step - loss: 0.0400 - val_loss: 0.0309\n",
      "Epoch 20/50\n",
      "12/12 [==============================] - 0s 27ms/step - loss: 0.0395 - val_loss: 0.0341\n",
      "Epoch 21/50\n",
      "12/12 [==============================] - 0s 34ms/step - loss: 0.0388 - val_loss: 0.0345\n",
      "Epoch 22/50\n",
      "12/12 [==============================] - 0s 31ms/step - loss: 0.0381 - val_loss: 0.0345\n",
      "Epoch 23/50\n",
      "12/12 [==============================] - 0s 41ms/step - loss: 0.0386 - val_loss: 0.0312\n",
      "Epoch 24/50\n",
      "12/12 [==============================] - 1s 44ms/step - loss: 0.0380 - val_loss: 0.0333\n",
      "Epoch 25/50\n",
      "12/12 [==============================] - 1s 46ms/step - loss: 0.0391 - val_loss: 0.0331\n",
      "Epoch 26/50\n",
      "12/12 [==============================] - 1s 67ms/step - loss: 0.0382 - val_loss: 0.0341\n",
      "Epoch 27/50\n",
      "12/12 [==============================] - 1s 73ms/step - loss: 0.0382 - val_loss: 0.0326\n",
      "Epoch 28/50\n",
      "12/12 [==============================] - 1s 94ms/step - loss: 0.0396 - val_loss: 0.0363\n",
      "Epoch 29/50\n",
      "12/12 [==============================] - 3s 282ms/step - loss: 0.0413 - val_loss: 0.0337\n",
      "Epoch 30/50\n",
      "12/12 [==============================] - 8s 712ms/step - loss: 0.0398 - val_loss: 0.0322\n",
      "Epoch 31/50\n",
      "12/12 [==============================] - 9s 730ms/step - loss: 0.0410 - val_loss: 0.0368\n",
      "Epoch 32/50\n",
      "12/12 [==============================] - 8s 715ms/step - loss: 0.0382 - val_loss: 0.0313\n",
      "Epoch 33/50\n",
      "12/12 [==============================] - 9s 777ms/step - loss: 0.0374 - val_loss: 0.0321\n",
      "Epoch 34/50\n",
      "12/12 [==============================] - 8s 683ms/step - loss: 0.0382 - val_loss: 0.0310\n",
      "Epoch 35/50\n",
      "12/12 [==============================] - 2s 189ms/step - loss: 0.0366 - val_loss: 0.0314\n",
      "Epoch 36/50\n",
      "12/12 [==============================] - 2s 127ms/step - loss: 0.0365 - val_loss: 0.0303\n",
      "Epoch 37/50\n",
      "12/12 [==============================] - 2s 141ms/step - loss: 0.0364 - val_loss: 0.0318\n",
      "Epoch 38/50\n",
      "12/12 [==============================] - 1s 106ms/step - loss: 0.0354 - val_loss: 0.0304\n",
      "Epoch 39/50\n",
      "12/12 [==============================] - 1s 107ms/step - loss: 0.0347 - val_loss: 0.0315\n",
      "Epoch 40/50\n",
      "12/12 [==============================] - 1s 97ms/step - loss: 0.0341 - val_loss: 0.0305\n",
      "Epoch 41/50\n",
      "12/12 [==============================] - 1s 83ms/step - loss: 0.0336 - val_loss: 0.0292\n",
      "Epoch 42/50\n",
      "12/12 [==============================] - 1s 80ms/step - loss: 0.0330 - val_loss: 0.0291\n",
      "Epoch 43/50\n",
      "12/12 [==============================] - 1s 73ms/step - loss: 0.0329 - val_loss: 0.0289\n",
      "Epoch 44/50\n",
      "12/12 [==============================] - 1s 80ms/step - loss: 0.0316 - val_loss: 0.0286\n",
      "Epoch 45/50\n",
      "12/12 [==============================] - 1s 68ms/step - loss: 0.0310 - val_loss: 0.0283\n",
      "Epoch 46/50\n",
      "12/12 [==============================] - 1s 62ms/step - loss: 0.0304 - val_loss: 0.0275\n",
      "Epoch 47/50\n",
      "12/12 [==============================] - 1s 61ms/step - loss: 0.0308 - val_loss: 0.0297\n",
      "Epoch 48/50\n",
      "12/12 [==============================] - 1s 65ms/step - loss: 0.0316 - val_loss: 0.0280\n",
      "Epoch 49/50\n",
      "12/12 [==============================] - 1s 63ms/step - loss: 0.0335 - val_loss: 0.0314\n",
      "Epoch 50/50\n",
      "12/12 [==============================] - 1s 59ms/step - loss: 0.0319 - val_loss: 0.0277\n"
     ]
    }
   ],
   "source": [
    "# Prepare the data for LSTM (we'll use sequences)\n",
    "def create_sequences(data, seq_length):\n",
    "    xs, ys = [], []\n",
    "    for i in range(len(data) - seq_length):\n",
    "        x = data[i:(i + seq_length)]\n",
    "        y = data[i + seq_length]\n",
    "        xs.append(x)\n",
    "        ys.append(y)\n",
    "    return np.array(xs), np.array(ys)\n",
    "\n",
    "SEQ_LENGTH = 10  # Length of the sequences for LSTM\n",
    "X, y = create_sequences(filtered_df[features].values, SEQ_LENGTH)\n",
    "\n",
    "# Build LSTM Autoencoder Model\n",
    "model = Sequential([\n",
    "    LSTM(128, activation='relu', input_shape=(SEQ_LENGTH, len(features)), return_sequences=True),\n",
    "    Dropout(0.2),\n",
    "    LSTM(64, activation='relu', return_sequences=False),\n",
    "    RepeatVector(SEQ_LENGTH),\n",
    "    LSTM(64, activation='relu', return_sequences=True),\n",
    "    Dropout(0.2),\n",
    "    LSTM(128, activation='relu', return_sequences=True),\n",
    "    TimeDistributed(Dense(len(features)))\n",
    "])\n",
    "\n",
    "model.compile(optimizer='adam', loss='mse')\n",
    "model.summary()\n",
    "\n",
    "# Train the model\n",
    "history = model.fit(X, X, epochs=50, batch_size=32, validation_split=0.1, shuffle=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "d5a3d586",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14/14 [==============================] - 0s 7ms/step\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/gp/x0f5sgq13db073j79l3_2tb40000gn/T/ipykernel_3751/1529233682.py:17: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df['anomaly'] = anomaly_scores\n"
     ]
    }
   ],
   "source": [
    "# Predict the reconstruction\n",
    "X_pred = model.predict(X)\n",
    "\n",
    "# Calculate the reconstruction error\n",
    "reconstruction_error = np.mean(np.abs(X_pred - X), axis=(1, 2))\n",
    "\n",
    "# Threshold for anomaly detection\n",
    "threshold = np.mean(reconstruction_error) + 3 * np.std(reconstruction_error)\n",
    "\n",
    "# Detect anomalies\n",
    "anomalies = reconstruction_error > threshold\n",
    "\n",
    "# Append anomaly scores to the original DataFrame\n",
    "anomaly_scores = np.zeros(len(filtered_df))\n",
    "anomaly_scores[SEQ_LENGTH:] = anomalies\n",
    "\n",
    "filtered_df['anomaly'] = anomaly_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "b6619a44",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/gp/x0f5sgq13db073j79l3_2tb40000gn/T/ipykernel_3751/984967263.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  filtered_df[features] = scaler.inverse_transform(filtered_df[features])\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# Inverse transform the normalized features back to their original scale\n",
    "filtered_df[features] = scaler.inverse_transform(filtered_df[features])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "8f58a560",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>price</th>\n",
       "      <th>buyVolume</th>\n",
       "      <th>sellVolume</th>\n",
       "      <th>imbalance</th>\n",
       "      <th>index</th>\n",
       "      <th>anomaly</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>2024-07-23 11:00:00</td>\n",
       "      <td>206.48</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2293</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              timestamp   price  buyVolume  sellVolume  imbalance  index  \\\n",
       "138 2024-07-23 11:00:00  206.48        8.0         0.0        1.0   2293   \n",
       "\n",
       "     anomaly  \n",
       "138      1.0  "
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df[filtered_df['anomaly'] == 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1bc4efe",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}