{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "302d85be", "metadata": {}, "outputs": [], "source": [ "import yfinance as yf\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 168, "id": "3d36bd11", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json\n", "from datetime import datetime, timedelta\n", "\n", "# Load the JSON data from the file\n", "# file_path = 'snow_volume_5_min'\n", "# file_path = 'AAPL_Volume_Footprint.txt'\n", "# file_path = 'MMM_Volume_Footprint.txt'\n", "# file_path = 'META_Volume_Footprint.txt'\n", "file_path = 'NKE_Volume_Footprint.txt'\n", "# file_path = 'CVS_Volume_Footprint.txt'\n", "\n", "\n", "with open(file_path, 'r') as file:\n", " file_content = file.read()\n", "\n", "# Strip everything after the ~m~98~m~ marker\n", "# marker = '~m~98~m~'\n", "# marker = '~m~152~m~'\n", "\n", "marker = '~m~99~m~'\n", "# marker = '~m~98~m~'\n", "file_content = file_content.split(marker)[0]\n", "\n", "# file_content\n", "\n", "main_data = json.loads(file_content)\n", "\n", "# data_section = main_data['p'][1]['st1']['ns']['d']\n", "# data_section = main_data['p'][1]['st16']['ns']['d']\n", "data_section = main_data['p'][1]['st22']['ns']['d']\n", "# data_section = main_data['p'][1]['st1']['ns']['d']\n", "\n", "nested_data = json.loads(data_section)\n", "footprint_levels = nested_data['graphicsCmds']['create']['footprintLevels']\n", "df = pd.DataFrame(footprint_levels[0]['data'])\n", "\n", "footprints = nested_data['graphicsCmds']['create']['footprints']\n", "\n", "df1 = pd.DataFrame(footprints[0]['data'])\n" ] }, { "cell_type": "code", "execution_count": 169, "id": "ee55ebb3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idindexpocvalvahlevels
02081.1781.1681.18[{'buyVolume': 1072.0, 'sellVolume': 1388.0, '...
13181.2181.1681.24[{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb...
24281.2581.2481.28[{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i...
35381.2581.2281.26[{'buyVolume': 836.0, 'sellVolume': 533.0, 'im...
46481.2381.2281.26[{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb...
.....................
28306699283078.0778.0678.08[{'buyVolume': 2138.0, 'sellVolume': 3137.0, '...
28316700283178.1378.1278.16[{'buyVolume': 2696.0, 'sellVolume': 6590.0, '...
28326701283278.2378.2078.24[{'buyVolume': 604.0, 'sellVolume': 83.0, 'imb...
28336706283378.2778.2478.32[{'buyVolume': 1773.0, 'sellVolume': 1903.0, '...
28346707283478.4578.4078.50[{'buyVolume': 865.0, 'sellVolume': 3717.0, 'i...
\n", "

2835 rows × 6 columns

\n", "
" ], "text/plain": [ " id index poc val vah \\\n", "0 2 0 81.17 81.16 81.18 \n", "1 3 1 81.21 81.16 81.24 \n", "2 4 2 81.25 81.24 81.28 \n", "3 5 3 81.25 81.22 81.26 \n", "4 6 4 81.23 81.22 81.26 \n", "... ... ... ... ... ... \n", "2830 6699 2830 78.07 78.06 78.08 \n", "2831 6700 2831 78.13 78.12 78.16 \n", "2832 6701 2832 78.23 78.20 78.24 \n", "2833 6706 2833 78.27 78.24 78.32 \n", "2834 6707 2834 78.45 78.40 78.50 \n", "\n", " levels \n", "0 [{'buyVolume': 1072.0, 'sellVolume': 1388.0, '... \n", "1 [{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb... \n", "2 [{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i... \n", "3 [{'buyVolume': 836.0, 'sellVolume': 533.0, 'im... \n", "4 [{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb... \n", "... ... \n", "2830 [{'buyVolume': 2138.0, 'sellVolume': 3137.0, '... \n", "2831 [{'buyVolume': 2696.0, 'sellVolume': 6590.0, '... \n", "2832 [{'buyVolume': 604.0, 'sellVolume': 83.0, 'imb... \n", "2833 [{'buyVolume': 1773.0, 'sellVolume': 1903.0, '... \n", "2834 [{'buyVolume': 865.0, 'sellVolume': 3717.0, 'i... \n", "\n", "[2835 rows x 6 columns]" ] }, "execution_count": 169, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 170, "id": "9204ce99", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idstartIndextypepriceextendTo
0135buy81.386.0
185buy81.287.0
295buy81.307.0
3105buy81.327.0
4115buy81.347.0
\n", "
" ], "text/plain": [ " id startIndex type price extendTo\n", "0 13 5 buy 81.38 6.0\n", "1 8 5 buy 81.28 7.0\n", "2 9 5 buy 81.30 7.0\n", "3 10 5 buy 81.32 7.0\n", "4 11 5 buy 81.34 7.0" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 171, "id": "3fee201c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idindexpocvalvahlevels
02081.1781.1681.18[{'buyVolume': 1072.0, 'sellVolume': 1388.0, '...
13181.2181.1681.24[{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb...
24281.2581.2481.28[{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i...
35381.2581.2281.26[{'buyVolume': 836.0, 'sellVolume': 533.0, 'im...
46481.2381.2281.26[{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb...
\n", "
" ], "text/plain": [ " id index poc val vah \\\n", "0 2 0 81.17 81.16 81.18 \n", "1 3 1 81.21 81.16 81.24 \n", "2 4 2 81.25 81.24 81.28 \n", "3 5 3 81.25 81.22 81.26 \n", "4 6 4 81.23 81.22 81.26 \n", "\n", " levels \n", "0 [{'buyVolume': 1072.0, 'sellVolume': 1388.0, '... \n", "1 [{'buyVolume': 0.0, 'sellVolume': 1253.0, 'imb... \n", "2 [{'buyVolume': 2115.0, 'sellVolume': 961.0, 'i... \n", "3 [{'buyVolume': 836.0, 'sellVolume': 533.0, 'im... \n", "4 [{'buyVolume': 1816.0, 'sellVolume': 0.0, 'imb... " ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 172, "id": "e808128e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindex
192502024-12-04 15:45:0078.18276.00.0buy2832
192512024-12-04 15:45:0078.20181.0497.0buy2832
192522024-12-04 15:45:0078.223586.0499.0buy2832
192532024-12-04 15:45:0078.24513.0138.02832
192542024-12-04 15:50:0078.241773.01903.02833
192552024-12-04 15:50:0078.262938.02038.02833
192562024-12-04 15:50:0078.281250.00.02833
192572024-12-04 15:50:0078.30428.00.0buy2833
192582024-12-04 15:50:0078.321241.01163.0buy2833
192592024-12-04 15:50:0078.34651.0113.02833
192602024-12-04 15:50:0078.36449.0668.0buy2833
192612024-12-04 15:50:0078.38823.0570.02833
192622024-12-04 15:55:0078.34865.03717.02834
192632024-12-04 15:55:0078.364654.04021.02834
192642024-12-04 15:55:0078.384378.01201.02834
192652024-12-04 15:55:0078.40478.01584.02834
192662024-12-04 15:55:0078.421472.04033.02834
192672024-12-04 15:55:0078.445457.07033.02834
192682024-12-04 15:55:0078.464136.02046.02834
192692024-12-04 15:55:0078.484377.01388.02834
\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index\n", "19250 2024-12-04 15:45:00 78.18 276.0 0.0 buy 2832\n", "19251 2024-12-04 15:45:00 78.20 181.0 497.0 buy 2832\n", "19252 2024-12-04 15:45:00 78.22 3586.0 499.0 buy 2832\n", "19253 2024-12-04 15:45:00 78.24 513.0 138.0 2832\n", "19254 2024-12-04 15:50:00 78.24 1773.0 1903.0 2833\n", "19255 2024-12-04 15:50:00 78.26 2938.0 2038.0 2833\n", "19256 2024-12-04 15:50:00 78.28 1250.0 0.0 2833\n", "19257 2024-12-04 15:50:00 78.30 428.0 0.0 buy 2833\n", "19258 2024-12-04 15:50:00 78.32 1241.0 1163.0 buy 2833\n", "19259 2024-12-04 15:50:00 78.34 651.0 113.0 2833\n", "19260 2024-12-04 15:50:00 78.36 449.0 668.0 buy 2833\n", "19261 2024-12-04 15:50:00 78.38 823.0 570.0 2833\n", "19262 2024-12-04 15:55:00 78.34 865.0 3717.0 2834\n", "19263 2024-12-04 15:55:00 78.36 4654.0 4021.0 2834\n", "19264 2024-12-04 15:55:00 78.38 4378.0 1201.0 2834\n", "19265 2024-12-04 15:55:00 78.40 478.0 1584.0 2834\n", "19266 2024-12-04 15:55:00 78.42 1472.0 4033.0 2834\n", "19267 2024-12-04 15:55:00 78.44 5457.0 7033.0 2834\n", "19268 2024-12-04 15:55:00 78.46 4136.0 2046.0 2834\n", "19269 2024-12-04 15:55:00 78.48 4377.0 1388.0 2834" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# # Define the trading holidays\n", "# trading_holidays = [\n", "# datetime(2024, 1, 1), # Monday, January 1 - New Year's Day\n", "# datetime(2024, 1, 15), # Monday, January 15 - Martin Luther King Jr. Day\n", "# datetime(2024, 2, 19), # Monday, February 19 - Presidents' Day\n", "# datetime(2024, 3, 29), # Friday, March 29 - Good Friday\n", "# datetime(2024, 5, 27), # Monday, May 27 - Memorial Day\n", "# datetime(2024, 6, 19), # Wednesday, June 19 - Juneteenth National Independence Day\n", "# datetime(2024, 7, 4), # Thursday, July 4 - Independence Day\n", "# datetime(2024, 9, 2), # Monday, September 2 - Labor Day\n", "# datetime(2024, 11, 28), # Thursday, November 28 - Thanksgiving Day\n", "# datetime(2024, 12, 25), # Wednesday, December 25 - Christmas Day\n", "# datetime(2025, 1, 1), # Wednesday, January 1 - New Year's Day\n", "# datetime(2025, 1, 20), # Monday, January 20 - Martin Luther King Jr. Day\n", "# datetime(2025, 2, 17), # Monday, February 17 - Presidents' Day\n", "# datetime(2025, 4, 18), # Friday, April 18 - Good Friday\n", "# datetime(2025, 5, 26), # Monday, May 26 - Memorial Day\n", "# datetime(2025, 6, 19), # Thursday, June 19 - Juneteenth National Independence Day\n", "# datetime(2025, 7, 4), # Friday, July 4 - Independence Day\n", "# datetime(2025, 9, 1), # Monday, September 1 - Labor Day\n", "# datetime(2025, 11, 27), # Thursday, November 27 - Thanksgiving Day\n", "# datetime(2025, 12, 25) # Thursday, December 25 - Christmas Day\n", "# ]\n", "\n", "# # Initialize the base timestamp for index 2245\n", "# base_timestamp = datetime.strptime('2024-12-02 15:55:00', '%Y-%m-%d %H:%M:%S')\n", "# current_timestamp = base_timestamp\n", "\n", "# # Initialize a dictionary to store timestamps for each index\n", "# # index_to_timestamp = {2245: current_timestamp}\n", "# index_to_timestamp = {max(df1['index']): current_timestamp}\n", "\n", "# # Market hours\n", "# market_open = timedelta(hours=9, minutes=30)\n", "# market_close = timedelta(hours=15, minutes=55)\n", "# day_increment = timedelta(days=1)\n", "# weekend_days = [5, 6] # Saturday and Sunday\n", "\n", "# # Calculate the timestamps backward in 5-minute intervals, excluding weekends and outside market hours\n", "# # for index in range(2244, -1, -1):\n", "# for index in range(max(df1['index'])-1, -1, -1):\n", "# # Subtract 5 minutes\n", "# current_timestamp -= timedelta(minutes=5)\n", " \n", "# # Check if current timestamp is before market open\n", "# while (current_timestamp.time() < (datetime.min + market_open).time() or\n", "# current_timestamp.time() > (datetime.min + market_close).time() or\n", "# current_timestamp.weekday() in weekend_days or\n", "# current_timestamp.date() in [holiday.date() for holiday in trading_holidays]):\n", "# # Move to previous trading day if before market open\n", "# if current_timestamp.time() < (datetime.min + market_open).time():\n", "# current_timestamp = datetime.combine(current_timestamp.date() - day_increment, (datetime.min + market_close).time())\n", "# else:\n", "# # Otherwise, just subtract 5 minutes\n", "# current_timestamp -= timedelta(minutes=5)\n", " \n", "# # Skip weekends and trading holidays\n", "# while current_timestamp.weekday() in weekend_days or current_timestamp.date() in [holiday.date() for holiday in trading_holidays]:\n", "# current_timestamp -= day_increment\n", "# current_timestamp = datetime.combine(current_timestamp.date(), (datetime.min + market_close).time())\n", " \n", "# # Assign the calculated timestamp to the index\n", "# index_to_timestamp[index] = current_timestamp\n", "\n", "# # Create a list to hold the time series data\n", "# time_series_data = []\n", "\n", "# # Iterate over df1 and extract levels data\n", "# for i, row in df1.iterrows():\n", "# timestamp = index_to_timestamp.get(row['index'])\n", " \n", "# if timestamp:\n", "# levels = row['levels']\n", "# for level in levels:\n", "# time_series_data.append({\n", "# 'timestamp': timestamp,\n", "# 'price': level['price'],\n", "# 'buyVolume': level['buyVolume'],\n", "# 'sellVolume': level['sellVolume'],\n", "# 'imbalance': level['imbalance'],\n", "# 'index': row['index']\n", "# })\n", "\n", "# # Create the dataframe from the time series data\n", "# series_df = pd.DataFrame(time_series_data)\n", "\n", "# series_df.tail(20)\n", "\n", "\n", "# Define the trading holidays\n", "trading_holidays = [\n", " datetime(2024, 1, 1), # Monday, January 1 - New Year's Day\n", " datetime(2024, 1, 15), # Monday, January 15 - Martin Luther King Jr. Day\n", " datetime(2024, 2, 19), # Monday, February 19 - Presidents' Day\n", " datetime(2024, 3, 29), # Friday, March 29 - Good Friday\n", " datetime(2024, 5, 27), # Monday, May 27 - Memorial Day\n", " datetime(2024, 6, 19), # Wednesday, June 19 - Juneteenth National Independence Day\n", " datetime(2024, 7, 4), # Thursday, July 4 - Independence Day\n", " datetime(2024, 9, 2), # Monday, September 2 - Labor Day\n", " datetime(2024, 11, 28), # Thursday, November 28 - Thanksgiving Day\n", " datetime(2024, 12, 25), # Wednesday, December 25 - Christmas Day\n", " datetime(2025, 1, 1), # Wednesday, January 1 - New Year's Day\n", " datetime(2025, 1, 20), # Monday, January 20 - Martin Luther King Jr. Day\n", " datetime(2025, 2, 17), # Monday, February 17 - Presidents' Day\n", " datetime(2025, 4, 18), # Friday, April 18 - Good Friday\n", " datetime(2025, 5, 26), # Monday, May 26 - Memorial Day\n", " datetime(2025, 6, 19), # Thursday, June 19 - Juneteenth National Independence Day\n", " datetime(2025, 7, 4), # Friday, July 4 - Independence Day\n", " datetime(2025, 9, 1), # Monday, September 1 - Labor Day\n", " datetime(2025, 11, 27), # Thursday, November 27 - Thanksgiving Day\n", " datetime(2025, 12, 25) # Thursday, December 25 - Christmas Day\n", "]\n", "\n", "# Initialize the base timestamp for index 2245\n", "base_timestamp = datetime.strptime('2024-12-04 15:55:00', '%Y-%m-%d %H:%M:%S')\n", "current_timestamp = base_timestamp\n", "\n", "# Initialize a dictionary to store timestamps for each index\n", "# index_to_timestamp = {2245: current_timestamp}\n", "index_to_timestamp = {max(df1['index']): current_timestamp}\n", "\n", "# Market hours\n", "market_open = timedelta(hours=9, minutes=30)\n", "market_close = timedelta(hours=15, minutes=55)\n", "special_close = {\n", " datetime(2024, 11, 29).date(): timedelta(hours=12, minutes=55) # Special close time on 2024-11-29\n", "}\n", "day_increment = timedelta(days=1)\n", "weekend_days = [5, 6] # Saturday and Sunday\n", "\n", "# Calculate the timestamps backward in 5-minute intervals, excluding weekends and outside market hours\n", "# for index in range(2244, -1, -1):\n", "for index in range(max(df1['index'])-1, -1, -1):\n", " # Subtract 5 minutes\n", " current_timestamp -= timedelta(minutes=5)\n", " \n", " # Check if current timestamp is before market open\n", " while (current_timestamp.time() < (datetime.min + market_open).time() or\n", " current_timestamp.time() > (datetime.min + special_close.get(current_timestamp.date(), market_close)).time() or\n", " current_timestamp.weekday() in weekend_days or\n", " current_timestamp.date() in [holiday.date() for holiday in trading_holidays]):\n", " # Move to previous trading day if before market open\n", " if current_timestamp.time() < (datetime.min + market_open).time():\n", " current_timestamp = datetime.combine(current_timestamp.date() - day_increment, (datetime.min + special_close.get(current_timestamp.date() - day_increment, market_close)).time())\n", " else:\n", " # Otherwise, just subtract 5 minutes\n", " current_timestamp -= timedelta(minutes=5)\n", " \n", " # Skip weekends and trading holidays\n", " while current_timestamp.weekday() in weekend_days or current_timestamp.date() in [holiday.date() for holiday in trading_holidays]:\n", " current_timestamp -= day_increment\n", " current_timestamp = datetime.combine(current_timestamp.date(), (datetime.min + special_close.get(current_timestamp.date(), market_close)).time())\n", " \n", " # Assign the calculated timestamp to the index\n", " index_to_timestamp[index] = current_timestamp\n", "\n", "# Create a list to hold the time series data\n", "time_series_data = []\n", "\n", "# Iterate over df1 and extract levels data\n", "for i, row in df1.iterrows():\n", " timestamp = index_to_timestamp.get(row['index'])\n", " \n", " if timestamp:\n", " levels = row['levels']\n", " for level in levels:\n", " time_series_data.append({\n", " 'timestamp': timestamp,\n", " 'price': level['price'],\n", " 'buyVolume': level['buyVolume'],\n", " 'sellVolume': level['sellVolume'],\n", " 'imbalance': level['imbalance'],\n", " 'index': row['index']\n", " })\n", "\n", "# Create the dataframe from the time series data\n", "series_df = pd.DataFrame(time_series_data)\n", "\n", "series_df.tail(20)\n" ] }, { "cell_type": "code", "execution_count": 173, "id": "ccb6e9dd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindex
186702024-12-03 14:55:0078.960.0156.02744
186712024-12-03 15:00:0078.96227.01918.02745
186722024-12-03 15:00:0078.981366.0161.02745
186732024-12-03 15:05:0078.981228.01333.02746
186742024-12-03 15:05:0079.00566.0255.02746
.....................
192652024-12-04 15:55:0078.40478.01584.02834
192662024-12-04 15:55:0078.421472.04033.02834
192672024-12-04 15:55:0078.445457.07033.02834
192682024-12-04 15:55:0078.464136.02046.02834
192692024-12-04 15:55:0078.484377.01388.02834
\n", "

600 rows × 6 columns

\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index\n", "18670 2024-12-03 14:55:00 78.96 0.0 156.0 2744\n", "18671 2024-12-03 15:00:00 78.96 227.0 1918.0 2745\n", "18672 2024-12-03 15:00:00 78.98 1366.0 161.0 2745\n", "18673 2024-12-03 15:05:00 78.98 1228.0 1333.0 2746\n", "18674 2024-12-03 15:05:00 79.00 566.0 255.0 2746\n", "... ... ... ... ... ... ...\n", "19265 2024-12-04 15:55:00 78.40 478.0 1584.0 2834\n", "19266 2024-12-04 15:55:00 78.42 1472.0 4033.0 2834\n", "19267 2024-12-04 15:55:00 78.44 5457.0 7033.0 2834\n", "19268 2024-12-04 15:55:00 78.46 4136.0 2046.0 2834\n", "19269 2024-12-04 15:55:00 78.48 4377.0 1388.0 2834\n", "\n", "[600 rows x 6 columns]" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_df.tail(600)" ] }, { "cell_type": "code", "execution_count": 177, "id": "71d91b66", "metadata": {}, "outputs": [], "source": [ "series_df.to_csv('nke_5_min_series_data.csv')" ] }, { "cell_type": "code", "execution_count": 178, "id": "4e1436c2", "metadata": {}, "outputs": [], "source": [ "# Filter the DataFrame\n", "filtered_df = series_df[series_df['timestamp'] >= '2024-12-04 09:30:00']" ] }, { "cell_type": "code", "execution_count": 179, "id": "1e7f8c72", "metadata": {}, "outputs": [], "source": [ "filtered_df.reset_index(inplace=True,drop=True)" ] }, { "cell_type": "code", "execution_count": 180, "id": "7b01d454", "metadata": {}, "outputs": [], "source": [ "# Sort by timestamp and price ascending\n", "filtered_df = filtered_df.sort_values(by=['timestamp', 'price']).reset_index(drop=True)\n" ] }, { "cell_type": "code", "execution_count": 181, "id": "d3a75a98", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "timestamp\n", "2024-12-04 09:30:00 40\n", "2024-12-04 09:35:00 19\n", "2024-12-04 10:05:00 18\n", "2024-12-04 09:40:00 12\n", "2024-12-04 09:55:00 12\n", " ..\n", "2024-12-04 15:00:00 3\n", "2024-12-04 13:35:00 3\n", "2024-12-04 13:25:00 3\n", "2024-12-04 14:20:00 2\n", "2024-12-04 13:45:00 2\n", "Name: count, Length: 78, dtype: int64" ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df['timestamp'].value_counts()" ] }, { "cell_type": "code", "execution_count": 182, "id": "1a234672", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindex
02024-12-04 09:30:0077.10597.0119.02757
12024-12-04 09:30:0077.121306.0985.0both2757
22024-12-04 09:30:0077.14156.02510.0sell2757
32024-12-04 09:30:0077.16100.0709.0sell2757
42024-12-04 09:30:0077.180.01658.0sell2757
.....................
5312024-12-04 15:55:0078.40478.01584.02834
5322024-12-04 15:55:0078.421472.04033.02834
5332024-12-04 15:55:0078.445457.07033.02834
5342024-12-04 15:55:0078.464136.02046.02834
5352024-12-04 15:55:0078.484377.01388.02834
\n", "

536 rows × 6 columns

\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index\n", "0 2024-12-04 09:30:00 77.10 597.0 119.0 2757\n", "1 2024-12-04 09:30:00 77.12 1306.0 985.0 both 2757\n", "2 2024-12-04 09:30:00 77.14 156.0 2510.0 sell 2757\n", "3 2024-12-04 09:30:00 77.16 100.0 709.0 sell 2757\n", "4 2024-12-04 09:30:00 77.18 0.0 1658.0 sell 2757\n", ".. ... ... ... ... ... ...\n", "531 2024-12-04 15:55:00 78.40 478.0 1584.0 2834\n", "532 2024-12-04 15:55:00 78.42 1472.0 4033.0 2834\n", "533 2024-12-04 15:55:00 78.44 5457.0 7033.0 2834\n", "534 2024-12-04 15:55:00 78.46 4136.0 2046.0 2834\n", "535 2024-12-04 15:55:00 78.48 4377.0 1388.0 2834\n", "\n", "[536 rows x 6 columns]" ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df" ] }, { "cell_type": "code", "execution_count": 183, "id": "e0ca7553", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindex
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [timestamp, price, buyVolume, sellVolume, imbalance, index]\n", "Index: []" ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df[filtered_df['timestamp'] == '2024-12-02 09:30:00']" ] }, { "cell_type": "code", "execution_count": 184, "id": "a2366d90", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:44: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", " filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n", "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:44: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n", "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:70: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_imbalances)\n", "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_10332\\915788632.py:132: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " filtered_df = filtered_df.groupby('timestamp').apply(check_auctions).reset_index(drop=True)\n" ] } ], "source": [ "\n", "\n", "# Calculate total volume at each price level\n", "filtered_df['totalVolume'] = filtered_df['buyVolume'] + filtered_df['sellVolume']\n", "\n", "# # Group by timestamp and identify the Point of Control (POC) for each 5-minute interval\n", "# def calculate_poc(group):\n", "# poc_price = group.loc[group['totalVolume'].idxmax(), 'price']\n", "# group['poc'] = poc_price\n", " \n", "# # Calculate highest bid stacked imbalance and ask stacked imbalance\n", "# group['highest_bid_stacked_imbalance'] = group['buyVolume'].max()\n", "# group['highest_ask_stacked_imbalance'] = group['sellVolume'].max()\n", " \n", "# return group\n", "\n", "# filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n", "\n", "# Group by timestamp and identify the Point of Control (POC) for each 5-minute interval\n", "def calculate_poc(group):\n", " poc_price = group.loc[group['totalVolume'].idxmax(), 'price']\n", " group['poc'] = poc_price\n", " \n", " # Calculate highest bid stacked imbalance and ask stacked imbalance\n", " group['highest_bid_stacked_imbalance'] = group['buyVolume'].max()\n", " group['highest_ask_stacked_imbalance'] = group['sellVolume'].max()\n", " \n", " # Calculate highest ask imbalance stack price (consider imbalance as 'sell' or 'both')\n", " ask_imbalance_filter = group[(group['imbalance'] == 'sell') | (group['imbalance'] == 'both')]\n", " if not ask_imbalance_filter.empty:\n", " highest_ask_imbalance_stack_price = ask_imbalance_filter.loc[ask_imbalance_filter['sellVolume'].idxmax(), 'price']\n", " else:\n", " highest_ask_imbalance_stack_price = None\n", " group['highest_ask_imbalance_stack_price'] = highest_ask_imbalance_stack_price\n", " \n", " # Calculate highest bid imbalance stack price (consider imbalance as 'buy' or 'both')\n", " bid_imbalance_filter = group[(group['imbalance'] == 'buy') | (group['imbalance'] == 'both')]\n", " if not bid_imbalance_filter.empty:\n", " highest_bid_imbalance_stack_price = bid_imbalance_filter.loc[bid_imbalance_filter['buyVolume'].idxmax(), 'price']\n", " else:\n", " highest_bid_imbalance_stack_price = None\n", " group['highest_bid_imbalance_stack_price'] = highest_bid_imbalance_stack_price\n", " \n", " return group\n", "\n", "filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_poc)\n", "\n", "\n", "# Calculate delta (buyVolume - sellVolume)\n", "filtered_df['delta'] = filtered_df['buyVolume'] - filtered_df['sellVolume']\n", "\n", "# Calculate total ask imbalance count and highest stacked imbalance count\n", "def calculate_imbalances(group):\n", " # Total ask imbalance count (where imbalance is 'sell' or 'both')\n", " ask_imbalance_count = ((group['imbalance'] == 'sell') | (group['imbalance'] == 'both')).sum()\n", " group['total_ask_imbalance_count'] = ask_imbalance_count\n", "\n", " # Highest stacked ask imbalance count (consecutive 'sell' or 'both' imbalance)\n", " max_stacked_ask_imbalance = ((group['imbalance'] == 'sell') | (group['imbalance'] == 'both')).astype(int).groupby(((group['imbalance'] != 'sell') & (group['imbalance'] != 'both')).cumsum()).cumsum().max()\n", " group['highest_stacked_ask_imbalance'] = max_stacked_ask_imbalance\n", "\n", " # Total bid imbalance count (where imbalance is 'buy' or 'both')\n", " bid_imbalance_count = ((group['imbalance'] == 'buy') | (group['imbalance'] == 'both')).sum()\n", " group['total_bid_imbalance_count'] = bid_imbalance_count\n", "\n", " # Highest stacked bid imbalance count (consecutive 'buy' or 'both' imbalance)\n", " max_stacked_bid_imbalance = ((group['imbalance'] == 'buy') | (group['imbalance'] == 'both')).astype(int).groupby(((group['imbalance'] != 'buy') & (group['imbalance'] != 'both')).cumsum()).cumsum().max()\n", " group['highest_stacked_bid_imbalance'] = max_stacked_bid_imbalance\n", "\n", " return group\n", "\n", "filtered_df = filtered_df.groupby('timestamp', group_keys=False).apply(calculate_imbalances)\n", "\n", "filtered_df = filtered_df.sort_values(by=['timestamp', 'price']).reset_index(drop=True)\n", "\n", "def add_support_resistance_by_timestamp(df):\n", " # Initialize lists for final results\n", " support_levels_per_timestamp = []\n", " resistance_levels_per_timestamp = []\n", "\n", " # Group by timestamp\n", " grouped = df.groupby('timestamp')\n", "\n", " for timestamp, group in grouped:\n", " support_levels = []\n", " resistance_levels = []\n", " \n", " # Group consecutive rows with the same imbalance\n", " for _, sub_group in group.groupby((group['imbalance'] != group['imbalance'].shift()).cumsum()):\n", " if len(sub_group) >= 3: # Ensure the sub-group has at least 3 rows\n", " prices = sub_group['price'].tolist()\n", " imbalance_type = sub_group['imbalance'].iloc[0]\n", " \n", " # Identify support and resistance levels based on imbalance type\n", " if all(sub_group['imbalance'].isin(['buy', 'both'])):\n", " support_levels.append([round(p, 2) for p in prices])\n", " elif all(sub_group['imbalance'].isin(['sell', 'both'])):\n", " resistance_levels.append([round(p, 2) for p in prices])\n", "\n", " # Store levels for the current timestamp\n", " support_levels_per_timestamp.append((timestamp, support_levels))\n", " resistance_levels_per_timestamp.append((timestamp, resistance_levels))\n", " \n", " # Create new DataFrame columns\n", " df['support_imbalance'] = df['timestamp'].map(\n", " dict((timestamp, levels) for timestamp, levels in support_levels_per_timestamp)\n", " )\n", " df['resistance_imbalance'] = df['timestamp'].map(\n", " dict((timestamp, levels) for timestamp, levels in resistance_levels_per_timestamp)\n", " )\n", "\n", " return df\n", "\n", "# Apply the function to the filtered DataFrame\n", "filtered_df = add_support_resistance_by_timestamp(filtered_df)\n", "\n", "# Sort by timestamp and price\n", "filtered_df = filtered_df.sort_values(by=['timestamp', 'price']).reset_index(drop=True)\n", "\n", "# Group by timestamp and check for unfinished bid and ask auctions\n", "def check_auctions(group):\n", " min_price_row = group.loc[group['price'].idxmin()]\n", " max_price_row = group.loc[group['price'].idxmax()]\n", " \n", " buy_auction_status = 'incomplete' if min_price_row['buyVolume'] > 0 and min_price_row['sellVolume'] > 0 else 'complete'\n", " sell_auction_status = 'incomplete' if max_price_row['buyVolume'] > 0 and max_price_row['sellVolume'] > 0 else 'complete'\n", " \n", " group['buy_auction_status'] = buy_auction_status\n", " group['sell_auction_status'] = sell_auction_status\n", " \n", " return group\n", "\n", "# Apply the auction check function to each group\n", "filtered_df = filtered_df.groupby('timestamp').apply(check_auctions).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 185, "id": "85bdf0b4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindextotalVolumepochighest_bid_stacked_imbalancehighest_ask_stacked_imbalance...highest_bid_imbalance_stack_pricedeltatotal_ask_imbalance_counthighest_stacked_ask_imbalancetotal_bid_imbalance_counthighest_stacked_bid_imbalancesupport_imbalanceresistance_imbalancebuy_auction_statussell_auction_status
02024-12-04 09:30:0077.10597.0119.02757716.077.383760.03415.0...77.4478.018752[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...incompletecomplete
12024-12-04 09:30:0077.121306.0985.0both27572291.077.383760.03415.0...77.4321.018752[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...incompletecomplete
22024-12-04 09:30:0077.14156.02510.0sell27572666.077.383760.03415.0...77.4-2354.018752[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...incompletecomplete
32024-12-04 09:30:0077.16100.0709.0sell2757809.077.383760.03415.0...77.4-609.018752[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...incompletecomplete
42024-12-04 09:30:0077.180.01658.0sell27571658.077.383760.03415.0...77.4-1658.018752[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...incompletecomplete
..................................................................
5312024-12-04 15:55:0078.40478.01584.028342062.078.445457.07033.0...NaN-1106.00000[][]incompleteincomplete
5322024-12-04 15:55:0078.421472.04033.028345505.078.445457.07033.0...NaN-2561.00000[][]incompleteincomplete
5332024-12-04 15:55:0078.445457.07033.0283412490.078.445457.07033.0...NaN-1576.00000[][]incompleteincomplete
5342024-12-04 15:55:0078.464136.02046.028346182.078.445457.07033.0...NaN2090.00000[][]incompleteincomplete
5352024-12-04 15:55:0078.484377.01388.028345765.078.445457.07033.0...NaN2989.00000[][]incompleteincomplete
\n", "

536 rows × 21 columns

\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index \\\n", "0 2024-12-04 09:30:00 77.10 597.0 119.0 2757 \n", "1 2024-12-04 09:30:00 77.12 1306.0 985.0 both 2757 \n", "2 2024-12-04 09:30:00 77.14 156.0 2510.0 sell 2757 \n", "3 2024-12-04 09:30:00 77.16 100.0 709.0 sell 2757 \n", "4 2024-12-04 09:30:00 77.18 0.0 1658.0 sell 2757 \n", ".. ... ... ... ... ... ... \n", "531 2024-12-04 15:55:00 78.40 478.0 1584.0 2834 \n", "532 2024-12-04 15:55:00 78.42 1472.0 4033.0 2834 \n", "533 2024-12-04 15:55:00 78.44 5457.0 7033.0 2834 \n", "534 2024-12-04 15:55:00 78.46 4136.0 2046.0 2834 \n", "535 2024-12-04 15:55:00 78.48 4377.0 1388.0 2834 \n", "\n", " totalVolume poc highest_bid_stacked_imbalance \\\n", "0 716.0 77.38 3760.0 \n", "1 2291.0 77.38 3760.0 \n", "2 2666.0 77.38 3760.0 \n", "3 809.0 77.38 3760.0 \n", "4 1658.0 77.38 3760.0 \n", ".. ... ... ... \n", "531 2062.0 78.44 5457.0 \n", "532 5505.0 78.44 5457.0 \n", "533 12490.0 78.44 5457.0 \n", "534 6182.0 78.44 5457.0 \n", "535 5765.0 78.44 5457.0 \n", "\n", " highest_ask_stacked_imbalance ... highest_bid_imbalance_stack_price \\\n", "0 3415.0 ... 77.4 \n", "1 3415.0 ... 77.4 \n", "2 3415.0 ... 77.4 \n", "3 3415.0 ... 77.4 \n", "4 3415.0 ... 77.4 \n", ".. ... ... ... \n", "531 7033.0 ... NaN \n", "532 7033.0 ... NaN \n", "533 7033.0 ... NaN \n", "534 7033.0 ... NaN \n", "535 7033.0 ... NaN \n", "\n", " delta total_ask_imbalance_count highest_stacked_ask_imbalance \\\n", "0 478.0 18 7 \n", "1 321.0 18 7 \n", "2 -2354.0 18 7 \n", "3 -609.0 18 7 \n", "4 -1658.0 18 7 \n", ".. ... ... ... \n", "531 -1106.0 0 0 \n", "532 -2561.0 0 0 \n", "533 -1576.0 0 0 \n", "534 2090.0 0 0 \n", "535 2989.0 0 0 \n", "\n", " total_bid_imbalance_count highest_stacked_bid_imbalance \\\n", "0 5 2 \n", "1 5 2 \n", "2 5 2 \n", "3 5 2 \n", "4 5 2 \n", ".. ... ... \n", "531 0 0 \n", "532 0 0 \n", "533 0 0 \n", "534 0 0 \n", "535 0 0 \n", "\n", " support_imbalance resistance_imbalance \\\n", "0 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "1 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "2 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "3 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "4 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", ".. ... ... \n", "531 [] [] \n", "532 [] [] \n", "533 [] [] \n", "534 [] [] \n", "535 [] [] \n", "\n", " buy_auction_status sell_auction_status \n", "0 incomplete complete \n", "1 incomplete complete \n", "2 incomplete complete \n", "3 incomplete complete \n", "4 incomplete complete \n", ".. ... ... \n", "531 incomplete incomplete \n", "532 incomplete incomplete \n", "533 incomplete incomplete \n", "534 incomplete incomplete \n", "535 incomplete incomplete \n", "\n", "[536 rows x 21 columns]" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df" ] }, { "cell_type": "code", "execution_count": 186, "id": "1f47fb07", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindextotalVolumepochighest_bid_stacked_imbalancehighest_ask_stacked_imbalance...highest_bid_imbalance_stack_pricedeltatotal_ask_imbalance_counthighest_stacked_ask_imbalancetotal_bid_imbalance_counthighest_stacked_bid_imbalancesupport_imbalanceresistance_imbalancebuy_auction_statussell_auction_status
12024-12-04 09:30:0077.121306.0985.0both27572291.077.383760.03415.0...77.40321.018752[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...incompletecomplete
1032024-12-04 09:55:0077.542391.01477.0both27623868.077.542855.01477.0...77.50914.01195[[77.36, 77.38, 77.4, 77.42, 77.44], [77.48, 7...[]completecomplete
1462024-12-04 10:15:0077.76906.011.0both2766917.077.821604.01717.0...77.76895.02111[][]completecomplete
1642024-12-04 10:25:0077.942203.01934.0both27684137.077.924363.02211.0...77.92269.01133[][]incompleteincomplete
2182024-12-04 11:05:0077.88486.0601.0both27761087.077.841400.01199.0...77.88-115.04311[][]incompletecomplete
2722024-12-04 11:45:0078.181117.02176.0both27843293.078.181767.02176.0...78.22-1059.01121[][]completecomplete
3652024-12-04 12:55:0078.02367.0120.0both2798487.077.96737.0381.0...78.02247.01111[][]completecomplete
4352024-12-04 14:10:0077.961160.01394.0both28132554.077.961160.01394.0...77.96-234.01122[][]completecomplete
\n", "

8 rows × 21 columns

\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index \\\n", "1 2024-12-04 09:30:00 77.12 1306.0 985.0 both 2757 \n", "103 2024-12-04 09:55:00 77.54 2391.0 1477.0 both 2762 \n", "146 2024-12-04 10:15:00 77.76 906.0 11.0 both 2766 \n", "164 2024-12-04 10:25:00 77.94 2203.0 1934.0 both 2768 \n", "218 2024-12-04 11:05:00 77.88 486.0 601.0 both 2776 \n", "272 2024-12-04 11:45:00 78.18 1117.0 2176.0 both 2784 \n", "365 2024-12-04 12:55:00 78.02 367.0 120.0 both 2798 \n", "435 2024-12-04 14:10:00 77.96 1160.0 1394.0 both 2813 \n", "\n", " totalVolume poc highest_bid_stacked_imbalance \\\n", "1 2291.0 77.38 3760.0 \n", "103 3868.0 77.54 2855.0 \n", "146 917.0 77.82 1604.0 \n", "164 4137.0 77.92 4363.0 \n", "218 1087.0 77.84 1400.0 \n", "272 3293.0 78.18 1767.0 \n", "365 487.0 77.96 737.0 \n", "435 2554.0 77.96 1160.0 \n", "\n", " highest_ask_stacked_imbalance ... highest_bid_imbalance_stack_price \\\n", "1 3415.0 ... 77.40 \n", "103 1477.0 ... 77.50 \n", "146 1717.0 ... 77.76 \n", "164 2211.0 ... 77.92 \n", "218 1199.0 ... 77.88 \n", "272 2176.0 ... 78.22 \n", "365 381.0 ... 78.02 \n", "435 1394.0 ... 77.96 \n", "\n", " delta total_ask_imbalance_count highest_stacked_ask_imbalance \\\n", "1 321.0 18 7 \n", "103 914.0 1 1 \n", "146 895.0 2 1 \n", "164 269.0 1 1 \n", "218 -115.0 4 3 \n", "272 -1059.0 1 1 \n", "365 247.0 1 1 \n", "435 -234.0 1 1 \n", "\n", " total_bid_imbalance_count highest_stacked_bid_imbalance \\\n", "1 5 2 \n", "103 9 5 \n", "146 1 1 \n", "164 3 3 \n", "218 1 1 \n", "272 2 1 \n", "365 1 1 \n", "435 2 2 \n", "\n", " support_imbalance \\\n", "1 [] \n", "103 [[77.36, 77.38, 77.4, 77.42, 77.44], [77.48, 7... \n", "146 [] \n", "164 [] \n", "218 [] \n", "272 [] \n", "365 [] \n", "435 [] \n", "\n", " resistance_imbalance buy_auction_status \\\n", "1 [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... incomplete \n", "103 [] complete \n", "146 [] complete \n", "164 [] incomplete \n", "218 [] incomplete \n", "272 [] complete \n", "365 [] complete \n", "435 [] complete \n", "\n", " sell_auction_status \n", "1 complete \n", "103 complete \n", "146 complete \n", "164 incomplete \n", "218 complete \n", "272 complete \n", "365 complete \n", "435 complete \n", "\n", "[8 rows x 21 columns]" ] }, "execution_count": 186, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df[filtered_df['imbalance'] == 'both']" ] }, { "cell_type": "code", "execution_count": 187, "id": "039d89b2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampbuy_auction_statussell_auction_status
02024-12-04 09:30:00incompletecomplete
402024-12-04 09:35:00incompletecomplete
592024-12-04 09:40:00completecomplete
712024-12-04 09:45:00completeincomplete
822024-12-04 09:50:00completecomplete
............
5042024-12-04 15:35:00incompletecomplete
5092024-12-04 15:40:00incompleteincomplete
5132024-12-04 15:45:00incompleteincomplete
5202024-12-04 15:50:00incompleteincomplete
5282024-12-04 15:55:00incompleteincomplete
\n", "

78 rows × 3 columns

\n", "
" ], "text/plain": [ " timestamp buy_auction_status sell_auction_status\n", "0 2024-12-04 09:30:00 incomplete complete\n", "40 2024-12-04 09:35:00 incomplete complete\n", "59 2024-12-04 09:40:00 complete complete\n", "71 2024-12-04 09:45:00 complete incomplete\n", "82 2024-12-04 09:50:00 complete complete\n", ".. ... ... ...\n", "504 2024-12-04 15:35:00 incomplete complete\n", "509 2024-12-04 15:40:00 incomplete incomplete\n", "513 2024-12-04 15:45:00 incomplete incomplete\n", "520 2024-12-04 15:50:00 incomplete incomplete\n", "528 2024-12-04 15:55:00 incomplete incomplete\n", "\n", "[78 rows x 3 columns]" ] }, "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df[['timestamp','buy_auction_status','sell_auction_status']].drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 188, "id": "84a6f333", "metadata": {}, "outputs": [], "source": [ "import ast\n", "\n", "# Define a function that converts the value to a list if needed\n", "def safe_literal_eval(val):\n", " if isinstance(val, str):\n", " try:\n", " return ast.literal_eval(val)\n", " except ValueError:\n", " print(f\"Error in evaluating: {val}\")\n", " return val # Optionally handle bad strings gracefully\n", " return val\n", "\n", "# Apply to the columns\n", "filtered_df['support_imbalance'] = filtered_df['support_imbalance'].apply(safe_literal_eval)\n", "filtered_df['resistance_imbalance'] = filtered_df['resistance_imbalance'].apply(safe_literal_eval)" ] }, { "cell_type": "code", "execution_count": 189, "id": "248390d3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 []\n", "1 []\n", "2 []\n", "3 []\n", "4 []\n", " ..\n", "531 []\n", "532 []\n", "533 []\n", "534 []\n", "535 []\n", "Name: support_imbalance, Length: 536, dtype: object" ] }, "execution_count": 189, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df['support_imbalance']" ] }, { "cell_type": "code", "execution_count": 190, "id": "ccd75649", "metadata": {}, "outputs": [], "source": [ "# filtered_df['support_imbalance_count'] = filtered_df['support_imbalance'].apply(lambda x: len(set(x[0])) if isinstance(x, list) and len(x) > 0 and isinstance(x[0], list) else 0)\n", "# filtered_df['resistance_imbalance_count'] = filtered_df['resistance_imbalance'].apply(lambda x: len(set(x[0])) if isinstance(x, list) and len(x) > 0 and isinstance(x[0], list) else 0)\n", "\n", "\n", "import itertools\n", "\n", "# Flatten the nested list and then take the set of unique elements\n", "filtered_df['support_imbalance_count'] = filtered_df['support_imbalance'].apply(\n", " lambda x: len(set(itertools.chain.from_iterable(x))) if isinstance(x, list) else 0\n", ")\n", "\n", "filtered_df['resistance_imbalance_count'] = filtered_df['resistance_imbalance'].apply(\n", " lambda x: len(set(itertools.chain.from_iterable(x))) if isinstance(x, list) else 0\n", ")" ] }, { "cell_type": "code", "execution_count": 191, "id": "c9820a16", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
support_imbalanceresistance_imbalancesupport_imbalance_countresistance_imbalance_count
0[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...017
1[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...017
2[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...017
3[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...017
4[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...017
...............
531[][]00
532[][]00
533[][]00
534[][]00
535[][]00
\n", "

536 rows × 4 columns

\n", "
" ], "text/plain": [ " support_imbalance resistance_imbalance \\\n", "0 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "1 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "2 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "3 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", "4 [] [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... \n", ".. ... ... \n", "531 [] [] \n", "532 [] [] \n", "533 [] [] \n", "534 [] [] \n", "535 [] [] \n", "\n", " support_imbalance_count resistance_imbalance_count \n", "0 0 17 \n", "1 0 17 \n", "2 0 17 \n", "3 0 17 \n", "4 0 17 \n", ".. ... ... \n", "531 0 0 \n", "532 0 0 \n", "533 0 0 \n", "534 0 0 \n", "535 0 0 \n", "\n", "[536 rows x 4 columns]" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df[['support_imbalance','resistance_imbalance','support_imbalance_count','resistance_imbalance_count']]" ] }, { "cell_type": "code", "execution_count": 192, "id": "acc8670e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# filtered_df[filtered_df['timestamp'] == '2024-12-04 09:40:00'][['resistance_imbalance']]\n", "filtered_df.loc[59,'resistance_imbalance']" ] }, { "cell_type": "code", "execution_count": 193, "id": "91e912d2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2024-12-04 09:30:00\n", "[]\n", "77.1\n", "77.12\n", "77.14\n", "77.16\n", "77.18\n", "77.2\n", "77.22\n", "77.24\n", "77.26\n", "77.28\n", "77.3\n", "77.32\n", "77.34\n", "77.36\n", "77.38\n", "77.4\n", "77.42\n", "77.44\n", "77.46\n", "77.48\n", "77.5\n", "77.52\n", "77.54\n", "77.56\n", "77.58\n", "77.6\n", "77.62\n", "77.64\n", "77.66\n", "77.68\n", "77.7\n", "77.72\n", "77.74\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "2024-12-04 09:35:00\n", "[]\n", "77.26\n", "77.28\n", "77.3\n", "77.32\n", "77.34\n", "77.36\n", "77.38\n", "77.4\n", "77.42\n", "77.44\n", "77.46\n", "77.48\n", "77.5\n", "77.52\n", "77.54\n", "77.56\n", "77.58\n", "77.6\n", "77.62\n", "2024-12-04 09:40:00\n", "[]\n", "77.38\n", "77.4\n", "77.42\n", "77.44\n", "77.46\n", "77.48\n", "77.5\n", "77.52\n", "77.54\n", "77.56\n", "77.58\n", "77.6\n", "2024-12-04 09:45:00\n", "[77.42, 77.44, 77.46, 77.48]\n", "77.36\n", "77.38\n", "77.4\n", "77.42\n", "77.44\n", "77.46\n", "77.48\n", "77.5\n", "77.52\n", "77.54\n", "77.56\n", "2024-12-04 09:50:00\n", "[]\n", "77.32\n", "77.34\n", "77.36\n", "77.38\n", "77.4\n", "77.42\n", "77.44\n", "77.46\n", "77.48\n", "77.5\n", "77.52\n", "2024-12-04 09:55:00\n", "[]\n", "77.34\n", "77.36\n", "77.38\n", "77.4\n", "77.42\n", "77.44\n", "77.46\n", "77.48\n", "77.5\n", "77.52\n", "77.54\n", "77.56\n", "2024-12-04 10:00:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.52]\n", "77.52\n", "77.54\n", "77.56\n", "77.58\n", "77.6\n", "77.62\n", "77.64\n", "77.66\n", "77.68\n", "2024-12-04 10:05:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.58]\n", "77.58\n", "77.6\n", "77.62\n", "77.64\n", "77.66\n", "77.68\n", "77.7\n", "77.72\n", "77.74\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "2024-12-04 10:10:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.74, 77.76, 77.78]\n", "77.74\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "2024-12-04 10:15:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.68\n", "77.7\n", "77.72\n", "77.74\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "2024-12-04 10:20:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "2024-12-04 10:25:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9, 77.92, 77.94]\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "2024-12-04 10:30:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "2024-12-04 10:35:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "2024-12-04 10:40:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "2024-12-04 10:45:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.66\n", "77.68\n", "77.7\n", "77.72\n", "77.74\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "2024-12-04 10:50:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.74\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "2024-12-04 10:55:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "2024-12-04 11:00:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9]\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "78.0\n", "78.02\n", "2024-12-04 11:05:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9]\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "2024-12-04 11:10:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "78.0\n", "2024-12-04 11:15:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.0\n", "78.02\n", "78.04\n", "78.06\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "2024-12-04 11:20:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "2024-12-04 11:25:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "78.26\n", "2024-12-04 11:30:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "2024-12-04 11:35:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "2024-12-04 11:40:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.2\n", "78.22\n", "78.24\n", "78.26\n", "78.28\n", "2024-12-04 11:45:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "78.26\n", "2024-12-04 11:50:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.2\n", "78.22\n", "78.24\n", "78.26\n", "2024-12-04 11:55:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.2\n", "78.22\n", "78.24\n", "78.26\n", "2024-12-04 12:00:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "2024-12-04 12:05:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "2024-12-04 12:10:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "78.26\n", "78.28\n", "2024-12-04 12:15:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "2024-12-04 12:20:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "77.98\n", "78.0\n", "78.02\n", "78.04\n", "78.06\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "2024-12-04 12:25:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94, 77.96]\n", "77.96\n", "77.98\n", "78.0\n", "78.02\n", "78.04\n", "78.06\n", "78.08\n", "2024-12-04 12:30:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.86, 77.88, 77.92, 77.94]\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "78.0\n", "78.02\n", "78.04\n", "2024-12-04 12:35:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "2024-12-04 12:40:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "2024-12-04 12:45:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "2024-12-04 12:50:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.92, 77.94, 77.96]\n", "77.96\n", "77.98\n", "78.0\n", "78.02\n", "2024-12-04 12:55:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.92, 77.94]\n", "77.94\n", "77.96\n", "77.98\n", "78.0\n", "78.02\n", "78.04\n", "2024-12-04 13:00:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.92]\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "2024-12-04 13:05:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "2024-12-04 13:10:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "2024-12-04 13:15:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "2024-12-04 13:20:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "2024-12-04 13:25:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "77.84\n", "2024-12-04 13:30:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.78\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "2024-12-04 13:35:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "77.84\n", "2024-12-04 13:40:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "2024-12-04 13:45:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "2024-12-04 13:50:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.76\n", "77.78\n", "77.8\n", "77.82\n", "2024-12-04 13:55:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.8\n", "77.82\n", "77.84\n", "77.86\n", "2024-12-04 14:00:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.82\n", "77.84\n", "77.86\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "2024-12-04 14:05:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 77.88, 77.9, 77.92]\n", "77.88\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "2024-12-04 14:10:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.92\n", "77.94\n", "77.96\n", "77.98\n", "2024-12-04 14:15:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.9\n", "77.92\n", "77.94\n", "77.96\n", "2024-12-04 14:20:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.96\n", "77.98\n", "2024-12-04 14:25:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "77.98\n", "78.0\n", "78.02\n", "78.04\n", "2024-12-04 14:30:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.02\n", "78.04\n", "78.06\n", "78.08\n", "2024-12-04 14:35:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "2024-12-04 14:40:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.04\n", "78.06\n", "78.08\n", "78.1\n", "78.12\n", "2024-12-04 14:45:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.02\n", "78.04\n", "78.06\n", "78.08\n", "2024-12-04 14:50:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.0\n", "78.02\n", "78.04\n", "2024-12-04 14:55:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.02\n", "78.04\n", "78.06\n", "78.08\n", "2024-12-04 15:00:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.04\n", "78.06\n", "78.08\n", "2024-12-04 15:05:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "2024-12-04 15:10:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "2024-12-04 15:15:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "2024-12-04 15:20:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "2024-12-04 15:25:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 78.12, 78.14, 78.16]\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "2024-12-04 15:30:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.06\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "2024-12-04 15:35:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.04\n", "78.06\n", "78.08\n", "78.1\n", "78.12\n", "2024-12-04 15:40:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.08\n", "78.1\n", "78.12\n", "78.14\n", "2024-12-04 15:45:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64]\n", "78.12\n", "78.14\n", "78.16\n", "78.18\n", "78.2\n", "78.22\n", "78.24\n", "2024-12-04 15:50:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 78.16, 78.18, 78.2, 78.22]\n", "78.24\n", "78.26\n", "78.28\n", "78.3\n", "78.32\n", "78.34\n", "78.36\n", "78.38\n", "2024-12-04 15:55:00\n", "[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5, 77.54, 77.56, 77.6, 77.62, 77.64, 78.16, 78.18, 78.2, 78.22]\n", "78.34\n", "78.36\n", "78.38\n", "78.4\n", "78.42\n", "78.44\n", "78.46\n", "78.48\n" ] } ], "source": [ "# Initialize active support and resistance levels\n", "active_support_levels = [item for sublist in filtered_df.loc[0, 'support_imbalance'] for item in sublist]\n", "active_resistance_levels = [item for sublist in filtered_df.loc[0, 'resistance_imbalance'] for item in sublist]\n", "\n", "# Function to update active support and resistance levels\n", "def update_active_levels(active_levels, traded_price):\n", " # Remove levels that have been breached\n", " return [level for level in active_levels if level != traded_price]\n", "\n", "# Group by timestamp and update active levels\n", "active_levels_df = []\n", "for timestamp, group in filtered_df.groupby('timestamp'):\n", " print(timestamp)\n", " print(active_support_levels)\n", " for idx, row in group.iterrows():\n", " traded_price = round(row['price'],2)\n", " print(traded_price)\n", " # Add new support and resistance levels from the current row\n", " new_support_levels = [item for sublist in row['support_imbalance'] for item in sublist]\n", " new_resistance_levels = [item for sublist in row['resistance_imbalance'] for item in sublist]\n", " \n", " # Update active support and resistance levels with new levels\n", " active_support_levels = list(set(active_support_levels + new_support_levels))\n", " active_resistance_levels = list(set(active_resistance_levels + new_resistance_levels))\n", " \n", " # Update support levels\n", " active_support_levels = update_active_levels(active_support_levels, traded_price)\n", " \n", " # Update resistance levels\n", " active_resistance_levels = update_active_levels(active_resistance_levels, traded_price)\n", " \n", " # Sort the active support and resistance levels\n", " active_support_levels = sorted(active_support_levels)\n", " active_resistance_levels = sorted(active_resistance_levels)\n", " \n", " # Append the updated levels to the dataframe\n", " active_levels_df.append({\n", " 'timestamp': timestamp,\n", " 'active_support_levels': active_support_levels,\n", " 'active_resistance_levels': active_resistance_levels\n", " })\n", "\n", "# Create a DataFrame for active levels\n", "active_levels_df = pd.DataFrame(active_levels_df)" ] }, { "cell_type": "code", "execution_count": 194, "id": "600f3efa", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indextimestampactive_support_levelsactive_resistance_levels
002024-12-04 09:30:00[][77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.2...
112024-12-04 09:35:00[][77.14, 77.16, 77.18, 77.2, 77.22, 77.24]
222024-12-04 09:40:00[77.42, 77.44, 77.46, 77.48][77.14, 77.16, 77.18, 77.2, 77.22, 77.24]
332024-12-04 09:45:00[][77.14, 77.16, 77.18, 77.2, 77.22, 77.24]
442024-12-04 09:50:00[][77.14, 77.16, 77.18, 77.2, 77.22, 77.24]
...............
73732024-12-04 15:35:00[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]
74742024-12-04 15:40:00[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]
75752024-12-04 15:45:00[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]
76762024-12-04 15:50:00[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]
77772024-12-04 15:55:00[77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5...[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]
\n", "

78 rows × 4 columns

\n", "
" ], "text/plain": [ " index timestamp \\\n", "0 0 2024-12-04 09:30:00 \n", "1 1 2024-12-04 09:35:00 \n", "2 2 2024-12-04 09:40:00 \n", "3 3 2024-12-04 09:45:00 \n", "4 4 2024-12-04 09:50:00 \n", ".. ... ... \n", "73 73 2024-12-04 15:35:00 \n", "74 74 2024-12-04 15:40:00 \n", "75 75 2024-12-04 15:45:00 \n", "76 76 2024-12-04 15:50:00 \n", "77 77 2024-12-04 15:55:00 \n", "\n", " active_support_levels \\\n", "0 [] \n", "1 [] \n", "2 [77.42, 77.44, 77.46, 77.48] \n", "3 [] \n", "4 [] \n", ".. ... \n", "73 [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5... \n", "74 [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5... \n", "75 [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5... \n", "76 [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5... \n", "77 [77.36, 77.38, 77.4, 77.42, 77.44, 77.48, 77.5... \n", "\n", " active_resistance_levels \n", "0 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.2... \n", "1 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24] \n", "2 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24] \n", "3 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24] \n", "4 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24] \n", ".. ... \n", "73 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72] \n", "74 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72] \n", "75 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72] \n", "76 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72] \n", "77 [77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72] \n", "\n", "[78 rows x 4 columns]" ] }, "execution_count": 194, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp_df = active_levels_df\n", "\n", "temp_df['active_support_levels'] = temp_df['active_support_levels'].apply(str)\n", "temp_df['active_resistance_levels'] = temp_df['active_resistance_levels'].apply(str)\n", "temp_df = temp_df.drop_duplicates().reset_index()\n", "\n", "temp_df" ] }, { "cell_type": "code", "execution_count": 195, "id": "1d9c8662", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'[77.14, 77.16, 77.18, 77.2, 77.22, 77.24, 77.72]'" ] }, "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp_df.loc[77,'active_resistance_levels']" ] }, { "cell_type": "code", "execution_count": 197, "id": "90d27fd9", "metadata": {}, "outputs": [], "source": [ "\n", "# Convert 'support_imbalance' and 'resistance_imbalance' columns to strings\n", "filtered_df['support_imbalance'] = filtered_df['support_imbalance'].apply(str)\n", "filtered_df['resistance_imbalance'] = filtered_df['resistance_imbalance'].apply(str)\n", "\n", "# Reset index and filter required columns\n", "temp_filter_df = filtered_df[['timestamp','poc','highest_bid_stacked_imbalance','highest_ask_stacked_imbalance','highest_ask_imbalance_stack_price','highest_bid_imbalance_stack_price',\n", " \n", " 'total_ask_imbalance_count','highest_stacked_ask_imbalance',\n", " 'total_bid_imbalance_count','highest_stacked_bid_imbalance', \n", " 'support_imbalance_count','resistance_imbalance_count',\n", " 'support_imbalance', 'resistance_imbalance']].drop_duplicates().reset_index(drop=True)\n", "\n", "# Create consecutive POC flag and count highest consecutive POC\n", "temp_filter_df['consecutive_poc_flag'] = temp_filter_df['poc'].eq(temp_filter_df['poc'].shift())\n", "\n", "# Calculate the highest consecutive POC count\n", "temp_filter_df['highest_consecutive_poc_count'] = temp_filter_df['poc'].groupby((temp_filter_df['poc'] != temp_filter_df['poc'].shift()).cumsum()).transform('count')" ] }, { "cell_type": "code", "execution_count": 198, "id": "9de5cc77", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppochighest_bid_stacked_imbalancehighest_ask_stacked_imbalancehighest_ask_imbalance_stack_pricehighest_bid_imbalance_stack_pricetotal_ask_imbalance_counthighest_stacked_ask_imbalancetotal_bid_imbalance_counthighest_stacked_bid_imbalancesupport_imbalance_countresistance_imbalance_countsupport_imbalanceresistance_imbalanceconsecutive_poc_flaghighest_consecutive_poc_count
02024-12-04 09:30:0077.383760.03415.077.3477.4018752017[][[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7...False1
12024-12-04 09:35:0077.562322.02573.0NaN77.52006200[][]False1
22024-12-04 09:40:0077.523673.04166.077.5677.48224440[[77.42, 77.44, 77.46, 77.48]][]False1
32024-12-04 09:45:0077.445001.01856.0NaN77.44006200[][]False1
42024-12-04 09:50:0077.36992.01419.077.4077.48222200[][]False1
...................................................
732024-12-04 15:35:0078.062138.03137.078.0878.12111100[][]False1
742024-12-04 15:40:0078.123081.06590.0NaNNaN000000[][]False1
752024-12-04 15:45:0078.223586.0499.078.1278.22114440[[78.16, 78.18, 78.2, 78.22]][]False1
762024-12-04 15:50:0078.262938.02038.0NaN78.32003200[][]False1
772024-12-04 15:55:0078.445457.07033.0NaNNaN000000[][]False1
\n", "

78 rows × 16 columns

\n", "
" ], "text/plain": [ " timestamp poc highest_bid_stacked_imbalance \\\n", "0 2024-12-04 09:30:00 77.38 3760.0 \n", "1 2024-12-04 09:35:00 77.56 2322.0 \n", "2 2024-12-04 09:40:00 77.52 3673.0 \n", "3 2024-12-04 09:45:00 77.44 5001.0 \n", "4 2024-12-04 09:50:00 77.36 992.0 \n", ".. ... ... ... \n", "73 2024-12-04 15:35:00 78.06 2138.0 \n", "74 2024-12-04 15:40:00 78.12 3081.0 \n", "75 2024-12-04 15:45:00 78.22 3586.0 \n", "76 2024-12-04 15:50:00 78.26 2938.0 \n", "77 2024-12-04 15:55:00 78.44 5457.0 \n", "\n", " highest_ask_stacked_imbalance highest_ask_imbalance_stack_price \\\n", "0 3415.0 77.34 \n", "1 2573.0 NaN \n", "2 4166.0 77.56 \n", "3 1856.0 NaN \n", "4 1419.0 77.40 \n", ".. ... ... \n", "73 3137.0 78.08 \n", "74 6590.0 NaN \n", "75 499.0 78.12 \n", "76 2038.0 NaN \n", "77 7033.0 NaN \n", "\n", " highest_bid_imbalance_stack_price total_ask_imbalance_count \\\n", "0 77.40 18 \n", "1 77.52 0 \n", "2 77.48 2 \n", "3 77.44 0 \n", "4 77.48 2 \n", ".. ... ... \n", "73 78.12 1 \n", "74 NaN 0 \n", "75 78.22 1 \n", "76 78.32 0 \n", "77 NaN 0 \n", "\n", " highest_stacked_ask_imbalance total_bid_imbalance_count \\\n", "0 7 5 \n", "1 0 6 \n", "2 2 4 \n", "3 0 6 \n", "4 2 2 \n", ".. ... ... \n", "73 1 1 \n", "74 0 0 \n", "75 1 4 \n", "76 0 3 \n", "77 0 0 \n", "\n", " highest_stacked_bid_imbalance support_imbalance_count \\\n", "0 2 0 \n", "1 2 0 \n", "2 4 4 \n", "3 2 0 \n", "4 2 0 \n", ".. ... ... \n", "73 1 0 \n", "74 0 0 \n", "75 4 4 \n", "76 2 0 \n", "77 0 0 \n", "\n", " resistance_imbalance_count support_imbalance \\\n", "0 17 [] \n", "1 0 [] \n", "2 0 [[77.42, 77.44, 77.46, 77.48]] \n", "3 0 [] \n", "4 0 [] \n", ".. ... ... \n", "73 0 [] \n", "74 0 [] \n", "75 0 [[78.16, 78.18, 78.2, 78.22]] \n", "76 0 [] \n", "77 0 [] \n", "\n", " resistance_imbalance consecutive_poc_flag \\\n", "0 [[77.14, 77.16, 77.18, 77.2, 77.22, 77.24], [7... False \n", "1 [] False \n", "2 [] False \n", "3 [] False \n", "4 [] False \n", ".. ... ... \n", "73 [] False \n", "74 [] False \n", "75 [] False \n", "76 [] False \n", "77 [] False \n", "\n", " highest_consecutive_poc_count \n", "0 1 \n", "1 1 \n", "2 1 \n", "3 1 \n", "4 1 \n", ".. ... \n", "73 1 \n", "74 1 \n", "75 1 \n", "76 1 \n", "77 1 \n", "\n", "[78 rows x 16 columns]" ] }, "execution_count": 198, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp_filter_df" ] }, { "cell_type": "code", "execution_count": 201, "id": "bcdef9d5", "metadata": {}, "outputs": [], "source": [ "import yfinance as yf\n", "\n", "temp_filter_df['timestamp'] = pd.to_datetime(temp_filter_df['timestamp'])\n", "\n", "# Downloading NKE data from yfinance in 5-minute intervals (only available for the last 60 days)\n", "stock_data = yf.download('NKE', interval='5m', period='5d', progress=False)\n", "\n", "# Resetting the index of downloaded NKE data and renaming columns\n", "stock_data.reset_index(inplace=True)\n", "stock_data['timestamp'] = pd.to_datetime(stock_data['Datetime']).dt.tz_localize(None)\n", "\n", "# Rounding Open, High, Low, Close, and Adj Close columns to two decimals\n", "stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close']] = stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close']].round(2)\n", "\n", "# Left join the existing dataframe with NKE data\n", "merged_df = pd.merge(temp_filter_df, stock_data, how='left', on='timestamp')\n", "\n", "# Adding a new column 'poc_direction'\n", "merged_df['poc_direction'] = merged_df.apply(lambda row: 'bullish' if row['Adj Close'] >= row['poc'] else 'bearish', axis=1)\n", "merged_df['highest_ask_imbalance_price_direction'] = merged_df.apply(\n", " lambda row: '' if np.isnan(row['highest_ask_imbalance_stack_price']) else \n", " ('bullish' if row['Adj Close'] >= row['highest_ask_imbalance_stack_price'] else 'bearish'),\n", " axis=1\n", ")\n", "merged_df['highest_bid_imbalance_price_direction'] = merged_df.apply(\n", " lambda row: '' if np.isnan(row['highest_bid_imbalance_stack_price']) else \n", " ('bullish' if row['Adj Close'] >= row['highest_bid_imbalance_stack_price'] else 'bearish'),\n", " axis=1\n", ")\n", "\n", "merged_df['total_bid_ask_count_direction'] = merged_df.apply(\n", " lambda row: 'bearish' if row['total_ask_imbalance_count'] > row['total_bid_imbalance_count'] else \n", " ('neutral' if row['total_ask_imbalance_count'] == row['total_bid_imbalance_count'] else 'bullish'),\n", " axis=1\n", ")\n", "\n", "merged_df['imbalance_support_resistance_direction'] = merged_df.apply(\n", " lambda row: 'bearish' if row['resistance_imbalance_count'] > row['support_imbalance_count'] else \n", " ('neutral' if row['resistance_imbalance_count'] == row['support_imbalance_count'] else 'bullish'),\n", " axis=1\n", ")\n" ] }, { "cell_type": "code", "execution_count": 202, "id": "8580b4cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppocAdj Closepoc_directionhighest_ask_imbalance_price_directionhighest_bid_imbalance_price_directiontotal_bid_ask_count_directionimbalance_support_resistance_directionsupport_imbalance_countresistance_imbalance_counthighest_ask_imbalance_stack_pricehighest_bid_imbalance_stack_price
02024-12-04 09:30:0077.3877.50bullishbullishbullishbearishbearish01777.3477.40
12024-12-04 09:35:0077.5677.64bullishbullishbullishneutral00NaN77.52
22024-12-04 09:40:0077.5277.56bullishbullishbullishbullishbullish4077.5677.48
32024-12-04 09:45:0077.4477.53bullishbullishbullishneutral00NaN77.44
42024-12-04 09:50:0077.3677.36bullishbearishbearishneutralneutral0077.4077.48
52024-12-04 09:55:0077.5477.56bullishbullishbullishbullishbullish8077.5477.50
62024-12-04 10:00:0077.5877.69bullishbullishbullishbullishbullish3077.6677.58
72024-12-04 10:05:0077.7877.94bullishbullishbullishbullishbullish6077.8277.74
82024-12-04 10:10:0077.8077.77bearishbearishbearishbearishneutral0077.8677.80
92024-12-04 10:15:0077.8277.84bullishbullishbullishbearishneutral0077.8477.76
102024-12-04 10:20:0077.8877.98bullishbullishbullishbullish50NaN77.88
112024-12-04 10:25:0077.9277.92bullishbearishbullishbullishneutral0077.9477.92
122024-12-04 10:30:0077.7877.84bullishbullishbullishbearishneutral0077.8077.78
132024-12-04 10:35:0077.8277.80bearishbearishbearishneutralneutral0077.8277.86
142024-12-04 10:40:0077.7877.81bullishbullishbullishneutral00NaN77.78
\n", "
" ], "text/plain": [ " timestamp poc Adj Close poc_direction \\\n", "0 2024-12-04 09:30:00 77.38 77.50 bullish \n", "1 2024-12-04 09:35:00 77.56 77.64 bullish \n", "2 2024-12-04 09:40:00 77.52 77.56 bullish \n", "3 2024-12-04 09:45:00 77.44 77.53 bullish \n", "4 2024-12-04 09:50:00 77.36 77.36 bullish \n", "5 2024-12-04 09:55:00 77.54 77.56 bullish \n", "6 2024-12-04 10:00:00 77.58 77.69 bullish \n", "7 2024-12-04 10:05:00 77.78 77.94 bullish \n", "8 2024-12-04 10:10:00 77.80 77.77 bearish \n", "9 2024-12-04 10:15:00 77.82 77.84 bullish \n", "10 2024-12-04 10:20:00 77.88 77.98 bullish \n", "11 2024-12-04 10:25:00 77.92 77.92 bullish \n", "12 2024-12-04 10:30:00 77.78 77.84 bullish \n", "13 2024-12-04 10:35:00 77.82 77.80 bearish \n", "14 2024-12-04 10:40:00 77.78 77.81 bullish \n", "\n", " highest_ask_imbalance_price_direction \\\n", "0 bullish \n", "1 \n", "2 bullish \n", "3 \n", "4 bearish \n", "5 bullish \n", "6 bullish \n", "7 bullish \n", "8 bearish \n", "9 bullish \n", "10 \n", "11 bearish \n", "12 bullish \n", "13 bearish \n", "14 \n", "\n", " highest_bid_imbalance_price_direction total_bid_ask_count_direction \\\n", "0 bullish bearish \n", "1 bullish bullish \n", "2 bullish bullish \n", "3 bullish bullish \n", "4 bearish neutral \n", "5 bullish bullish \n", "6 bullish bullish \n", "7 bullish bullish \n", "8 bearish bearish \n", "9 bullish bearish \n", "10 bullish bullish \n", "11 bullish bullish \n", "12 bullish bearish \n", "13 bearish neutral \n", "14 bullish bullish \n", "\n", " imbalance_support_resistance_direction support_imbalance_count \\\n", "0 bearish 0 \n", "1 neutral 0 \n", "2 bullish 4 \n", "3 neutral 0 \n", "4 neutral 0 \n", "5 bullish 8 \n", "6 bullish 3 \n", "7 bullish 6 \n", "8 neutral 0 \n", "9 neutral 0 \n", "10 bullish 5 \n", "11 neutral 0 \n", "12 neutral 0 \n", "13 neutral 0 \n", "14 neutral 0 \n", "\n", " resistance_imbalance_count highest_ask_imbalance_stack_price \\\n", "0 17 77.34 \n", "1 0 NaN \n", "2 0 77.56 \n", "3 0 NaN \n", "4 0 77.40 \n", "5 0 77.54 \n", "6 0 77.66 \n", "7 0 77.82 \n", "8 0 77.86 \n", "9 0 77.84 \n", "10 0 NaN \n", "11 0 77.94 \n", "12 0 77.80 \n", "13 0 77.82 \n", "14 0 NaN \n", "\n", " highest_bid_imbalance_stack_price \n", "0 77.40 \n", "1 77.52 \n", "2 77.48 \n", "3 77.44 \n", "4 77.48 \n", "5 77.50 \n", "6 77.58 \n", "7 77.74 \n", "8 77.80 \n", "9 77.76 \n", "10 77.88 \n", "11 77.92 \n", "12 77.78 \n", "13 77.86 \n", "14 77.78 " ] }, "execution_count": 202, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_df[['timestamp','poc','Adj Close','poc_direction','highest_ask_imbalance_price_direction','highest_bid_imbalance_price_direction','total_bid_ask_count_direction','imbalance_support_resistance_direction','support_imbalance_count','resistance_imbalance_count','highest_ask_imbalance_stack_price','highest_bid_imbalance_stack_price']].head(15)" ] }, { "cell_type": "code", "execution_count": 95, "id": "5f85e064", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['timestamp', 'poc', 'highest_bid_stacked_imbalance',\n", " 'highest_ask_stacked_imbalance', 'highest_ask_imbalance_stack_price',\n", " 'highest_bid_imbalance_stack_price', 'total_ask_imbalance_count',\n", " 'highest_stacked_ask_imbalance', 'total_bid_imbalance_count',\n", " 'highest_stacked_bid_imbalance', 'support_imbalance',\n", " 'resistance_imbalance', 'consecutive_poc_flag',\n", " 'highest_consecutive_poc_count', 'Datetime', 'Open', 'High', 'Low',\n", " 'Close', 'Adj Close', 'Volume', 'poc_direction',\n", " 'highest_ask_imbalance_price_direction',\n", " 'highest_bid_imbalance_price_direction'],\n", " dtype='object')" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_df.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "433923fd", "metadata": {}, "outputs": [], "source": [ "filtered_df" ] }, { "cell_type": "code", "execution_count": 242, "id": "0aaace44", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'[[576.3, 576.47, 576.64], [577.15, 577.32, 577.49, 577.66, 577.83]]'" ] }, "execution_count": 242, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df.loc[0,'resistance_imbalance'] " ] }, { "cell_type": "code", "execution_count": 172, "id": "37b59f94", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[580.55, 580.72, 580.89],\n", " [581.23, 581.4, 581.57, 581.74, 581.91, 582.08, 582.25, 582.42, 582.59],\n", " [582.76, 582.93, 583.1, 583.27, 583.44],\n", " [583.61, 583.78, 583.95, 584.12, 584.29, 584.46],\n", " [584.8, 584.97, 585.14, 585.31, 585.48],\n", " [585.99, 586.16, 586.33, 586.5, 586.67, 586.84],\n", " [586.67, 586.84, 587.01, 587.18],\n", " [588.2, 588.37, 588.54],\n", " [588.88, 589.05, 589.22],\n", " [589.73, 589.9, 590.07, 590.24],\n", " [591.26, 591.43, 591.6],\n", " [592.28, 592.45, 592.62],\n", " [593.81, 593.98, 594.15],\n", " [591.43, 591.6, 591.77, 591.94]]" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp_filter_df.loc[0,'support_imbalance']" ] }, { "cell_type": "code", "execution_count": 68, "id": "5c967fc2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppochighest_bid_stacked_imbalancehighest_ask_stacked_imbalancetotal_ask_imbalance_counthighest_stacked_ask_imbalancetotal_bid_imbalance_counthighest_stacked_bid_imbalanceconsecutive_poc_flaghighest_consecutive_poc_count
132024-12-02 10:35:00587.354426.0861.02111True2
462024-12-02 13:20:00591.094249.01262.00021True2
652024-12-02 14:55:00590.922789.03055.01100True2
\n", "
" ], "text/plain": [ " timestamp poc highest_bid_stacked_imbalance \\\n", "13 2024-12-02 10:35:00 587.35 4426.0 \n", "46 2024-12-02 13:20:00 591.09 4249.0 \n", "65 2024-12-02 14:55:00 590.92 2789.0 \n", "\n", " highest_ask_stacked_imbalance total_ask_imbalance_count \\\n", "13 861.0 2 \n", "46 1262.0 0 \n", "65 3055.0 1 \n", "\n", " highest_stacked_ask_imbalance total_bid_imbalance_count \\\n", "13 1 1 \n", "46 0 2 \n", "65 1 0 \n", "\n", " highest_stacked_bid_imbalance consecutive_poc_flag \\\n", "13 1 True \n", "46 1 True \n", "65 0 True \n", "\n", " highest_consecutive_poc_count \n", "13 2 \n", "46 2 \n", "65 2 " ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp_filter_df[temp_filter_df['consecutive_poc_flag']]" ] }, { "cell_type": "code", "execution_count": null, "id": "60f8fea8", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9d066931", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "8fe2eed0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 39, "id": "610f5a11", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\1965833506.py:38: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['minute'] = filtered_df['timestamp'].dt.minute # Add minute to the original DataFrame\n" ] } ], "source": [ "# Group by minute to analyze reversal and initiation\n", "def analyze_minute(group):\n", " bid_prices = group.loc[group['imbalance'] == \"sell\", \"price\"]\n", " ask_prices = group.loc[group['imbalance'] == \"buy\", \"price\"]\n", " close_price = group['price'].iloc[-1] # Last price in the minute\n", "\n", " # Initialize comment\n", " comment = \"\"\n", "\n", " # Check if close price below ask prices (buyers absorbed by sellers)\n", " if not ask_prices.empty and close_price < ask_prices.min():\n", " comment = \"Buyers absorbed by sellers.\"\n", "\n", " # Check if close price above bid prices (sellers absorbed by buyers)\n", " elif not bid_prices.empty and close_price > bid_prices.max():\n", " comment = \"Sellers absorbed by buyers.\"\n", "\n", " # Fallback comment\n", " if comment == \"\":\n", " comment = \"No significant absorption detected.\"\n", "\n", " return pd.Series({\n", " \"minute\": group['timestamp'].iloc[0].minute, # Add the minute column explicitly\n", " \"close_price\": close_price,\n", " \"comment\": comment\n", " })\n", "\n", "# Apply analysis for each minute\n", "result_df = filtered_df.groupby(filtered_df['timestamp'].dt.minute).apply(analyze_minute).reset_index(drop=True)\n", "\n", "# Ensure 'minute' column in result_df exists\n", "if 'minute' in result_df.columns:\n", " result_df.rename(columns={\"minute\": \"grouped_minute\"}, inplace=True)\n", "else:\n", " raise ValueError(\"The 'minute' column was not created in result_df.\")\n", "\n", "# Add the grouped analysis back to the original DataFrame by merging on the minute\n", "filtered_df['minute'] = filtered_df['timestamp'].dt.minute # Add minute to the original DataFrame\n", "final_df = filtered_df.merge(result_df, how=\"left\", left_on=\"minute\", right_on=\"grouped_minute\")\n", "\n", "# Drop unnecessary columns to clean up\n", "final_df.drop(columns=[\"minute\", \"grouped_minute\"], inplace=True)" ] }, { "cell_type": "code", "execution_count": 40, "id": "c35f27d8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindexclose_pricecomment
02024-11-29 09:30:00128.241032.0756.0sell4606132.02Buyers absorbed by sellers.
12024-11-29 09:30:00128.380.0904.04606132.02Buyers absorbed by sellers.
22024-11-29 09:30:00128.1074.0657.04606132.02Buyers absorbed by sellers.
32024-11-29 09:30:00127.961219.0585.0sell4606132.02Buyers absorbed by sellers.
42024-11-29 09:30:00127.821024.01495.04606132.02Buyers absorbed by sellers.
...........................
2022024-11-29 12:50:00133.561081.03640.04646133.56Sellers absorbed by buyers.
2032024-11-29 12:55:00133.5611117.011835.0sell4647133.28Buyers absorbed by sellers.
2042024-11-29 12:55:00133.70945.0661.04647133.28Buyers absorbed by sellers.
2052024-11-29 12:55:00133.423027.04472.0buy4647133.28Buyers absorbed by sellers.
2062024-11-29 12:55:00133.280.0124.04647133.28Buyers absorbed by sellers.
\n", "

207 rows × 8 columns

\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index \\\n", "0 2024-11-29 09:30:00 128.24 1032.0 756.0 sell 4606 \n", "1 2024-11-29 09:30:00 128.38 0.0 904.0 4606 \n", "2 2024-11-29 09:30:00 128.10 74.0 657.0 4606 \n", "3 2024-11-29 09:30:00 127.96 1219.0 585.0 sell 4606 \n", "4 2024-11-29 09:30:00 127.82 1024.0 1495.0 4606 \n", ".. ... ... ... ... ... ... \n", "202 2024-11-29 12:50:00 133.56 1081.0 3640.0 4646 \n", "203 2024-11-29 12:55:00 133.56 11117.0 11835.0 sell 4647 \n", "204 2024-11-29 12:55:00 133.70 945.0 661.0 4647 \n", "205 2024-11-29 12:55:00 133.42 3027.0 4472.0 buy 4647 \n", "206 2024-11-29 12:55:00 133.28 0.0 124.0 4647 \n", "\n", " close_price comment \n", "0 132.02 Buyers absorbed by sellers. \n", "1 132.02 Buyers absorbed by sellers. \n", "2 132.02 Buyers absorbed by sellers. \n", "3 132.02 Buyers absorbed by sellers. \n", "4 132.02 Buyers absorbed by sellers. \n", ".. ... ... \n", "202 133.56 Sellers absorbed by buyers. \n", "203 133.28 Buyers absorbed by sellers. \n", "204 133.28 Buyers absorbed by sellers. \n", "205 133.28 Buyers absorbed by sellers. \n", "206 133.28 Buyers absorbed by sellers. \n", "\n", "[207 rows x 8 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_df" ] }, { "cell_type": "code", "execution_count": 41, "id": "3ce12eeb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampcomment
02024-11-29 09:30:00Buyers absorbed by sellers.
72024-11-29 09:35:00Sellers absorbed by buyers.
112024-11-29 09:40:00No significant absorption detected.
152024-11-29 09:45:00Sellers absorbed by buyers.
192024-11-29 09:50:00Sellers absorbed by buyers.
232024-11-29 09:55:00Buyers absorbed by sellers.
262024-11-29 10:00:00Sellers absorbed by buyers.
302024-11-29 10:05:00No significant absorption detected.
352024-11-29 10:10:00No significant absorption detected.
422024-11-29 10:15:00No significant absorption detected.
492024-11-29 10:20:00Buyers absorbed by sellers.
532024-11-29 10:25:00Sellers absorbed by buyers.
572024-11-29 10:30:00Buyers absorbed by sellers.
612024-11-29 10:35:00Sellers absorbed by buyers.
692024-11-29 10:40:00No significant absorption detected.
722024-11-29 10:45:00Sellers absorbed by buyers.
752024-11-29 10:50:00Sellers absorbed by buyers.
802024-11-29 10:55:00Buyers absorbed by sellers.
832024-11-29 11:00:00Sellers absorbed by buyers.
902024-11-29 11:05:00No significant absorption detected.
932024-11-29 11:10:00No significant absorption detected.
962024-11-29 11:15:00No significant absorption detected.
1002024-11-29 11:20:00Buyers absorbed by sellers.
1132024-11-29 11:25:00Sellers absorbed by buyers.
1202024-11-29 11:30:00Buyers absorbed by sellers.
1302024-11-29 11:35:00Sellers absorbed by buyers.
1352024-11-29 11:40:00No significant absorption detected.
1412024-11-29 11:45:00Sellers absorbed by buyers.
1452024-11-29 11:50:00Sellers absorbed by buyers.
1502024-11-29 11:55:00Buyers absorbed by sellers.
1532024-11-29 12:00:00Sellers absorbed by buyers.
1572024-11-29 12:05:00No significant absorption detected.
1602024-11-29 12:10:00No significant absorption detected.
1622024-11-29 12:15:00No significant absorption detected.
1662024-11-29 12:20:00Buyers absorbed by sellers.
1702024-11-29 12:25:00Sellers absorbed by buyers.
1832024-11-29 12:30:00Buyers absorbed by sellers.
1862024-11-29 12:35:00Sellers absorbed by buyers.
1932024-11-29 12:40:00No significant absorption detected.
1962024-11-29 12:45:00Sellers absorbed by buyers.
1992024-11-29 12:50:00Sellers absorbed by buyers.
2032024-11-29 12:55:00Buyers absorbed by sellers.
\n", "
" ], "text/plain": [ " timestamp comment\n", "0 2024-11-29 09:30:00 Buyers absorbed by sellers.\n", "7 2024-11-29 09:35:00 Sellers absorbed by buyers.\n", "11 2024-11-29 09:40:00 No significant absorption detected.\n", "15 2024-11-29 09:45:00 Sellers absorbed by buyers.\n", "19 2024-11-29 09:50:00 Sellers absorbed by buyers.\n", "23 2024-11-29 09:55:00 Buyers absorbed by sellers.\n", "26 2024-11-29 10:00:00 Sellers absorbed by buyers.\n", "30 2024-11-29 10:05:00 No significant absorption detected.\n", "35 2024-11-29 10:10:00 No significant absorption detected.\n", "42 2024-11-29 10:15:00 No significant absorption detected.\n", "49 2024-11-29 10:20:00 Buyers absorbed by sellers.\n", "53 2024-11-29 10:25:00 Sellers absorbed by buyers.\n", "57 2024-11-29 10:30:00 Buyers absorbed by sellers.\n", "61 2024-11-29 10:35:00 Sellers absorbed by buyers.\n", "69 2024-11-29 10:40:00 No significant absorption detected.\n", "72 2024-11-29 10:45:00 Sellers absorbed by buyers.\n", "75 2024-11-29 10:50:00 Sellers absorbed by buyers.\n", "80 2024-11-29 10:55:00 Buyers absorbed by sellers.\n", "83 2024-11-29 11:00:00 Sellers absorbed by buyers.\n", "90 2024-11-29 11:05:00 No significant absorption detected.\n", "93 2024-11-29 11:10:00 No significant absorption detected.\n", "96 2024-11-29 11:15:00 No significant absorption detected.\n", "100 2024-11-29 11:20:00 Buyers absorbed by sellers.\n", "113 2024-11-29 11:25:00 Sellers absorbed by buyers.\n", "120 2024-11-29 11:30:00 Buyers absorbed by sellers.\n", "130 2024-11-29 11:35:00 Sellers absorbed by buyers.\n", "135 2024-11-29 11:40:00 No significant absorption detected.\n", "141 2024-11-29 11:45:00 Sellers absorbed by buyers.\n", "145 2024-11-29 11:50:00 Sellers absorbed by buyers.\n", "150 2024-11-29 11:55:00 Buyers absorbed by sellers.\n", "153 2024-11-29 12:00:00 Sellers absorbed by buyers.\n", "157 2024-11-29 12:05:00 No significant absorption detected.\n", "160 2024-11-29 12:10:00 No significant absorption detected.\n", "162 2024-11-29 12:15:00 No significant absorption detected.\n", "166 2024-11-29 12:20:00 Buyers absorbed by sellers.\n", "170 2024-11-29 12:25:00 Sellers absorbed by buyers.\n", "183 2024-11-29 12:30:00 Buyers absorbed by sellers.\n", "186 2024-11-29 12:35:00 Sellers absorbed by buyers.\n", "193 2024-11-29 12:40:00 No significant absorption detected.\n", "196 2024-11-29 12:45:00 Sellers absorbed by buyers.\n", "199 2024-11-29 12:50:00 Sellers absorbed by buyers.\n", "203 2024-11-29 12:55:00 Buyers absorbed by sellers." ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp_df = final_df[['timestamp','comment']].drop_duplicates()\n", "\n", "temp_df" ] }, { "cell_type": "code", "execution_count": 10, "id": "2ce9b495", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindex
02024-11-29 09:30:00234.780.01193.02906
12024-11-29 09:30:00234.841145.01269.02906
22024-11-29 09:30:00234.902290.0967.02906
32024-11-29 09:30:00234.962290.0646.02906
42024-11-29 09:30:00235.02382.00.02906
.....................
2802024-11-29 12:55:00237.305388.01265.0buy2947
2812024-11-29 12:55:00237.3613753.021027.0buy2947
2822024-11-29 12:55:00237.4223940.027431.02947
2832024-11-29 12:55:00237.4824117.011987.0sell2947
2842024-11-29 12:55:00237.54350.0553.02947
\n", "

285 rows × 6 columns

\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index\n", "0 2024-11-29 09:30:00 234.78 0.0 1193.0 2906\n", "1 2024-11-29 09:30:00 234.84 1145.0 1269.0 2906\n", "2 2024-11-29 09:30:00 234.90 2290.0 967.0 2906\n", "3 2024-11-29 09:30:00 234.96 2290.0 646.0 2906\n", "4 2024-11-29 09:30:00 235.02 382.0 0.0 2906\n", ".. ... ... ... ... ... ...\n", "280 2024-11-29 12:55:00 237.30 5388.0 1265.0 buy 2947\n", "281 2024-11-29 12:55:00 237.36 13753.0 21027.0 buy 2947\n", "282 2024-11-29 12:55:00 237.42 23940.0 27431.0 2947\n", "283 2024-11-29 12:55:00 237.48 24117.0 11987.0 sell 2947\n", "284 2024-11-29 12:55:00 237.54 350.0 553.0 2947\n", "\n", "[285 rows x 6 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df" ] }, { "cell_type": "code", "execution_count": 11, "id": "38a4465e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['imbalance'] = filtered_df['imbalance'].fillna('missing_value')\n", "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['imbalance'] = filtered_df['imbalance'].apply(lambda x: 1 if x == 'buy' else -1 if x == 'sell' else 0)\n", "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['timestamp'] = filtered_df['timestamp'].astype(np.int64) // 10**9\n", "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\2088222860.py:21: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['anomaly'] = model.predict(filtered_df[features])\n" ] } ], "source": [ "from sklearn.ensemble import IsolationForest\n", "\n", "# Replace missing values in 'imbalance' with a specific value (e.g., 'missing_value')\n", "filtered_df['imbalance'] = filtered_df['imbalance'].fillna('missing_value')\n", "\n", "# Encode categorical 'imbalance' feature\n", "filtered_df['imbalance'] = filtered_df['imbalance'].apply(lambda x: 1 if x == 'buy' else -1 if x == 'sell' else 0)\n", "\n", "# Convert the timestamp to numerical format (e.g., seconds since epoch)\n", "filtered_df['timestamp'] = filtered_df['timestamp'].astype(np.int64) // 10**9\n", "\n", "# Select relevant features for anomaly detection\n", "features = ['timestamp', 'price', 'buyVolume', 'sellVolume', 'imbalance']\n", "\n", "# # Initialize and fit the Isolation Forest model\n", "# model = IsolationForest(contamination='auto', random_state=42)\n", "model = IsolationForest(contamination=0.03, random_state=42)\n", "model.fit(filtered_df[features])\n", "\n", "# Predict anomalies\n", "filtered_df['anomaly'] = model.predict(filtered_df[features])" ] }, { "cell_type": "code", "execution_count": 12, "id": "63a670ed", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "anomaly\n", " 1 276\n", "-1 9\n", "Name: count, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df['anomaly'].value_counts()" ] }, { "cell_type": "code", "execution_count": 13, "id": "489e5c5e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\saite\\AppData\\Local\\Temp\\ipykernel_32296\\1014059852.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['timestamp'] = pd.to_datetime(filtered_df['timestamp'], unit='s')\n" ] } ], "source": [ "# Convert the numerical timestamp back to normal datetime format\n", "filtered_df['timestamp'] = pd.to_datetime(filtered_df['timestamp'], unit='s')\n", "\n", "filtered_anomaly = filtered_df[filtered_df['anomaly'] == -1]" ] }, { "cell_type": "code", "execution_count": 14, "id": "c19f3ef3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "timestamp\n", "2024-11-29 12:55:00 3\n", "2024-11-29 09:30:00 2\n", "2024-11-29 10:25:00 1\n", "2024-11-29 11:10:00 1\n", "2024-11-29 11:50:00 1\n", "2024-11-29 12:50:00 1\n", "Name: count, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_anomaly['timestamp'].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "id": "33bc2737", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindexanomaly
132024-11-29 09:30:00234.122746.014200.002906-1
182024-11-29 09:30:00233.940.05707.0-12906-1
1032024-11-29 10:25:00235.265064.011759.0-12917-1
1502024-11-29 11:10:00235.9854888.00.012926-1
1962024-11-29 11:50:00237.0074386.01535.012934-1
2642024-11-29 12:50:00237.4296558.01084.012946-1
2812024-11-29 12:55:00237.3613753.021027.012947-1
2822024-11-29 12:55:00237.4223940.027431.002947-1
2832024-11-29 12:55:00237.4824117.011987.0-12947-1
\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index \\\n", "13 2024-11-29 09:30:00 234.12 2746.0 14200.0 0 2906 \n", "18 2024-11-29 09:30:00 233.94 0.0 5707.0 -1 2906 \n", "103 2024-11-29 10:25:00 235.26 5064.0 11759.0 -1 2917 \n", "150 2024-11-29 11:10:00 235.98 54888.0 0.0 1 2926 \n", "196 2024-11-29 11:50:00 237.00 74386.0 1535.0 1 2934 \n", "264 2024-11-29 12:50:00 237.42 96558.0 1084.0 1 2946 \n", "281 2024-11-29 12:55:00 237.36 13753.0 21027.0 1 2947 \n", "282 2024-11-29 12:55:00 237.42 23940.0 27431.0 0 2947 \n", "283 2024-11-29 12:55:00 237.48 24117.0 11987.0 -1 2947 \n", "\n", " anomaly \n", "13 -1 \n", "18 -1 \n", "103 -1 \n", "150 -1 \n", "196 -1 \n", "264 -1 \n", "281 -1 \n", "282 -1 \n", "283 -1 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_anomaly" ] }, { "cell_type": "code", "execution_count": 18, "id": "ac1b4ebf", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Found array with 0 sample(s) (shape=(0, 4)) while a minimum of 1 is required by MinMaxScaler.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[18], line 22\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# Normalize the data\u001b[39;00m\n\u001b[0;32m 21\u001b[0m scaler \u001b[38;5;241m=\u001b[39m MinMaxScaler()\n\u001b[1;32m---> 22\u001b[0m filtered_df[features] \u001b[38;5;241m=\u001b[39m \u001b[43mscaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiltered_df\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\utils\\_set_output.py:316\u001b[0m, in \u001b[0;36m_wrap_method_output..wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m 314\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(f)\n\u001b[0;32m 315\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 316\u001b[0m data_to_wrap \u001b[38;5;241m=\u001b[39m f(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_to_wrap, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m 318\u001b[0m \u001b[38;5;66;03m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m 319\u001b[0m return_tuple \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 320\u001b[0m _wrap_data_with_container(method, data_to_wrap[\u001b[38;5;241m0\u001b[39m], X, \u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m 321\u001b[0m \u001b[38;5;241m*\u001b[39mdata_to_wrap[\u001b[38;5;241m1\u001b[39m:],\n\u001b[0;32m 322\u001b[0m )\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\base.py:1098\u001b[0m, in \u001b[0;36mTransformerMixin.fit_transform\u001b[1;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[0;32m 1083\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 1084\u001b[0m (\n\u001b[0;32m 1085\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis object (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) has a `transform`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1093\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[0;32m 1094\u001b[0m )\n\u001b[0;32m 1096\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 1097\u001b[0m \u001b[38;5;66;03m# fit method of arity 1 (unsupervised transformation)\u001b[39;00m\n\u001b[1;32m-> 1098\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n\u001b[0;32m 1099\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1100\u001b[0m \u001b[38;5;66;03m# fit method of arity 2 (supervised transformation)\u001b[39;00m\n\u001b[0;32m 1101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:450\u001b[0m, in \u001b[0;36mMinMaxScaler.fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 448\u001b[0m \u001b[38;5;66;03m# Reset internal state before fitting\u001b[39;00m\n\u001b[0;32m 449\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset()\n\u001b[1;32m--> 450\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpartial_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:490\u001b[0m, in \u001b[0;36mMinMaxScaler.partial_fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 487\u001b[0m xp, _ \u001b[38;5;241m=\u001b[39m get_namespace(X)\n\u001b[0;32m 489\u001b[0m first_pass \u001b[38;5;241m=\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_samples_seen_\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 490\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfirst_pass\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_array_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msupported_float_dtypes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxp\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_all_finite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mallow-nan\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 495\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 497\u001b[0m data_min \u001b[38;5;241m=\u001b[39m _array_api\u001b[38;5;241m.\u001b[39m_nanmin(X, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, xp\u001b[38;5;241m=\u001b[39mxp)\n\u001b[0;32m 498\u001b[0m data_max \u001b[38;5;241m=\u001b[39m _array_api\u001b[38;5;241m.\u001b[39m_nanmax(X, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, xp\u001b[38;5;241m=\u001b[39mxp)\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\base.py:633\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[1;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[0;32m 631\u001b[0m out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[0;32m 632\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m no_val_y:\n\u001b[1;32m--> 633\u001b[0m out \u001b[38;5;241m=\u001b[39m check_array(X, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n\u001b[0;32m 634\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_y:\n\u001b[0;32m 635\u001b[0m out \u001b[38;5;241m=\u001b[39m _check_y(y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n", "File \u001b[1;32mc:\\Users\\saite\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\utils\\validation.py:1087\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m 1085\u001b[0m n_samples \u001b[38;5;241m=\u001b[39m _num_samples(array)\n\u001b[0;32m 1086\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m n_samples \u001b[38;5;241m<\u001b[39m ensure_min_samples:\n\u001b[1;32m-> 1087\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 1088\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFound array with \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m sample(s) (shape=\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m) while a\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 1089\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m minimum of \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m is required\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 1090\u001b[0m \u001b[38;5;241m%\u001b[39m (n_samples, array\u001b[38;5;241m.\u001b[39mshape, ensure_min_samples, context)\n\u001b[0;32m 1091\u001b[0m )\n\u001b[0;32m 1093\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ensure_min_features \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m array\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m 1094\u001b[0m n_features \u001b[38;5;241m=\u001b[39m array\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n", "\u001b[1;31mValueError\u001b[0m: Found array with 0 sample(s) (shape=(0, 4)) while a minimum of 1 is required by MinMaxScaler." ] } ], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "import matplotlib.pyplot as plt\n", "from keras.models import Sequential\n", "from keras.layers import LSTM, Dense, Dropout, RepeatVector, TimeDistributed\n", "\n", "\n", "# Filter the DataFrame\n", "filtered_df = series_df[(series_df['timestamp'] >= '2024-11-29 09:30:00') & (series_df['timestamp'] < '2024-07-23 15:50:00')]\n", "filtered_df.reset_index(inplace=True,drop=True)\n", "\n", "# Replace missing values in 'imbalance' with a specific value (e.g., 'missing_value')\n", "filtered_df['imbalance'] = filtered_df['imbalance'].fillna('missing_value')\n", "\n", "# Encode categorical 'imbalance' feature\n", "filtered_df['imbalance'] = filtered_df['imbalance'].apply(lambda x: 1 if x == 'buy' else -1 if x == 'sell' else 0)\n", "\n", "# Select relevant features for anomaly detection\n", "features = ['price', 'buyVolume', 'sellVolume', 'imbalance']\n", "\n", "# Normalize the data\n", "scaler = MinMaxScaler()\n", "filtered_df[features] = scaler.fit_transform(filtered_df[features])" ] }, { "cell_type": "code", "execution_count": 71, "id": "8b4ce733", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_1\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " lstm_4 (LSTM) (None, 10, 128) 68096 \n", " \n", " dropout_2 (Dropout) (None, 10, 128) 0 \n", " \n", " lstm_5 (LSTM) (None, 64) 49408 \n", " \n", " repeat_vector_1 (RepeatVect (None, 10, 64) 0 \n", " or) \n", " \n", " lstm_6 (LSTM) (None, 10, 64) 33024 \n", " \n", " dropout_3 (Dropout) (None, 10, 64) 0 \n", " \n", " lstm_7 (LSTM) (None, 10, 128) 98816 \n", " \n", " time_distributed_1 (TimeDis (None, 10, 4) 516 \n", " tributed) \n", " \n", "=================================================================\n", "Total params: 249,860\n", "Trainable params: 249,860\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Epoch 1/50\n", "12/12 [==============================] - 5s 67ms/step - loss: 0.1506 - val_loss: 0.1098\n", "Epoch 2/50\n", "12/12 [==============================] - 0s 20ms/step - loss: 0.0920 - val_loss: 0.1008\n", "Epoch 3/50\n", "12/12 [==============================] - 0s 19ms/step - loss: 0.0751 - val_loss: 0.0820\n", "Epoch 4/50\n", "12/12 [==============================] - 0s 19ms/step - loss: 0.0689 - val_loss: 0.0710\n", "Epoch 5/50\n", "12/12 [==============================] - 0s 19ms/step - loss: 0.0645 - val_loss: 0.0639\n", "Epoch 6/50\n", "12/12 [==============================] - 0s 20ms/step - loss: 0.0573 - val_loss: 0.0510\n", "Epoch 7/50\n", "12/12 [==============================] - 0s 27ms/step - loss: 0.0519 - val_loss: 0.0426\n", "Epoch 8/50\n", "12/12 [==============================] - 0s 23ms/step - loss: 0.0483 - val_loss: 0.0374\n", "Epoch 9/50\n", "12/12 [==============================] - 0s 22ms/step - loss: 0.0453 - val_loss: 0.0381\n", "Epoch 10/50\n", "12/12 [==============================] - 0s 26ms/step - loss: 0.0433 - val_loss: 0.0352\n", "Epoch 11/50\n", "12/12 [==============================] - 0s 28ms/step - loss: 0.0433 - val_loss: 0.0355\n", "Epoch 12/50\n", "12/12 [==============================] - 0s 26ms/step - loss: 0.0417 - val_loss: 0.0353\n", "Epoch 13/50\n", "12/12 [==============================] - 0s 26ms/step - loss: 0.0403 - val_loss: 0.0360\n", "Epoch 14/50\n", "12/12 [==============================] - 0s 22ms/step - loss: 0.0395 - val_loss: 0.0359\n", "Epoch 15/50\n", "12/12 [==============================] - 0s 24ms/step - loss: 0.0435 - val_loss: 0.0338\n", "Epoch 16/50\n", "12/12 [==============================] - 0s 27ms/step - loss: 0.0405 - val_loss: 0.0362\n", "Epoch 17/50\n", "12/12 [==============================] - 0s 24ms/step - loss: 0.0404 - val_loss: 0.0358\n", "Epoch 18/50\n", "12/12 [==============================] - 0s 24ms/step - loss: 0.0399 - val_loss: 0.0452\n", "Epoch 19/50\n", "12/12 [==============================] - 0s 24ms/step - loss: 0.0400 - val_loss: 0.0309\n", "Epoch 20/50\n", "12/12 [==============================] - 0s 27ms/step - loss: 0.0395 - val_loss: 0.0341\n", "Epoch 21/50\n", "12/12 [==============================] - 0s 34ms/step - loss: 0.0388 - val_loss: 0.0345\n", "Epoch 22/50\n", "12/12 [==============================] - 0s 31ms/step - loss: 0.0381 - val_loss: 0.0345\n", "Epoch 23/50\n", "12/12 [==============================] - 0s 41ms/step - loss: 0.0386 - val_loss: 0.0312\n", "Epoch 24/50\n", "12/12 [==============================] - 1s 44ms/step - loss: 0.0380 - val_loss: 0.0333\n", "Epoch 25/50\n", "12/12 [==============================] - 1s 46ms/step - loss: 0.0391 - val_loss: 0.0331\n", "Epoch 26/50\n", "12/12 [==============================] - 1s 67ms/step - loss: 0.0382 - val_loss: 0.0341\n", "Epoch 27/50\n", "12/12 [==============================] - 1s 73ms/step - loss: 0.0382 - val_loss: 0.0326\n", "Epoch 28/50\n", "12/12 [==============================] - 1s 94ms/step - loss: 0.0396 - val_loss: 0.0363\n", "Epoch 29/50\n", "12/12 [==============================] - 3s 282ms/step - loss: 0.0413 - val_loss: 0.0337\n", "Epoch 30/50\n", "12/12 [==============================] - 8s 712ms/step - loss: 0.0398 - val_loss: 0.0322\n", "Epoch 31/50\n", "12/12 [==============================] - 9s 730ms/step - loss: 0.0410 - val_loss: 0.0368\n", "Epoch 32/50\n", "12/12 [==============================] - 8s 715ms/step - loss: 0.0382 - val_loss: 0.0313\n", "Epoch 33/50\n", "12/12 [==============================] - 9s 777ms/step - loss: 0.0374 - val_loss: 0.0321\n", "Epoch 34/50\n", "12/12 [==============================] - 8s 683ms/step - loss: 0.0382 - val_loss: 0.0310\n", "Epoch 35/50\n", "12/12 [==============================] - 2s 189ms/step - loss: 0.0366 - val_loss: 0.0314\n", "Epoch 36/50\n", "12/12 [==============================] - 2s 127ms/step - loss: 0.0365 - val_loss: 0.0303\n", "Epoch 37/50\n", "12/12 [==============================] - 2s 141ms/step - loss: 0.0364 - val_loss: 0.0318\n", "Epoch 38/50\n", "12/12 [==============================] - 1s 106ms/step - loss: 0.0354 - val_loss: 0.0304\n", "Epoch 39/50\n", "12/12 [==============================] - 1s 107ms/step - loss: 0.0347 - val_loss: 0.0315\n", "Epoch 40/50\n", "12/12 [==============================] - 1s 97ms/step - loss: 0.0341 - val_loss: 0.0305\n", "Epoch 41/50\n", "12/12 [==============================] - 1s 83ms/step - loss: 0.0336 - val_loss: 0.0292\n", "Epoch 42/50\n", "12/12 [==============================] - 1s 80ms/step - loss: 0.0330 - val_loss: 0.0291\n", "Epoch 43/50\n", "12/12 [==============================] - 1s 73ms/step - loss: 0.0329 - val_loss: 0.0289\n", "Epoch 44/50\n", "12/12 [==============================] - 1s 80ms/step - loss: 0.0316 - val_loss: 0.0286\n", "Epoch 45/50\n", "12/12 [==============================] - 1s 68ms/step - loss: 0.0310 - val_loss: 0.0283\n", "Epoch 46/50\n", "12/12 [==============================] - 1s 62ms/step - loss: 0.0304 - val_loss: 0.0275\n", "Epoch 47/50\n", "12/12 [==============================] - 1s 61ms/step - loss: 0.0308 - val_loss: 0.0297\n", "Epoch 48/50\n", "12/12 [==============================] - 1s 65ms/step - loss: 0.0316 - val_loss: 0.0280\n", "Epoch 49/50\n", "12/12 [==============================] - 1s 63ms/step - loss: 0.0335 - val_loss: 0.0314\n", "Epoch 50/50\n", "12/12 [==============================] - 1s 59ms/step - loss: 0.0319 - val_loss: 0.0277\n" ] } ], "source": [ "# Prepare the data for LSTM (we'll use sequences)\n", "def create_sequences(data, seq_length):\n", " xs, ys = [], []\n", " for i in range(len(data) - seq_length):\n", " x = data[i:(i + seq_length)]\n", " y = data[i + seq_length]\n", " xs.append(x)\n", " ys.append(y)\n", " return np.array(xs), np.array(ys)\n", "\n", "SEQ_LENGTH = 10 # Length of the sequences for LSTM\n", "X, y = create_sequences(filtered_df[features].values, SEQ_LENGTH)\n", "\n", "# Build LSTM Autoencoder Model\n", "model = Sequential([\n", " LSTM(128, activation='relu', input_shape=(SEQ_LENGTH, len(features)), return_sequences=True),\n", " Dropout(0.2),\n", " LSTM(64, activation='relu', return_sequences=False),\n", " RepeatVector(SEQ_LENGTH),\n", " LSTM(64, activation='relu', return_sequences=True),\n", " Dropout(0.2),\n", " LSTM(128, activation='relu', return_sequences=True),\n", " TimeDistributed(Dense(len(features)))\n", "])\n", "\n", "model.compile(optimizer='adam', loss='mse')\n", "model.summary()\n", "\n", "# Train the model\n", "history = model.fit(X, X, epochs=50, batch_size=32, validation_split=0.1, shuffle=False)\n" ] }, { "cell_type": "code", "execution_count": 73, "id": "d5a3d586", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "14/14 [==============================] - 0s 7ms/step\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/gp/x0f5sgq13db073j79l3_2tb40000gn/T/ipykernel_3751/1529233682.py:17: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df['anomaly'] = anomaly_scores\n" ] } ], "source": [ "# Predict the reconstruction\n", "X_pred = model.predict(X)\n", "\n", "# Calculate the reconstruction error\n", "reconstruction_error = np.mean(np.abs(X_pred - X), axis=(1, 2))\n", "\n", "# Threshold for anomaly detection\n", "threshold = np.mean(reconstruction_error) + 3 * np.std(reconstruction_error)\n", "\n", "# Detect anomalies\n", "anomalies = reconstruction_error > threshold\n", "\n", "# Append anomaly scores to the original DataFrame\n", "anomaly_scores = np.zeros(len(filtered_df))\n", "anomaly_scores[SEQ_LENGTH:] = anomalies\n", "\n", "filtered_df['anomaly'] = anomaly_scores" ] }, { "cell_type": "code", "execution_count": 75, "id": "b6619a44", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/gp/x0f5sgq13db073j79l3_2tb40000gn/T/ipykernel_3751/984967263.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_df[features] = scaler.inverse_transform(filtered_df[features])\n" ] } ], "source": [ "\n", "# Inverse transform the normalized features back to their original scale\n", "filtered_df[features] = scaler.inverse_transform(filtered_df[features])\n" ] }, { "cell_type": "code", "execution_count": 76, "id": "8f58a560", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppricebuyVolumesellVolumeimbalanceindexanomaly
1382024-07-23 11:00:00206.488.00.01.022931.0
\n", "
" ], "text/plain": [ " timestamp price buyVolume sellVolume imbalance index \\\n", "138 2024-07-23 11:00:00 206.48 8.0 0.0 1.0 2293 \n", "\n", " anomaly \n", "138 1.0 " ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_df[filtered_df['anomaly'] == 1]" ] }, { "cell_type": "code", "execution_count": null, "id": "a1bc4efe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 5 }