Delete Analysis_code
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Analysis_code/0.air_data_merge.ipynb +0 -1029
- Analysis_code/1.data_merge.ipynb +0 -0
- Analysis_code/2.eda_preproccesing.ipynb +0 -3
- Analysis_code/3.oversampling.ipynb +0 -0
- Analysis_code/__pycache__/code.cpython-312.pyc +0 -0
- Analysis_code/__pycache__/deepgbm.cpython-312.pyc +0 -0
- Analysis_code/__pycache__/deepgbm.cpython-38.pyc +0 -0
- Analysis_code/__pycache__/deepgbm.cpython-39.pyc +0 -0
- Analysis_code/__pycache__/ft_transformer.cpython-312.pyc +0 -0
- Analysis_code/__pycache__/ft_transformer.cpython-38.pyc +0 -0
- Analysis_code/__pycache__/ft_transformer.cpython-39.pyc +0 -0
- Analysis_code/__pycache__/resnet_like.cpython-312.pyc +0 -0
- Analysis_code/__pycache__/resnet_like.cpython-38.pyc +0 -0
- Analysis_code/__pycache__/resnet_like.cpython-39.pyc +0 -0
- Analysis_code/best_deepgbm_model.pth +0 -3
- Analysis_code/best_model_f1.pth +0 -3
- Analysis_code/deepgbm.py +0 -47
- Analysis_code/deeplearning_model_binary.ipynb +0 -0
- Analysis_code/deeplearning_model_multi.ipynb +0 -0
- Analysis_code/final_test/final.ipynb +0 -1143
- Analysis_code/find_reason/ busan_trend.ipynb +0 -0
- Analysis_code/find_reason/ daegu_trend.ipynb +0 -0
- Analysis_code/find_reason/ gwangju_trend.ipynb +0 -0
- Analysis_code/find_reason/ incheon_trend.ipynb +0 -0
- Analysis_code/find_reason/ seoul_trend.ipynb +0 -0
- Analysis_code/find_reason/daejeon_trend.ipynb +0 -0
- Analysis_code/find_reason/make_trend_plot.ipynb +0 -0
- Analysis_code/find_reason/wasserstein_distance.ipynb +0 -541
- Analysis_code/ft_transformer.py +0 -56
- Analysis_code/make_oversample_data/gan_sample_10000_1.py +0 -181
- Analysis_code/make_oversample_data/gan_sample_10000_2.py +0 -182
- Analysis_code/make_oversample_data/gan_sample_10000_3.py +0 -182
- Analysis_code/make_oversample_data/gan_sample_20000_1.py +0 -183
- Analysis_code/make_oversample_data/gan_sample_20000_2.py +0 -183
- Analysis_code/make_oversample_data/gan_sample_20000_3.py +0 -183
- Analysis_code/make_oversample_data/gan_sample_7000_1.py +0 -180
- Analysis_code/make_oversample_data/gan_sample_7000_2.py +0 -182
- Analysis_code/make_oversample_data/gan_sample_7000_3.py +0 -182
- Analysis_code/make_oversample_data/oversampling_code.py +0 -355
- Analysis_code/make_oversample_data/smote_sample_1.py +0 -53
- Analysis_code/make_oversample_data/smote_sample_2.py +0 -53
- Analysis_code/make_oversample_data/smote_sample_3.py +0 -53
- Analysis_code/make_train_test.ipynb +0 -1099
- Analysis_code/model_result/best_sample/ensemble_best_sample.csv +0 -157
- Analysis_code/model_result/deepgbm_sampled_data_test.csv +0 -31
- Analysis_code/model_result/ft_transformer_sampled_data_test.csv +0 -31
- Analysis_code/model_result/lightgbm_sampled_data_test.csv +0 -31
- Analysis_code/model_result/resnet_like_sampled_data_test.csv +0 -31
- Analysis_code/model_result/xgboost_sampled_data_test.csv +0 -31
- Analysis_code/model_visualize.ipynb +0 -0
Analysis_code/0.air_data_merge.ipynb
DELETED
|
@@ -1,1029 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 1,
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"outputs": [
|
| 8 |
-
{
|
| 9 |
-
"name": "stderr",
|
| 10 |
-
"output_type": "stream",
|
| 11 |
-
"text": [
|
| 12 |
-
"/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 13 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 14 |
-
]
|
| 15 |
-
}
|
| 16 |
-
],
|
| 17 |
-
"source": [
|
| 18 |
-
"import os\n",
|
| 19 |
-
"import numpy as np\n",
|
| 20 |
-
"import pandas as pd\n",
|
| 21 |
-
"import natsort\n",
|
| 22 |
-
"from datetime import datetime\n",
|
| 23 |
-
"from tqdm.auto import tqdm"
|
| 24 |
-
]
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"cell_type": "code",
|
| 28 |
-
"execution_count": 2,
|
| 29 |
-
"metadata": {},
|
| 30 |
-
"outputs": [],
|
| 31 |
-
"source": [
|
| 32 |
-
"def get_data(year):\n",
|
| 33 |
-
" files = natsort.natsorted(os.listdir(f'../data/대기질/{year}/'))\n",
|
| 34 |
-
" data = []\n",
|
| 35 |
-
" for file in tqdm(files, desc=f\"Reading files...({len(files)})\"):\n",
|
| 36 |
-
" data.append(pd.read_excel(f'../data/대기질/{year}/{file}', usecols=[\"지역\", '망', \"측정소코드\", \"측정소명\", \"측정일시\", \"O3\", \"NO2\", \"PM10\", \"PM25\", \"주소\"]))\n",
|
| 37 |
-
"\n",
|
| 38 |
-
" return pd.concat(data)"
|
| 39 |
-
]
|
| 40 |
-
},
|
| 41 |
-
{
|
| 42 |
-
"cell_type": "code",
|
| 43 |
-
"execution_count": 3,
|
| 44 |
-
"metadata": {},
|
| 45 |
-
"outputs": [],
|
| 46 |
-
"source": [
|
| 47 |
-
"# 합친 데이터에 날짜 정보를 추가한다.\n",
|
| 48 |
-
"def add_date(df):\n",
|
| 49 |
-
"\n",
|
| 50 |
-
" df[\"측정일시\"] = df[\"측정일시\"].astype(str).str[:10]\n",
|
| 51 |
-
" df[\"측정일시\"] = pd.to_datetime(df[\"측정일시\"], format='%Y%m%d%H', errors=\"coerce\")\n",
|
| 52 |
-
"\n",
|
| 53 |
-
" df[\"year\"] = df[\"측정일시\"].dt.year\n",
|
| 54 |
-
" df[\"month\"] = df[\"측정일시\"].dt.month\n",
|
| 55 |
-
" df[\"day\"] = df[\"측정일시\"].dt.day\n",
|
| 56 |
-
" df[\"hour\"] = df[\"측정일시\"].dt.hour\n",
|
| 57 |
-
"\n",
|
| 58 |
-
" return df"
|
| 59 |
-
]
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"cell_type": "code",
|
| 63 |
-
"execution_count": 4,
|
| 64 |
-
"metadata": {},
|
| 65 |
-
"outputs": [
|
| 66 |
-
{
|
| 67 |
-
"name": "stderr",
|
| 68 |
-
"output_type": "stream",
|
| 69 |
-
"text": [
|
| 70 |
-
" 0%| | 0/6 [00:00<?, ?it/s]\n",
|
| 71 |
-
"Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
|
| 72 |
-
"Reading files...(13): 8%|▊ | 1/13 [00:34<06:57, 34.80s/it]\u001b[A\n",
|
| 73 |
-
"Reading files...(13): 15%|█▌ | 2/13 [01:12<06:41, 36.47s/it]\u001b[A\n",
|
| 74 |
-
"Reading files...(13): 23%|██▎ | 3/13 [01:47<05:58, 35.89s/it]\u001b[A\n",
|
| 75 |
-
"Reading files...(13): 31%|███ | 4/13 [02:23<05:23, 35.96s/it]\u001b[A\n",
|
| 76 |
-
"Reading files...(13): 38%|███▊ | 5/13 [02:59<04:47, 35.92s/it]\u001b[A\n",
|
| 77 |
-
"Reading files...(13): 46%|████▌ | 6/13 [03:35<04:12, 36.09s/it]\u001b[A\n",
|
| 78 |
-
"Reading files...(13): 62%|██████▏ | 8/13 [04:12<02:16, 27.35s/it]\u001b[A\n",
|
| 79 |
-
"Reading files...(13): 69%|██████▉ | 9/13 [04:46<01:56, 29.05s/it]\u001b[A\n",
|
| 80 |
-
"Reading files...(13): 77%|███████▋ | 10/13 [05:21<01:31, 30.55s/it]\u001b[A\n",
|
| 81 |
-
"Reading files...(13): 85%|████████▍ | 11/13 [05:58<01:04, 32.46s/it]\u001b[A\n",
|
| 82 |
-
"Reading files...(13): 92%|█████████▏| 12/13 [06:37<00:34, 34.28s/it]\u001b[A\n",
|
| 83 |
-
"Reading files...(13): 100%|██████████| 13/13 [07:08<00:00, 32.93s/it]\u001b[A\n",
|
| 84 |
-
" 17%|█▋ | 1/6 [07:18<36:30, 438.18s/it]\n",
|
| 85 |
-
"Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
|
| 86 |
-
"Reading files...(13): 8%|▊ | 1/13 [00:43<08:41, 43.43s/it]\u001b[A\n",
|
| 87 |
-
"Reading files...(13): 15%|█▌ | 2/13 [01:26<07:56, 43.29s/it]\u001b[A\n",
|
| 88 |
-
"Reading files...(13): 23%|██▎ | 3/13 [02:07<07:02, 42.22s/it]\u001b[A\n",
|
| 89 |
-
"Reading files...(13): 31%|███ | 4/13 [02:50<06:23, 42.66s/it]\u001b[A\n",
|
| 90 |
-
"Reading files...(13): 38%|███▊ | 5/13 [03:28<05:27, 40.90s/it]\u001b[A\n",
|
| 91 |
-
"Reading files...(13): 46%|████▌ | 6/13 [04:15<04:59, 42.79s/it]\u001b[A\n",
|
| 92 |
-
"Reading files...(13): 54%|█████▍ | 7/13 [04:58<04:18, 43.14s/it]\u001b[A\n",
|
| 93 |
-
"Reading files...(13): 62%|██████▏ | 8/13 [05:43<03:37, 43.47s/it]\u001b[A\n",
|
| 94 |
-
"Reading files...(13): 69%|██████▉ | 9/13 [06:28<02:55, 43.96s/it]\u001b[A\n",
|
| 95 |
-
"Reading files...(13): 77%|███████▋ | 10/13 [07:12<02:12, 44.01s/it]\u001b[A\n",
|
| 96 |
-
"Reading files...(13): 85%|████████▍ | 11/13 [07:52<01:25, 42.90s/it]\u001b[A\n",
|
| 97 |
-
"Reading files...(13): 100%|██████████| 13/13 [08:34<00:00, 39.61s/it]\u001b[A\n",
|
| 98 |
-
" 33%|███▎ | 2/6 [16:05<32:42, 490.55s/it]\n",
|
| 99 |
-
"Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
|
| 100 |
-
"Reading files...(13): 8%|▊ | 1/13 [00:49<09:56, 49.74s/it]\u001b[A\n",
|
| 101 |
-
"Reading files...(13): 15%|█▌ | 2/13 [01:43<09:31, 51.98s/it]\u001b[A\n",
|
| 102 |
-
"Reading files...(13): 23%|██▎ | 3/13 [02:33<08:29, 50.96s/it]\u001b[A\n",
|
| 103 |
-
"Reading files...(13): 31%|███ | 4/13 [03:23<07:38, 50.95s/it]\u001b[A\n",
|
| 104 |
-
"Reading files...(13): 38%|███▊ | 5/13 [04:13<06:43, 50.46s/it]\u001b[A\n",
|
| 105 |
-
"Reading files...(13): 46%|████▌ | 6/13 [04:58<05:40, 48.71s/it]\u001b[A\n",
|
| 106 |
-
"Reading files...(13): 54%|█████▍ | 7/13 [05:50<04:57, 49.66s/it]\u001b[A\n",
|
| 107 |
-
"Reading files...(13): 62%|██████▏ | 8/13 [06:45<04:16, 51.29s/it]\u001b[A\n",
|
| 108 |
-
"Reading files...(13): 77%|███████▋ | 10/13 [07:38<01:58, 39.46s/it]\u001b[A\n",
|
| 109 |
-
"Reading files...(13): 85%|████████▍ | 11/13 [08:30<01:25, 42.79s/it]\u001b[A\n",
|
| 110 |
-
"Reading files...(13): 92%|█████████▏| 12/13 [09:26<00:46, 46.32s/it]\u001b[A\n",
|
| 111 |
-
"Reading files...(13): 100%|██████████| 13/13 [10:13<00:00, 47.19s/it]\u001b[A\n",
|
| 112 |
-
" 50%|█████ | 3/6 [26:32<27:38, 552.96s/it]\n",
|
| 113 |
-
"Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
|
| 114 |
-
"Reading files...(13): 8%|▊ | 1/13 [00:59<11:48, 59.01s/it]\u001b[A\n",
|
| 115 |
-
"Reading files...(13): 15%|█▌ | 2/13 [01:56<10:40, 58.19s/it]\u001b[A\n",
|
| 116 |
-
"Reading files...(13): 23%|██▎ | 3/13 [02:53<09:37, 57.77s/it]\u001b[A\n",
|
| 117 |
-
"Reading files...(13): 31%|███ | 4/13 [03:52<08:41, 58.00s/it]\u001b[A\n",
|
| 118 |
-
"Reading files...(13): 38%|███▊ | 5/13 [04:44<07:26, 55.77s/it]\u001b[A\n",
|
| 119 |
-
"Reading files...(13): 46%|████▌ | 6/13 [05:40<06:32, 56.05s/it]\u001b[A\n",
|
| 120 |
-
"Reading files...(13): 54%|█████▍ | 7/13 [06:36<05:36, 56.06s/it]\u001b[A\n",
|
| 121 |
-
"Reading files...(13): 62%|██████▏ | 8/13 [07:33<04:42, 56.42s/it]\u001b[A\n",
|
| 122 |
-
"Reading files...(13): 69%|██████▉ | 9/13 [08:34<03:51, 57.76s/it]\u001b[A\n",
|
| 123 |
-
"Reading files...(13): 77%|███████▋ | 10/13 [09:35<02:56, 58.75s/it]\u001b[A\n",
|
| 124 |
-
"Reading files...(13): 92%|█████████▏| 12/13 [10:33<00:44, 44.84s/it]\u001b[A\n",
|
| 125 |
-
"Reading files...(13): 100%|██████████| 13/13 [11:32<00:00, 53.29s/it]\u001b[A\n",
|
| 126 |
-
" 67%|██████▋ | 4/6 [38:20<20:28, 614.26s/it]\n",
|
| 127 |
-
"Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
|
| 128 |
-
"Reading files...(13): 8%|▊ | 1/13 [00:59<11:57, 59.79s/it]\u001b[A\n",
|
| 129 |
-
"Reading files...(13): 15%|█▌ | 2/13 [02:01<11:07, 60.67s/it]\u001b[A\n",
|
| 130 |
-
"Reading files...(13): 23%|██▎ | 3/13 [03:02<10:10, 61.02s/it]\u001b[A\n",
|
| 131 |
-
"Reading files...(13): 31%|███ | 4/13 [03:57<08:48, 58.74s/it]\u001b[A\n",
|
| 132 |
-
"Reading files...(13): 38%|███▊ | 5/13 [04:57<07:53, 59.18s/it]\u001b[A\n",
|
| 133 |
-
"Reading files...(13): 46%|████▌ | 6/13 [06:00<07:03, 60.45s/it]\u001b[A\n",
|
| 134 |
-
"Reading files...(13): 54%|█████▍ | 7/13 [07:00<06:02, 60.38s/it]\u001b[A\n",
|
| 135 |
-
"Reading files...(13): 62%|██████▏ | 8/13 [08:02<05:04, 60.85s/it]\u001b[A\n",
|
| 136 |
-
"Reading files...(13): 69%|██████▉ | 9/13 [09:04<04:04, 61.03s/it]\u001b[A\n",
|
| 137 |
-
"Reading files...(13): 77%|███████▋ | 10/13 [10:04<03:02, 60.67s/it]\u001b[A\n",
|
| 138 |
-
"Reading files...(13): 92%|█████████▏| 12/13 [11:06<00:46, 46.76s/it]\u001b[A\n",
|
| 139 |
-
"Reading files...(13): 100%|██████████| 13/13 [12:09<00:00, 56.08s/it]\u001b[A\n",
|
| 140 |
-
" 83%|████████▎ | 5/6 [50:46<11:01, 661.78s/it]\n",
|
| 141 |
-
"Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
|
| 142 |
-
"Reading files...(13): 8%|▊ | 1/13 [01:03<12:46, 63.88s/it]\u001b[A\n",
|
| 143 |
-
"Reading files...(13): 15%|█▌ | 2/13 [02:08<11:50, 64.56s/it]\u001b[A\n",
|
| 144 |
-
"Reading files...(13): 23%|██▎ | 3/13 [03:10<10:32, 63.22s/it]\u001b[A\n",
|
| 145 |
-
"Reading files...(13): 31%|███ | 4/13 [04:07<09:05, 60.63s/it]\u001b[A\n",
|
| 146 |
-
"Reading files...(13): 38%|███▊ | 5/13 [05:09<08:11, 61.41s/it]\u001b[A\n",
|
| 147 |
-
"Reading files...(13): 46%|████▌ | 6/13 [06:12<07:13, 61.92s/it]\u001b[A\n",
|
| 148 |
-
"Reading files...(13): 54%|█████▍ | 7/13 [07:13<06:09, 61.50s/it]\u001b[A\n",
|
| 149 |
-
"Reading files...(13): 62%|██████▏ | 8/13 [08:15<05:08, 61.64s/it]\u001b[A\n",
|
| 150 |
-
"Reading files...(13): 69%|██████▉ | 9/13 [09:17<04:07, 61.81s/it]\u001b[A\n",
|
| 151 |
-
"Reading files...(13): 77%|███████▋ | 10/13 [10:19<03:05, 61.96s/it]\u001b[A\n",
|
| 152 |
-
"Reading files...(13): 92%|█████████▏| 12/13 [11:23<00:47, 47.75s/it]\u001b[A\n",
|
| 153 |
-
"Reading files...(13): 100%|██████████| 13/13 [12:27<00:00, 57.50s/it]\u001b[A\n",
|
| 154 |
-
"100%|██████████| 6/6 [1:03:31<00:00, 635.28s/it]\n"
|
| 155 |
-
]
|
| 156 |
-
}
|
| 157 |
-
],
|
| 158 |
-
"source": [
|
| 159 |
-
"import os\n",
|
| 160 |
-
"import pandas as pd\n",
|
| 161 |
-
"from tqdm.auto import tqdm\n",
|
| 162 |
-
"\n",
|
| 163 |
-
"# 대기질 데이터를 불러와서 하나의 파일로 합친다.\n",
|
| 164 |
-
"def get_data(year):\n",
|
| 165 |
-
" directory = f'../data/대기질/{year}/'\n",
|
| 166 |
-
" files = os.listdir(directory)\n",
|
| 167 |
-
" data = []\n",
|
| 168 |
-
" \n",
|
| 169 |
-
" # 파일 목록에서 디렉토리를 제외하고 오직 Excel 파일만 처리\n",
|
| 170 |
-
" for file in tqdm(files, desc=f\"Reading files...({len(files)})\"):\n",
|
| 171 |
-
" file_path = os.path.join(directory, file)\n",
|
| 172 |
-
" if os.path.isfile(file_path) and file_path.endswith(('.xls', '.xlsx')): # Excel 파일 확장자만 허용\n",
|
| 173 |
-
" data.append(pd.read_excel(file_path, usecols=[\"지역\", '망', \"측정소코드\", \"측정소명\", \"측정일시\", \"O3\", \"NO2\", \"PM10\", \"PM25\", \"주소\"]))\n",
|
| 174 |
-
" \n",
|
| 175 |
-
" return pd.concat(data)\n",
|
| 176 |
-
"\n",
|
| 177 |
-
"years = [2018, 2019, 2020,2021,2022,2023] # 2018년부터 2023년까지의 데이터를 합친다.\n",
|
| 178 |
-
"for year in tqdm(years):\n",
|
| 179 |
-
" data = get_data(year)\n",
|
| 180 |
-
" data = add_date(data)\n",
|
| 181 |
-
" data.reset_index(drop=True, inplace=True)\n",
|
| 182 |
-
" data.to_feather(f\"../data/대기질/{year}.feather\")\n"
|
| 183 |
-
]
|
| 184 |
-
},
|
| 185 |
-
{
|
| 186 |
-
"cell_type": "code",
|
| 187 |
-
"execution_count": 6,
|
| 188 |
-
"metadata": {},
|
| 189 |
-
"outputs": [
|
| 190 |
-
{
|
| 191 |
-
"data": {
|
| 192 |
-
"text/html": [
|
| 193 |
-
"<div>\n",
|
| 194 |
-
"<style scoped>\n",
|
| 195 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
| 196 |
-
" vertical-align: middle;\n",
|
| 197 |
-
" }\n",
|
| 198 |
-
"\n",
|
| 199 |
-
" .dataframe tbody tr th {\n",
|
| 200 |
-
" vertical-align: top;\n",
|
| 201 |
-
" }\n",
|
| 202 |
-
"\n",
|
| 203 |
-
" .dataframe thead th {\n",
|
| 204 |
-
" text-align: right;\n",
|
| 205 |
-
" }\n",
|
| 206 |
-
"</style>\n",
|
| 207 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
| 208 |
-
" <thead>\n",
|
| 209 |
-
" <tr style=\"text-align: right;\">\n",
|
| 210 |
-
" <th></th>\n",
|
| 211 |
-
" <th>지역</th>\n",
|
| 212 |
-
" <th>망</th>\n",
|
| 213 |
-
" <th>측정소코드</th>\n",
|
| 214 |
-
" <th>측정소명</th>\n",
|
| 215 |
-
" <th>측정일시</th>\n",
|
| 216 |
-
" <th>O3</th>\n",
|
| 217 |
-
" <th>NO2</th>\n",
|
| 218 |
-
" <th>PM10</th>\n",
|
| 219 |
-
" <th>PM25</th>\n",
|
| 220 |
-
" <th>주소</th>\n",
|
| 221 |
-
" <th>year</th>\n",
|
| 222 |
-
" <th>month</th>\n",
|
| 223 |
-
" <th>day</th>\n",
|
| 224 |
-
" <th>hour</th>\n",
|
| 225 |
-
" </tr>\n",
|
| 226 |
-
" </thead>\n",
|
| 227 |
-
" <tbody>\n",
|
| 228 |
-
" <tr>\n",
|
| 229 |
-
" <th>0</th>\n",
|
| 230 |
-
" <td>서울 중구</td>\n",
|
| 231 |
-
" <td>도시대기</td>\n",
|
| 232 |
-
" <td>111121</td>\n",
|
| 233 |
-
" <td>중구</td>\n",
|
| 234 |
-
" <td>2023-07-01 01:00:00</td>\n",
|
| 235 |
-
" <td>0.0249</td>\n",
|
| 236 |
-
" <td>0.0188</td>\n",
|
| 237 |
-
" <td>21.0</td>\n",
|
| 238 |
-
" <td>19.0</td>\n",
|
| 239 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 240 |
-
" <td>2023.0</td>\n",
|
| 241 |
-
" <td>7.0</td>\n",
|
| 242 |
-
" <td>1.0</td>\n",
|
| 243 |
-
" <td>1.0</td>\n",
|
| 244 |
-
" </tr>\n",
|
| 245 |
-
" <tr>\n",
|
| 246 |
-
" <th>1</th>\n",
|
| 247 |
-
" <td>서울 중구</td>\n",
|
| 248 |
-
" <td>도시대기</td>\n",
|
| 249 |
-
" <td>111121</td>\n",
|
| 250 |
-
" <td>중구</td>\n",
|
| 251 |
-
" <td>2023-07-01 02:00:00</td>\n",
|
| 252 |
-
" <td>0.0263</td>\n",
|
| 253 |
-
" <td>0.0163</td>\n",
|
| 254 |
-
" <td>18.0</td>\n",
|
| 255 |
-
" <td>15.0</td>\n",
|
| 256 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 257 |
-
" <td>2023.0</td>\n",
|
| 258 |
-
" <td>7.0</td>\n",
|
| 259 |
-
" <td>1.0</td>\n",
|
| 260 |
-
" <td>2.0</td>\n",
|
| 261 |
-
" </tr>\n",
|
| 262 |
-
" <tr>\n",
|
| 263 |
-
" <th>2</th>\n",
|
| 264 |
-
" <td>서울 중구</td>\n",
|
| 265 |
-
" <td>도시대기</td>\n",
|
| 266 |
-
" <td>111121</td>\n",
|
| 267 |
-
" <td>중구</td>\n",
|
| 268 |
-
" <td>2023-07-01 03:00:00</td>\n",
|
| 269 |
-
" <td>0.0218</td>\n",
|
| 270 |
-
" <td>0.0192</td>\n",
|
| 271 |
-
" <td>24.0</td>\n",
|
| 272 |
-
" <td>21.0</td>\n",
|
| 273 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 274 |
-
" <td>2023.0</td>\n",
|
| 275 |
-
" <td>7.0</td>\n",
|
| 276 |
-
" <td>1.0</td>\n",
|
| 277 |
-
" <td>3.0</td>\n",
|
| 278 |
-
" </tr>\n",
|
| 279 |
-
" <tr>\n",
|
| 280 |
-
" <th>3</th>\n",
|
| 281 |
-
" <td>서울 중구</td>\n",
|
| 282 |
-
" <td>도시대기</td>\n",
|
| 283 |
-
" <td>111121</td>\n",
|
| 284 |
-
" <td>중구</td>\n",
|
| 285 |
-
" <td>2023-07-01 04:00:00</td>\n",
|
| 286 |
-
" <td>0.0131</td>\n",
|
| 287 |
-
" <td>0.0214</td>\n",
|
| 288 |
-
" <td>25.0</td>\n",
|
| 289 |
-
" <td>19.0</td>\n",
|
| 290 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 291 |
-
" <td>2023.0</td>\n",
|
| 292 |
-
" <td>7.0</td>\n",
|
| 293 |
-
" <td>1.0</td>\n",
|
| 294 |
-
" <td>4.0</td>\n",
|
| 295 |
-
" </tr>\n",
|
| 296 |
-
" <tr>\n",
|
| 297 |
-
" <th>4</th>\n",
|
| 298 |
-
" <td>서울 중구</td>\n",
|
| 299 |
-
" <td>도시대기</td>\n",
|
| 300 |
-
" <td>111121</td>\n",
|
| 301 |
-
" <td>중구</td>\n",
|
| 302 |
-
" <td>2023-07-01 05:00:00</td>\n",
|
| 303 |
-
" <td>0.0131</td>\n",
|
| 304 |
-
" <td>0.0160</td>\n",
|
| 305 |
-
" <td>25.0</td>\n",
|
| 306 |
-
" <td>21.0</td>\n",
|
| 307 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 308 |
-
" <td>2023.0</td>\n",
|
| 309 |
-
" <td>7.0</td>\n",
|
| 310 |
-
" <td>1.0</td>\n",
|
| 311 |
-
" <td>5.0</td>\n",
|
| 312 |
-
" </tr>\n",
|
| 313 |
-
" <tr>\n",
|
| 314 |
-
" <th>5</th>\n",
|
| 315 |
-
" <td>서울 중구</td>\n",
|
| 316 |
-
" <td>도시대기</td>\n",
|
| 317 |
-
" <td>111121</td>\n",
|
| 318 |
-
" <td>중구</td>\n",
|
| 319 |
-
" <td>2023-07-01 06:00:00</td>\n",
|
| 320 |
-
" <td>0.0115</td>\n",
|
| 321 |
-
" <td>0.0196</td>\n",
|
| 322 |
-
" <td>23.0</td>\n",
|
| 323 |
-
" <td>18.0</td>\n",
|
| 324 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 325 |
-
" <td>2023.0</td>\n",
|
| 326 |
-
" <td>7.0</td>\n",
|
| 327 |
-
" <td>1.0</td>\n",
|
| 328 |
-
" <td>6.0</td>\n",
|
| 329 |
-
" </tr>\n",
|
| 330 |
-
" <tr>\n",
|
| 331 |
-
" <th>6</th>\n",
|
| 332 |
-
" <td>서울 중구</td>\n",
|
| 333 |
-
" <td>도시대기</td>\n",
|
| 334 |
-
" <td>111121</td>\n",
|
| 335 |
-
" <td>중구</td>\n",
|
| 336 |
-
" <td>2023-07-01 07:00:00</td>\n",
|
| 337 |
-
" <td>0.0094</td>\n",
|
| 338 |
-
" <td>0.0230</td>\n",
|
| 339 |
-
" <td>26.0</td>\n",
|
| 340 |
-
" <td>21.0</td>\n",
|
| 341 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 342 |
-
" <td>2023.0</td>\n",
|
| 343 |
-
" <td>7.0</td>\n",
|
| 344 |
-
" <td>1.0</td>\n",
|
| 345 |
-
" <td>7.0</td>\n",
|
| 346 |
-
" </tr>\n",
|
| 347 |
-
" <tr>\n",
|
| 348 |
-
" <th>7</th>\n",
|
| 349 |
-
" <td>서울 중구</td>\n",
|
| 350 |
-
" <td>도시대기</td>\n",
|
| 351 |
-
" <td>111121</td>\n",
|
| 352 |
-
" <td>중구</td>\n",
|
| 353 |
-
" <td>2023-07-01 08:00:00</td>\n",
|
| 354 |
-
" <td>0.0222</td>\n",
|
| 355 |
-
" <td>0.0175</td>\n",
|
| 356 |
-
" <td>26.0</td>\n",
|
| 357 |
-
" <td>20.0</td>\n",
|
| 358 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 359 |
-
" <td>2023.0</td>\n",
|
| 360 |
-
" <td>7.0</td>\n",
|
| 361 |
-
" <td>1.0</td>\n",
|
| 362 |
-
" <td>8.0</td>\n",
|
| 363 |
-
" </tr>\n",
|
| 364 |
-
" <tr>\n",
|
| 365 |
-
" <th>8</th>\n",
|
| 366 |
-
" <td>서울 중구</td>\n",
|
| 367 |
-
" <td>도시대기</td>\n",
|
| 368 |
-
" <td>111121</td>\n",
|
| 369 |
-
" <td>중구</td>\n",
|
| 370 |
-
" <td>2023-07-01 09:00:00</td>\n",
|
| 371 |
-
" <td>0.0396</td>\n",
|
| 372 |
-
" <td>0.0153</td>\n",
|
| 373 |
-
" <td>27.0</td>\n",
|
| 374 |
-
" <td>20.0</td>\n",
|
| 375 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 376 |
-
" <td>2023.0</td>\n",
|
| 377 |
-
" <td>7.0</td>\n",
|
| 378 |
-
" <td>1.0</td>\n",
|
| 379 |
-
" <td>9.0</td>\n",
|
| 380 |
-
" </tr>\n",
|
| 381 |
-
" <tr>\n",
|
| 382 |
-
" <th>9</th>\n",
|
| 383 |
-
" <td>서울 중구</td>\n",
|
| 384 |
-
" <td>도시대기</td>\n",
|
| 385 |
-
" <td>111121</td>\n",
|
| 386 |
-
" <td>중구</td>\n",
|
| 387 |
-
" <td>2023-07-01 10:00:00</td>\n",
|
| 388 |
-
" <td>0.0530</td>\n",
|
| 389 |
-
" <td>0.0105</td>\n",
|
| 390 |
-
" <td>19.0</td>\n",
|
| 391 |
-
" <td>16.0</td>\n",
|
| 392 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 393 |
-
" <td>2023.0</td>\n",
|
| 394 |
-
" <td>7.0</td>\n",
|
| 395 |
-
" <td>1.0</td>\n",
|
| 396 |
-
" <td>10.0</td>\n",
|
| 397 |
-
" </tr>\n",
|
| 398 |
-
" <tr>\n",
|
| 399 |
-
" <th>10</th>\n",
|
| 400 |
-
" <td>서울 중구</td>\n",
|
| 401 |
-
" <td>도시대기</td>\n",
|
| 402 |
-
" <td>111121</td>\n",
|
| 403 |
-
" <td>중구</td>\n",
|
| 404 |
-
" <td>2023-07-01 11:00:00</td>\n",
|
| 405 |
-
" <td>0.0607</td>\n",
|
| 406 |
-
" <td>0.0090</td>\n",
|
| 407 |
-
" <td>20.0</td>\n",
|
| 408 |
-
" <td>20.0</td>\n",
|
| 409 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 410 |
-
" <td>2023.0</td>\n",
|
| 411 |
-
" <td>7.0</td>\n",
|
| 412 |
-
" <td>1.0</td>\n",
|
| 413 |
-
" <td>11.0</td>\n",
|
| 414 |
-
" </tr>\n",
|
| 415 |
-
" <tr>\n",
|
| 416 |
-
" <th>11</th>\n",
|
| 417 |
-
" <td>서울 중구</td>\n",
|
| 418 |
-
" <td>도시대기</td>\n",
|
| 419 |
-
" <td>111121</td>\n",
|
| 420 |
-
" <td>중구</td>\n",
|
| 421 |
-
" <td>2023-07-01 12:00:00</td>\n",
|
| 422 |
-
" <td>0.0688</td>\n",
|
| 423 |
-
" <td>0.0114</td>\n",
|
| 424 |
-
" <td>20.0</td>\n",
|
| 425 |
-
" <td>17.0</td>\n",
|
| 426 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 427 |
-
" <td>2023.0</td>\n",
|
| 428 |
-
" <td>7.0</td>\n",
|
| 429 |
-
" <td>1.0</td>\n",
|
| 430 |
-
" <td>12.0</td>\n",
|
| 431 |
-
" </tr>\n",
|
| 432 |
-
" <tr>\n",
|
| 433 |
-
" <th>12</th>\n",
|
| 434 |
-
" <td>서울 중구</td>\n",
|
| 435 |
-
" <td>도시대기</td>\n",
|
| 436 |
-
" <td>111121</td>\n",
|
| 437 |
-
" <td>중구</td>\n",
|
| 438 |
-
" <td>2023-07-01 13:00:00</td>\n",
|
| 439 |
-
" <td>0.0758</td>\n",
|
| 440 |
-
" <td>0.0101</td>\n",
|
| 441 |
-
" <td>23.0</td>\n",
|
| 442 |
-
" <td>17.0</td>\n",
|
| 443 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 444 |
-
" <td>2023.0</td>\n",
|
| 445 |
-
" <td>7.0</td>\n",
|
| 446 |
-
" <td>1.0</td>\n",
|
| 447 |
-
" <td>13.0</td>\n",
|
| 448 |
-
" </tr>\n",
|
| 449 |
-
" <tr>\n",
|
| 450 |
-
" <th>13</th>\n",
|
| 451 |
-
" <td>서울 중구</td>\n",
|
| 452 |
-
" <td>도시대기</td>\n",
|
| 453 |
-
" <td>111121</td>\n",
|
| 454 |
-
" <td>중구</td>\n",
|
| 455 |
-
" <td>2023-07-01 14:00:00</td>\n",
|
| 456 |
-
" <td>0.0743</td>\n",
|
| 457 |
-
" <td>0.0093</td>\n",
|
| 458 |
-
" <td>20.0</td>\n",
|
| 459 |
-
" <td>17.0</td>\n",
|
| 460 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 461 |
-
" <td>2023.0</td>\n",
|
| 462 |
-
" <td>7.0</td>\n",
|
| 463 |
-
" <td>1.0</td>\n",
|
| 464 |
-
" <td>14.0</td>\n",
|
| 465 |
-
" </tr>\n",
|
| 466 |
-
" <tr>\n",
|
| 467 |
-
" <th>14</th>\n",
|
| 468 |
-
" <td>서울 중구</td>\n",
|
| 469 |
-
" <td>도시대기</td>\n",
|
| 470 |
-
" <td>111121</td>\n",
|
| 471 |
-
" <td>중구</td>\n",
|
| 472 |
-
" <td>2023-07-01 15:00:00</td>\n",
|
| 473 |
-
" <td>0.0749</td>\n",
|
| 474 |
-
" <td>0.0100</td>\n",
|
| 475 |
-
" <td>19.0</td>\n",
|
| 476 |
-
" <td>11.0</td>\n",
|
| 477 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 478 |
-
" <td>2023.0</td>\n",
|
| 479 |
-
" <td>7.0</td>\n",
|
| 480 |
-
" <td>1.0</td>\n",
|
| 481 |
-
" <td>15.0</td>\n",
|
| 482 |
-
" </tr>\n",
|
| 483 |
-
" <tr>\n",
|
| 484 |
-
" <th>15</th>\n",
|
| 485 |
-
" <td>서울 중구</td>\n",
|
| 486 |
-
" <td>도시대기</td>\n",
|
| 487 |
-
" <td>111121</td>\n",
|
| 488 |
-
" <td>중구</td>\n",
|
| 489 |
-
" <td>2023-07-01 16:00:00</td>\n",
|
| 490 |
-
" <td>0.0716</td>\n",
|
| 491 |
-
" <td>0.0092</td>\n",
|
| 492 |
-
" <td>19.0</td>\n",
|
| 493 |
-
" <td>15.0</td>\n",
|
| 494 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 495 |
-
" <td>2023.0</td>\n",
|
| 496 |
-
" <td>7.0</td>\n",
|
| 497 |
-
" <td>1.0</td>\n",
|
| 498 |
-
" <td>16.0</td>\n",
|
| 499 |
-
" </tr>\n",
|
| 500 |
-
" <tr>\n",
|
| 501 |
-
" <th>16</th>\n",
|
| 502 |
-
" <td>서울 중구</td>\n",
|
| 503 |
-
" <td>도시대기</td>\n",
|
| 504 |
-
" <td>111121</td>\n",
|
| 505 |
-
" <td>중구</td>\n",
|
| 506 |
-
" <td>2023-07-01 17:00:00</td>\n",
|
| 507 |
-
" <td>0.0613</td>\n",
|
| 508 |
-
" <td>0.0099</td>\n",
|
| 509 |
-
" <td>18.0</td>\n",
|
| 510 |
-
" <td>15.0</td>\n",
|
| 511 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 512 |
-
" <td>2023.0</td>\n",
|
| 513 |
-
" <td>7.0</td>\n",
|
| 514 |
-
" <td>1.0</td>\n",
|
| 515 |
-
" <td>17.0</td>\n",
|
| 516 |
-
" </tr>\n",
|
| 517 |
-
" <tr>\n",
|
| 518 |
-
" <th>17</th>\n",
|
| 519 |
-
" <td>서울 중구</td>\n",
|
| 520 |
-
" <td>도시대기</td>\n",
|
| 521 |
-
" <td>111121</td>\n",
|
| 522 |
-
" <td>중구</td>\n",
|
| 523 |
-
" <td>2023-07-01 18:00:00</td>\n",
|
| 524 |
-
" <td>0.0496</td>\n",
|
| 525 |
-
" <td>0.0098</td>\n",
|
| 526 |
-
" <td>18.0</td>\n",
|
| 527 |
-
" <td>14.0</td>\n",
|
| 528 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 529 |
-
" <td>2023.0</td>\n",
|
| 530 |
-
" <td>7.0</td>\n",
|
| 531 |
-
" <td>1.0</td>\n",
|
| 532 |
-
" <td>18.0</td>\n",
|
| 533 |
-
" </tr>\n",
|
| 534 |
-
" <tr>\n",
|
| 535 |
-
" <th>18</th>\n",
|
| 536 |
-
" <td>서울 중구</td>\n",
|
| 537 |
-
" <td>도시대기</td>\n",
|
| 538 |
-
" <td>111121</td>\n",
|
| 539 |
-
" <td>중구</td>\n",
|
| 540 |
-
" <td>2023-07-01 19:00:00</td>\n",
|
| 541 |
-
" <td>0.0473</td>\n",
|
| 542 |
-
" <td>0.0124</td>\n",
|
| 543 |
-
" <td>17.0</td>\n",
|
| 544 |
-
" <td>17.0</td>\n",
|
| 545 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 546 |
-
" <td>2023.0</td>\n",
|
| 547 |
-
" <td>7.0</td>\n",
|
| 548 |
-
" <td>1.0</td>\n",
|
| 549 |
-
" <td>19.0</td>\n",
|
| 550 |
-
" </tr>\n",
|
| 551 |
-
" <tr>\n",
|
| 552 |
-
" <th>19</th>\n",
|
| 553 |
-
" <td>서울 중구</td>\n",
|
| 554 |
-
" <td>도시대기</td>\n",
|
| 555 |
-
" <td>111121</td>\n",
|
| 556 |
-
" <td>중구</td>\n",
|
| 557 |
-
" <td>2023-07-01 20:00:00</td>\n",
|
| 558 |
-
" <td>0.0498</td>\n",
|
| 559 |
-
" <td>0.0170</td>\n",
|
| 560 |
-
" <td>17.0</td>\n",
|
| 561 |
-
" <td>15.0</td>\n",
|
| 562 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 563 |
-
" <td>2023.0</td>\n",
|
| 564 |
-
" <td>7.0</td>\n",
|
| 565 |
-
" <td>1.0</td>\n",
|
| 566 |
-
" <td>20.0</td>\n",
|
| 567 |
-
" </tr>\n",
|
| 568 |
-
" <tr>\n",
|
| 569 |
-
" <th>20</th>\n",
|
| 570 |
-
" <td>서울 중구</td>\n",
|
| 571 |
-
" <td>도시대기</td>\n",
|
| 572 |
-
" <td>111121</td>\n",
|
| 573 |
-
" <td>중구</td>\n",
|
| 574 |
-
" <td>2023-07-01 21:00:00</td>\n",
|
| 575 |
-
" <td>0.0616</td>\n",
|
| 576 |
-
" <td>0.0134</td>\n",
|
| 577 |
-
" <td>23.0</td>\n",
|
| 578 |
-
" <td>20.0</td>\n",
|
| 579 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 580 |
-
" <td>2023.0</td>\n",
|
| 581 |
-
" <td>7.0</td>\n",
|
| 582 |
-
" <td>1.0</td>\n",
|
| 583 |
-
" <td>21.0</td>\n",
|
| 584 |
-
" </tr>\n",
|
| 585 |
-
" <tr>\n",
|
| 586 |
-
" <th>21</th>\n",
|
| 587 |
-
" <td>서울 중구</td>\n",
|
| 588 |
-
" <td>도시대기</td>\n",
|
| 589 |
-
" <td>111121</td>\n",
|
| 590 |
-
" <td>중구</td>\n",
|
| 591 |
-
" <td>2023-07-01 22:00:00</td>\n",
|
| 592 |
-
" <td>0.0543</td>\n",
|
| 593 |
-
" <td>0.0109</td>\n",
|
| 594 |
-
" <td>18.0</td>\n",
|
| 595 |
-
" <td>16.0</td>\n",
|
| 596 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 597 |
-
" <td>2023.0</td>\n",
|
| 598 |
-
" <td>7.0</td>\n",
|
| 599 |
-
" <td>1.0</td>\n",
|
| 600 |
-
" <td>22.0</td>\n",
|
| 601 |
-
" </tr>\n",
|
| 602 |
-
" <tr>\n",
|
| 603 |
-
" <th>22</th>\n",
|
| 604 |
-
" <td>서울 중구</td>\n",
|
| 605 |
-
" <td>도시대기</td>\n",
|
| 606 |
-
" <td>111121</td>\n",
|
| 607 |
-
" <td>중구</td>\n",
|
| 608 |
-
" <td>2023-07-01 23:00:00</td>\n",
|
| 609 |
-
" <td>0.0507</td>\n",
|
| 610 |
-
" <td>0.0113</td>\n",
|
| 611 |
-
" <td>17.0</td>\n",
|
| 612 |
-
" <td>16.0</td>\n",
|
| 613 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 614 |
-
" <td>2023.0</td>\n",
|
| 615 |
-
" <td>7.0</td>\n",
|
| 616 |
-
" <td>1.0</td>\n",
|
| 617 |
-
" <td>23.0</td>\n",
|
| 618 |
-
" </tr>\n",
|
| 619 |
-
" <tr>\n",
|
| 620 |
-
" <th>23</th>\n",
|
| 621 |
-
" <td>서울 중구</td>\n",
|
| 622 |
-
" <td>도시대기</td>\n",
|
| 623 |
-
" <td>111121</td>\n",
|
| 624 |
-
" <td>중구</td>\n",
|
| 625 |
-
" <td>NaT</td>\n",
|
| 626 |
-
" <td>0.0427</td>\n",
|
| 627 |
-
" <td>0.0125</td>\n",
|
| 628 |
-
" <td>17.0</td>\n",
|
| 629 |
-
" <td>16.0</td>\n",
|
| 630 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 631 |
-
" <td>NaN</td>\n",
|
| 632 |
-
" <td>NaN</td>\n",
|
| 633 |
-
" <td>NaN</td>\n",
|
| 634 |
-
" <td>NaN</td>\n",
|
| 635 |
-
" </tr>\n",
|
| 636 |
-
" <tr>\n",
|
| 637 |
-
" <th>24</th>\n",
|
| 638 |
-
" <td>서울 중구</td>\n",
|
| 639 |
-
" <td>도시대기</td>\n",
|
| 640 |
-
" <td>111121</td>\n",
|
| 641 |
-
" <td>중구</td>\n",
|
| 642 |
-
" <td>2023-07-02 01:00:00</td>\n",
|
| 643 |
-
" <td>0.0334</td>\n",
|
| 644 |
-
" <td>0.0148</td>\n",
|
| 645 |
-
" <td>21.0</td>\n",
|
| 646 |
-
" <td>20.0</td>\n",
|
| 647 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 648 |
-
" <td>2023.0</td>\n",
|
| 649 |
-
" <td>7.0</td>\n",
|
| 650 |
-
" <td>2.0</td>\n",
|
| 651 |
-
" <td>1.0</td>\n",
|
| 652 |
-
" </tr>\n",
|
| 653 |
-
" <tr>\n",
|
| 654 |
-
" <th>25</th>\n",
|
| 655 |
-
" <td>서울 중구</td>\n",
|
| 656 |
-
" <td>도시대기</td>\n",
|
| 657 |
-
" <td>111121</td>\n",
|
| 658 |
-
" <td>중구</td>\n",
|
| 659 |
-
" <td>2023-07-02 02:00:00</td>\n",
|
| 660 |
-
" <td>0.0337</td>\n",
|
| 661 |
-
" <td>0.0133</td>\n",
|
| 662 |
-
" <td>22.0</td>\n",
|
| 663 |
-
" <td>18.0</td>\n",
|
| 664 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 665 |
-
" <td>2023.0</td>\n",
|
| 666 |
-
" <td>7.0</td>\n",
|
| 667 |
-
" <td>2.0</td>\n",
|
| 668 |
-
" <td>2.0</td>\n",
|
| 669 |
-
" </tr>\n",
|
| 670 |
-
" <tr>\n",
|
| 671 |
-
" <th>26</th>\n",
|
| 672 |
-
" <td>서울 중구</td>\n",
|
| 673 |
-
" <td>도시대기</td>\n",
|
| 674 |
-
" <td>111121</td>\n",
|
| 675 |
-
" <td>중구</td>\n",
|
| 676 |
-
" <td>2023-07-02 03:00:00</td>\n",
|
| 677 |
-
" <td>0.0260</td>\n",
|
| 678 |
-
" <td>0.0162</td>\n",
|
| 679 |
-
" <td>25.0</td>\n",
|
| 680 |
-
" <td>20.0</td>\n",
|
| 681 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 682 |
-
" <td>2023.0</td>\n",
|
| 683 |
-
" <td>7.0</td>\n",
|
| 684 |
-
" <td>2.0</td>\n",
|
| 685 |
-
" <td>3.0</td>\n",
|
| 686 |
-
" </tr>\n",
|
| 687 |
-
" <tr>\n",
|
| 688 |
-
" <th>27</th>\n",
|
| 689 |
-
" <td>서울 중구</td>\n",
|
| 690 |
-
" <td>도시대기</td>\n",
|
| 691 |
-
" <td>111121</td>\n",
|
| 692 |
-
" <td>중구</td>\n",
|
| 693 |
-
" <td>2023-07-02 04:00:00</td>\n",
|
| 694 |
-
" <td>0.0195</td>\n",
|
| 695 |
-
" <td>0.0179</td>\n",
|
| 696 |
-
" <td>22.0</td>\n",
|
| 697 |
-
" <td>18.0</td>\n",
|
| 698 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 699 |
-
" <td>2023.0</td>\n",
|
| 700 |
-
" <td>7.0</td>\n",
|
| 701 |
-
" <td>2.0</td>\n",
|
| 702 |
-
" <td>4.0</td>\n",
|
| 703 |
-
" </tr>\n",
|
| 704 |
-
" <tr>\n",
|
| 705 |
-
" <th>28</th>\n",
|
| 706 |
-
" <td>서울 중구</td>\n",
|
| 707 |
-
" <td>도시대기</td>\n",
|
| 708 |
-
" <td>111121</td>\n",
|
| 709 |
-
" <td>중구</td>\n",
|
| 710 |
-
" <td>2023-07-02 05:00:00</td>\n",
|
| 711 |
-
" <td>0.0171</td>\n",
|
| 712 |
-
" <td>0.0170</td>\n",
|
| 713 |
-
" <td>19.0</td>\n",
|
| 714 |
-
" <td>17.0</td>\n",
|
| 715 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 716 |
-
" <td>2023.0</td>\n",
|
| 717 |
-
" <td>7.0</td>\n",
|
| 718 |
-
" <td>2.0</td>\n",
|
| 719 |
-
" <td>5.0</td>\n",
|
| 720 |
-
" </tr>\n",
|
| 721 |
-
" <tr>\n",
|
| 722 |
-
" <th>29</th>\n",
|
| 723 |
-
" <td>서울 중구</td>\n",
|
| 724 |
-
" <td>도시대기</td>\n",
|
| 725 |
-
" <td>111121</td>\n",
|
| 726 |
-
" <td>중구</td>\n",
|
| 727 |
-
" <td>2023-07-02 06:00:00</td>\n",
|
| 728 |
-
" <td>0.0181</td>\n",
|
| 729 |
-
" <td>0.0145</td>\n",
|
| 730 |
-
" <td>14.0</td>\n",
|
| 731 |
-
" <td>10.0</td>\n",
|
| 732 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 733 |
-
" <td>2023.0</td>\n",
|
| 734 |
-
" <td>7.0</td>\n",
|
| 735 |
-
" <td>2.0</td>\n",
|
| 736 |
-
" <td>6.0</td>\n",
|
| 737 |
-
" </tr>\n",
|
| 738 |
-
" <tr>\n",
|
| 739 |
-
" <th>30</th>\n",
|
| 740 |
-
" <td>서울 중구</td>\n",
|
| 741 |
-
" <td>도시대기</td>\n",
|
| 742 |
-
" <td>111121</td>\n",
|
| 743 |
-
" <td>중구</td>\n",
|
| 744 |
-
" <td>2023-07-02 07:00:00</td>\n",
|
| 745 |
-
" <td>0.0174</td>\n",
|
| 746 |
-
" <td>0.0156</td>\n",
|
| 747 |
-
" <td>11.0</td>\n",
|
| 748 |
-
" <td>10.0</td>\n",
|
| 749 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 750 |
-
" <td>2023.0</td>\n",
|
| 751 |
-
" <td>7.0</td>\n",
|
| 752 |
-
" <td>2.0</td>\n",
|
| 753 |
-
" <td>7.0</td>\n",
|
| 754 |
-
" </tr>\n",
|
| 755 |
-
" <tr>\n",
|
| 756 |
-
" <th>31</th>\n",
|
| 757 |
-
" <td>서울 중구</td>\n",
|
| 758 |
-
" <td>도시대기</td>\n",
|
| 759 |
-
" <td>111121</td>\n",
|
| 760 |
-
" <td>중구</td>\n",
|
| 761 |
-
" <td>2023-07-02 08:00:00</td>\n",
|
| 762 |
-
" <td>0.0213</td>\n",
|
| 763 |
-
" <td>0.0147</td>\n",
|
| 764 |
-
" <td>12.0</td>\n",
|
| 765 |
-
" <td>9.0</td>\n",
|
| 766 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 767 |
-
" <td>2023.0</td>\n",
|
| 768 |
-
" <td>7.0</td>\n",
|
| 769 |
-
" <td>2.0</td>\n",
|
| 770 |
-
" <td>8.0</td>\n",
|
| 771 |
-
" </tr>\n",
|
| 772 |
-
" <tr>\n",
|
| 773 |
-
" <th>32</th>\n",
|
| 774 |
-
" <td>서울 중구</td>\n",
|
| 775 |
-
" <td>도시대기</td>\n",
|
| 776 |
-
" <td>111121</td>\n",
|
| 777 |
-
" <td>중구</td>\n",
|
| 778 |
-
" <td>2023-07-02 09:00:00</td>\n",
|
| 779 |
-
" <td>0.0267</td>\n",
|
| 780 |
-
" <td>0.0143</td>\n",
|
| 781 |
-
" <td>11.0</td>\n",
|
| 782 |
-
" <td>10.0</td>\n",
|
| 783 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 784 |
-
" <td>2023.0</td>\n",
|
| 785 |
-
" <td>7.0</td>\n",
|
| 786 |
-
" <td>2.0</td>\n",
|
| 787 |
-
" <td>9.0</td>\n",
|
| 788 |
-
" </tr>\n",
|
| 789 |
-
" <tr>\n",
|
| 790 |
-
" <th>33</th>\n",
|
| 791 |
-
" <td>서울 중구</td>\n",
|
| 792 |
-
" <td>도시대기</td>\n",
|
| 793 |
-
" <td>111121</td>\n",
|
| 794 |
-
" <td>중구</td>\n",
|
| 795 |
-
" <td>2023-07-02 10:00:00</td>\n",
|
| 796 |
-
" <td>0.0289</td>\n",
|
| 797 |
-
" <td>0.0155</td>\n",
|
| 798 |
-
" <td>12.0</td>\n",
|
| 799 |
-
" <td>9.0</td>\n",
|
| 800 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 801 |
-
" <td>2023.0</td>\n",
|
| 802 |
-
" <td>7.0</td>\n",
|
| 803 |
-
" <td>2.0</td>\n",
|
| 804 |
-
" <td>10.0</td>\n",
|
| 805 |
-
" </tr>\n",
|
| 806 |
-
" <tr>\n",
|
| 807 |
-
" <th>34</th>\n",
|
| 808 |
-
" <td>서울 중구</td>\n",
|
| 809 |
-
" <td>도시대기</td>\n",
|
| 810 |
-
" <td>111121</td>\n",
|
| 811 |
-
" <td>중구</td>\n",
|
| 812 |
-
" <td>2023-07-02 11:00:00</td>\n",
|
| 813 |
-
" <td>0.0381</td>\n",
|
| 814 |
-
" <td>0.0108</td>\n",
|
| 815 |
-
" <td>13.0</td>\n",
|
| 816 |
-
" <td>13.0</td>\n",
|
| 817 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 818 |
-
" <td>2023.0</td>\n",
|
| 819 |
-
" <td>7.0</td>\n",
|
| 820 |
-
" <td>2.0</td>\n",
|
| 821 |
-
" <td>11.0</td>\n",
|
| 822 |
-
" </tr>\n",
|
| 823 |
-
" <tr>\n",
|
| 824 |
-
" <th>35</th>\n",
|
| 825 |
-
" <td>서울 중구</td>\n",
|
| 826 |
-
" <td>도시대기</td>\n",
|
| 827 |
-
" <td>111121</td>\n",
|
| 828 |
-
" <td>중구</td>\n",
|
| 829 |
-
" <td>2023-07-02 12:00:00</td>\n",
|
| 830 |
-
" <td>0.0441</td>\n",
|
| 831 |
-
" <td>0.0079</td>\n",
|
| 832 |
-
" <td>13.0</td>\n",
|
| 833 |
-
" <td>12.0</td>\n",
|
| 834 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 835 |
-
" <td>2023.0</td>\n",
|
| 836 |
-
" <td>7.0</td>\n",
|
| 837 |
-
" <td>2.0</td>\n",
|
| 838 |
-
" <td>12.0</td>\n",
|
| 839 |
-
" </tr>\n",
|
| 840 |
-
" <tr>\n",
|
| 841 |
-
" <th>36</th>\n",
|
| 842 |
-
" <td>서울 중구</td>\n",
|
| 843 |
-
" <td>도시대기</td>\n",
|
| 844 |
-
" <td>111121</td>\n",
|
| 845 |
-
" <td>중구</td>\n",
|
| 846 |
-
" <td>2023-07-02 13:00:00</td>\n",
|
| 847 |
-
" <td>0.0489</td>\n",
|
| 848 |
-
" <td>0.0067</td>\n",
|
| 849 |
-
" <td>8.0</td>\n",
|
| 850 |
-
" <td>10.0</td>\n",
|
| 851 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 852 |
-
" <td>2023.0</td>\n",
|
| 853 |
-
" <td>7.0</td>\n",
|
| 854 |
-
" <td>2.0</td>\n",
|
| 855 |
-
" <td>13.0</td>\n",
|
| 856 |
-
" </tr>\n",
|
| 857 |
-
" <tr>\n",
|
| 858 |
-
" <th>37</th>\n",
|
| 859 |
-
" <td>서울 중구</td>\n",
|
| 860 |
-
" <td>도시대기</td>\n",
|
| 861 |
-
" <td>111121</td>\n",
|
| 862 |
-
" <td>중구</td>\n",
|
| 863 |
-
" <td>2023-07-02 14:00:00</td>\n",
|
| 864 |
-
" <td>0.0498</td>\n",
|
| 865 |
-
" <td>0.0072</td>\n",
|
| 866 |
-
" <td>11.0</td>\n",
|
| 867 |
-
" <td>10.0</td>\n",
|
| 868 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 869 |
-
" <td>2023.0</td>\n",
|
| 870 |
-
" <td>7.0</td>\n",
|
| 871 |
-
" <td>2.0</td>\n",
|
| 872 |
-
" <td>14.0</td>\n",
|
| 873 |
-
" </tr>\n",
|
| 874 |
-
" <tr>\n",
|
| 875 |
-
" <th>38</th>\n",
|
| 876 |
-
" <td>서울 중구</td>\n",
|
| 877 |
-
" <td>도시대기</td>\n",
|
| 878 |
-
" <td>111121</td>\n",
|
| 879 |
-
" <td>중구</td>\n",
|
| 880 |
-
" <td>2023-07-02 15:00:00</td>\n",
|
| 881 |
-
" <td>0.0459</td>\n",
|
| 882 |
-
" <td>0.0073</td>\n",
|
| 883 |
-
" <td>14.0</td>\n",
|
| 884 |
-
" <td>12.0</td>\n",
|
| 885 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 886 |
-
" <td>2023.0</td>\n",
|
| 887 |
-
" <td>7.0</td>\n",
|
| 888 |
-
" <td>2.0</td>\n",
|
| 889 |
-
" <td>15.0</td>\n",
|
| 890 |
-
" </tr>\n",
|
| 891 |
-
" <tr>\n",
|
| 892 |
-
" <th>39</th>\n",
|
| 893 |
-
" <td>서울 중구</td>\n",
|
| 894 |
-
" <td>도시대기</td>\n",
|
| 895 |
-
" <td>111121</td>\n",
|
| 896 |
-
" <td>중구</td>\n",
|
| 897 |
-
" <td>2023-07-02 16:00:00</td>\n",
|
| 898 |
-
" <td>0.0474</td>\n",
|
| 899 |
-
" <td>0.0079</td>\n",
|
| 900 |
-
" <td>12.0</td>\n",
|
| 901 |
-
" <td>11.0</td>\n",
|
| 902 |
-
" <td>서울 중구 덕수궁길 15</td>\n",
|
| 903 |
-
" <td>2023.0</td>\n",
|
| 904 |
-
" <td>7.0</td>\n",
|
| 905 |
-
" <td>2.0</td>\n",
|
| 906 |
-
" <td>16.0</td>\n",
|
| 907 |
-
" </tr>\n",
|
| 908 |
-
" </tbody>\n",
|
| 909 |
-
"</table>\n",
|
| 910 |
-
"</div>"
|
| 911 |
-
],
|
| 912 |
-
"text/plain": [
|
| 913 |
-
" 지역 망 측정소코드 측정소명 측정일시 O3 NO2 PM10 PM25 \\\n",
|
| 914 |
-
"0 서울 중구 도시대기 111121 중구 2023-07-01 01:00:00 0.0249 0.0188 21.0 19.0 \n",
|
| 915 |
-
"1 서울 중구 도시대기 111121 중구 2023-07-01 02:00:00 0.0263 0.0163 18.0 15.0 \n",
|
| 916 |
-
"2 서울 중구 도시대기 111121 중구 2023-07-01 03:00:00 0.0218 0.0192 24.0 21.0 \n",
|
| 917 |
-
"3 서울 중구 도시대기 111121 중구 2023-07-01 04:00:00 0.0131 0.0214 25.0 19.0 \n",
|
| 918 |
-
"4 서울 중구 도시대기 111121 중구 2023-07-01 05:00:00 0.0131 0.0160 25.0 21.0 \n",
|
| 919 |
-
"5 서울 중구 도시대기 111121 중구 2023-07-01 06:00:00 0.0115 0.0196 23.0 18.0 \n",
|
| 920 |
-
"6 서울 중구 도시대기 111121 중구 2023-07-01 07:00:00 0.0094 0.0230 26.0 21.0 \n",
|
| 921 |
-
"7 서울 중구 도시대기 111121 중구 2023-07-01 08:00:00 0.0222 0.0175 26.0 20.0 \n",
|
| 922 |
-
"8 서울 중구 도시대기 111121 중구 2023-07-01 09:00:00 0.0396 0.0153 27.0 20.0 \n",
|
| 923 |
-
"9 서울 중구 도시대기 111121 중구 2023-07-01 10:00:00 0.0530 0.0105 19.0 16.0 \n",
|
| 924 |
-
"10 서울 중구 도시대기 111121 중구 2023-07-01 11:00:00 0.0607 0.0090 20.0 20.0 \n",
|
| 925 |
-
"11 서울 중구 도시대기 111121 중구 2023-07-01 12:00:00 0.0688 0.0114 20.0 17.0 \n",
|
| 926 |
-
"12 서울 중구 도시대기 111121 중구 2023-07-01 13:00:00 0.0758 0.0101 23.0 17.0 \n",
|
| 927 |
-
"13 서울 중구 도시대기 111121 중구 2023-07-01 14:00:00 0.0743 0.0093 20.0 17.0 \n",
|
| 928 |
-
"14 서울 중구 도시대기 111121 중구 2023-07-01 15:00:00 0.0749 0.0100 19.0 11.0 \n",
|
| 929 |
-
"15 서울 중구 도시대기 111121 중구 2023-07-01 16:00:00 0.0716 0.0092 19.0 15.0 \n",
|
| 930 |
-
"16 서울 중구 도시대기 111121 중구 2023-07-01 17:00:00 0.0613 0.0099 18.0 15.0 \n",
|
| 931 |
-
"17 서울 중구 도시대기 111121 중구 2023-07-01 18:00:00 0.0496 0.0098 18.0 14.0 \n",
|
| 932 |
-
"18 서울 중구 도시대기 111121 중구 2023-07-01 19:00:00 0.0473 0.0124 17.0 17.0 \n",
|
| 933 |
-
"19 서울 중구 도시대기 111121 중구 2023-07-01 20:00:00 0.0498 0.0170 17.0 15.0 \n",
|
| 934 |
-
"20 서울 중구 도시대기 111121 중구 2023-07-01 21:00:00 0.0616 0.0134 23.0 20.0 \n",
|
| 935 |
-
"21 서울 중구 도시대기 111121 중구 2023-07-01 22:00:00 0.0543 0.0109 18.0 16.0 \n",
|
| 936 |
-
"22 서울 중구 도시대기 111121 중구 2023-07-01 23:00:00 0.0507 0.0113 17.0 16.0 \n",
|
| 937 |
-
"23 서울 중구 도시대기 111121 중구 NaT 0.0427 0.0125 17.0 16.0 \n",
|
| 938 |
-
"24 서울 중구 도시대기 111121 중구 2023-07-02 01:00:00 0.0334 0.0148 21.0 20.0 \n",
|
| 939 |
-
"25 서울 중구 도시대기 111121 중구 2023-07-02 02:00:00 0.0337 0.0133 22.0 18.0 \n",
|
| 940 |
-
"26 서울 중구 도시대기 111121 중구 2023-07-02 03:00:00 0.0260 0.0162 25.0 20.0 \n",
|
| 941 |
-
"27 서울 중구 도시대기 111121 중구 2023-07-02 04:00:00 0.0195 0.0179 22.0 18.0 \n",
|
| 942 |
-
"28 서울 중구 도시대기 111121 중구 2023-07-02 05:00:00 0.0171 0.0170 19.0 17.0 \n",
|
| 943 |
-
"29 서울 중구 도시대기 111121 중구 2023-07-02 06:00:00 0.0181 0.0145 14.0 10.0 \n",
|
| 944 |
-
"30 서울 중구 도시대기 111121 중구 2023-07-02 07:00:00 0.0174 0.0156 11.0 10.0 \n",
|
| 945 |
-
"31 서울 중구 도시대기 111121 중구 2023-07-02 08:00:00 0.0213 0.0147 12.0 9.0 \n",
|
| 946 |
-
"32 서울 중구 도시대기 111121 중구 2023-07-02 09:00:00 0.0267 0.0143 11.0 10.0 \n",
|
| 947 |
-
"33 서울 중구 도시대기 111121 중구 2023-07-02 10:00:00 0.0289 0.0155 12.0 9.0 \n",
|
| 948 |
-
"34 서울 중구 도시대기 111121 중구 2023-07-02 11:00:00 0.0381 0.0108 13.0 13.0 \n",
|
| 949 |
-
"35 서울 중구 도시대기 111121 중구 2023-07-02 12:00:00 0.0441 0.0079 13.0 12.0 \n",
|
| 950 |
-
"36 서울 중구 도시대기 111121 중구 2023-07-02 13:00:00 0.0489 0.0067 8.0 10.0 \n",
|
| 951 |
-
"37 서울 중구 도시대기 111121 중구 2023-07-02 14:00:00 0.0498 0.0072 11.0 10.0 \n",
|
| 952 |
-
"38 서울 중구 도시대기 111121 중구 2023-07-02 15:00:00 0.0459 0.0073 14.0 12.0 \n",
|
| 953 |
-
"39 서울 중구 도시대기 111121 중구 2023-07-02 16:00:00 0.0474 0.0079 12.0 11.0 \n",
|
| 954 |
-
"\n",
|
| 955 |
-
" 주소 year month day hour \n",
|
| 956 |
-
"0 서울 중구 덕수궁길 15 2023.0 7.0 1.0 1.0 \n",
|
| 957 |
-
"1 서울 중구 덕수궁길 15 2023.0 7.0 1.0 2.0 \n",
|
| 958 |
-
"2 서울 중구 덕수궁길 15 2023.0 7.0 1.0 3.0 \n",
|
| 959 |
-
"3 서울 중구 덕수궁길 15 2023.0 7.0 1.0 4.0 \n",
|
| 960 |
-
"4 서울 중구 덕수궁길 15 2023.0 7.0 1.0 5.0 \n",
|
| 961 |
-
"5 서울 중구 덕수궁길 15 2023.0 7.0 1.0 6.0 \n",
|
| 962 |
-
"6 서울 중구 덕수궁길 15 2023.0 7.0 1.0 7.0 \n",
|
| 963 |
-
"7 서울 중구 덕수궁길 15 2023.0 7.0 1.0 8.0 \n",
|
| 964 |
-
"8 서울 중구 덕수궁길 15 2023.0 7.0 1.0 9.0 \n",
|
| 965 |
-
"9 서울 중구 덕수궁길 15 2023.0 7.0 1.0 10.0 \n",
|
| 966 |
-
"10 서울 중구 덕수궁길 15 2023.0 7.0 1.0 11.0 \n",
|
| 967 |
-
"11 서울 중구 덕수궁길 15 2023.0 7.0 1.0 12.0 \n",
|
| 968 |
-
"12 서울 중구 덕수궁길 15 2023.0 7.0 1.0 13.0 \n",
|
| 969 |
-
"13 서울 중구 덕수궁길 15 2023.0 7.0 1.0 14.0 \n",
|
| 970 |
-
"14 서울 중구 덕수궁길 15 2023.0 7.0 1.0 15.0 \n",
|
| 971 |
-
"15 서울 중구 덕수궁길 15 2023.0 7.0 1.0 16.0 \n",
|
| 972 |
-
"16 서울 중구 덕수궁길 15 2023.0 7.0 1.0 17.0 \n",
|
| 973 |
-
"17 서울 중구 덕수궁길 15 2023.0 7.0 1.0 18.0 \n",
|
| 974 |
-
"18 서울 중구 덕수궁길 15 2023.0 7.0 1.0 19.0 \n",
|
| 975 |
-
"19 서울 중구 덕수궁길 15 2023.0 7.0 1.0 20.0 \n",
|
| 976 |
-
"20 서울 중구 덕수궁길 15 2023.0 7.0 1.0 21.0 \n",
|
| 977 |
-
"21 서울 중구 덕수궁길 15 2023.0 7.0 1.0 22.0 \n",
|
| 978 |
-
"22 서울 중구 덕수궁길 15 2023.0 7.0 1.0 23.0 \n",
|
| 979 |
-
"23 서울 중구 덕수궁길 15 NaN NaN NaN NaN \n",
|
| 980 |
-
"24 서울 중구 덕수궁길 15 2023.0 7.0 2.0 1.0 \n",
|
| 981 |
-
"25 서울 중구 덕수궁길 15 2023.0 7.0 2.0 2.0 \n",
|
| 982 |
-
"26 서울 중구 덕수궁길 15 2023.0 7.0 2.0 3.0 \n",
|
| 983 |
-
"27 서울 중구 덕수궁길 15 2023.0 7.0 2.0 4.0 \n",
|
| 984 |
-
"28 서울 중구 덕수궁길 15 2023.0 7.0 2.0 5.0 \n",
|
| 985 |
-
"29 서울 중구 덕수궁길 15 2023.0 7.0 2.0 6.0 \n",
|
| 986 |
-
"30 서울 중구 덕수궁길 15 2023.0 7.0 2.0 7.0 \n",
|
| 987 |
-
"31 서울 중구 덕수궁길 15 2023.0 7.0 2.0 8.0 \n",
|
| 988 |
-
"32 서울 중구 덕수궁길 15 2023.0 7.0 2.0 9.0 \n",
|
| 989 |
-
"33 서울 중구 덕수궁길 15 2023.0 7.0 2.0 10.0 \n",
|
| 990 |
-
"34 서울 중구 덕수궁길 15 2023.0 7.0 2.0 11.0 \n",
|
| 991 |
-
"35 서울 중구 덕수궁길 15 2023.0 7.0 2.0 12.0 \n",
|
| 992 |
-
"36 서울 중구 덕수궁길 15 2023.0 7.0 2.0 13.0 \n",
|
| 993 |
-
"37 서울 중구 덕수궁길 15 2023.0 7.0 2.0 14.0 \n",
|
| 994 |
-
"38 서울 중구 덕수궁길 15 2023.0 7.0 2.0 15.0 \n",
|
| 995 |
-
"39 서울 중구 덕수궁길 15 2023.0 7.0 2.0 16.0 "
|
| 996 |
-
]
|
| 997 |
-
},
|
| 998 |
-
"execution_count": 6,
|
| 999 |
-
"metadata": {},
|
| 1000 |
-
"output_type": "execute_result"
|
| 1001 |
-
}
|
| 1002 |
-
],
|
| 1003 |
-
"source": [
|
| 1004 |
-
"data.head(40)"
|
| 1005 |
-
]
|
| 1006 |
-
}
|
| 1007 |
-
],
|
| 1008 |
-
"metadata": {
|
| 1009 |
-
"kernelspec": {
|
| 1010 |
-
"display_name": "Python 3 (ipykernel)",
|
| 1011 |
-
"language": "python",
|
| 1012 |
-
"name": "python3"
|
| 1013 |
-
},
|
| 1014 |
-
"language_info": {
|
| 1015 |
-
"codemirror_mode": {
|
| 1016 |
-
"name": "ipython",
|
| 1017 |
-
"version": 3
|
| 1018 |
-
},
|
| 1019 |
-
"file_extension": ".py",
|
| 1020 |
-
"mimetype": "text/x-python",
|
| 1021 |
-
"name": "python",
|
| 1022 |
-
"nbconvert_exporter": "python",
|
| 1023 |
-
"pygments_lexer": "ipython3",
|
| 1024 |
-
"version": "3.8.13"
|
| 1025 |
-
}
|
| 1026 |
-
},
|
| 1027 |
-
"nbformat": 4,
|
| 1028 |
-
"nbformat_minor": 4
|
| 1029 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/1.data_merge.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/2.eda_preproccesing.ipynb
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f8fe49fd26e48bcada89076a0b3c8ffe45e3d2e8407fe953b1558df9bfcfcddb
|
| 3 |
-
size 41518580
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/3.oversampling.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/__pycache__/code.cpython-312.pyc
DELETED
|
Binary file (13.8 kB)
|
|
|
Analysis_code/__pycache__/deepgbm.cpython-312.pyc
DELETED
|
Binary file (2.67 kB)
|
|
|
Analysis_code/__pycache__/deepgbm.cpython-38.pyc
DELETED
|
Binary file (1.88 kB)
|
|
|
Analysis_code/__pycache__/deepgbm.cpython-39.pyc
DELETED
|
Binary file (1.82 kB)
|
|
|
Analysis_code/__pycache__/ft_transformer.cpython-312.pyc
DELETED
|
Binary file (2.71 kB)
|
|
|
Analysis_code/__pycache__/ft_transformer.cpython-38.pyc
DELETED
|
Binary file (1.99 kB)
|
|
|
Analysis_code/__pycache__/ft_transformer.cpython-39.pyc
DELETED
|
Binary file (1.93 kB)
|
|
|
Analysis_code/__pycache__/resnet_like.cpython-312.pyc
DELETED
|
Binary file (2.26 kB)
|
|
|
Analysis_code/__pycache__/resnet_like.cpython-38.pyc
DELETED
|
Binary file (1.54 kB)
|
|
|
Analysis_code/__pycache__/resnet_like.cpython-39.pyc
DELETED
|
Binary file (1.48 kB)
|
|
|
Analysis_code/best_deepgbm_model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:471ce03d818bdefc9631121537afd4b771d85d872f72d74634e5cee824de2b62
|
| 3 |
-
size 988522
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/best_model_f1.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:12c6e86e039528b07c4cc6e90e3033da7b02d67c33539cbb81d280da281f46c3
|
| 3 |
-
size 8999933
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/deepgbm.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import torch.nn as nn
|
| 3 |
-
import torch.nn.functional as F
|
| 4 |
-
|
| 5 |
-
class DeepGBM(nn.Module):
|
| 6 |
-
def __init__(self, num_features, cat_features, num_classes, d_main=128, d_hidden=64, n_blocks=4, dropout=0.2):
|
| 7 |
-
super(DeepGBM, self).__init__()
|
| 8 |
-
|
| 9 |
-
self.num_classes = num_classes
|
| 10 |
-
|
| 11 |
-
# 연속형 변수 처리 (Linear)
|
| 12 |
-
self.num_linear = nn.Linear(num_features, d_main)
|
| 13 |
-
|
| 14 |
-
# 범주형 변수 처리 (Embedding)
|
| 15 |
-
self.cat_embedding = nn.ModuleList([
|
| 16 |
-
nn.Embedding(cat_size, d_main) for cat_size in cat_features
|
| 17 |
-
])
|
| 18 |
-
|
| 19 |
-
# ResNet-like 블록
|
| 20 |
-
self.blocks = nn.ModuleList([
|
| 21 |
-
nn.Sequential(
|
| 22 |
-
nn.Linear(d_main, d_hidden),
|
| 23 |
-
nn.ReLU(),
|
| 24 |
-
nn.Dropout(dropout),
|
| 25 |
-
nn.Linear(d_hidden, d_main),
|
| 26 |
-
nn.ReLU()
|
| 27 |
-
) for _ in range(n_blocks)
|
| 28 |
-
])
|
| 29 |
-
|
| 30 |
-
if num_classes == 2:
|
| 31 |
-
self.output_layer = nn.Linear(d_main, 1) # Binary classification
|
| 32 |
-
elif num_classes > 2:
|
| 33 |
-
self.output_layer = nn.Linear(d_main, num_classes) # Multi classification
|
| 34 |
-
|
| 35 |
-
def forward(self, x_num, x_cat): # 두 개의 입력을 받음
|
| 36 |
-
x_num = self.num_linear(x_num)
|
| 37 |
-
|
| 38 |
-
# 범주형 변수를 임베딩 후 합산
|
| 39 |
-
x_cat = [embed(x_cat[:, i]) for i, embed in enumerate(self.cat_embedding)]
|
| 40 |
-
x_cat = torch.stack(x_cat, dim=1).sum(dim=1)
|
| 41 |
-
x = x_num + x_cat # 연속형 + 범주형 결합
|
| 42 |
-
|
| 43 |
-
for block in self.blocks:
|
| 44 |
-
x = x + block(x) # Residual connection
|
| 45 |
-
x = self.output_layer(x)
|
| 46 |
-
|
| 47 |
-
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/deeplearning_model_binary.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/deeplearning_model_multi.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/final_test/final.ipynb
DELETED
|
@@ -1,1143 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 1,
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"outputs": [],
|
| 8 |
-
"source": [
|
| 9 |
-
"import pandas as pd\n",
|
| 10 |
-
"import numpy as np\n",
|
| 11 |
-
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
| 12 |
-
"import torch\n",
|
| 13 |
-
"from torch.utils.data import DataLoader, TensorDataset\n",
|
| 14 |
-
"import random\n",
|
| 15 |
-
"from collections import Counter\n",
|
| 16 |
-
"import sys\n",
|
| 17 |
-
"sys.path.append('../../../../../../../../mnt/workspace/LightGBM/python-package')\n",
|
| 18 |
-
"from lightgbm import LGBMClassifier\n",
|
| 19 |
-
"import numpy as np\n",
|
| 20 |
-
"from sklearn.model_selection import train_test_split\n",
|
| 21 |
-
"from sklearn.inspection import permutation_importance\n",
|
| 22 |
-
"from sklearn.metrics import confusion_matrix,accuracy_score, recall_score, precision_score\n",
|
| 23 |
-
"from sklearn.model_selection import StratifiedKFold\n",
|
| 24 |
-
"from xgboost import XGBClassifier\n",
|
| 25 |
-
"from warnings import filterwarnings\n",
|
| 26 |
-
"filterwarnings('ignore')\n",
|
| 27 |
-
"import sys\n",
|
| 28 |
-
"sys.path.append('../')\n",
|
| 29 |
-
"import torch\n",
|
| 30 |
-
"import torch.nn as nn\n",
|
| 31 |
-
"import torch.optim as optim\n",
|
| 32 |
-
"import optuna\n",
|
| 33 |
-
"import pandas as pd\n",
|
| 34 |
-
"import numpy as np\n",
|
| 35 |
-
"import random\n",
|
| 36 |
-
"from ft_transformer import FTTransformer\n",
|
| 37 |
-
"from resnet_like import ResNetLike\n",
|
| 38 |
-
"from deepgbm import DeepGBM\n",
|
| 39 |
-
"from pytorch_tabnet.tab_model import TabNetClassifier\n",
|
| 40 |
-
"from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix"
|
| 41 |
-
]
|
| 42 |
-
},
|
| 43 |
-
{
|
| 44 |
-
"cell_type": "code",
|
| 45 |
-
"execution_count": 2,
|
| 46 |
-
"metadata": {},
|
| 47 |
-
"outputs": [],
|
| 48 |
-
"source": [
|
| 49 |
-
"# Python 및 Numpy 시드 고정\n",
|
| 50 |
-
"seed = 42\n",
|
| 51 |
-
"random.seed(seed)\n",
|
| 52 |
-
"np.random.seed(seed)\n",
|
| 53 |
-
"\n",
|
| 54 |
-
"# PyTorch 시드 고정\n",
|
| 55 |
-
"torch.manual_seed(seed)\n",
|
| 56 |
-
"torch.cuda.manual_seed(seed)\n",
|
| 57 |
-
"torch.cuda.manual_seed_all(seed) # Multi-GPU 환경에서 동일한 시드 적용\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"# PyTorch 연산의 결정적 모드 설정\n",
|
| 60 |
-
"torch.backends.cudnn.deterministic = True # 실행마다 동일한 결과를 보장\n",
|
| 61 |
-
"torch.backends.cudnn.benchmark = True # 성능 최적화를 활성화 (가능한 한 빠른 연산 수행)\n",
|
| 62 |
-
"\n",
|
| 63 |
-
"# 전처리 함수\n",
|
| 64 |
-
"def preprocessing(df):\n",
|
| 65 |
-
" df = df[df.columns].copy()\n",
|
| 66 |
-
" df.loc[df['wind_dir']=='정온', 'wind_dir'] = \"0\"\n",
|
| 67 |
-
" df['wind_dir'] = df['wind_dir'].astype('int')\n",
|
| 68 |
-
" df['lm_cloudcover'] = df['lm_cloudcover'].astype('int')\n",
|
| 69 |
-
" df['cloudcover'] = df['cloudcover'].astype('int')\n",
|
| 70 |
-
" return df\n",
|
| 71 |
-
"\n",
|
| 72 |
-
"# 데이터셋 준비 함수\n",
|
| 73 |
-
"def prepare_dataset(region, data_sample='pure', target='multi', fold=3):\n",
|
| 74 |
-
"\n",
|
| 75 |
-
" # 데이터 경로 지정\n",
|
| 76 |
-
" dat_path = f\"../../data/data_for_modeling/{region}_train.csv\"\n",
|
| 77 |
-
" if data_sample == 'pure':\n",
|
| 78 |
-
" train_path = dat_path\n",
|
| 79 |
-
" else:\n",
|
| 80 |
-
" train_path = f'../../data/data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv'\n",
|
| 81 |
-
" train_path = f'../../data/data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv'\n",
|
| 82 |
-
" test_path = f\"../../data/data_for_modeling/{region}_test.csv\"\n",
|
| 83 |
-
" drop_col = ['binary_class','multi_class','visi','year']\n",
|
| 84 |
-
" target_col = f'{target}_class'\n",
|
| 85 |
-
" \n",
|
| 86 |
-
" # 데이터 로드\n",
|
| 87 |
-
" region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))\n",
|
| 88 |
-
" if data_sample == 'pure':\n",
|
| 89 |
-
" region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]\n",
|
| 90 |
-
" else:\n",
|
| 91 |
-
" region_train = preprocessing(pd.read_csv(train_path))\n",
|
| 92 |
-
" region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]\n",
|
| 93 |
-
" region_test = preprocessing(pd.read_csv(test_path))\n",
|
| 94 |
-
"\n",
|
| 95 |
-
" # 컬럼 정렬 (일관성 유지)\n",
|
| 96 |
-
" common_columns = region_train.columns.to_list()\n",
|
| 97 |
-
" train_data = region_train[common_columns]\n",
|
| 98 |
-
" val_data = region_val[common_columns]\n",
|
| 99 |
-
" test_data = region_test[common_columns]\n",
|
| 100 |
-
"\n",
|
| 101 |
-
" # 설명변수 & 타겟 분리\n",
|
| 102 |
-
" X_train = train_data.drop(columns=drop_col)\n",
|
| 103 |
-
" y_train = train_data[target_col]\n",
|
| 104 |
-
" X_val = val_data.drop(columns=drop_col)\n",
|
| 105 |
-
" y_val = val_data[target_col]\n",
|
| 106 |
-
" X_test = test_data.drop(columns=drop_col)\n",
|
| 107 |
-
" y_test = test_data[target_col]\n",
|
| 108 |
-
"\n",
|
| 109 |
-
" # 범주형 & 연속형 변수 분리\n",
|
| 110 |
-
" categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns\n",
|
| 111 |
-
" numerical_cols = X_train.select_dtypes(include=['float64']).columns\n",
|
| 112 |
-
"\n",
|
| 113 |
-
" # 범주형 변수 Label Encoding\n",
|
| 114 |
-
" label_encoders = {}\n",
|
| 115 |
-
" for col in categorical_cols:\n",
|
| 116 |
-
" le = LabelEncoder()\n",
|
| 117 |
-
" le.fit(X_train[col]) # Train 데이터 기준으로 학���\n",
|
| 118 |
-
" label_encoders[col] = le\n",
|
| 119 |
-
"\n",
|
| 120 |
-
" # 변환 적용\n",
|
| 121 |
-
" for col in categorical_cols:\n",
|
| 122 |
-
" X_train[col] = label_encoders[col].transform(X_train[col])\n",
|
| 123 |
-
" X_val[col] = label_encoders[col].transform(X_val[col])\n",
|
| 124 |
-
" X_test[col] = label_encoders[col].transform(X_test[col])\n",
|
| 125 |
-
"\n",
|
| 126 |
-
" # 연속형 변수 Standard Scaling\n",
|
| 127 |
-
" scaler = StandardScaler()\n",
|
| 128 |
-
" scaler.fit(X_train[numerical_cols]) # Train 데이터 기준으로 학습\n",
|
| 129 |
-
"\n",
|
| 130 |
-
" # 변환 적용\n",
|
| 131 |
-
" X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])\n",
|
| 132 |
-
" X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])\n",
|
| 133 |
-
" X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])\n",
|
| 134 |
-
"\n",
|
| 135 |
-
" return X_train, X_val, X_test, y_train, y_val, y_test, categorical_cols, numerical_cols\n",
|
| 136 |
-
"\n",
|
| 137 |
-
"# 데이터 변환 및 dataloader 생성 함수\n",
|
| 138 |
-
"def prepare_dataloader(region, data_sample='pure', target='multi', fold=3, random_state=None):\n",
|
| 139 |
-
"\n",
|
| 140 |
-
" # 데이터 경로 지정\n",
|
| 141 |
-
" dat_path = f\"../../data/data_for_modeling/{region}_train.csv\"\n",
|
| 142 |
-
" if data_sample == 'pure':\n",
|
| 143 |
-
" train_path = dat_path\n",
|
| 144 |
-
" else:\n",
|
| 145 |
-
" train_path = f'../../data/data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv'\n",
|
| 146 |
-
" train_path = f'../../data/data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv'\n",
|
| 147 |
-
" test_path = f\"../../data/data_for_modeling/{region}_test.csv\"\n",
|
| 148 |
-
" drop_col = ['binary_class','multi_class','visi','year']\n",
|
| 149 |
-
" target_col = f'{target}_class'\n",
|
| 150 |
-
" \n",
|
| 151 |
-
" # 데이터 로드\n",
|
| 152 |
-
" region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))\n",
|
| 153 |
-
" if data_sample == 'pure':\n",
|
| 154 |
-
" region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]\n",
|
| 155 |
-
" else:\n",
|
| 156 |
-
" region_train = preprocessing(pd.read_csv(train_path))\n",
|
| 157 |
-
" region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]\n",
|
| 158 |
-
" region_test = preprocessing(pd.read_csv(test_path))\n",
|
| 159 |
-
"\n",
|
| 160 |
-
" # 컬럼 정렬 (일관성 유지)\n",
|
| 161 |
-
" common_columns = region_train.columns.to_list()\n",
|
| 162 |
-
" train_data = region_train[common_columns]\n",
|
| 163 |
-
" val_data = region_val[common_columns]\n",
|
| 164 |
-
" test_data = region_test[common_columns]\n",
|
| 165 |
-
"\n",
|
| 166 |
-
" # 설명변수 & 타겟 분리\n",
|
| 167 |
-
" X_train = train_data.drop(columns=drop_col)\n",
|
| 168 |
-
" y_train = train_data[target_col]\n",
|
| 169 |
-
" X_val = val_data.drop(columns=drop_col)\n",
|
| 170 |
-
" y_val = val_data[target_col]\n",
|
| 171 |
-
" X_test = test_data.drop(columns=drop_col)\n",
|
| 172 |
-
" y_test = test_data[target_col]\n",
|
| 173 |
-
"\n",
|
| 174 |
-
" # 범주형 & 연속형 변수 분리\n",
|
| 175 |
-
" categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns\n",
|
| 176 |
-
" numerical_cols = X_train.select_dtypes(include=['float64']).columns\n",
|
| 177 |
-
"\n",
|
| 178 |
-
" # 범주형 변수 Label Encoding\n",
|
| 179 |
-
" label_encoders = {}\n",
|
| 180 |
-
" for col in categorical_cols:\n",
|
| 181 |
-
" le = LabelEncoder()\n",
|
| 182 |
-
" le.fit(X_train[col]) # Train 데이터 기준으로 학습\n",
|
| 183 |
-
" label_encoders[col] = le\n",
|
| 184 |
-
"\n",
|
| 185 |
-
" # 변환 적용\n",
|
| 186 |
-
" for col in categorical_cols:\n",
|
| 187 |
-
" X_train[col] = label_encoders[col].transform(X_train[col])\n",
|
| 188 |
-
" X_val[col] = label_encoders[col].transform(X_val[col])\n",
|
| 189 |
-
" X_test[col] = label_encoders[col].transform(X_test[col])\n",
|
| 190 |
-
"\n",
|
| 191 |
-
" # 연속형 변수 Standard Scaling\n",
|
| 192 |
-
" scaler = StandardScaler()\n",
|
| 193 |
-
" scaler.fit(X_train[numerical_cols]) # Train 데이터 기준으로 학습\n",
|
| 194 |
-
"\n",
|
| 195 |
-
" # 변환 적용\n",
|
| 196 |
-
" X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])\n",
|
| 197 |
-
" X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])\n",
|
| 198 |
-
" X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])\n",
|
| 199 |
-
"\n",
|
| 200 |
-
" # 연속형 변수와 범주형 변수 분리\n",
|
| 201 |
-
" X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)\n",
|
| 202 |
-
" X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)\n",
|
| 203 |
-
"\n",
|
| 204 |
-
" X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)\n",
|
| 205 |
-
" X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)\n",
|
| 206 |
-
"\n",
|
| 207 |
-
" X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)\n",
|
| 208 |
-
" X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)\n",
|
| 209 |
-
"\n",
|
| 210 |
-
" # 레이블 변환\n",
|
| 211 |
-
" if target == \"binary\":\n",
|
| 212 |
-
" y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 분류 → float32\n",
|
| 213 |
-
" y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)\n",
|
| 214 |
-
" y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)\n",
|
| 215 |
-
" elif target == \"multi\":\n",
|
| 216 |
-
" y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 다중 분류 → long\n",
|
| 217 |
-
" y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)\n",
|
| 218 |
-
" y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)\n",
|
| 219 |
-
" else:\n",
|
| 220 |
-
" raise ValueError(\"target must be 'binary' or 'multi'\")\n",
|
| 221 |
-
"\n",
|
| 222 |
-
" # TensorDataset 생성\n",
|
| 223 |
-
" train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)\n",
|
| 224 |
-
" val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)\n",
|
| 225 |
-
" test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)\n",
|
| 226 |
-
"\n",
|
| 227 |
-
" # DataLoader 생성\n",
|
| 228 |
-
" if random_state == None:\n",
|
| 229 |
-
" train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)\n",
|
| 230 |
-
" else:\n",
|
| 231 |
-
" train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, generator=torch.Generator().manual_seed(random_state))\n",
|
| 232 |
-
" val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)\n",
|
| 233 |
-
" test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)\n",
|
| 234 |
-
" \n",
|
| 235 |
-
" return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader"
|
| 236 |
-
]
|
| 237 |
-
},
|
| 238 |
-
{
|
| 239 |
-
"cell_type": "code",
|
| 240 |
-
"execution_count": 3,
|
| 241 |
-
"metadata": {},
|
| 242 |
-
"outputs": [],
|
| 243 |
-
"source": [
|
| 244 |
-
"import os\n",
|
| 245 |
-
"import torch\n",
|
| 246 |
-
"# 디바이스 설정 (CUDA 사용 가능하면 GPU로, 아니면 CPU로)\n",
|
| 247 |
-
"import glob\n",
|
| 248 |
-
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 249 |
-
"\n",
|
| 250 |
-
"def calculate_csi(Y_test, pred):\n",
|
| 251 |
-
"\n",
|
| 252 |
-
" cm = confusion_matrix(Y_test, pred) # 변수 이름을 cm으로 변경\n",
|
| 253 |
-
" # 혼동 행렬에서 H, F, M 추출\n",
|
| 254 |
-
" H = (cm[0, 0] + cm[1, 1])\n",
|
| 255 |
-
" \n",
|
| 256 |
-
" F = (cm[1, 0] + cm[2, 0] +\n",
|
| 257 |
-
" cm[0, 1] + cm[2, 1])\n",
|
| 258 |
-
"\n",
|
| 259 |
-
" M = (cm[0, 2] + cm[1, 2])\n",
|
| 260 |
-
" \n",
|
| 261 |
-
" # CSI 계산\n",
|
| 262 |
-
" CSI = H / (H + F + M + 1e-10)\n",
|
| 263 |
-
" return CSI\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"def csi_metric(y_true, pred):\n",
|
| 266 |
-
" y_pred_binary = np.argmax(pred, axis=1)\n",
|
| 267 |
-
" score = calculate_csi(y_true, y_pred_binary)\n",
|
| 268 |
-
" return 'CSI', score, True # higher_better=True\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"\n",
|
| 271 |
-
"def eval_metric_csi(y_true, pred_prob):\n",
|
| 272 |
-
"\n",
|
| 273 |
-
" pred = np.argmax(pred_prob, axis=1)\n",
|
| 274 |
-
" y_true = y_true\n",
|
| 275 |
-
" y_pred = pred\n",
|
| 276 |
-
" csi = calculate_csi(y_true, y_pred)\n",
|
| 277 |
-
" return -1*csi\n",
|
| 278 |
-
"\n",
|
| 279 |
-
"\n",
|
| 280 |
-
"from sklearn.metrics import matthews_corrcoef, accuracy_score\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"def multiclass_mcc(y_val, y_pred):\n",
|
| 283 |
-
" \"\"\"\n",
|
| 284 |
-
" 다중 분류에서도 sklearn의 matthews_corrcoef를 그대로 사용할 수 있음.\n",
|
| 285 |
-
" \"\"\"\n",
|
| 286 |
-
" return matthews_corrcoef(y_val, y_pred)"
|
| 287 |
-
]
|
| 288 |
-
},
|
| 289 |
-
{
|
| 290 |
-
"cell_type": "code",
|
| 291 |
-
"execution_count": 4,
|
| 292 |
-
"metadata": {},
|
| 293 |
-
"outputs": [],
|
| 294 |
-
"source": [
|
| 295 |
-
"import os\n",
|
| 296 |
-
"import torch\n",
|
| 297 |
-
"# 디바이스 설정 (CUDA 사용 가능하면 GPU로, 아니면 CPU로)\n",
|
| 298 |
-
"import glob\n",
|
| 299 |
-
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 300 |
-
"import warnings\n",
|
| 301 |
-
"warnings.filterwarnings('ignore')\n",
|
| 302 |
-
"\n",
|
| 303 |
-
"def calculate_csi(Y_test, pred):\n",
|
| 304 |
-
"\n",
|
| 305 |
-
" cm = confusion_matrix(Y_test, pred) # 변수 이름을 cm으로 변경\n",
|
| 306 |
-
" # 혼동 행렬에서 H, F, M 추출\n",
|
| 307 |
-
" H = (cm[0, 0] + cm[1, 1])\n",
|
| 308 |
-
" \n",
|
| 309 |
-
" F = (cm[1, 0] + cm[2, 0] +\n",
|
| 310 |
-
" cm[0, 1] + cm[2, 1])\n",
|
| 311 |
-
"\n",
|
| 312 |
-
" M = (cm[0, 2] + cm[1, 2])\n",
|
| 313 |
-
" \n",
|
| 314 |
-
" # CSI 계산\n",
|
| 315 |
-
" CSI = H / (H + F + M + 1e-10)\n",
|
| 316 |
-
" return CSI\n",
|
| 317 |
-
"\n",
|
| 318 |
-
"# Soft Voting 앙상블\n",
|
| 319 |
-
"def get_proba(region, model_choose, data_sample, fold, target='multi'):\n",
|
| 320 |
-
" _, _, _, _,val_loader , test_loader = prepare_dataloader(region=region, data_sample=data_sample, target=target,fold=fold ,random_state=120)\n",
|
| 321 |
-
"\n",
|
| 322 |
-
" folder_path = f'../save_model/{model_choose}/{data_sample}'\n",
|
| 323 |
-
" model_paths = [path for path in glob.glob(f'{folder_path}/*.pth') if f'{region}' in path]\n",
|
| 324 |
-
"\n",
|
| 325 |
-
" model = torch.load(model_paths[fold-1], weights_only=False).to(device)\n",
|
| 326 |
-
" model.eval()\n",
|
| 327 |
-
"\n",
|
| 328 |
-
" test_preds = []\n",
|
| 329 |
-
"\n",
|
| 330 |
-
"\n",
|
| 331 |
-
" with torch.no_grad():\n",
|
| 332 |
-
" for x_num_batch, x_cat_batch, _ in test_loader:\n",
|
| 333 |
-
" output = model(x_num_batch.to(device), x_cat_batch.to(device))\n",
|
| 334 |
-
" output = torch.softmax(output, dim=1)\n",
|
| 335 |
-
" test_preds.extend(output.cpu().numpy())\n",
|
| 336 |
-
"\n",
|
| 337 |
-
"\n",
|
| 338 |
-
" return test_preds\n"
|
| 339 |
-
]
|
| 340 |
-
},
|
| 341 |
-
{
|
| 342 |
-
"cell_type": "code",
|
| 343 |
-
"execution_count": 5,
|
| 344 |
-
"metadata": {},
|
| 345 |
-
"outputs": [],
|
| 346 |
-
"source": [
|
| 347 |
-
"df_seoul = pd.read_csv(\"../../data/data_for_modeling/seoul_train.csv\")\n",
|
| 348 |
-
"df_seoul_test = pd.read_csv(\"../../data/data_for_modeling/seoul_test.csv\")\n",
|
| 349 |
-
"\n",
|
| 350 |
-
"df_busan = pd.read_csv(\"../../data/data_for_modeling/busan_train.csv\")\n",
|
| 351 |
-
"df_busan_test = pd.read_csv(\"../../data/data_for_modeling/busan_test.csv\")\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"df_daegu = pd.read_csv(\"../../data/data_for_modeling/daegu_train.csv\")\n",
|
| 354 |
-
"df_daegu_test = pd.read_csv(\"../../data/data_for_modeling/daegu_test.csv\")\n",
|
| 355 |
-
"\n",
|
| 356 |
-
"df_daejeon = pd.read_csv(\"../../data/data_for_modeling/daejeon_train.csv\")\n",
|
| 357 |
-
"df_daejeon_test = pd.read_csv(\"../../data/data_for_modeling/daejeon_test.csv\")\n",
|
| 358 |
-
"\n",
|
| 359 |
-
"df_incheon = pd.read_csv(\"../../data/data_for_modeling/incheon_train.csv\")\n",
|
| 360 |
-
"df_incheon_test = pd.read_csv(\"../../data/data_for_modeling/incheon_test.csv\")\n",
|
| 361 |
-
"\n",
|
| 362 |
-
"df_gwangju = pd.read_csv(\"../../data/data_for_modeling/gwangju_train.csv\")\n",
|
| 363 |
-
"df_gwangju_test = pd.read_csv(\"../../data/data_for_modeling/gwangju_test.csv\")"
|
| 364 |
-
]
|
| 365 |
-
},
|
| 366 |
-
{
|
| 367 |
-
"cell_type": "code",
|
| 368 |
-
"execution_count": 6,
|
| 369 |
-
"metadata": {},
|
| 370 |
-
"outputs": [],
|
| 371 |
-
"source": [
|
| 372 |
-
"def preprocessing_df(df):\n",
|
| 373 |
-
" df = df[df.columns].copy()\n",
|
| 374 |
-
" df['year'] = df['year'].astype('int')\n",
|
| 375 |
-
" df['month'] = df['month'].astype('int')\n",
|
| 376 |
-
" df['hour'] = df['hour'].astype('int')\n",
|
| 377 |
-
" df['binary_class'] = df['binary_class'].astype('int')\n",
|
| 378 |
-
" df['multi_class'] = df['multi_class'].astype('int')\n",
|
| 379 |
-
"\n",
|
| 380 |
-
" df.loc[df['wind_dir']=='정온', 'wind_dir'] = \"0\"\n",
|
| 381 |
-
" df['wind_dir'] = df['wind_dir'].astype('int')\n",
|
| 382 |
-
" df= df[['temp_C', 'precip_mm', 'wind_speed', 'wind_dir', 'hm',\n",
|
| 383 |
-
" 'vap_pressure', 'dewpoint_C', 'loc_pressure', 'sea_pressure',\n",
|
| 384 |
-
" 'solarRad', 'snow_cm', 'cloudcover', 'lm_cloudcover', 'low_cloudbase',\n",
|
| 385 |
-
" 'groundtemp', 'O3', 'NO2', 'PM10', 'PM25', 'year',\n",
|
| 386 |
-
" 'month', 'hour', 'ground_temp - temp_C', 'hour_sin', 'hour_cos',\n",
|
| 387 |
-
" 'month_sin', 'month_cos','multi_class']]\n",
|
| 388 |
-
" return df\n",
|
| 389 |
-
"\n"
|
| 390 |
-
]
|
| 391 |
-
},
|
| 392 |
-
{
|
| 393 |
-
"cell_type": "code",
|
| 394 |
-
"execution_count": 7,
|
| 395 |
-
"metadata": {},
|
| 396 |
-
"outputs": [],
|
| 397 |
-
"source": [
|
| 398 |
-
"df_seoul_test= preprocessing_df(df_seoul_test).copy()\n",
|
| 399 |
-
"df_busan_test= preprocessing_df(df_busan_test).copy()\n",
|
| 400 |
-
"df_daegu_test= preprocessing_df(df_daegu_test).copy()\n",
|
| 401 |
-
"df_gwangju_test= preprocessing_df(df_gwangju_test).copy()\n",
|
| 402 |
-
"df_daejeon_test= preprocessing_df(df_daejeon_test).copy()\n",
|
| 403 |
-
"df_incheon_test= preprocessing_df(df_incheon_test).copy()\n",
|
| 404 |
-
"\n",
|
| 405 |
-
"df_seoul= preprocessing_df(df_seoul).copy()\n",
|
| 406 |
-
"df_busan= preprocessing_df(df_busan).copy()\n",
|
| 407 |
-
"df_daegu= preprocessing_df(df_daegu).copy()\n",
|
| 408 |
-
"df_gwangju= preprocessing_df(df_gwangju).copy()\n",
|
| 409 |
-
"df_daejeon= preprocessing_df(df_daejeon).copy()\n",
|
| 410 |
-
"df_incheon= preprocessing_df(df_incheon).copy()\n",
|
| 411 |
-
"\n",
|
| 412 |
-
"df_seoul_test.drop(columns=['year'], inplace=True)\n",
|
| 413 |
-
"df_busan_test.drop(columns=['year'], inplace=True)\n",
|
| 414 |
-
"df_daegu_test.drop(columns=['year'], inplace=True)\n",
|
| 415 |
-
"df_daejeon_test.drop(columns=['year'], inplace=True)\n",
|
| 416 |
-
"df_incheon_test.drop(columns=['year'], inplace=True)\n",
|
| 417 |
-
"df_gwangju_test.drop(columns=['year'], inplace=True)"
|
| 418 |
-
]
|
| 419 |
-
},
|
| 420 |
-
{
|
| 421 |
-
"cell_type": "code",
|
| 422 |
-
"execution_count": 8,
|
| 423 |
-
"metadata": {},
|
| 424 |
-
"outputs": [],
|
| 425 |
-
"source": [
|
| 426 |
-
"import joblib\n",
|
| 427 |
-
"\n",
|
| 428 |
-
"lgb_seoul= joblib.load('../save_model/LGB_optima/lgb_seoul_smote.pkl')\n",
|
| 429 |
-
"lgb_busan= joblib.load('../save_model/LGB_optima/lgb_busan_smote.pkl')\n",
|
| 430 |
-
"lgb_incheon= joblib.load('../save_model/LGB_optima/lgb_incheon_smote.pkl')\n",
|
| 431 |
-
"lgb_daegu= joblib.load('../save_model/LGB_optima/lgb_daegu_smote.pkl')\n",
|
| 432 |
-
"lgb_daejeon= joblib.load('../save_model/LGB_optima/lgb_daejeon_smote.pkl')\n",
|
| 433 |
-
"lgb_gwangju= joblib.load('../save_model/LGB_optima/lgb_gwangju_smote.pkl')\n",
|
| 434 |
-
"\n",
|
| 435 |
-
"xgb_seoul= joblib.load('../save_model/XGB_optima/xgb_seoul_smote.pkl')\n",
|
| 436 |
-
"xgb_busan= joblib.load('../save_model/XGB_optima/xgb_busan_ctgan20000.pkl')\n",
|
| 437 |
-
"xgb_incheon= joblib.load('../save_model/XGB_optima/xgb_incheon_smote.pkl')\n",
|
| 438 |
-
"xgb_daegu= joblib.load('../save_model/XGB_optima/xgb_daegu_smote.pkl')\n",
|
| 439 |
-
"xgb_daejeon= joblib.load('../save_model/XGB_optima/xgb_daejeon_smote.pkl')\n",
|
| 440 |
-
"xgb_gwangju= joblib.load('../save_model/XGB_optima/xgb_gwangju_smote.pkl')\n",
|
| 441 |
-
"\n",
|
| 442 |
-
"lgb_seoul_1= lgb_seoul[0]\n",
|
| 443 |
-
"lgb_seoul_2= lgb_seoul[1]\n",
|
| 444 |
-
"lgb_seoul_3= lgb_seoul[2]\n",
|
| 445 |
-
"\n",
|
| 446 |
-
"lgb_busan_1= lgb_busan[0]\n",
|
| 447 |
-
"lgb_busan_2= lgb_busan[1]\n",
|
| 448 |
-
"lgb_busan_3= lgb_busan[2]\n",
|
| 449 |
-
"\n",
|
| 450 |
-
"lgb_incheon_1= lgb_incheon[0]\n",
|
| 451 |
-
"lgb_incheon_2= lgb_incheon[1]\n",
|
| 452 |
-
"lgb_incheon_3= lgb_incheon[2]\n",
|
| 453 |
-
"\n",
|
| 454 |
-
"lgb_daegu_1= lgb_daegu[0]\n",
|
| 455 |
-
"lgb_daegu_2= lgb_daegu[1]\n",
|
| 456 |
-
"lgb_daegu_3= lgb_daegu[2]\n",
|
| 457 |
-
"\n",
|
| 458 |
-
"lgb_daejeon_1= lgb_daejeon[0]\n",
|
| 459 |
-
"lgb_daejeon_2= lgb_daejeon[1]\n",
|
| 460 |
-
"lgb_daejeon_3= lgb_daejeon[2]\n",
|
| 461 |
-
"\n",
|
| 462 |
-
"lgb_gwangju_1= lgb_gwangju[0]\n",
|
| 463 |
-
"lgb_gwangju_2= lgb_gwangju[1]\n",
|
| 464 |
-
"lgb_gwangju_3= lgb_gwangju[2]\n",
|
| 465 |
-
"\n",
|
| 466 |
-
"\n",
|
| 467 |
-
"xgb_seoul_1= xgb_seoul[0]\n",
|
| 468 |
-
"xgb_seoul_2= xgb_seoul[1]\n",
|
| 469 |
-
"xgb_seoul_3= xgb_seoul[2]\n",
|
| 470 |
-
"\n",
|
| 471 |
-
"xgb_busan_1= xgb_busan[0]\n",
|
| 472 |
-
"xgb_busan_2= xgb_busan[1]\n",
|
| 473 |
-
"xgb_busan_3= xgb_busan[2]\n",
|
| 474 |
-
"\n",
|
| 475 |
-
"xgb_incheon_1= xgb_incheon[0]\n",
|
| 476 |
-
"xgb_incheon_2= xgb_incheon[1]\n",
|
| 477 |
-
"xgb_incheon_3= xgb_incheon[2]\n",
|
| 478 |
-
"\n",
|
| 479 |
-
"xgb_daegu_1= xgb_daegu[0]\n",
|
| 480 |
-
"xgb_daegu_2= xgb_daegu[1]\n",
|
| 481 |
-
"xgb_daegu_3= xgb_daegu[2]\n",
|
| 482 |
-
"\n",
|
| 483 |
-
"xgb_daejeon_1= xgb_daejeon[0]\n",
|
| 484 |
-
"xgb_daejeon_2= xgb_daejeon[1]\n",
|
| 485 |
-
"xgb_daejeon_3= xgb_daejeon[2]\n",
|
| 486 |
-
"\n",
|
| 487 |
-
"xgb_gwangju_1= xgb_gwangju[0]\n",
|
| 488 |
-
"xgb_gwangju_2= xgb_gwangju[1]\n",
|
| 489 |
-
"xgb_gwangju_3= xgb_gwangju[2]\n"
|
| 490 |
-
]
|
| 491 |
-
},
|
| 492 |
-
{
|
| 493 |
-
"cell_type": "markdown",
|
| 494 |
-
"metadata": {},
|
| 495 |
-
"source": [
|
| 496 |
-
"## **Soft Voting**"
|
| 497 |
-
]
|
| 498 |
-
},
|
| 499 |
-
{
|
| 500 |
-
"cell_type": "markdown",
|
| 501 |
-
"metadata": {},
|
| 502 |
-
"source": [
|
| 503 |
-
"## **서울**"
|
| 504 |
-
]
|
| 505 |
-
},
|
| 506 |
-
{
|
| 507 |
-
"cell_type": "code",
|
| 508 |
-
"execution_count": 9,
|
| 509 |
-
"metadata": {},
|
| 510 |
-
"outputs": [],
|
| 511 |
-
"source": [
|
| 512 |
-
"voting = []\n",
|
| 513 |
-
"mcc = []\n",
|
| 514 |
-
"accuracy = []\n"
|
| 515 |
-
]
|
| 516 |
-
},
|
| 517 |
-
{
|
| 518 |
-
"cell_type": "code",
|
| 519 |
-
"execution_count": 10,
|
| 520 |
-
"metadata": {},
|
| 521 |
-
"outputs": [
|
| 522 |
-
{
|
| 523 |
-
"data": {
|
| 524 |
-
"text/plain": [
|
| 525 |
-
"Index(['temp_C', 'precip_mm', 'wind_speed', 'wind_dir', 'hm', 'vap_pressure',\n",
|
| 526 |
-
" 'dewpoint_C', 'loc_pressure', 'sea_pressure', 'solarRad', 'snow_cm',\n",
|
| 527 |
-
" 'cloudcover', 'lm_cloudcover', 'low_cloudbase', 'groundtemp', 'O3',\n",
|
| 528 |
-
" 'NO2', 'PM10', 'PM25', 'month', 'hour', 'ground_temp - temp_C',\n",
|
| 529 |
-
" 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'multi_class'],\n",
|
| 530 |
-
" dtype='object')"
|
| 531 |
-
]
|
| 532 |
-
},
|
| 533 |
-
"execution_count": 10,
|
| 534 |
-
"metadata": {},
|
| 535 |
-
"output_type": "execute_result"
|
| 536 |
-
}
|
| 537 |
-
],
|
| 538 |
-
"source": [
|
| 539 |
-
"df_seoul_test.columns"
|
| 540 |
-
]
|
| 541 |
-
},
|
| 542 |
-
{
|
| 543 |
-
"cell_type": "code",
|
| 544 |
-
"execution_count": 11,
|
| 545 |
-
"metadata": {},
|
| 546 |
-
"outputs": [
|
| 547 |
-
{
|
| 548 |
-
"name": "stdout",
|
| 549 |
-
"output_type": "stream",
|
| 550 |
-
"text": [
|
| 551 |
-
"CSI score of soft(test) : 0.3248062015503624\n"
|
| 552 |
-
]
|
| 553 |
-
}
|
| 554 |
-
],
|
| 555 |
-
"source": [
|
| 556 |
-
"from scipy.stats import mode # 최다 득표수 계산을 위한 mode 함수\n",
|
| 557 |
-
"\n",
|
| 558 |
-
"probas = []\n",
|
| 559 |
-
"\n",
|
| 560 |
-
"# 1 Fold\n",
|
| 561 |
-
"test_preds = get_proba('seoul', 'deepgbm', 'ctgan20000', 1)\n",
|
| 562 |
-
"probas.append(test_preds)\n",
|
| 563 |
-
"test_preds = get_proba('seoul', 'resnet_like', 'smote', 1)\n",
|
| 564 |
-
"probas.append(test_preds)\n",
|
| 565 |
-
"# probas.append(xgb_seoul_1.predict_proba(df_seoul_test.iloc[:,:-1]))\n",
|
| 566 |
-
"\n",
|
| 567 |
-
"# 2 Fold\n",
|
| 568 |
-
"test_preds = get_proba('seoul', 'deepgbm', 'ctgan20000', 2)\n",
|
| 569 |
-
"probas.append(test_preds)\n",
|
| 570 |
-
"test_preds = get_proba('seoul', 'resnet_like', 'smote', 2)\n",
|
| 571 |
-
"probas.append(test_preds)\n",
|
| 572 |
-
"# probas.append(xgb_seoul_2.predict_proba(df_seoul_test.iloc[:,:-1]))\n",
|
| 573 |
-
"\n",
|
| 574 |
-
"# 3 Fold\n",
|
| 575 |
-
"test_preds = get_proba('seoul', 'deepgbm', 'ctgan20000', 3)\n",
|
| 576 |
-
"probas.append(test_preds)\n",
|
| 577 |
-
"test_preds = get_proba('seoul', 'resnet_like', 'smote', 3)\n",
|
| 578 |
-
"probas.append(test_preds)\n",
|
| 579 |
-
"# probas.append(xgb_seoul_3.predict_proba(df_seoul_test.iloc[:,:-1]))\n",
|
| 580 |
-
"\n",
|
| 581 |
-
"voting.append(calculate_csi(df_seoul_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 582 |
-
"mcc.append(multiclass_mcc(df_seoul_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 583 |
-
"accuracy.append(accuracy_score(df_seoul_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 584 |
-
"\n",
|
| 585 |
-
"\n",
|
| 586 |
-
"print(\"CSI score of soft(test) :\", calculate_csi(df_seoul_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n"
|
| 587 |
-
]
|
| 588 |
-
},
|
| 589 |
-
{
|
| 590 |
-
"cell_type": "code",
|
| 591 |
-
"execution_count": 12,
|
| 592 |
-
"metadata": {},
|
| 593 |
-
"outputs": [
|
| 594 |
-
{
|
| 595 |
-
"data": {
|
| 596 |
-
"text/plain": [
|
| 597 |
-
"array([[ 2, 11, 0],\n",
|
| 598 |
-
" [ 6, 417, 58],\n",
|
| 599 |
-
" [ 7, 789, 7470]])"
|
| 600 |
-
]
|
| 601 |
-
},
|
| 602 |
-
"execution_count": 12,
|
| 603 |
-
"metadata": {},
|
| 604 |
-
"output_type": "execute_result"
|
| 605 |
-
}
|
| 606 |
-
],
|
| 607 |
-
"source": [
|
| 608 |
-
"confusion_matrix(df_seoul_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1))"
|
| 609 |
-
]
|
| 610 |
-
},
|
| 611 |
-
{
|
| 612 |
-
"cell_type": "markdown",
|
| 613 |
-
"metadata": {},
|
| 614 |
-
"source": [
|
| 615 |
-
"## **부산**"
|
| 616 |
-
]
|
| 617 |
-
},
|
| 618 |
-
{
|
| 619 |
-
"cell_type": "code",
|
| 620 |
-
"execution_count": 13,
|
| 621 |
-
"metadata": {},
|
| 622 |
-
"outputs": [
|
| 623 |
-
{
|
| 624 |
-
"name": "stdout",
|
| 625 |
-
"output_type": "stream",
|
| 626 |
-
"text": [
|
| 627 |
-
"CSI score of soft(test) : 0.46608315098458075\n"
|
| 628 |
-
]
|
| 629 |
-
}
|
| 630 |
-
],
|
| 631 |
-
"source": [
|
| 632 |
-
"from scipy.stats import mode # 최다 득표수 계산을 위한 mode 함수\n",
|
| 633 |
-
"\n",
|
| 634 |
-
"probas = []\n",
|
| 635 |
-
"\n",
|
| 636 |
-
"# 1 Fold\n",
|
| 637 |
-
"test_preds = get_proba('busan', 'deepgbm', 'pure', 1)\n",
|
| 638 |
-
"probas.append(test_preds)\n",
|
| 639 |
-
"test_preds = get_proba('busan', 'resnet_like', 'ctgan10000', 1)\n",
|
| 640 |
-
"probas.append(test_preds)\n",
|
| 641 |
-
"\n",
|
| 642 |
-
"\n",
|
| 643 |
-
"# 2 Fold\n",
|
| 644 |
-
"test_preds = get_proba('busan', 'deepgbm', 'pure', 2)\n",
|
| 645 |
-
"probas.append(test_preds)\n",
|
| 646 |
-
"test_preds = get_proba('busan', 'resnet_like', 'ctgan10000', 2)\n",
|
| 647 |
-
"probas.append(test_preds)\n",
|
| 648 |
-
"\n",
|
| 649 |
-
"\n",
|
| 650 |
-
"# 3 Fold\n",
|
| 651 |
-
"test_preds = get_proba('busan', 'deepgbm', 'pure', 3)\n",
|
| 652 |
-
"probas.append(test_preds)\n",
|
| 653 |
-
"test_preds = get_proba('busan', 'resnet_like', 'ctgan10000', 3)\n",
|
| 654 |
-
"probas.append(test_preds)\n",
|
| 655 |
-
"\n",
|
| 656 |
-
"\n",
|
| 657 |
-
"voting.append(calculate_csi(df_busan_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 658 |
-
"mcc.append(multiclass_mcc(df_busan_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 659 |
-
"accuracy.append(accuracy_score(df_busan_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 660 |
-
"\n",
|
| 661 |
-
"print(\"CSI score of soft(test) :\", calculate_csi(df_busan_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n"
|
| 662 |
-
]
|
| 663 |
-
},
|
| 664 |
-
{
|
| 665 |
-
"cell_type": "code",
|
| 666 |
-
"execution_count": 14,
|
| 667 |
-
"metadata": {},
|
| 668 |
-
"outputs": [
|
| 669 |
-
{
|
| 670 |
-
"data": {
|
| 671 |
-
"text/plain": [
|
| 672 |
-
"array([[ 11, 13, 0],\n",
|
| 673 |
-
" [ 11, 202, 68],\n",
|
| 674 |
-
" [ 2, 150, 8303]])"
|
| 675 |
-
]
|
| 676 |
-
},
|
| 677 |
-
"execution_count": 14,
|
| 678 |
-
"metadata": {},
|
| 679 |
-
"output_type": "execute_result"
|
| 680 |
-
}
|
| 681 |
-
],
|
| 682 |
-
"source": [
|
| 683 |
-
"confusion_matrix(df_busan_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1))"
|
| 684 |
-
]
|
| 685 |
-
},
|
| 686 |
-
{
|
| 687 |
-
"cell_type": "markdown",
|
| 688 |
-
"metadata": {},
|
| 689 |
-
"source": [
|
| 690 |
-
"## **인천**"
|
| 691 |
-
]
|
| 692 |
-
},
|
| 693 |
-
{
|
| 694 |
-
"cell_type": "code",
|
| 695 |
-
"execution_count": 15,
|
| 696 |
-
"metadata": {},
|
| 697 |
-
"outputs": [
|
| 698 |
-
{
|
| 699 |
-
"name": "stdout",
|
| 700 |
-
"output_type": "stream",
|
| 701 |
-
"text": [
|
| 702 |
-
"CSI score of hard(test) : 0.572269457161506\n"
|
| 703 |
-
]
|
| 704 |
-
}
|
| 705 |
-
],
|
| 706 |
-
"source": [
|
| 707 |
-
"from scipy.stats import mode # 최다 득표수 계산을 위한 mode 함수\n",
|
| 708 |
-
"\n",
|
| 709 |
-
"\n",
|
| 710 |
-
"# 1 Fold\n",
|
| 711 |
-
"probas = []\n",
|
| 712 |
-
"test_preds = get_proba('incheon', 'deepgbm', 'pure', 1)\n",
|
| 713 |
-
"probas.append(test_preds)\n",
|
| 714 |
-
"test_preds = get_proba('incheon', 'resnet_like', 'smote', 1)\n",
|
| 715 |
-
"probas.append(test_preds)\n",
|
| 716 |
-
"test_preds = get_proba('incheon', 'ft_transformer', 'pure', 1)\n",
|
| 717 |
-
"probas.append(test_preds)\n",
|
| 718 |
-
"\n",
|
| 719 |
-
"probas.append(lgb_incheon_1.predict_proba(df_incheon_test.iloc[:,:-1]))\n",
|
| 720 |
-
"probas.append(xgb_incheon_1.predict_proba(df_incheon_test.iloc[:,:-1]))\n",
|
| 721 |
-
"\n",
|
| 722 |
-
"# 2 Fold\n",
|
| 723 |
-
"test_preds = get_proba('incheon', 'deepgbm', 'pure', 2)\n",
|
| 724 |
-
"probas.append(test_preds)\n",
|
| 725 |
-
"test_preds = get_proba('incheon', 'resnet_like', 'smote', 2)\n",
|
| 726 |
-
"probas.append(test_preds)\n",
|
| 727 |
-
"test_preds = get_proba('incheon', 'ft_transformer', 'pure', 2)\n",
|
| 728 |
-
"probas.append(test_preds)\n",
|
| 729 |
-
"\n",
|
| 730 |
-
"probas.append(lgb_incheon_2.predict_proba(df_incheon_test.iloc[:,:-1]))\n",
|
| 731 |
-
"probas.append(xgb_incheon_2.predict_proba(df_incheon_test.iloc[:,:-1]))\n",
|
| 732 |
-
"\n",
|
| 733 |
-
"# 3 Fold\n",
|
| 734 |
-
"test_preds = get_proba('incheon', 'deepgbm', 'pure', 3)\n",
|
| 735 |
-
"probas.append(test_preds)\n",
|
| 736 |
-
"test_preds = get_proba('incheon', 'resnet_like', 'smote', 3)\n",
|
| 737 |
-
"probas.append(test_preds)\n",
|
| 738 |
-
"test_preds = get_proba('incheon', 'ft_transformer', 'pure', 3)\n",
|
| 739 |
-
"probas.append(test_preds)\n",
|
| 740 |
-
"\n",
|
| 741 |
-
"probas.append(lgb_incheon_3.predict_proba(df_incheon_test.iloc[:,:-1]))\n",
|
| 742 |
-
"probas.append(xgb_incheon_3.predict_proba(df_incheon_test.iloc[:,:-1]))\n",
|
| 743 |
-
"\n",
|
| 744 |
-
"\n",
|
| 745 |
-
"\n",
|
| 746 |
-
"voting.append(calculate_csi(df_incheon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 747 |
-
"mcc.append(multiclass_mcc(df_incheon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 748 |
-
"accuracy.append(accuracy_score(df_incheon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 749 |
-
"\n",
|
| 750 |
-
"\n",
|
| 751 |
-
"print(\"CSI score of hard(test) :\", calculate_csi(df_incheon_test.iloc[:,-1],mode(np.argmax(probas, axis=2), axis=0).mode[0]))\n"
|
| 752 |
-
]
|
| 753 |
-
},
|
| 754 |
-
{
|
| 755 |
-
"cell_type": "code",
|
| 756 |
-
"execution_count": 16,
|
| 757 |
-
"metadata": {},
|
| 758 |
-
"outputs": [
|
| 759 |
-
{
|
| 760 |
-
"data": {
|
| 761 |
-
"text/plain": [
|
| 762 |
-
"array([[ 87, 74, 21],\n",
|
| 763 |
-
" [ 22, 788, 395],\n",
|
| 764 |
-
" [ 2, 140, 7231]])"
|
| 765 |
-
]
|
| 766 |
-
},
|
| 767 |
-
"execution_count": 16,
|
| 768 |
-
"metadata": {},
|
| 769 |
-
"output_type": "execute_result"
|
| 770 |
-
}
|
| 771 |
-
],
|
| 772 |
-
"source": [
|
| 773 |
-
"confusion_matrix(df_incheon_test.iloc[:,-1],mode(np.argmax(probas, axis=2), axis=0).mode[0])"
|
| 774 |
-
]
|
| 775 |
-
},
|
| 776 |
-
{
|
| 777 |
-
"cell_type": "markdown",
|
| 778 |
-
"metadata": {},
|
| 779 |
-
"source": [
|
| 780 |
-
"## **대구**"
|
| 781 |
-
]
|
| 782 |
-
},
|
| 783 |
-
{
|
| 784 |
-
"cell_type": "code",
|
| 785 |
-
"execution_count": 17,
|
| 786 |
-
"metadata": {},
|
| 787 |
-
"outputs": [
|
| 788 |
-
{
|
| 789 |
-
"name": "stdout",
|
| 790 |
-
"output_type": "stream",
|
| 791 |
-
"text": [
|
| 792 |
-
"CSI score of soft(test) : 0.2852112676055334\n"
|
| 793 |
-
]
|
| 794 |
-
}
|
| 795 |
-
],
|
| 796 |
-
"source": [
|
| 797 |
-
"from scipy.stats import mode # 최다 득표수 계산을 위한 mode 함수\n",
|
| 798 |
-
"\n",
|
| 799 |
-
"probas= []\n",
|
| 800 |
-
"\n",
|
| 801 |
-
"# 1 Fold\n",
|
| 802 |
-
"test_preds = get_proba('daegu', 'deepgbm', 'smote', 1)\n",
|
| 803 |
-
"probas.append(test_preds)\n",
|
| 804 |
-
"test_preds = get_proba('daegu', 'ft_transformer', 'pure', 1)\n",
|
| 805 |
-
"probas.append(test_preds)\n",
|
| 806 |
-
"\n",
|
| 807 |
-
"# 2 Fold\n",
|
| 808 |
-
"test_preds = get_proba('daegu', 'deepgbm', 'smote', 2)\n",
|
| 809 |
-
"probas.append(test_preds)\n",
|
| 810 |
-
"test_preds = get_proba('daegu', 'ft_transformer', 'pure', 2)\n",
|
| 811 |
-
"probas.append(test_preds)\n",
|
| 812 |
-
"\n",
|
| 813 |
-
"# 3 Fold\n",
|
| 814 |
-
"test_preds = get_proba('daegu', 'deepgbm', 'smote', 3)\n",
|
| 815 |
-
"probas.append(test_preds)\n",
|
| 816 |
-
"test_preds = get_proba('daegu', 'ft_transformer', 'pure', 3)\n",
|
| 817 |
-
"probas.append(test_preds)\n",
|
| 818 |
-
"\n",
|
| 819 |
-
"voting.append(calculate_csi(df_daegu_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 820 |
-
"mcc.append(multiclass_mcc(df_daegu_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 821 |
-
"accuracy.append(accuracy_score(df_daegu_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 822 |
-
"\n",
|
| 823 |
-
"print(\"CSI score of soft(test) :\", calculate_csi(df_daegu_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n"
|
| 824 |
-
]
|
| 825 |
-
},
|
| 826 |
-
{
|
| 827 |
-
"cell_type": "code",
|
| 828 |
-
"execution_count": 18,
|
| 829 |
-
"metadata": {},
|
| 830 |
-
"outputs": [
|
| 831 |
-
{
|
| 832 |
-
"data": {
|
| 833 |
-
"text/plain": [
|
| 834 |
-
"array([[ 1, 0, 0],\n",
|
| 835 |
-
" [ 1, 80, 47],\n",
|
| 836 |
-
" [ 2, 153, 8476]])"
|
| 837 |
-
]
|
| 838 |
-
},
|
| 839 |
-
"execution_count": 18,
|
| 840 |
-
"metadata": {},
|
| 841 |
-
"output_type": "execute_result"
|
| 842 |
-
}
|
| 843 |
-
],
|
| 844 |
-
"source": [
|
| 845 |
-
"confusion_matrix(df_daegu_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1))"
|
| 846 |
-
]
|
| 847 |
-
},
|
| 848 |
-
{
|
| 849 |
-
"cell_type": "markdown",
|
| 850 |
-
"metadata": {},
|
| 851 |
-
"source": [
|
| 852 |
-
"## **대전**"
|
| 853 |
-
]
|
| 854 |
-
},
|
| 855 |
-
{
|
| 856 |
-
"cell_type": "code",
|
| 857 |
-
"execution_count": 19,
|
| 858 |
-
"metadata": {},
|
| 859 |
-
"outputs": [
|
| 860 |
-
{
|
| 861 |
-
"name": "stdout",
|
| 862 |
-
"output_type": "stream",
|
| 863 |
-
"text": [
|
| 864 |
-
"CSI score of soft(test) : 0.31884057971011603\n"
|
| 865 |
-
]
|
| 866 |
-
}
|
| 867 |
-
],
|
| 868 |
-
"source": [
|
| 869 |
-
"from scipy.stats import mode # 최다 득표수 계산을 위한 mode 함수\n",
|
| 870 |
-
"\n",
|
| 871 |
-
"probas = []\n",
|
| 872 |
-
"\n",
|
| 873 |
-
"# 1 Fold\n",
|
| 874 |
-
"test_preds = get_proba('daejeon', 'deepgbm', 'pure', 1)\n",
|
| 875 |
-
"probas.append(test_preds)\n",
|
| 876 |
-
"test_preds = get_proba('daejeon', 'ft_transformer', 'pure', 1)\n",
|
| 877 |
-
"probas.append(test_preds)\n",
|
| 878 |
-
"\n",
|
| 879 |
-
"\n",
|
| 880 |
-
"# 2 Fold\n",
|
| 881 |
-
"test_preds = get_proba('daejeon', 'deepgbm', 'pure', 2)\n",
|
| 882 |
-
"probas.append(test_preds)\n",
|
| 883 |
-
"test_preds = get_proba('daejeon', 'ft_transformer', 'pure', 2)\n",
|
| 884 |
-
"probas.append(test_preds)\n",
|
| 885 |
-
"\n",
|
| 886 |
-
"\n",
|
| 887 |
-
"# 3 Fold\n",
|
| 888 |
-
"test_preds = get_proba('daejeon', 'deepgbm', 'pure', 3)\n",
|
| 889 |
-
"probas.append(test_preds)\n",
|
| 890 |
-
"test_preds = get_proba('daejeon', 'ft_transformer', 'pure', 3)\n",
|
| 891 |
-
"probas.append(test_preds)\n",
|
| 892 |
-
"\n",
|
| 893 |
-
"voting.append(calculate_csi(df_daejeon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 894 |
-
"mcc.append(multiclass_mcc(df_daejeon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 895 |
-
"accuracy.append(accuracy_score(df_daejeon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 896 |
-
"\n",
|
| 897 |
-
"\n",
|
| 898 |
-
"print(\"CSI score of soft(test) :\", calculate_csi(df_daejeon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))"
|
| 899 |
-
]
|
| 900 |
-
},
|
| 901 |
-
{
|
| 902 |
-
"cell_type": "code",
|
| 903 |
-
"execution_count": 20,
|
| 904 |
-
"metadata": {},
|
| 905 |
-
"outputs": [
|
| 906 |
-
{
|
| 907 |
-
"data": {
|
| 908 |
-
"text/plain": [
|
| 909 |
-
"array([[ 15, 23, 15],\n",
|
| 910 |
-
" [ 10, 337, 271],\n",
|
| 911 |
-
" [ 0, 433, 7656]])"
|
| 912 |
-
]
|
| 913 |
-
},
|
| 914 |
-
"execution_count": 20,
|
| 915 |
-
"metadata": {},
|
| 916 |
-
"output_type": "execute_result"
|
| 917 |
-
}
|
| 918 |
-
],
|
| 919 |
-
"source": [
|
| 920 |
-
"confusion_matrix(df_daejeon_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1))"
|
| 921 |
-
]
|
| 922 |
-
},
|
| 923 |
-
{
|
| 924 |
-
"cell_type": "markdown",
|
| 925 |
-
"metadata": {},
|
| 926 |
-
"source": [
|
| 927 |
-
"## **광주**"
|
| 928 |
-
]
|
| 929 |
-
},
|
| 930 |
-
{
|
| 931 |
-
"cell_type": "code",
|
| 932 |
-
"execution_count": 21,
|
| 933 |
-
"metadata": {},
|
| 934 |
-
"outputs": [
|
| 935 |
-
{
|
| 936 |
-
"name": "stdout",
|
| 937 |
-
"output_type": "stream",
|
| 938 |
-
"text": [
|
| 939 |
-
"CSI score of soft(test) : 0.4759725400457121\n"
|
| 940 |
-
]
|
| 941 |
-
}
|
| 942 |
-
],
|
| 943 |
-
"source": [
|
| 944 |
-
"from scipy.stats import mode # 최다 득표수 계산을 위한 mode 함수\n",
|
| 945 |
-
"\n",
|
| 946 |
-
"probas = []\n",
|
| 947 |
-
"\n",
|
| 948 |
-
"# 1 Fold\n",
|
| 949 |
-
"test_preds = get_proba('gwangju', 'deepgbm', 'pure', 1)\n",
|
| 950 |
-
"probas.append(test_preds)\n",
|
| 951 |
-
"test_preds = get_proba('gwangju', 'ft_transformer', 'pure', 1)\n",
|
| 952 |
-
"probas.append(test_preds)\n",
|
| 953 |
-
"\n",
|
| 954 |
-
"\n",
|
| 955 |
-
"# 2 Fold\n",
|
| 956 |
-
"test_preds = get_proba('gwangju', 'deepgbm', 'pure', 2)\n",
|
| 957 |
-
"probas.append(test_preds)\n",
|
| 958 |
-
"test_preds = get_proba('gwangju', 'ft_transformer', 'pure', 2)\n",
|
| 959 |
-
"probas.append(test_preds)\n",
|
| 960 |
-
"\n",
|
| 961 |
-
"\n",
|
| 962 |
-
"# 3 Fold\n",
|
| 963 |
-
"test_preds = get_proba('gwangju', 'deepgbm', 'pure', 3)\n",
|
| 964 |
-
"probas.append(test_preds)\n",
|
| 965 |
-
"test_preds = get_proba('gwangju', 'ft_transformer', 'pure', 3)\n",
|
| 966 |
-
"probas.append(test_preds)\n",
|
| 967 |
-
"\n",
|
| 968 |
-
"voting.append(calculate_csi(df_gwangju_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 969 |
-
"mcc.append(multiclass_mcc(df_gwangju_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 970 |
-
"accuracy.append(accuracy_score(df_gwangju_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))\n",
|
| 971 |
-
"\n",
|
| 972 |
-
"\n",
|
| 973 |
-
"print(\"CSI score of soft(test) :\", calculate_csi(df_gwangju_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1)))"
|
| 974 |
-
]
|
| 975 |
-
},
|
| 976 |
-
{
|
| 977 |
-
"cell_type": "code",
|
| 978 |
-
"execution_count": 22,
|
| 979 |
-
"metadata": {},
|
| 980 |
-
"outputs": [
|
| 981 |
-
{
|
| 982 |
-
"data": {
|
| 983 |
-
"text/plain": [
|
| 984 |
-
"array([[ 10, 12, 8],\n",
|
| 985 |
-
" [ 2, 406, 235],\n",
|
| 986 |
-
" [ 0, 201, 7886]])"
|
| 987 |
-
]
|
| 988 |
-
},
|
| 989 |
-
"execution_count": 22,
|
| 990 |
-
"metadata": {},
|
| 991 |
-
"output_type": "execute_result"
|
| 992 |
-
}
|
| 993 |
-
],
|
| 994 |
-
"source": [
|
| 995 |
-
"confusion_matrix(df_gwangju_test.iloc[:,-1], np.argmax(np.mean(probas, axis=0), axis=1))"
|
| 996 |
-
]
|
| 997 |
-
},
|
| 998 |
-
{
|
| 999 |
-
"cell_type": "code",
|
| 1000 |
-
"execution_count": 23,
|
| 1001 |
-
"metadata": {},
|
| 1002 |
-
"outputs": [
|
| 1003 |
-
{
|
| 1004 |
-
"data": {
|
| 1005 |
-
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAHDCAYAAAATEUquAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABDfUlEQVR4nO3deVwVdf///+cBZBfEDcgQUtw33EMvxQpDLcsrKysL5GvqpWJeF5VFlmiLWJbppaZZH9PcMs1WU0vUSrM0FZdCxd00cClFUKHg/fujH+fqCDhiynF53G+3c6t5z3tmXnPOeM55MjPvYzPGGAEAAAAASuXi7AIAAAAA4EpHcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAXFPWr1+vdu3aycfHRzabTWlpac4uSSNHjpTNZnN2GeVq1apVstlsWrVqVZmXnTFjhmw2m/bt23fJ6wKAi0VwAoDLaPfu3RowYIBq1aolT09P+fn5qX379powYYLOnDlj75efn68JEyaoefPm8vPzU6VKldSoUSP1799f27dvt/cr+kL5ww8/WG573759io+PV+3ateXp6amgoCB17NhRycnJl2VfrwS///677rvvPv366696/fXXNWvWLIWGhhbr99hjj8lms2nXrl2lrmv48OGy2WzasmXLBW379OnTGjly5EUFhcupT58+stls8vPzczjmimRkZMhms8lms+nVV191QoUAcHUgOAHAZbJ48WI1adJE77//vrp3766JEycqJSVFNWvW1JNPPqmhQ4fa+/bs2VOPP/64GjdurDFjxmjUqFHq2LGjlixZou+++67M2961a5eaN2+uZcuW6cEHH9SkSZM0ePBgValSRS+//PKl3M0ryu7du7V//3498cQT6t+/vx5++GEFBAQU69e7d29J0ty5c0td17x589SkSRM1bdr0grZ9+vRpjRo1qsTg9Oyzz5YYWsqLm5ubTp8+rU8//bTYvDlz5sjT09MJVQHA1cXN2QUAwLVo7969euCBBxQaGqoVK1YoODjYPm/w4MHatWuXFi9eLOnPS8s+++wzvfTSS3rmmWcc1jNp0iSdOHGizNt//fXXlZOTo7S0tGJnXI4cOVL2HfobcnNz5ePjUy7bKtq3SpUqnbdf27ZtFR4ernnz5mnEiBHF5q9du1Z79+7VmDFjLkldbm5ucnNz3keuh4eH2rdvr3nz5un+++93mDd37lzdcccd+uCDD5xUHQBcHTjjBACXwSuvvKKcnBz93//9n0NoKhIeHm4/47R7925JUvv27Yv1c3V1VZUqVcq8/d27d+vGG28s8TK16tWrF2tbsmSJoqKiVLFiRfn5+al169bFzsYsWLBALVu2lJeXl6pWraqHH35Yhw4dcujTp08f+fr6avfu3erWrZsqVqxoP7tTWFio8ePHq1GjRvL09FRgYKAGDBig33777YL2acWKFerQoYN8fHxUqVIl3X333UpPT3fYdlRUlCTpvvvuk81mU6dOnUpdX+/evbV9+3Zt3Lix2Ly5c+fKZrPpwQcflPRnIOvbt68CAwPl6empZs2aaebMmfb++/btU7Vq1SRJo0aNsl/6NnLkSEkl3+Nks9mUkJCgjz76SI0bN5aHh4caNWqkpUuXFqtn1apVatWqlTw9PVW7dm29+eabZb5v6qGHHtKSJUscgvj69euVkZGhhx56qMRl9uzZo/vuu0+VK1eWt7e3br75Znvg/6uff/5ZPXr0kI+Pj6pXr67//Oc/ysvLK3Gd33//vbp06SJ/f395e3srKipKa9asueD9AABnITgBwGXw6aefqlatWmrXrp1l36JwM2fOHP3xxx+XZPuhoaE6ePCgVqxYYdl3xowZuuOOO/Trr78qKSlJY8aMUUREhMMX+BkzZuj++++Xq6urUlJS1K9fPy1atEj/+Mc/ip0R++OPPxQTE6Pq1avr1VdfVc+ePSVJAwYM0JNPPmm/xys+Pl5z5sxRTEyMfv/99/PWuHz5csXExOjIkSMaOXKkEhMT9e2336p9+/b2AQQGDBhgP2P32GOPadasWRo+fHip6yztcr2CggK9//776tChg2rWrKkzZ86oU6dOmjVrlnr37q2xY8fK399fffr00YQJEyRJ1apV05QpUyRJ//znPzVr1izNmjVL99xzz3n3a/Xq1Ro0aJAeeOABvfLKKzp79qx69uyp48eP2/ts2rRJXbp00fHjxzVq1Cj17dtXzz//vD766KPzrvtc99xzj2w2mxYtWmRvmzt3rurXr68WLVoU65+VlaV27dpp2bJlGjRokF566SWdPXtWd911lz788EN7vzNnzui2227TsmXLlJCQoOHDh+ubb77RsGHDiq1zxYoV6tixo7Kzs5WcnKzRo0frxIkTuvXWW7Vu3boy7Q8AlDsDALikTp48aSSZu++++4L6FxYWmqioKCPJBAYGmgcffNBMnjzZ7N+/v1jfd955x0gy69evP+86t23bZry8vIwkExERYYYOHWo++ugjk5ub69DvxIkTpmLFiqZt27bmzJkzxeoyxpj8/HxTvXp107hxY4c+n332mZFkRowYYW+Li4szkszTTz/tsK5vvvnGSDJz5sxxaF+6dGmJ7eeKiIgw1atXN8ePH7e3bd682bi4uJjY2Fh728qVK40ks2DBgvOur0jr1q3NjTfeaAoKCorV9OabbxpjjBk/fryRZGbPnm3vk5+fbyIjI42vr6/Jzs42xhhz9OhRI8kkJycX205ycrI59yNXknF3dze7du1y2CdJZuLEifa27t27G29vb3Po0CF7W0ZGhnFzcyu2zpLExcUZHx8fY4wx9957r7ntttuMMcYUFBSYoKAgM2rUKLN3714jyYwdO9a+3L///W8jyXzzzTf2tlOnTpmbbrrJhIWF2Z+zoufn/ffft/fLzc014eHhRpJZuXKlMebP46lOnTomJibGfmwZY8zp06fNTTfdZDp37mxvKzrO9+7da7l/AFBeOOMEAJdYdna2JKlixYoX1N9ms2nZsmV68cUXFRAQoHnz5mnw4MEKDQ1Vr169Luoep0aNGiktLU0PP/yw9u3bpwkTJqhHjx4KDAzUW2+9Ze/35Zdf6tSpU3r66aeLDRBQdBnYDz/8oCNHjmjQoEEOfe644w7Vr1+/xEu3Bg4c6DC9YMEC+fv7q3Pnzjp27Jj90bJlS/n6+mrlypWl7ssvv/yitLQ09enTR5UrV7a3N23aVJ07d9bnn39etifnLx5++GH9/PPP+vrrr+1tc+fOlbu7u+677z5J0ueff66goCD7ZXuSVKFCBT322GPKycnRV199ddHbj46OVu3ate3TTZs2lZ+fn/bs2SPpz7Nfy5cvV48ePXTDDTfY+4WHh6tr165l3t5DDz2kVatWKTMzUytWrFBmZmapl+l9/vnnatOmjf7xj3/Y23x9fdW/f3/t27dPP/30k71fcHCw7r33Xns/b29v9e/f32F9aWlp9ssCjx8/bj8GcnNzddttt+nrr79WYWFhmfcJAMoLwQkALjE/Pz9J0qlTpy54GQ8PDw0fPlzp6ek6fPiw5s2bp5tvvlnvv/++EhISLqqOunXratasWTp27Ji2bNmi0aNHy83NTf3799fy5csl/e/+qsaNG5e6nv3790uS6tWrV2xe/fr17fOLuLm56cYbb3Roy8jI0MmTJ1W9enVVq1bN4ZGTk3PeASvOt/0GDRrYv3xfjAceeECurq72y/XOnj2rDz/8UF27drWPxrd//37VqVNHLi6OH5kNGjRwqO9i1KxZs1hbQECA/b6vI0eO6MyZMwoPDy/Wr6Q2K0X3nc2fP19z5sxR69atS13P/v37S33Oi+YX/Tc8PLzY/VbnLpuRkSFJiouLK3YMvP3228rLy9PJkyfLvE8AUF4YVQ8ALjE/Pz/dcMMN2rZt20UtHxwcrAceeEA9e/ZUo0aN9P7772vGjBkXPSqbq6urmjRpoiZNmigyMlK33HKL5syZo+jo6ItanxUPD49iIaOwsFDVq1fXnDlzSlymaGCF8la9enV17txZH3zwgSZPnqxPP/1Up06dst//dLm5urqW2G6MuSzb8/Dw0D333KOZM2dqz5499sErykPR2aSxY8cqIiKixD6+vr7lVg8AlBXBCQAugzvvvFPTpk3T2rVrFRkZeVHrqFChgpo2baqMjAwdO3ZMQUFBf7uuVq1aSfrz8jdJ9svEtm3bVuqZh6LBK3bs2KFbb73VYd6OHTtKHLnvXLVr19by5cvVvn17eXl5lanmv27/XNu3b1fVqlX/1nDnvXv31tKlS7VkyRLNnTtXfn5+6t69u8P2t2zZosLCQodAWPTDxEX1lWWEuwtVvXp1eXp6lvhDvef78d7zeeihhzR9+nS5uLjogQceKLVfaGhoqc950fyi/27btk3GGIfn4Nxli441Pz+/yxbaAeBy4lI9ALgMhg0bJh8fHz366KPKysoqNn/37t32EdkyMjJ04MCBYn1OnDihtWvXKiAgoMxnZL755psSR6oruh+o6DKq22+/XRUrVlRKSorOnj3r0LforEerVq1UvXp1TZ061WGI6SVLlig9PV133HGHZT3333+/CgoK9MILLxSb98cff5z3Pq7g4GBFRERo5syZDv22bdumL774Qt26dbPc/vn06NFD3t7eeuONN7RkyRLdc889DvdydevWTZmZmZo/f75DzRMnTpSvr699CHRvb29Juqh70krj6uqq6OhoffTRRzp8+LC9fdeuXVqyZMlFrfOWW27RCy+8oEmTJp03jHfr1k3r1q3T2rVr7W25ubmaNm2awsLC1LBhQ3u/w4cPa+HChfZ+p0+f1rRp0xzW17JlS9WuXVuvvvqqcnJyim3v6NGjF7U/AFBeOOMEAJdB7dq1NXfuXPXq1UsNGjRQbGysGjdurPz8fH377bdasGCB+vTpI0navHmzHnroIXXt2lUdOnRQ5cqVdejQIc2cOVOHDx/W+PHjS72kqzQvv/yyNmzYoHvuuUdNmzaVJG3cuFHvvvuuKleurH//+9+S/vzr/+uvv65HH31UrVu31kMPPaSAgABt3rxZp0+f1syZM1WhQgW9/PLLio+PV1RUlB588EFlZWVpwoQJCgsL03/+8x/LeqKiojRgwAClpKQoLS1Nt99+uypUqKCMjAwtWLBAEyZMcBhc4Fxjx45V165dFRkZqb59++rMmTOaOHGi/P39//blZr6+vurRo4f9PqdzL9Pr37+/3nzzTfXp00cbNmxQWFiYFi5cqDVr1mj8+PH2QUC8vLzUsGFDzZ8/X3Xr1lXlypXVuHHj894/diFGjhypL774Qu3bt9fAgQNVUFCgSZMmqXHjxkpLSyvz+lxcXPTss89a9nv66ac1b948de3aVY899pgqV66smTNnau/evfrggw/sZ9/69eunSZMmKTY2Vhs2bFBwcLBmzZplD5J/3e7bb7+trl27qlGjRoqPj1eNGjV06NAhrVy5Un5+fvr000/LvD8AUG6cPKofAFzTdu7cafr162fCwsKMu7u7qVixomnfvr2ZOHGiOXv2rDHGmKysLDNmzBgTFRVlgoODjZubmwkICDC33nqrWbhwocP6LnQ48jVr1pjBgwebxo0bG39/f1OhQgVTs2ZN06dPH7N79+5i/T/55BPTrl074+XlZfz8/EybNm3MvHnzHPrMnz/fNG/e3Hh4eJjKlSub3r17m59//tmhz1+Hvi7JtGnTTMuWLY2Xl5epWLGiadKkiRk2bJg5fPjweffHGGOWL19u2rdvb6+xe/fu5qeffnLoU9bhyIssXrzYSDLBwcEOQ5MXycrKMvHx8aZq1arG3d3dNGnSxLzzzjvF+n377bemZcuWxt3d3WFo8tKGIx88eHCxdYSGhpq4uDiHttTUVNO8eXPj7u5uateubd5++23z+OOPG09PT8t9s3pNjDElDkdujDG7d+829957r6lUqZLx9PQ0bdq0MZ999lmx5ffv32/uuusu4+3tbapWrWqGDh1qH9a9aDjyIps2bTL33HOPqVKlivHw8DChoaHm/vvvN6mpqfY+DEcO4EpkM+Yy3YEKAAAumx49eujHH3+0j1YHALi8uMcJAIAr3JkzZxymMzIy9Pnnn6tTp07OKQgArkOccQIA4AoXHBysPn36qFatWtq/f7+mTJmivLw8bdq0SXXq1HF2eQBwXWBwCAAArnBdunTRvHnzlJmZKQ8PD0VGRmr06NGEJgAoR5xxAgAAAAAL3OMEAAAAABYITgAAAABgwen3OE2ePFljx45VZmammjVrpokTJ6pNmzal9j9x4oSGDx+uRYsW6ddff1VoaKjGjx9/wb8cX1hYqMOHD6tixYqy2WyXajcAAAAAXGWMMTp16pRuuOEG+w97l8apwWn+/PlKTEzU1KlT1bZtW40fP14xMTHasWOHqlevXqx/fn6+OnfurOrVq2vhwoWqUaOG9u/fr0qVKl3wNg8fPqyQkJBLuBcAAAAArmYHDx7UjTfeeN4+Th0com3btmrdurUmTZok6c+zQSEhIRoyZIiefvrpYv2nTp2qsWPHavv27apQocJFbfPkyZOqVKmSDh48KD8/v79VPwAAAICrV3Z2tkJCQnTixAn5+/uft6/Tzjjl5+drw4YNSkpKsre5uLgoOjpaa9euLXGZTz75RJGRkRo8eLA+/vhjVatWTQ899JCeeuopubq6XtB2iy7P8/PzIzgBAAAAuKBbeJwWnI4dO6aCggIFBgY6tAcGBmr79u0lLrNnzx6tWLFCvXv31ueff65du3Zp0KBB+v3335WcnFziMnl5ecrLy7NPZ2dnX7qdAAAAAHBduKpG1SssLFT16tU1bdo0tWzZUr169dLw4cM1derUUpdJSUmRv7+//cH9TQAAAADKymnBqWrVqnJ1dVVWVpZDe1ZWloKCgkpcJjg4WHXr1nW4LK9BgwbKzMxUfn5+icskJSXp5MmT9sfBgwcv3U4AAAAAuC44LTi5u7urZcuWSk1NtbcVFhYqNTVVkZGRJS7Tvn177dq1S4WFhfa2nTt3Kjg4WO7u7iUu4+HhYb+fifuaAAAAAFwMp16ql5iYqLfeekszZ85Uenq6Bg4cqNzcXMXHx0uSYmNjHQaPGDhwoH799VcNHTpUO3fu1OLFizV69GgNHjzYWbsAAAAA4Drg1N9x6tWrl44ePaoRI0YoMzNTERERWrp0qX3AiAMHDjj8EFVISIiWLVum//znP2ratKlq1KihoUOH6qmnnnLWLgAAAAC4Djj1d5ycITs7W/7+/jp58iSX7QEAAADXsbJkg6tqVD0AAAAAcAaCEwAAAABYIDgBAAAAgAWCEwAAAABYIDgBAAAAgAWCEwAAAABYIDgBAAAAgAWCEwAAAABYIDgBAAAAgAU3ZxcAAFeT9hPbO7sElGLNkDXOLgEAcA3jjBMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWLgigtPkyZMVFhYmT09PtW3bVuvWrSu174wZM2Sz2Rwenp6e5VgtAAAAgOuN04PT/PnzlZiYqOTkZG3cuFHNmjVTTEyMjhw5Uuoyfn5++uWXX+yP/fv3l2PFAAAAAK43Tg9O48aNU79+/RQfH6+GDRtq6tSp8vb21vTp00tdxmazKSgoyP4IDAwsx4oBAAAAXG+cGpzy8/O1YcMGRUdH29tcXFwUHR2ttWvXlrpcTk6OQkNDFRISorvvvls//vhjqX3z8vKUnZ3t8AAAAACAsnBqcDp27JgKCgqKnTEKDAxUZmZmicvUq1dP06dP18cff6zZs2ersLBQ7dq1088//1xi/5SUFPn7+9sfISEhl3w/AAAAAFzbnH6pXllFRkYqNjZWERERioqK0qJFi1StWjW9+eabJfZPSkrSyZMn7Y+DBw+Wc8UAAAAArnZuztx41apV5erqqqysLIf2rKwsBQUFXdA6KlSooObNm2vXrl0lzvfw8JCHh8ffrhUAAADA9cupZ5zc3d3VsmVLpaam2tsKCwuVmpqqyMjIC1pHQUGBtm7dquDg4MtVJgAAAIDrnFPPOElSYmKi4uLi1KpVK7Vp00bjx49Xbm6u4uPjJUmxsbGqUaOGUlJSJEnPP/+8br75ZoWHh+vEiRMaO3as9u/fr0cffdSZuwEAAADgGub04NSrVy8dPXpUI0aMUGZmpiIiIrR06VL7gBEHDhyQi8v/Toz99ttv6tevnzIzMxUQEKCWLVvq22+/VcOGDZ21CwAAAACucTZjjHF2EeUpOztb/v7+OnnypPz8/JxdDoCrTPuJ7Z1dAkqxZsgaZ5cAALjKlCUbXHWj6gEAAABAeSM4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFN2cXAAAAAFxNXnr4XmeXgFIMn73wsq2bM04AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWGI4cuEAHnm/i7BJwHjVHbHV2CQAA4BrGGScAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALV0Rwmjx5ssLCwuTp6am2bdtq3bp1F7Tce++9J5vNph49elzeAgEAAABc15wenObPn6/ExEQlJydr48aNatasmWJiYnTkyJHzLrdv3z498cQT6tChQzlVCgAAAOB65fTgNG7cOPXr10/x8fFq2LChpk6dKm9vb02fPr3UZQoKCtS7d2+NGjVKtWrVKsdqAQAAAFyPnBqc8vPztWHDBkVHR9vbXFxcFB0drbVr15a63PPPP6/q1aurb9++5VEmAAAAgOucmzM3fuzYMRUUFCgwMNChPTAwUNu3by9xmdWrV+v//u//lJaWdkHbyMvLU15enn06Ozv7ousFAAAAcH1y+qV6ZXHq1Ck98sgjeuutt1S1atULWiYlJUX+/v72R0hIyGWuEgAAAMC1xqlnnKpWrSpXV1dlZWU5tGdlZSkoKKhY/927d2vfvn3q3r27va2wsFCS5Obmph07dqh27doOyyQlJSkxMdE+nZ2dTXgCAAAAUCZODU7u7u5q2bKlUlNT7UOKFxYWKjU1VQkJCcX6169fX1u3bnVoe/bZZ3Xq1ClNmDChxEDk4eEhDw+Py1I/AAAAgOuDU4OTJCUmJiouLk6tWrVSmzZtNH78eOXm5io+Pl6SFBsbqxo1aiglJUWenp5q3Lixw/KVKlWSpGLtAAAAAHCpOD049erVS0ePHtWIESOUmZmpiIgILV261D5gxIEDB+TiclXdigUAAADgGuP04CRJCQkJJV6aJ0mrVq0677IzZsy49AUBAAAAwF9wKgcAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMACwQkAAAAALBCcAAAAAMCCm7MLuFq0fPJdZ5eA89gwNtbZJQAAAOAaxhknAAAAALBAcAIAAAAAC1yqBwAAUAaTHv/U2SWgFAmvdXd2CbiGccYJAAAAACyU6YzTli1bLqhf06ZNL6oYAAAAALgSlSk4RUREyGazyRhTbF5Ru81mU0FBwSUrEAAAAACcrUzBae/evZerDgAAAAC4YpUpOIWGhl6uOgAAAADgilWmwSGOHTum/fv3O7T9+OOPio+P1/3336+5c+de0uIAAAAA4EpQpuA0ZMgQ/fe//7VPHzlyRB06dND69euVl5enPn36aNasWZe8SAAAAABwpjIFp++++0533XWXffrdd99V5cqVlZaWpo8//lijR4/W5MmTy1zE5MmTFRYWJk9PT7Vt21br1q0rte+iRYvUqlUrVapUST4+PoqIiCCsAQAAALisyhScMjMzFRYWZp9esWKF7rnnHrm5/Xmr1F133aWMjIwyFTB//nwlJiYqOTlZGzduVLNmzRQTE6MjR46U2L9y5coaPny41q5dqy1btig+Pl7x8fFatmxZmbYLAAAAABeqTMHJz89PJ06csE+vW7dObdu2tU/bbDbl5eWVqYBx48apX79+io+PV8OGDTV16lR5e3tr+vTpJfbv1KmT/vnPf6pBgwaqXbu2hg4dqqZNm2r16tVl2i4AAAAAXKgyBaebb75Z//3vf1VYWKiFCxfq1KlTuvXWW+3zd+7cqZCQkAteX35+vjZs2KDo6Oj/FeTioujoaK1du9ZyeWOMUlNTtWPHDnXs2LHEPnl5ecrOznZ4AAAAAEBZlCk4Pf/88/rkk0/k5eWlXr16adiwYQoICLDPf++99xQVFXXB6zt27JgKCgoUGBjo0B4YGKjMzMxSlzt58qR8fX3l7u6uO+64QxMnTlTnzp1L7JuSkiJ/f3/7oyzBDgAAAACkMv6OU7NmzZSenq41a9YoKCjI4TI9SXrwwQfVoEGDS1pgSSpWrKi0tDTl5OQoNTVViYmJqlWrljp16lSsb1JSkhITE+3T2dnZhCcAAAAAZVKm4LRixQolJCTou+++k5+fn8O8kydP6sknn9TUqVN10003XdD6qlatKldXV2VlZTm0Z2VlKSgoqNTlXFxcFB4eLkmKiIhQenq6UlJSSgxOHh4e8vDwuKB6AAAAAKAkZbpUb/z48erXr1+x0CRJ/v7+GjBggMaNG3fB63N3d1fLli2VmppqbyssLFRqaqoiIyMveD2FhYVlHpQCAAAAAC5UmYLT5s2b1aVLl1Ln33777dqwYUOZCkhMTNRbb72lmTNnKj09XQMHDlRubq7i4+MlSbGxsUpKSrL3T0lJ0Zdffqk9e/YoPT1dr732mmbNmqWHH364TNsFAAAAgAtVpkv1srKyVKFChdJX5uamo0ePlqmAXr166ejRoxoxYoQyMzMVERGhpUuX2geMOHDggFxc/pfvcnNzNWjQIP3888/y8vJS/fr1NXv2bPXq1atM2wUAAACAC1Wm4FSjRg1t27bNfn/RubZs2aLg4OAyF5GQkKCEhIQS561atcph+sUXX9SLL75Y5m0AAAAAwMUq06V63bp103PPPaezZ88Wm3fmzBklJyfrzjvvvGTFAQAAAMCVoExnnJ599lktWrRIdevWVUJCgurVqydJ2r59uyZPnqyCggINHz78shQKAAAAAM5SpuAUGBiob7/9VgMHDlRSUpKMMZIkm82mmJgYTZ48udiP2QIAAADA1a5MwUmSQkND9fnnn+u3337Trl27ZIxRnTp1FBAQcDnqAwDgivJVxyhnl4BSRH39lbNLAHANK3NwKhIQEKDWrVtfyloAAAAA4IpUpsEhAAAAAOB6RHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAsEJwAAAACwQHACAAAAAAtXRHCaPHmywsLC5OnpqbZt22rdunWl9n3rrbfUoUMHBQQEKCAgQNHR0eftDwAAAAB/l9OD0/z585WYmKjk5GRt3LhRzZo1U0xMjI4cOVJi/1WrVunBBx/UypUrtXbtWoWEhOj222/XoUOHyrlyAAAAANcLpwencePGqV+/foqPj1fDhg01depUeXt7a/r06SX2nzNnjgYNGqSIiAjVr19fb7/9tgoLC5WamlrOlQMAAAC4Xjg1OOXn52vDhg2Kjo62t7m4uCg6Olpr1669oHWcPn1av//+uypXrlzi/Ly8PGVnZzs8AAAAAKAsnBqcjh07poKCAgUGBjq0BwYGKjMz84LW8dRTT+mGG25wCF9/lZKSIn9/f/sjJCTkb9cNAAAA4Pri9Ev1/o4xY8bovffe04cffihPT88S+yQlJenkyZP2x8GDB8u5SgAAAABXOzdnbrxq1apydXVVVlaWQ3tWVpaCgoLOu+yrr76qMWPGaPny5WratGmp/Tw8POTh4XFJ6gUAAABwfXLqGSd3d3e1bNnSYWCHooEeIiMjS13ulVde0QsvvKClS5eqVatW5VEqAAAAgOuYU884SVJiYqLi4uLUqlUrtWnTRuPHj1dubq7i4+MlSbGxsapRo4ZSUlIkSS+//LJGjBihuXPnKiwszH4vlK+vr3x9fZ22HwAAAACuXU4PTr169dLRo0c1YsQIZWZmKiIiQkuXLrUPGHHgwAG5uPzvxNiUKVOUn5+ve++912E9ycnJGjlyZHmWDgAAAOA64fTgJEkJCQlKSEgocd6qVascpvft23f5CwIAAACAv7iqR9UDAAAAgPJAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAACwQnAAAAALBAcAIAAAAAC04PTpMnT1ZYWJg8PT3Vtm1brVu3rtS+P/74o3r27KmwsDDZbDaNHz++/AoFAAAAcN1yanCaP3++EhMTlZycrI0bN6pZs2aKiYnRkSNHSux/+vRp1apVS2PGjFFQUFA5VwsAAADgeuXU4DRu3Dj169dP8fHxatiwoaZOnSpvb29Nnz69xP6tW7fW2LFj9cADD8jDw6OcqwUAAABwvXJacMrPz9eGDRsUHR39v2JcXBQdHa21a9c6qywAAAAAKMbNWRs+duyYCgoKFBgY6NAeGBio7du3X7Lt5OXlKS8vzz6dnZ19ydYNAAAA4Prg9MEhLreUlBT5+/vbHyEhIc4uCQAAAMBVxmnBqWrVqnJ1dVVWVpZDe1ZW1iUd+CEpKUknT560Pw4ePHjJ1g0AAADg+uC04OTu7q6WLVsqNTXV3lZYWKjU1FRFRkZesu14eHjIz8/P4QEAAAAAZeG0e5wkKTExUXFxcWrVqpXatGmj8ePHKzc3V/Hx8ZKk2NhY1ahRQykpKZL+HFDip59+sv//oUOHlJaWJl9fX4WHhzttPwAAAABc25wanHr16qWjR49qxIgRyszMVEREhJYuXWofMOLAgQNycfnfSbHDhw+refPm9ulXX31Vr776qqKiorRq1aryLh8AAADAdcKpwUmSEhISlJCQUOK8c8NQWFiYjDHlUBUAAAAA/M81P6oeAAAAAPxdBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALV0Rwmjx5ssLCwuTp6am2bdtq3bp15+2/YMEC1a9fX56enmrSpIk+//zzcqoUAAAAwPXI6cFp/vz5SkxMVHJysjZu3KhmzZopJiZGR44cKbH/t99+qwcffFB9+/bVpk2b1KNHD/Xo0UPbtm0r58oBAAAAXC+cHpzGjRunfv36KT4+Xg0bNtTUqVPl7e2t6dOnl9h/woQJ6tKli5588kk1aNBAL7zwglq0aKFJkyaVc+UAAAAArhduztx4fn6+NmzYoKSkJHubi4uLoqOjtXbt2hKXWbt2rRITEx3aYmJi9NFHH5XYPy8vT3l5efbpkydPSpKys7PLVGtB3pky9Uf5KuvreTFOnS247NvAxSuPY0CS/jjzR7lsB2VXXsdA7h8cA1eq8joGzuSdLpftoOzK6xg4+/vv5bIdlF1Zj4Gi/sYYy75ODU7Hjh1TQUGBAgMDHdoDAwO1ffv2EpfJzMwssX9mZmaJ/VNSUjRq1Khi7SEhIRdZNa5E/hP/5ewS4Gwp/s6uAE7m/xTHwHXPn2PgejdssrMrgLO9+P7FvQ+cOnVK/hbvIU4NTuUhKSnJ4QxVYWGhfv31V1WpUkU2m82JlTlPdna2QkJCdPDgQfn5+Tm7HDgBxwA4BsAxAInjABwDxhidOnVKN9xwg2VfpwanqlWrytXVVVlZWQ7tWVlZCgoKKnGZoKCgMvX38PCQh4eHQ1ulSpUuvuhriJ+f33X5DwT/wzEAjgFwDEDiOMD1fQxYnWkq4tTBIdzd3dWyZUulpqba2woLC5WamqrIyMgSl4mMjHToL0lffvllqf0BAAAA4O9y+qV6iYmJiouLU6tWrdSmTRuNHz9eubm5io+PlyTFxsaqRo0aSklJkSQNHTpUUVFReu2113THHXfovffe0w8//KBp06Y5czcAAAAAXMOcHpx69eqlo0ePasSIEcrMzFRERISWLl1qHwDiwIEDcnH534mxdu3aae7cuXr22Wf1zDPPqE6dOvroo4/UuHFjZ+3CVcfDw0PJycnFLmHE9YNjABwD4BiAxHEAjoGysJkLGXsPAAAAAK5jTv8BXAAAAAC40hGcAAAAAMACwQkAAAAALBCccFFWrVolm82mEydOOLsUAOcxY8YMfrsOQKk6deqkf//7384uA9eIa/37IcHpKnT06FENHDhQNWvWlIeHh4KCghQTE6M1a9Y4uzRcRn369JHNZrM/qlSpoi5dumjLli3OLg2XWJ8+fdSjRw9nl4ErzF/fAypUqKDAwEB17txZ06dPV2FhobPLwyVUnq/1okWL9MILL1zSdeLvy8zM1NChQxUeHi5PT08FBgaqffv2mjJlik6fPu3s8krVrl07/fLLLxf8g7JXG4LTVahnz57atGmTZs6cqZ07d+qTTz5Rp06ddPz4cWeXhsusS5cu+uWXX/TLL78oNTVVbm5uuvPOO51dFoByUvQesG/fPi1ZskS33HKLhg4dqjvvvFN//PGHs8vDJVRer3XlypVVsWLFS7Y+/H179uxR8+bN9cUXX2j06NHatGmT1q5dq2HDhumzzz7T8uXLnV1iqdzd3RUUFCSbzebsUi4LgtNV5sSJE/rmm2/08ssv65ZbblFoaKjatGmjpKQk3XXXXfY+jz76qKpVqyY/Pz/deuut2rx5s8N6pkyZotq1a8vd3V316tXTrFmz7PP27dsnm82mtLQ0h+3abDatWrWqPHYTpSg6wxgUFKSIiAg9/fTTOnjwoI4ePVri6fG0tDTZbDbt27dPkrR//351795dAQEB8vHxUaNGjfT5559LkgoKCtS3b1/ddNNN8vLyUr169TRhwgSH7RedCXn11VcVHBysKlWqaPDgwfr999/L6ym47nTq1EmPPfaYhg0bpsqVKysoKEgjR4506HPixAkNGDBAgYGB8vT0VOPGjfXZZ5859Fm2bJkaNGggX19f+xeyv3r77bfVoEEDeXp6qn79+nrjjTcc5m/dulW33nqrvLy8VKVKFfXv3185OTn2+Rwb5aPoPaBGjRpq0aKFnnnmGX388cdasmSJZsyYIUkaN26cmjRpIh8fH4WEhGjQoEEOr5UkrV69Wh06dJCXl5dCQkL02GOPKTc31z7/l19+0R133CEvLy/ddNNNmjt3rsLCwjR+/HhJfE6Uh/J6rc+9VC8vL09PPPGEatSoIR8fH7Vt27bYa/rBBx+oUaNG8vDwUFhYmF577TWH+WFhYRo9erT+3//7f6pYsaJq1qypadOmXdLn51o2aNAgubm56YcfftD999+vBg0aqFatWrr77ru1ePFide/eXU888YTDH07Hjx8vm82mpUuX2tvCw8P19ttvS5LWr1+vzp07q2rVqvL391dUVJQ2btzosF2bzaa3335b//znP+Xt7a06derok08+cejzySefqE6dOvL09NQtt9yimTNnOnz3OPe7yMiRIxUREeGwjvHjxyssLOzSPFnljOB0lfH19ZWvr68++ugj5eXlldjnvvvu05EjR7RkyRJt2LBBLVq00G233aZff/1VkvThhx9q6NChevzxx7Vt2zYNGDBA8fHxWrlyZXnuCv6mnJwczZ49W+Hh4apSpcoFLTN48GDl5eXp66+/1tatW/Xyyy/L19dXklRYWKgbb7xRCxYs0E8//aQRI0bomWee0fvvv++wjpUrV2r37t1auXKlZs6cqRkzZtg/xHF5zJw5Uz4+Pvr+++/1yiuv6Pnnn9eXX34p6c/XrWvXrlqzZo1mz56tn376SWPGjJGrq6t9+dOnT+vVV1/VrFmz9PXXX+vAgQN64okn7PPnzJmjESNG6KWXXlJ6erpGjx6t5557TjNnzpQk5ebmKiYmRgEBAVq/fr0WLFig5cuXKyEhwaFOjg3nuPXWW9WsWTMtWrRIkuTi4qL//ve/+vHHHzVz5kytWLFCw4YNs/ffvXu3unTpop49e2rLli2aP3++Vq9e7fB6xsbG6vDhw1q1apU++OADTZs2TUeOHCn3fYOjy/FanyshIUFr167Ve++9py1btui+++5Tly5dlJGRIUnasGGD7r//fj3wwAPaunWrRo4cqeeee67Yv/XXXntNrVq10qZNmzRo0CANHDhQO3bsuPRPyjXm+PHj+uKLLzR48GD5+PiU2MdmsykqKkqrV69WQUGBJOmrr75S1apV7SH30KFD2r17tzp16iRJOnXqlOLi4rR69Wp99913qlOnjrp166ZTp045rHvUqFG6//77tWXLFnXr1k29e/e2f3/cu3ev7r33XvXo0UObN2/WgAEDNHz48MvzRFypDK46CxcuNAEBAcbT09O0a9fOJCUlmc2bNxtjjPnmm2+Mn5+fOXv2rMMytWvXNm+++aYxxph27dqZfv36Ocy/7777TLdu3Ywxxuzdu9dIMps2bbLP/+2334wks3LlSmOMMStXrjSSzG+//XZ5dhLFxMXFGVdXV+Pj42N8fHyMJBMcHGw2bNhgjCn5Ndm0aZORZPbu3WuMMaZJkyZm5MiRF7zNwYMHm549ezrUEBoaav744w9723333Wd69er193YODuLi4szdd99tjDEmKirK/OMf/3CY37p1a/PUU08ZY4xZtmyZcXFxMTt27ChxXe+8846RZHbt2mVvmzx5sgkMDLRP165d28ydO9dhuRdeeMFERkYaY4yZNm2aCQgIMDk5Ofb5ixcvNi4uLiYzM9NeM8fG5fXX4+JcvXr1Mg0aNChx3oIFC0yVKlXs03379jX9+/d36PPNN98YFxcXc+bMGZOenm4kmfXr19vnZ2RkGEnm9ddfN8Zc2OcELl55vdbG/PkeM3ToUGOMMfv37zeurq7m0KFDDsvcdtttJikpyRhjzEMPPWQ6d+7sMP/JJ580DRs2tE+Hhoaahx9+2D5dWFhoqlevbqZMmXKevYYxxnz33XdGklm0aJFDe5UqVeyf/8OGDTO//fabcXFxMevXrzeFhYWmcuXKJiUlxbRt29YYY8zs2bNNjRo1St1OQUGBqVixovn000/tbZLMs88+a5/OyckxksySJUuMMcY89dRTpnHjxg7rGT58uMN3j3O/iyQnJ5tmzZo5LPP666+b0NDQsjwtVwzOOF2FevbsqcOHD+uTTz5Rly5dtGrVKrVo0UIzZszQ5s2blZOToypVqtjPTvn6+mrv3r3avXu3JCk9PV3t27d3WGf79u2Vnp7ujN1BGdxyyy1KS0tTWlqa1q1bp5iYGHXt2lX79++/oOUfe+wxvfjii2rfvr2Sk5OLDSwxefJktWzZUtWqVZOvr6+mTZumAwcOOPRp1KiRw9mM4OBg/hJ9mTVt2tRh+q/PeVpamm688UbVrVu31OW9vb1Vu3btEpfPzc3V7t271bdvX4f3jBdffNHhPaNZs2YOf/1s3769CgsLHf6CzLHhPMYY+z0Fy5cv12233aYaNWqoYsWKeuSRR3T8+HH7DeWbN2/WjBkzHF7vmJgYFRYWau/evdqxY4fc3NzUokUL+/rDw8MVEBDglH2Do0v5Wp9r69atKigoUN26dR2W+eqrryy/Q2RkZNjPfkiO71s2m01BQUG8H/wN69atU1pamho1aqS8vDxVqlRJzZo106pVq7R161a5u7urf//+2rRpk3JycvTVV18pKirKvnxWVpb69eunOnXqyN/fX35+fsrJySn2Gf/X183Hx0d+fn72123Hjh1q3bq1Q/82bdpcxr2+8rg5uwBcHE9PT3Xu3FmdO3fWc889p0cffVTJyckaNGiQgoODS7zG/EKHJHZx+TNPG2PsbdyncGXw8fFReHi4ffrtt9+Wv7+/3nrrLd1+++2Szv+6Pfroo4qJidHixYv1xRdfKCUlRa+99pqGDBmi9957T0888YRee+01RUZGqmLFiho7dqy+//57h3VUqFDBYdpmszGi12V2vufcy8vropYvOk6K7od466231LZtW4d+fw1Bf7dOXF7p6em66aabtG/fPt15550aOHCgXnrpJVWuXFmrV69W3759lZ+fL29vb+Xk5GjAgAF67LHHiq2nZs2a2rlzp+X2+Jxwnkv5Wp8rJydHrq6u2rBhQ7F//0WXdV8o3g8uTnh4uGw2W7HLGmvVqiXJ8T2/U6dOWrVqlTw8PBQVFaXKlSurQYMGWr16tb766is9/vjj9r5xcXE6fvy4JkyYoNDQUHl4eCgyMlL5+fkO27nUr5uLi4vD+4R0db9XEJyuEQ0bNtRHH32kFi1aKDMzU25ubqXeeNegQQOtWbNGcXFx9rY1a9aoYcOGkqRq1apJ+vPm4ObNm0uSww3AuHLYbDa5uLjozJkzDq9b0V+GS3rdQkJC9K9//Uv/+te/lJSUpLfeektDhgzRmjVr1K5dOw0aNMjet+gvjLhyNW3aVD///LN27tx53rNOpQkMDNQNN9ygPXv2qHfv3iX2adCggWbMmKHc3Fz7Wac1a9bIxcVF9erV+1v14+9bsWKFtm7dqv/85z/asGGDCgsL9dprr9nDzbn3KbZo0UI//fSTwx9h/qpevXr6448/tGnTJrVs2VKStGvXLv3222/2PnxOOMelfq3P1bx5cxUUFOjIkSPq0KFDiX2KvkP81Zo1a1S3bt0y/7EFxVWpUkWdO3fWpEmTNGTIkFLvc5KkqKgoTZ8+XW5uburSpYukP8PUvHnztHPnTvv9TdKfr9Ebb7yhbt26SZIOHjyoY8eOlam2evXq2QeUKrJ+/frzLlOtWjVlZmY6nCm9mt8ruFTvKnP8+HHdeuutmj17trZs2aK9e/dqwYIFeuWVV3T33XcrOjpakZGR6tGjh7744gvt27dP3377rYYPH64ffvhBkvTkk09qxowZmjJlijIyMjRu3DgtWrTIfrO4l5eXbr75Zo0ZM0bp6en66quv9Oyzzzpzt/H/y8vLU2ZmpjIzM5Wenq4hQ4YoJydH3bt3V3h4uEJCQjRy5EhlZGRo8eLFxUY6+ve//61ly5Zp79692rhxo1auXKkGDRpIkurUqaMffvhBy5Yt086dO/Xcc89ZviHC+aKiotSxY0f17NlTX375pfbu3aslS5Y4jKxkZdSoUUpJSdF///tf7dy5U1u3btU777yjcePGSZJ69+4tT09PxcXFadu2bVq5cqWGDBmiRx55RIGBgZdr11CCoveAQ4cOaePGjRo9erTuvvtu3XnnnYqNjVV4eLh+//13TZw4UXv27NGsWbM0depUh3U89dRT+vbbb5WQkKC0tDRlZGTo448/tg8YUL9+fUVHR6t///5at26dNm3apP79+8vLy8v+xYfPicuvPF7rc9WtW1e9e/dWbGysFi1apL1792rdunVKSUnR4sWLJUmPP/64UlNT9cILL2jnzp2aOXOmJk2a5DDgDP6eN954Q3/88YdatWql+fPnKz09XTt27NDs2bO1fft2e0Dt2LGjTp06pc8++8wekjp16qQ5c+YoODjY4Y9pderU0axZs5Senq7vv/9evXv3vqArFv5qwIAB2r59u5566int3LlT77//vn1QkNKGH+/UqZOOHj2qV155Rbt379bkyZO1ZMmSsj8pVwrn3V6Fi3H27Fnz9NNPmxYtWhh/f3/j7e1t6tWrZ5599llz+vRpY4wx2dnZZsiQIeaGG24wFSpUMCEhIaZ3797mwIED9vW88cYbplatWqZChQqmbt265t1333XYzk8//WQiIyONl5eXiYiIMF988QWDQzhZXFyckWR/VKxY0bRu3dosXLjQ3mf16tWmSZMmxtPT03To0MEsWLDAYXCIhIQEU7t2bePh4WGqVatmHnnkEXPs2DFjzJ/HVp8+fYy/v7+pVKmSGThwoHn66acdbuos6YbloUOHmqioqMu899eXcweHKLpxu8jdd99t4uLi7NPHjx838fHxpkqVKsbT09M0btzYfPbZZ8aYPweH8Pf3d1j+ww8/NOe+/c+ZM8dEREQYd3d3ExAQYDp27Ohwc/KWLVvMLbfcYjw9PU3lypVNv379zKlTp0qsuQjHxqX11/cANzc3U61aNRMdHW2mT59uCgoK7P3GjRtngoODjZeXl4mJiTHvvvtusffrdevWmc6dOxtfX1/j4+NjmjZtal566SX7/MOHD5uuXbsaDw8PExoaaubOnWuqV69upk6dau9j9TmBi1eer/W57zH5+flmxIgRJiwszFSoUMEEBwebf/7zn2bLli32PgsXLjQNGzY0FSpUMDVr1jRjx451qD80NNQ+kEiRZs2ameTk5Evy/FwPDh8+bBISEsxNN91kKlSoYHx9fU2bNm3M2LFjTW5urr1fs2bNTFBQkH36+PHjxmazmQceeMBhfRs3bjStWrUynp6epk6dOmbBggXFXidJ5sMPP3RYzt/f37zzzjv26Y8//tiEh4cbDw8P06lTJzNlyhQjyT7YSEnfD6dMmWJCQkKMj4+PiY2NNS+99NJVOziEzZhzLjwEAAD4i59//lkhISH2wQhw7YiMjNRtt92mF1980dml4Cr00ksvaerUqTp48KCkP38zsGvXrjp79qzc3d2dXN2lxz1OAADAwYoVK5STk6MmTZrol19+0bBhwxQWFqaOHTs6uzRcInl5edq6dat+/PHHEgePAEryxhtvqHXr1qpSpYrWrFmjsWPH2i/9zMrK0scff6w6depck6FJIjgBAIBz/P7773rmmWe0Z88eVaxYUe3atdOcOXOKjbiFq9eSJUsUGxuru+66S/fee6+zy8FVIiMjQy+++KJ+/fVX1axZU48//riSkpIkyf6Dum+88YaTq7x8uFQPAAAAACwwqh4AAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWPj/AJ3jGjFFTxgpAAAAAElFTkSuQmCC",
|
| 1006 |
-
"text/plain": [
|
| 1007 |
-
"<Figure size 1000x500 with 1 Axes>"
|
| 1008 |
-
]
|
| 1009 |
-
},
|
| 1010 |
-
"metadata": {},
|
| 1011 |
-
"output_type": "display_data"
|
| 1012 |
-
}
|
| 1013 |
-
],
|
| 1014 |
-
"source": [
|
| 1015 |
-
"import matplotlib.pyplot as plt\n",
|
| 1016 |
-
"import seaborn as sns\n",
|
| 1017 |
-
"\n",
|
| 1018 |
-
"plt.figure(figsize=(10,5))\n",
|
| 1019 |
-
"sns.barplot(x=['Seoul', 'Busan', 'Incheon', 'Daegu', 'Daejeon', 'Gwangju'], y=voting)\n",
|
| 1020 |
-
"plt.title('CSI Score of Voting Model')\n",
|
| 1021 |
-
"plt.ylabel('CSI')\n",
|
| 1022 |
-
"\n",
|
| 1023 |
-
"plt.show()"
|
| 1024 |
-
]
|
| 1025 |
-
},
|
| 1026 |
-
{
|
| 1027 |
-
"cell_type": "code",
|
| 1028 |
-
"execution_count": 24,
|
| 1029 |
-
"metadata": {},
|
| 1030 |
-
"outputs": [
|
| 1031 |
-
{
|
| 1032 |
-
"data": {
|
| 1033 |
-
"text/plain": [
|
| 1034 |
-
"0.4078715882283252"
|
| 1035 |
-
]
|
| 1036 |
-
},
|
| 1037 |
-
"execution_count": 24,
|
| 1038 |
-
"metadata": {},
|
| 1039 |
-
"output_type": "execute_result"
|
| 1040 |
-
}
|
| 1041 |
-
],
|
| 1042 |
-
"source": [
|
| 1043 |
-
"np.mean(voting)"
|
| 1044 |
-
]
|
| 1045 |
-
},
|
| 1046 |
-
{
|
| 1047 |
-
"cell_type": "code",
|
| 1048 |
-
"execution_count": 25,
|
| 1049 |
-
"metadata": {},
|
| 1050 |
-
"outputs": [
|
| 1051 |
-
{
|
| 1052 |
-
"data": {
|
| 1053 |
-
"text/plain": [
|
| 1054 |
-
"[0.3248062015503624,\n",
|
| 1055 |
-
" 0.46608315098458075,\n",
|
| 1056 |
-
" 0.5763157894736463,\n",
|
| 1057 |
-
" 0.2852112676055334,\n",
|
| 1058 |
-
" 0.31884057971011603,\n",
|
| 1059 |
-
" 0.4759725400457121]"
|
| 1060 |
-
]
|
| 1061 |
-
},
|
| 1062 |
-
"execution_count": 25,
|
| 1063 |
-
"metadata": {},
|
| 1064 |
-
"output_type": "execute_result"
|
| 1065 |
-
}
|
| 1066 |
-
],
|
| 1067 |
-
"source": [
|
| 1068 |
-
"voting"
|
| 1069 |
-
]
|
| 1070 |
-
},
|
| 1071 |
-
{
|
| 1072 |
-
"cell_type": "code",
|
| 1073 |
-
"execution_count": 26,
|
| 1074 |
-
"metadata": {},
|
| 1075 |
-
"outputs": [
|
| 1076 |
-
{
|
| 1077 |
-
"data": {
|
| 1078 |
-
"text/plain": [
|
| 1079 |
-
"[0.5106142349456536,\n",
|
| 1080 |
-
" 0.640202275543952,\n",
|
| 1081 |
-
" 0.709448778435959,\n",
|
| 1082 |
-
" 0.45579515959653394,\n",
|
| 1083 |
-
" 0.453960121993875,\n",
|
| 1084 |
-
" 0.6218724605270242]"
|
| 1085 |
-
]
|
| 1086 |
-
},
|
| 1087 |
-
"execution_count": 26,
|
| 1088 |
-
"metadata": {},
|
| 1089 |
-
"output_type": "execute_result"
|
| 1090 |
-
}
|
| 1091 |
-
],
|
| 1092 |
-
"source": [
|
| 1093 |
-
"mcc"
|
| 1094 |
-
]
|
| 1095 |
-
},
|
| 1096 |
-
{
|
| 1097 |
-
"cell_type": "code",
|
| 1098 |
-
"execution_count": 27,
|
| 1099 |
-
"metadata": {},
|
| 1100 |
-
"outputs": [
|
| 1101 |
-
{
|
| 1102 |
-
"data": {
|
| 1103 |
-
"text/plain": [
|
| 1104 |
-
"[0.9005707762557078,\n",
|
| 1105 |
-
" 0.9721461187214612,\n",
|
| 1106 |
-
" 0.9264840182648402,\n",
|
| 1107 |
-
" 0.9768264840182649,\n",
|
| 1108 |
-
" 0.9141552511415525,\n",
|
| 1109 |
-
" 0.9477168949771689]"
|
| 1110 |
-
]
|
| 1111 |
-
},
|
| 1112 |
-
"execution_count": 27,
|
| 1113 |
-
"metadata": {},
|
| 1114 |
-
"output_type": "execute_result"
|
| 1115 |
-
}
|
| 1116 |
-
],
|
| 1117 |
-
"source": [
|
| 1118 |
-
"accuracy"
|
| 1119 |
-
]
|
| 1120 |
-
}
|
| 1121 |
-
],
|
| 1122 |
-
"metadata": {
|
| 1123 |
-
"kernelspec": {
|
| 1124 |
-
"display_name": "Python 3",
|
| 1125 |
-
"language": "python",
|
| 1126 |
-
"name": "python3"
|
| 1127 |
-
},
|
| 1128 |
-
"language_info": {
|
| 1129 |
-
"codemirror_mode": {
|
| 1130 |
-
"name": "ipython",
|
| 1131 |
-
"version": 3
|
| 1132 |
-
},
|
| 1133 |
-
"file_extension": ".py",
|
| 1134 |
-
"mimetype": "text/x-python",
|
| 1135 |
-
"name": "python",
|
| 1136 |
-
"nbconvert_exporter": "python",
|
| 1137 |
-
"pygments_lexer": "ipython3",
|
| 1138 |
-
"version": "3.8.10"
|
| 1139 |
-
}
|
| 1140 |
-
},
|
| 1141 |
-
"nbformat": 4,
|
| 1142 |
-
"nbformat_minor": 2
|
| 1143 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/find_reason/ busan_trend.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/ daegu_trend.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/ gwangju_trend.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/ incheon_trend.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/ seoul_trend.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/daejeon_trend.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/make_trend_plot.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Analysis_code/find_reason/wasserstein_distance.ipynb
DELETED
|
@@ -1,541 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 6,
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"outputs": [],
|
| 8 |
-
"source": [
|
| 9 |
-
"# 분석에 필요한 라이브러리 임포트\n",
|
| 10 |
-
"import warnings\n",
|
| 11 |
-
"warnings.filterwarnings('ignore')\n",
|
| 12 |
-
"import pandas as pd\n",
|
| 13 |
-
"import numpy as np\n",
|
| 14 |
-
"import matplotlib.pyplot as plt\n",
|
| 15 |
-
"import seaborn as sns\n",
|
| 16 |
-
"from scipy import stats\n",
|
| 17 |
-
"from scipy.spatial import distance\n",
|
| 18 |
-
"from scipy.stats import wasserstein_distance, entropy, ks_2samp\n",
|
| 19 |
-
"from sklearn.manifold import TSNE\n",
|
| 20 |
-
"from sklearn.preprocessing import StandardScaler\n",
|
| 21 |
-
"from sklearn.ensemble import RandomForestRegressor\n",
|
| 22 |
-
"from sklearn.ensemble import RandomForestClassifier # Added\n",
|
| 23 |
-
"from sklearn.model_selection import train_test_split # Added\n",
|
| 24 |
-
"from sklearn.metrics import roc_auc_score # Added\n",
|
| 25 |
-
"from statsmodels.distributions.empirical_distribution import ECDF # Added\n",
|
| 26 |
-
"import ot\n",
|
| 27 |
-
"\n",
|
| 28 |
-
"\n",
|
| 29 |
-
"# 한글 폰트 설정\n",
|
| 30 |
-
"plt.rcParams['font.family'] = 'NanumGothic'\n",
|
| 31 |
-
"plt.rcParams['axes.unicode_minus'] = False"
|
| 32 |
-
]
|
| 33 |
-
},
|
| 34 |
-
{
|
| 35 |
-
"cell_type": "code",
|
| 36 |
-
"execution_count": 7,
|
| 37 |
-
"metadata": {},
|
| 38 |
-
"outputs": [],
|
| 39 |
-
"source": [
|
| 40 |
-
"seoul = pd.read_feather(\"../../data/data_for_modeling/df_seoul.feather\")\n",
|
| 41 |
-
"seoul= seoul[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
|
| 42 |
-
"\n",
|
| 43 |
-
"busan = pd.read_feather(\"../../data/data_for_modeling/df_busan.feather\")\n",
|
| 44 |
-
"busan= busan[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
|
| 45 |
-
"\n",
|
| 46 |
-
"incheon = pd.read_feather(\"../../data/data_for_modeling/df_incheon.feather\")\n",
|
| 47 |
-
"incheon= incheon[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
|
| 48 |
-
"\n",
|
| 49 |
-
"daegu = pd.read_feather(\"../../data/data_for_modeling/df_daegu.feather\")\n",
|
| 50 |
-
"daegu= daegu[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
|
| 51 |
-
"\n",
|
| 52 |
-
"daejeon = pd.read_feather(\"../../data/data_for_modeling/df_daejeon.feather\")\n",
|
| 53 |
-
"daejeon= daejeon[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
|
| 54 |
-
"\n",
|
| 55 |
-
"gwangju = pd.read_feather(\"../../data/data_for_modeling/df_gwangju.feather\")\n",
|
| 56 |
-
"gwangju= gwangju[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]"
|
| 57 |
-
]
|
| 58 |
-
},
|
| 59 |
-
{
|
| 60 |
-
"cell_type": "code",
|
| 61 |
-
"execution_count": 8,
|
| 62 |
-
"metadata": {},
|
| 63 |
-
"outputs": [
|
| 64 |
-
{
|
| 65 |
-
"name": "stdout",
|
| 66 |
-
"output_type": "stream",
|
| 67 |
-
"text": [
|
| 68 |
-
"[0.5920662 0.92351786]\n",
|
| 69 |
-
"[0.60414398 0.9190468 ]\n",
|
| 70 |
-
"[0.60250035 0.9391276 ]\n",
|
| 71 |
-
"[0.60112832 0.92493121]\n",
|
| 72 |
-
"[0.58469137 0.90476229]\n",
|
| 73 |
-
"[0.617718 0.93503164]\n"
|
| 74 |
-
]
|
| 75 |
-
}
|
| 76 |
-
],
|
| 77 |
-
"source": [
|
| 78 |
-
"from sklearn.decomposition import PCA\n",
|
| 79 |
-
"\n",
|
| 80 |
-
"# 특성 선택 (예: PM10, PM25, hm 등)\n",
|
| 81 |
-
"features = ['PM10','PM25', 'hm']\n",
|
| 82 |
-
"# 스케일링\n",
|
| 83 |
-
"scaler = StandardScaler()\n",
|
| 84 |
-
"scaled_features = scaler.fit_transform(seoul[features])\n",
|
| 85 |
-
"pca = PCA(n_components=2)\n",
|
| 86 |
-
"pca.fit(scaled_features)\n",
|
| 87 |
-
"print(pca.explained_variance_ratio_.cumsum())\n",
|
| 88 |
-
"seoul_pca = pca.transform(scaled_features)\n",
|
| 89 |
-
"seoul.drop(columns=['PM25', 'hm'], inplace=True)\n",
|
| 90 |
-
"seoul[['pca_x', 'pca_y']] = seoul_pca\n",
|
| 91 |
-
"\n",
|
| 92 |
-
"\n",
|
| 93 |
-
"scaled_features = scaler.fit_transform(busan[features])\n",
|
| 94 |
-
"pca = PCA(n_components=2)\n",
|
| 95 |
-
"pca.fit(scaled_features)\n",
|
| 96 |
-
"print(pca.explained_variance_ratio_.cumsum())\n",
|
| 97 |
-
"busan_pca = pca.transform(scaled_features)\n",
|
| 98 |
-
"busan.drop(columns=['PM25', 'hm'], inplace=True)\n",
|
| 99 |
-
"busan[['pca_x', 'pca_y']] = busan_pca\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"scaled_features = scaler.fit_transform(incheon[features]) \n",
|
| 102 |
-
"pca = PCA(n_components=2)\n",
|
| 103 |
-
"pca.fit(scaled_features)\n",
|
| 104 |
-
"print(pca.explained_variance_ratio_.cumsum())\n",
|
| 105 |
-
"incheon_pca = pca.transform(scaled_features)\n",
|
| 106 |
-
"incheon.drop(columns=['PM25', 'hm'], inplace=True)\n",
|
| 107 |
-
"incheon[['pca_x', 'pca_y']] = incheon_pca\n",
|
| 108 |
-
"\n",
|
| 109 |
-
"scaled_features = scaler.fit_transform(daegu[features])\n",
|
| 110 |
-
"pca = PCA(n_components=2)\n",
|
| 111 |
-
"pca.fit(scaled_features)\n",
|
| 112 |
-
"print(pca.explained_variance_ratio_.cumsum())\n",
|
| 113 |
-
"daegu_pca = pca.transform(scaled_features)\n",
|
| 114 |
-
"daegu.drop(columns=['PM25', 'hm'], inplace=True)\n",
|
| 115 |
-
"daegu[['pca_x', 'pca_y']] = daegu_pca\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"scaled_features = scaler.fit_transform(daejeon[features])\n",
|
| 118 |
-
"pca = PCA(n_components=2)\n",
|
| 119 |
-
"pca.fit(scaled_features)\n",
|
| 120 |
-
"print(pca.explained_variance_ratio_.cumsum())\n",
|
| 121 |
-
"daejeon_pca = pca.transform(scaled_features)\n",
|
| 122 |
-
"daejeon.drop(columns=['PM25', 'hm'], inplace=True)\n",
|
| 123 |
-
"daejeon[['pca_x', 'pca_y']] = daejeon_pca\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"scaled_features = scaler.fit_transform(gwangju[features])\n",
|
| 126 |
-
"pca = PCA(n_components=2)\n",
|
| 127 |
-
"pca.fit(scaled_features)\n",
|
| 128 |
-
"print(pca.explained_variance_ratio_.cumsum())\n",
|
| 129 |
-
"gwangju_pca = pca.transform(scaled_features)\n",
|
| 130 |
-
"gwangju.drop(columns=['PM25', 'hm'], inplace=True)\n",
|
| 131 |
-
"gwangju[['pca_x', 'pca_y']] = gwangju_pca\n"
|
| 132 |
-
]
|
| 133 |
-
},
|
| 134 |
-
{
|
| 135 |
-
"cell_type": "code",
|
| 136 |
-
"execution_count": 31,
|
| 137 |
-
"metadata": {},
|
| 138 |
-
"outputs": [],
|
| 139 |
-
"source": [
|
| 140 |
-
"seoul_2018 = seoul[seoul['year'] == 2018]\n",
|
| 141 |
-
"seoul_2019 = seoul[seoul['year'] == 2019]\n",
|
| 142 |
-
"seoul_2020 = seoul[seoul['year'] == 2020]\n",
|
| 143 |
-
"seoul_2021 = seoul[seoul['year'] == 2021]\n",
|
| 144 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 145 |
-
"\n",
|
| 146 |
-
"\n",
|
| 147 |
-
"busan_2018 = busan[busan['year'] == 2018]\n",
|
| 148 |
-
"busan_2019 = busan[busan['year'] == 2019]\n",
|
| 149 |
-
"busan_2020 = busan[busan['year'] == 2020]\n",
|
| 150 |
-
"busan_2021 = busan[busan['year'] == 2021]\n",
|
| 151 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"\n",
|
| 154 |
-
"incheon_2018 = incheon[incheon['year'] == 2018]\n",
|
| 155 |
-
"incheon_2019 = incheon[incheon['year'] == 2019]\n",
|
| 156 |
-
"incheon_2020 = incheon[incheon['year'] == 2020]\n",
|
| 157 |
-
"incheon_2021 = incheon[incheon['year'] == 2021]\n",
|
| 158 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 159 |
-
"\n",
|
| 160 |
-
"\n",
|
| 161 |
-
"daegu_2018 = daegu[daegu['year'] == 2018]\n",
|
| 162 |
-
"daegu_2019 = daegu[daegu['year'] == 2019]\n",
|
| 163 |
-
"daegu_2020 = daegu[daegu['year'] == 2020]\n",
|
| 164 |
-
"daegu_2021 = daegu[daegu['year'] == 2021]\n",
|
| 165 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 166 |
-
"\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"daejeon_2018 = daejeon[daejeon['year'] == 2018]\n",
|
| 169 |
-
"daejeon_2019 = daejeon[daejeon['year'] == 2019]\n",
|
| 170 |
-
"daejeon_2020 = daejeon[daejeon['year'] == 2020]\n",
|
| 171 |
-
"daejeon_2021 = daejeon[daejeon['year'] == 2021]\n",
|
| 172 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"\n",
|
| 175 |
-
"gwangju_2018 = gwangju[gwangju['year'] == 2018]\n",
|
| 176 |
-
"gwangju_2019 = gwangju[gwangju['year'] == 2019]\n",
|
| 177 |
-
"gwangju_2020 = gwangju[gwangju['year'] == 2020]\n",
|
| 178 |
-
"gwangju_2021 = gwangju[gwangju['year'] == 2021]\n",
|
| 179 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 180 |
-
"\n",
|
| 181 |
-
"\n",
|
| 182 |
-
"\n"
|
| 183 |
-
]
|
| 184 |
-
},
|
| 185 |
-
{
|
| 186 |
-
"cell_type": "code",
|
| 187 |
-
"execution_count": 33,
|
| 188 |
-
"metadata": {},
|
| 189 |
-
"outputs": [
|
| 190 |
-
{
|
| 191 |
-
"name": "stdout",
|
| 192 |
-
"output_type": "stream",
|
| 193 |
-
"text": [
|
| 194 |
-
" 2018 2019 2020 2021\n",
|
| 195 |
-
"2018 0.0 0.130217 0.063132 1.081307\n",
|
| 196 |
-
"2019 0.130217 0.0 0.059051 0.830648\n",
|
| 197 |
-
"2020 0.063132 0.059051 0.0 0.039927\n",
|
| 198 |
-
"2021 1.081307 0.830648 0.039927 0.0\n"
|
| 199 |
-
]
|
| 200 |
-
}
|
| 201 |
-
],
|
| 202 |
-
"source": [
|
| 203 |
-
"# 연도별 데이터 준비\n",
|
| 204 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 205 |
-
"data_dict = {\n",
|
| 206 |
-
" 2018: seoul_2018[['pca_x', 'pca_y']].values,\n",
|
| 207 |
-
" 2019: seoul_2019[['pca_x', 'pca_y']].values,\n",
|
| 208 |
-
" 2020: seoul_2020[['pca_x', 'pca_y']].values,\n",
|
| 209 |
-
" 2021: seoul_2021[['pca_x', 'pca_y']].values\n",
|
| 210 |
-
"}\n",
|
| 211 |
-
"\n",
|
| 212 |
-
"\n",
|
| 213 |
-
"# 결과를 저장할 데이터프레임 생성\n",
|
| 214 |
-
"result_df = pd.DataFrame(index=years, columns=years)\n",
|
| 215 |
-
"\n",
|
| 216 |
-
"for i, year1 in enumerate(years):\n",
|
| 217 |
-
" for j, year2 in enumerate(years):\n",
|
| 218 |
-
" if year1 == year2:\n",
|
| 219 |
-
" result_df.iloc[i, j] = 0.0\n",
|
| 220 |
-
" if j < i:\n",
|
| 221 |
-
" # 이미 계산된 값 사용\n",
|
| 222 |
-
" result_df.iloc[i, j] = result_df.iloc[j, i]\n",
|
| 223 |
-
" else:\n",
|
| 224 |
-
" X = data_dict[year1]\n",
|
| 225 |
-
" Y = data_dict[year2]\n",
|
| 226 |
-
" a = np.ones(len(X)) / len(X)\n",
|
| 227 |
-
" b = np.ones(len(Y)) / len(Y)\n",
|
| 228 |
-
" W = ot.emd2(a, b, ot.dist(X, Y))\n",
|
| 229 |
-
" result_df.iloc[i, j] = W\n",
|
| 230 |
-
" result_df.iloc[j, i] = W # 대칭 위치에 동일 값 저장\n",
|
| 231 |
-
"\n",
|
| 232 |
-
"# 결과 출력\n",
|
| 233 |
-
"print(result_df)"
|
| 234 |
-
]
|
| 235 |
-
},
|
| 236 |
-
{
|
| 237 |
-
"cell_type": "code",
|
| 238 |
-
"execution_count": 23,
|
| 239 |
-
"metadata": {},
|
| 240 |
-
"outputs": [
|
| 241 |
-
{
|
| 242 |
-
"name": "stdout",
|
| 243 |
-
"output_type": "stream",
|
| 244 |
-
"text": [
|
| 245 |
-
" 2018 2019 2020 2021\n",
|
| 246 |
-
"2018 0.0 0.116261 0.10445 1.424479\n",
|
| 247 |
-
"2019 0.116261 0.0 0.09933 1.164067\n",
|
| 248 |
-
"2020 0.10445 0.09933 0.0 1.075336\n",
|
| 249 |
-
"2021 1.424479 1.164067 1.075336 0.0\n"
|
| 250 |
-
]
|
| 251 |
-
}
|
| 252 |
-
],
|
| 253 |
-
"source": [
|
| 254 |
-
"# 연도별 데이터 준비\n",
|
| 255 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 256 |
-
"data_dict = {\n",
|
| 257 |
-
" 2018: busan_2018[['pca_x', 'pca_y']].values,\n",
|
| 258 |
-
" 2019: busan_2019[['pca_x', 'pca_y']].values,\n",
|
| 259 |
-
" 2020: busan_2020[['pca_x', 'pca_y']].values,\n",
|
| 260 |
-
" 2021: busan_2021[['pca_x', 'pca_y']].values\n",
|
| 261 |
-
"}\n",
|
| 262 |
-
"\n",
|
| 263 |
-
"\n",
|
| 264 |
-
"# 결과를 저장할 데이터프레임 생성\n",
|
| 265 |
-
"result_df = pd.DataFrame(index=years, columns=years)\n",
|
| 266 |
-
"\n",
|
| 267 |
-
"for i, year1 in enumerate(years):\n",
|
| 268 |
-
" for j, year2 in enumerate(years):\n",
|
| 269 |
-
" if year1 == year2:\n",
|
| 270 |
-
" result_df.iloc[i, j] = 0.0\n",
|
| 271 |
-
" if j < i:\n",
|
| 272 |
-
" # 이미 계산된 값 사용\n",
|
| 273 |
-
" result_df.iloc[i, j] = result_df.iloc[j, i]\n",
|
| 274 |
-
" else:\n",
|
| 275 |
-
" X = data_dict[year1]\n",
|
| 276 |
-
" Y = data_dict[year2]\n",
|
| 277 |
-
" a = np.ones(len(X)) / len(X)\n",
|
| 278 |
-
" b = np.ones(len(Y)) / len(Y)\n",
|
| 279 |
-
" W = ot.emd2(a, b, ot.dist(X, Y))\n",
|
| 280 |
-
" result_df.iloc[i, j] = W\n",
|
| 281 |
-
" result_df.iloc[j, i] = W # 대칭 위치에 동일 값 저장\n",
|
| 282 |
-
"\n",
|
| 283 |
-
"# 결과 출력\n",
|
| 284 |
-
"print(result_df)"
|
| 285 |
-
]
|
| 286 |
-
},
|
| 287 |
-
{
|
| 288 |
-
"cell_type": "code",
|
| 289 |
-
"execution_count": 24,
|
| 290 |
-
"metadata": {},
|
| 291 |
-
"outputs": [
|
| 292 |
-
{
|
| 293 |
-
"name": "stdout",
|
| 294 |
-
"output_type": "stream",
|
| 295 |
-
"text": [
|
| 296 |
-
" 2018 2019 2020 2021\n",
|
| 297 |
-
"2018 0.0 0.080291 0.074071 0.449094\n",
|
| 298 |
-
"2019 0.080291 0.0 0.060171 0.384189\n",
|
| 299 |
-
"2020 0.074071 0.060171 0.0 0.04047\n",
|
| 300 |
-
"2021 0.449094 0.384189 0.04047 0.0\n"
|
| 301 |
-
]
|
| 302 |
-
}
|
| 303 |
-
],
|
| 304 |
-
"source": [
|
| 305 |
-
"# 연도별 데이터 준비\n",
|
| 306 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 307 |
-
"data_dict = {\n",
|
| 308 |
-
" 2018: incheon_2018[['pca_x', 'pca_y']].values,\n",
|
| 309 |
-
" 2019: incheon_2019[['pca_x', 'pca_y']].values,\n",
|
| 310 |
-
" 2020: incheon_2020[['pca_x', 'pca_y']].values,\n",
|
| 311 |
-
" 2021: incheon_2021[['pca_x', 'pca_y']].values\n",
|
| 312 |
-
"}\n",
|
| 313 |
-
"\n",
|
| 314 |
-
"\n",
|
| 315 |
-
"# 결과를 저장할 데이터프레임 생성\n",
|
| 316 |
-
"result_df = pd.DataFrame(index=years, columns=years)\n",
|
| 317 |
-
"\n",
|
| 318 |
-
"for i, year1 in enumerate(years):\n",
|
| 319 |
-
" for j, year2 in enumerate(years):\n",
|
| 320 |
-
" if year1 == year2:\n",
|
| 321 |
-
" result_df.iloc[i, j] = 0.0\n",
|
| 322 |
-
" if j < i:\n",
|
| 323 |
-
" # 이미 계산된 값 사용\n",
|
| 324 |
-
" result_df.iloc[i, j] = result_df.iloc[j, i]\n",
|
| 325 |
-
" else:\n",
|
| 326 |
-
" X = data_dict[year1]\n",
|
| 327 |
-
" Y = data_dict[year2]\n",
|
| 328 |
-
" a = np.ones(len(X)) / len(X)\n",
|
| 329 |
-
" b = np.ones(len(Y)) / len(Y)\n",
|
| 330 |
-
" W = ot.emd2(a, b, ot.dist(X, Y))\n",
|
| 331 |
-
" result_df.iloc[i, j] = W\n",
|
| 332 |
-
" result_df.iloc[j, i] = W # 대칭 위치에 동일 값 저장\n",
|
| 333 |
-
"\n",
|
| 334 |
-
"# 결과 출력\n",
|
| 335 |
-
"print(result_df)"
|
| 336 |
-
]
|
| 337 |
-
},
|
| 338 |
-
{
|
| 339 |
-
"cell_type": "code",
|
| 340 |
-
"execution_count": 25,
|
| 341 |
-
"metadata": {},
|
| 342 |
-
"outputs": [
|
| 343 |
-
{
|
| 344 |
-
"name": "stdout",
|
| 345 |
-
"output_type": "stream",
|
| 346 |
-
"text": [
|
| 347 |
-
" 2018 2019 2020 2021\n",
|
| 348 |
-
"2018 0.0 0.127512 0.112157 0.731476\n",
|
| 349 |
-
"2019 0.127512 0.0 0.094651 0.647071\n",
|
| 350 |
-
"2020 0.112157 0.094651 0.0 0.041217\n",
|
| 351 |
-
"2021 0.731476 0.647071 0.041217 0.0\n"
|
| 352 |
-
]
|
| 353 |
-
}
|
| 354 |
-
],
|
| 355 |
-
"source": [
|
| 356 |
-
"# 연도별 데이터 준비\n",
|
| 357 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 358 |
-
"data_dict = {\n",
|
| 359 |
-
" 2018: daegu_2018[['pca_x', 'pca_y']].values,\n",
|
| 360 |
-
" 2019: daegu_2019[['pca_x', 'pca_y']].values,\n",
|
| 361 |
-
" 2020: daegu_2020[['pca_x', 'pca_y']].values,\n",
|
| 362 |
-
" 2021: daegu_2021[['pca_x', 'pca_y']].values\n",
|
| 363 |
-
"}\n",
|
| 364 |
-
"\n",
|
| 365 |
-
"\n",
|
| 366 |
-
"# 결과를 저장할 데이터프레임 생성\n",
|
| 367 |
-
"result_df = pd.DataFrame(index=years, columns=years)\n",
|
| 368 |
-
"\n",
|
| 369 |
-
"for i, year1 in enumerate(years):\n",
|
| 370 |
-
" for j, year2 in enumerate(years):\n",
|
| 371 |
-
" if year1 == year2:\n",
|
| 372 |
-
" result_df.iloc[i, j] = 0.0\n",
|
| 373 |
-
" if j < i:\n",
|
| 374 |
-
" # 이미 계산된 값 사용\n",
|
| 375 |
-
" result_df.iloc[i, j] = result_df.iloc[j, i]\n",
|
| 376 |
-
" else:\n",
|
| 377 |
-
" X = data_dict[year1]\n",
|
| 378 |
-
" Y = data_dict[year2]\n",
|
| 379 |
-
" a = np.ones(len(X)) / len(X)\n",
|
| 380 |
-
" b = np.ones(len(Y)) / len(Y)\n",
|
| 381 |
-
" W = ot.emd2(a, b, ot.dist(X, Y))\n",
|
| 382 |
-
" result_df.iloc[i, j] = W\n",
|
| 383 |
-
" result_df.iloc[j, i] = W # 대칭 위치에 동일 값 저장\n",
|
| 384 |
-
"\n",
|
| 385 |
-
"# 결과 출력\n",
|
| 386 |
-
"print(result_df)"
|
| 387 |
-
]
|
| 388 |
-
},
|
| 389 |
-
{
|
| 390 |
-
"cell_type": "code",
|
| 391 |
-
"execution_count": 26,
|
| 392 |
-
"metadata": {},
|
| 393 |
-
"outputs": [
|
| 394 |
-
{
|
| 395 |
-
"name": "stdout",
|
| 396 |
-
"output_type": "stream",
|
| 397 |
-
"text": [
|
| 398 |
-
" 2018 2019 2020 2021\n",
|
| 399 |
-
"2018 0.0 0.273013 0.053969 0.877338\n",
|
| 400 |
-
"2019 0.273013 0.0 0.137817 0.780071\n",
|
| 401 |
-
"2020 0.053969 0.137817 0.0 0.042294\n",
|
| 402 |
-
"2021 0.877338 0.780071 0.042294 0.0\n"
|
| 403 |
-
]
|
| 404 |
-
}
|
| 405 |
-
],
|
| 406 |
-
"source": [
|
| 407 |
-
"# 연도별 데이터 준비\n",
|
| 408 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 409 |
-
"data_dict = {\n",
|
| 410 |
-
" 2018: daejeon_2018[['pca_x', 'pca_y']].values,\n",
|
| 411 |
-
" 2019: daejeon_2019[['pca_x', 'pca_y']].values,\n",
|
| 412 |
-
" 2020: daejeon_2020[['pca_x', 'pca_y']].values,\n",
|
| 413 |
-
" 2021: daejeon_2021[['pca_x', 'pca_y']].values\n",
|
| 414 |
-
"}\n",
|
| 415 |
-
"\n",
|
| 416 |
-
"\n",
|
| 417 |
-
"# 결과를 저장할 데이터프레임 생성\n",
|
| 418 |
-
"result_df = pd.DataFrame(index=years, columns=years)\n",
|
| 419 |
-
"\n",
|
| 420 |
-
"for i, year1 in enumerate(years):\n",
|
| 421 |
-
" for j, year2 in enumerate(years):\n",
|
| 422 |
-
" if year1 == year2:\n",
|
| 423 |
-
" result_df.iloc[i, j] = 0.0\n",
|
| 424 |
-
" if j < i:\n",
|
| 425 |
-
" # 이미 계산된 값 사용\n",
|
| 426 |
-
" result_df.iloc[i, j] = result_df.iloc[j, i]\n",
|
| 427 |
-
" else:\n",
|
| 428 |
-
" X = data_dict[year1]\n",
|
| 429 |
-
" Y = data_dict[year2]\n",
|
| 430 |
-
" a = np.ones(len(X)) / len(X)\n",
|
| 431 |
-
" b = np.ones(len(Y)) / len(Y)\n",
|
| 432 |
-
" W = ot.emd2(a, b, ot.dist(X, Y))\n",
|
| 433 |
-
" result_df.iloc[i, j] = W\n",
|
| 434 |
-
" result_df.iloc[j, i] = W # 대칭 위치에 동일 값 저장\n",
|
| 435 |
-
"\n",
|
| 436 |
-
"# 결과 출력\n",
|
| 437 |
-
"print(result_df)"
|
| 438 |
-
]
|
| 439 |
-
},
|
| 440 |
-
{
|
| 441 |
-
"cell_type": "code",
|
| 442 |
-
"execution_count": 27,
|
| 443 |
-
"metadata": {},
|
| 444 |
-
"outputs": [
|
| 445 |
-
{
|
| 446 |
-
"name": "stdout",
|
| 447 |
-
"output_type": "stream",
|
| 448 |
-
"text": [
|
| 449 |
-
" 2018 2019 2020 2021\n",
|
| 450 |
-
"2018 0.0 0.105633 0.08202 1.00155\n",
|
| 451 |
-
"2019 0.105633 0.0 0.069322 0.892938\n",
|
| 452 |
-
"2020 0.08202 0.069322 0.0 0.480667\n",
|
| 453 |
-
"2021 1.00155 0.892938 0.480667 0.0\n"
|
| 454 |
-
]
|
| 455 |
-
}
|
| 456 |
-
],
|
| 457 |
-
"source": [
|
| 458 |
-
"# 연도별 데이터 준비\n",
|
| 459 |
-
"years = [2018, 2019, 2020, 2021]\n",
|
| 460 |
-
"data_dict = {\n",
|
| 461 |
-
" 2018: gwangju_2018[['pca_x', 'pca_y']].values,\n",
|
| 462 |
-
" 2019: gwangju_2019[['pca_x', 'pca_y']].values,\n",
|
| 463 |
-
" 2020: gwangju_2020[['pca_x', 'pca_y']].values,\n",
|
| 464 |
-
" 2021: gwangju_2021[['pca_x', 'pca_y']].values\n",
|
| 465 |
-
"}\n",
|
| 466 |
-
"\n",
|
| 467 |
-
"\n",
|
| 468 |
-
"# 결과를 저장할 데이터프레임 생성\n",
|
| 469 |
-
"result_df = pd.DataFrame(index=years, columns=years)\n",
|
| 470 |
-
"\n",
|
| 471 |
-
"for i, year1 in enumerate(years):\n",
|
| 472 |
-
" for j, year2 in enumerate(years):\n",
|
| 473 |
-
" if year1 == year2:\n",
|
| 474 |
-
" result_df.iloc[i, j] = 0.0\n",
|
| 475 |
-
" if j < i:\n",
|
| 476 |
-
" # 이미 계산된 값 사용\n",
|
| 477 |
-
" result_df.iloc[i, j] = result_df.iloc[j, i]\n",
|
| 478 |
-
" else:\n",
|
| 479 |
-
" X = data_dict[year1]\n",
|
| 480 |
-
" Y = data_dict[year2]\n",
|
| 481 |
-
" a = np.ones(len(X)) / len(X)\n",
|
| 482 |
-
" b = np.ones(len(Y)) / len(Y)\n",
|
| 483 |
-
" W = ot.emd2(a, b, ot.dist(X, Y))\n",
|
| 484 |
-
" result_df.iloc[i, j] = W\n",
|
| 485 |
-
" result_df.iloc[j, i] = W # 대칭 위치에 동일 값 저장\n",
|
| 486 |
-
"\n",
|
| 487 |
-
"# 결과 출력\n",
|
| 488 |
-
"print(result_df)"
|
| 489 |
-
]
|
| 490 |
-
},
|
| 491 |
-
{
|
| 492 |
-
"cell_type": "code",
|
| 493 |
-
"execution_count": null,
|
| 494 |
-
"metadata": {},
|
| 495 |
-
"outputs": [],
|
| 496 |
-
"source": []
|
| 497 |
-
},
|
| 498 |
-
{
|
| 499 |
-
"cell_type": "code",
|
| 500 |
-
"execution_count": null,
|
| 501 |
-
"metadata": {},
|
| 502 |
-
"outputs": [],
|
| 503 |
-
"source": []
|
| 504 |
-
},
|
| 505 |
-
{
|
| 506 |
-
"cell_type": "code",
|
| 507 |
-
"execution_count": null,
|
| 508 |
-
"metadata": {},
|
| 509 |
-
"outputs": [],
|
| 510 |
-
"source": []
|
| 511 |
-
},
|
| 512 |
-
{
|
| 513 |
-
"cell_type": "code",
|
| 514 |
-
"execution_count": null,
|
| 515 |
-
"metadata": {},
|
| 516 |
-
"outputs": [],
|
| 517 |
-
"source": []
|
| 518 |
-
}
|
| 519 |
-
],
|
| 520 |
-
"metadata": {
|
| 521 |
-
"kernelspec": {
|
| 522 |
-
"display_name": "py39",
|
| 523 |
-
"language": "python",
|
| 524 |
-
"name": "python3"
|
| 525 |
-
},
|
| 526 |
-
"language_info": {
|
| 527 |
-
"codemirror_mode": {
|
| 528 |
-
"name": "ipython",
|
| 529 |
-
"version": 3
|
| 530 |
-
},
|
| 531 |
-
"file_extension": ".py",
|
| 532 |
-
"mimetype": "text/x-python",
|
| 533 |
-
"name": "python",
|
| 534 |
-
"nbconvert_exporter": "python",
|
| 535 |
-
"pygments_lexer": "ipython3",
|
| 536 |
-
"version": "3.9.18"
|
| 537 |
-
}
|
| 538 |
-
},
|
| 539 |
-
"nbformat": 4,
|
| 540 |
-
"nbformat_minor": 4
|
| 541 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/ft_transformer.py
DELETED
|
@@ -1,56 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import torch.nn as nn
|
| 3 |
-
|
| 4 |
-
# FT-Transformer Implementation
|
| 5 |
-
class FTTransformer(nn.Module):
|
| 6 |
-
def __init__(self, num_features, cat_cardinalities, num_classes, d_token=192, n_blocks=6, attention_dropout=0.2, ffn_dropout=0.2):
|
| 7 |
-
super(FTTransformer, self).__init__()
|
| 8 |
-
|
| 9 |
-
self.num_classes = num_classes # 클래스 개수 저장
|
| 10 |
-
|
| 11 |
-
# Embedding layers for categorical features
|
| 12 |
-
self.cat_embeddings = nn.ModuleList([
|
| 13 |
-
nn.Embedding(num_categories, d_token) for num_categories in cat_cardinalities
|
| 14 |
-
])
|
| 15 |
-
|
| 16 |
-
# Linear layer for numerical features
|
| 17 |
-
self.num_linear = nn.Linear(num_features, d_token)
|
| 18 |
-
|
| 19 |
-
# Transformer blocks
|
| 20 |
-
self.transformer_blocks = nn.ModuleList([
|
| 21 |
-
nn.TransformerEncoderLayer(
|
| 22 |
-
d_model=d_token,
|
| 23 |
-
nhead=8,
|
| 24 |
-
dim_feedforward=4 * d_token,
|
| 25 |
-
dropout=attention_dropout,
|
| 26 |
-
activation='gelu'
|
| 27 |
-
) for _ in range(n_blocks)
|
| 28 |
-
])
|
| 29 |
-
|
| 30 |
-
self.ffn_dropout = nn.Dropout(ffn_dropout)
|
| 31 |
-
if num_classes == 2:
|
| 32 |
-
self.output_layer = nn.Linear(d_token, 1) # Binary classification
|
| 33 |
-
elif num_classes > 2:
|
| 34 |
-
self.output_layer = nn.Linear(d_token, num_classes) # Multi classification
|
| 35 |
-
|
| 36 |
-
def forward(self, x_num, x_cat):
|
| 37 |
-
# Numerical feature embedding
|
| 38 |
-
x_num = self.num_linear(x_num)
|
| 39 |
-
|
| 40 |
-
# Categorical feature embedding
|
| 41 |
-
x_cat = [embed(x_cat[:, i]) for i, embed in enumerate(self.cat_embeddings)]
|
| 42 |
-
x_cat = torch.stack(x_cat, dim=1)
|
| 43 |
-
|
| 44 |
-
# Combine numerical and categorical embeddings
|
| 45 |
-
x = x_num.unsqueeze(1) + x_cat
|
| 46 |
-
|
| 47 |
-
# Pass through transformer blocks
|
| 48 |
-
for block in self.transformer_blocks:
|
| 49 |
-
x = block(x)
|
| 50 |
-
|
| 51 |
-
# Pooling and output
|
| 52 |
-
x = x.mean(dim=1)
|
| 53 |
-
x = self.ffn_dropout(x)
|
| 54 |
-
x = self.output_layer(x)
|
| 55 |
-
|
| 56 |
-
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_10000_1.py
DELETED
|
@@ -1,181 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from imblearn.over_sampling import SMOTENC
|
| 5 |
-
import optuna
|
| 6 |
-
from ctgan import CTGAN
|
| 7 |
-
import torch
|
| 8 |
-
import warnings
|
| 9 |
-
|
| 10 |
-
# 지역별 데이터 파일 경로
|
| 11 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 12 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 13 |
-
output_paths = [f'../../data/data_oversampled/ctgan10000/ctgan10000_1_{region}.csv' for region in regions]
|
| 14 |
-
|
| 15 |
-
# GPU 사용 설정
|
| 16 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 17 |
-
print(f"Using device: {device}")
|
| 18 |
-
|
| 19 |
-
# 경고 무시
|
| 20 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 21 |
-
|
| 22 |
-
# 지역별 처리
|
| 23 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 24 |
-
# 데이터 로드
|
| 25 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 26 |
-
data= data.loc[data['year'].isin([2018,2019]),:]
|
| 27 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 28 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 29 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 30 |
-
y = data['multi_class']
|
| 31 |
-
|
| 32 |
-
# 불필요한 열 제거
|
| 33 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 34 |
-
|
| 35 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 36 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 37 |
-
|
| 38 |
-
# sampling_strategy 설정
|
| 39 |
-
count_class_0 = (y == 0).sum()
|
| 40 |
-
count_class_1 = (y == 1).sum()
|
| 41 |
-
count_class_2 = (y == 2).sum()
|
| 42 |
-
sampling_strategy = {
|
| 43 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 44 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 45 |
-
2: count_class_2
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
# SMOTENC 적용
|
| 49 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 50 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 51 |
-
|
| 52 |
-
# Resampled 데이터 생성
|
| 53 |
-
lerp_data = X_resampled.copy()
|
| 54 |
-
lerp_data['multi_class'] = y_resampled
|
| 55 |
-
|
| 56 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 57 |
-
categorical_features = [
|
| 58 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 59 |
-
]
|
| 60 |
-
|
| 61 |
-
# Optuna 목적 함수 정의
|
| 62 |
-
def objective(trial):
|
| 63 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 64 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 65 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 66 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 67 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 68 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 69 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 70 |
-
|
| 71 |
-
# CTGAN 모델 생성
|
| 72 |
-
ctgan = CTGAN(
|
| 73 |
-
embedding_dim=embedding_dim,
|
| 74 |
-
generator_dim=generator_dim,
|
| 75 |
-
discriminator_dim=discriminator_dim,
|
| 76 |
-
batch_size=batch_size,
|
| 77 |
-
discriminator_steps=discriminator_steps,
|
| 78 |
-
pac=pac
|
| 79 |
-
)
|
| 80 |
-
|
| 81 |
-
# 범주 0 데이터 필터링
|
| 82 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 83 |
-
|
| 84 |
-
# 모델 학습
|
| 85 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 86 |
-
|
| 87 |
-
# 샘플 생성
|
| 88 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 89 |
-
|
| 90 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 91 |
-
real_visi = data_0['visi']
|
| 92 |
-
generated_visi = generated_data['visi']
|
| 93 |
-
|
| 94 |
-
# 분포 간 차이(MSE) 계산
|
| 95 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 96 |
-
return -mse
|
| 97 |
-
|
| 98 |
-
# Optuna로 최적화 수행
|
| 99 |
-
study = optuna.create_study(direction="maximize")
|
| 100 |
-
study.optimize(objective, n_trials=50)
|
| 101 |
-
|
| 102 |
-
# 최적 하이퍼파라미터 출력
|
| 103 |
-
best_params = study.best_params
|
| 104 |
-
|
| 105 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 106 |
-
ctgan = CTGAN(
|
| 107 |
-
embedding_dim=best_params["embedding_dim"],
|
| 108 |
-
generator_dim=best_params["generator_dim"],
|
| 109 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 110 |
-
batch_size=best_params["batch_size"],
|
| 111 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 112 |
-
pac=best_params["pac"]
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
# 범주 0 데이터로 최종 학습
|
| 116 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 117 |
-
generated_0 = ctgan.sample(10000)
|
| 118 |
-
|
| 119 |
-
# 범주 1 데이터 최적화 및 생성
|
| 120 |
-
def objective_class1(trial):
|
| 121 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 122 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 123 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 124 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 125 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 126 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 127 |
-
|
| 128 |
-
ctgan = CTGAN(
|
| 129 |
-
embedding_dim=embedding_dim,
|
| 130 |
-
generator_dim=generator_dim,
|
| 131 |
-
discriminator_dim=discriminator_dim,
|
| 132 |
-
batch_size=batch_size,
|
| 133 |
-
discriminator_steps=discriminator_steps,
|
| 134 |
-
pac=pac
|
| 135 |
-
)
|
| 136 |
-
|
| 137 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 138 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 139 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 140 |
-
|
| 141 |
-
real_visi = data_1['visi']
|
| 142 |
-
generated_visi = generated_data['visi']
|
| 143 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 144 |
-
return -mse
|
| 145 |
-
|
| 146 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 147 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 148 |
-
|
| 149 |
-
best_params_class1 = study_class1.best_params
|
| 150 |
-
ctgan = CTGAN(
|
| 151 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 152 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 153 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 154 |
-
batch_size=best_params_class1["batch_size"],
|
| 155 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 156 |
-
pac=best_params_class1["pac"]
|
| 157 |
-
)
|
| 158 |
-
|
| 159 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 160 |
-
generated_1 = ctgan.sample(10000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 161 |
-
|
| 162 |
-
# 데이터 병합 및 저장
|
| 163 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 164 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 165 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 166 |
-
# 제거변수 복구
|
| 167 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 168 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 169 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 170 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 171 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 172 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 173 |
-
|
| 174 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 175 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 176 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 177 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 178 |
-
|
| 179 |
-
# 결과 저장
|
| 180 |
-
final_data.to_csv(output_path, index = False)
|
| 181 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_10000_2.py
DELETED
|
@@ -1,182 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import os
|
| 4 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 13 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 14 |
-
output_paths = [f'../../data/data_oversampled/ctgan10000/ctgan10000_2_{region}.csv' for region in regions]
|
| 15 |
-
|
| 16 |
-
# GPU 사용 설정
|
| 17 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
-
print(f"Using device: {device}")
|
| 19 |
-
|
| 20 |
-
# 경고 무시
|
| 21 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 22 |
-
|
| 23 |
-
# 지역별 처리
|
| 24 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 25 |
-
# 데이터 로드
|
| 26 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 27 |
-
data= data.loc[data['year'].isin([2018,2020]),:]
|
| 28 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 29 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 30 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 31 |
-
y = data['multi_class']
|
| 32 |
-
|
| 33 |
-
# 불필요한 열 제거
|
| 34 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 35 |
-
|
| 36 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 37 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 38 |
-
|
| 39 |
-
# sampling_strategy 설정
|
| 40 |
-
count_class_0 = (y == 0).sum()
|
| 41 |
-
count_class_1 = (y == 1).sum()
|
| 42 |
-
count_class_2 = (y == 2).sum()
|
| 43 |
-
sampling_strategy = {
|
| 44 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 45 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 46 |
-
2: count_class_2
|
| 47 |
-
}
|
| 48 |
-
|
| 49 |
-
# SMOTENC 적용
|
| 50 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 51 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 52 |
-
|
| 53 |
-
# Resampled 데이터 생성
|
| 54 |
-
lerp_data = X_resampled.copy()
|
| 55 |
-
lerp_data['multi_class'] = y_resampled
|
| 56 |
-
|
| 57 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 58 |
-
categorical_features = [
|
| 59 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 60 |
-
]
|
| 61 |
-
|
| 62 |
-
# Optuna 목적 함수 정의
|
| 63 |
-
def objective(trial):
|
| 64 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 65 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 66 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 67 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 69 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 70 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 71 |
-
|
| 72 |
-
# CTGAN 모델 생성
|
| 73 |
-
ctgan = CTGAN(
|
| 74 |
-
embedding_dim=embedding_dim,
|
| 75 |
-
generator_dim=generator_dim,
|
| 76 |
-
discriminator_dim=discriminator_dim,
|
| 77 |
-
batch_size=batch_size,
|
| 78 |
-
discriminator_steps=discriminator_steps,
|
| 79 |
-
pac=pac
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
# 범주 0 데이터 필터링
|
| 83 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 84 |
-
|
| 85 |
-
# 모델 학습
|
| 86 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 87 |
-
|
| 88 |
-
# 샘플 생성
|
| 89 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 90 |
-
|
| 91 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 92 |
-
real_visi = data_0['visi']
|
| 93 |
-
generated_visi = generated_data['visi']
|
| 94 |
-
|
| 95 |
-
# 분포 간 차이(MSE) 계산
|
| 96 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 97 |
-
return -mse
|
| 98 |
-
|
| 99 |
-
# Optuna로 최적화 수행
|
| 100 |
-
study = optuna.create_study(direction="maximize")
|
| 101 |
-
study.optimize(objective, n_trials=50)
|
| 102 |
-
|
| 103 |
-
# 최적 하이퍼파라미터 출력
|
| 104 |
-
best_params = study.best_params
|
| 105 |
-
|
| 106 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 107 |
-
ctgan = CTGAN(
|
| 108 |
-
embedding_dim=best_params["embedding_dim"],
|
| 109 |
-
generator_dim=best_params["generator_dim"],
|
| 110 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 111 |
-
batch_size=best_params["batch_size"],
|
| 112 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 113 |
-
pac=best_params["pac"]
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
# 범주 0 데이터로 최종 학습
|
| 117 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 118 |
-
generated_0 = ctgan.sample(10000)
|
| 119 |
-
|
| 120 |
-
# 범주 1 데이터 최적화 및 생성
|
| 121 |
-
def objective_class1(trial):
|
| 122 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 123 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 124 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 126 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 127 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 128 |
-
|
| 129 |
-
ctgan = CTGAN(
|
| 130 |
-
embedding_dim=embedding_dim,
|
| 131 |
-
generator_dim=generator_dim,
|
| 132 |
-
discriminator_dim=discriminator_dim,
|
| 133 |
-
batch_size=batch_size,
|
| 134 |
-
discriminator_steps=discriminator_steps,
|
| 135 |
-
pac=pac
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 139 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 140 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 141 |
-
|
| 142 |
-
real_visi = data_1['visi']
|
| 143 |
-
generated_visi = generated_data['visi']
|
| 144 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 145 |
-
return -mse
|
| 146 |
-
|
| 147 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 148 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 149 |
-
|
| 150 |
-
best_params_class1 = study_class1.best_params
|
| 151 |
-
ctgan = CTGAN(
|
| 152 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 153 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 154 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 155 |
-
batch_size=best_params_class1["batch_size"],
|
| 156 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 157 |
-
pac=best_params_class1["pac"]
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 161 |
-
generated_1 = ctgan.sample(10000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 162 |
-
|
| 163 |
-
# 데이터 병합 및 저장
|
| 164 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 165 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 166 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 167 |
-
# 제거변수 복구
|
| 168 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 169 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 170 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 172 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 174 |
-
|
| 175 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 176 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 177 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 178 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 179 |
-
|
| 180 |
-
# 결과 저장
|
| 181 |
-
final_data.to_csv(output_path, index = False)
|
| 182 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_10000_3.py
DELETED
|
@@ -1,182 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import os
|
| 4 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 13 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 14 |
-
output_paths = [f'../../data/data_oversampled/ctgan10000/ctgan10000_3_{region}.csv' for region in regions]
|
| 15 |
-
|
| 16 |
-
# GPU 사용 설정
|
| 17 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
-
print(f"Using device: {device}")
|
| 19 |
-
|
| 20 |
-
# 경고 무시
|
| 21 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 22 |
-
|
| 23 |
-
# 지역별 처리
|
| 24 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 25 |
-
# 데이터 로드
|
| 26 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 27 |
-
data= data.loc[data['year'].isin([2019,2020]),:]
|
| 28 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 29 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 30 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 31 |
-
y = data['multi_class']
|
| 32 |
-
|
| 33 |
-
# 불필요한 열 제거
|
| 34 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 35 |
-
|
| 36 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 37 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 38 |
-
|
| 39 |
-
# sampling_strategy 설정
|
| 40 |
-
count_class_0 = (y == 0).sum()
|
| 41 |
-
count_class_1 = (y == 1).sum()
|
| 42 |
-
count_class_2 = (y == 2).sum()
|
| 43 |
-
sampling_strategy = {
|
| 44 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 45 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 46 |
-
2: count_class_2
|
| 47 |
-
}
|
| 48 |
-
|
| 49 |
-
# SMOTENC 적용
|
| 50 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 51 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 52 |
-
|
| 53 |
-
# Resampled 데이터 생성
|
| 54 |
-
lerp_data = X_resampled.copy()
|
| 55 |
-
lerp_data['multi_class'] = y_resampled
|
| 56 |
-
|
| 57 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 58 |
-
categorical_features = [
|
| 59 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 60 |
-
]
|
| 61 |
-
|
| 62 |
-
# Optuna 목적 함수 정의
|
| 63 |
-
def objective(trial):
|
| 64 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 65 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 66 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 67 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 69 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 70 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 71 |
-
|
| 72 |
-
# CTGAN 모델 생성
|
| 73 |
-
ctgan = CTGAN(
|
| 74 |
-
embedding_dim=embedding_dim,
|
| 75 |
-
generator_dim=generator_dim,
|
| 76 |
-
discriminator_dim=discriminator_dim,
|
| 77 |
-
batch_size=batch_size,
|
| 78 |
-
discriminator_steps=discriminator_steps,
|
| 79 |
-
pac=pac
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
# 범주 0 데이터 필터링
|
| 83 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 84 |
-
|
| 85 |
-
# 모델 학습
|
| 86 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 87 |
-
|
| 88 |
-
# 샘플 생성
|
| 89 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 90 |
-
|
| 91 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 92 |
-
real_visi = data_0['visi']
|
| 93 |
-
generated_visi = generated_data['visi']
|
| 94 |
-
|
| 95 |
-
# 분포 간 차이(MSE) 계산
|
| 96 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 97 |
-
return -mse
|
| 98 |
-
|
| 99 |
-
# Optuna로 최적화 수행
|
| 100 |
-
study = optuna.create_study(direction="maximize")
|
| 101 |
-
study.optimize(objective, n_trials=50)
|
| 102 |
-
|
| 103 |
-
# 최적 하이퍼파라미터 출력
|
| 104 |
-
best_params = study.best_params
|
| 105 |
-
|
| 106 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 107 |
-
ctgan = CTGAN(
|
| 108 |
-
embedding_dim=best_params["embedding_dim"],
|
| 109 |
-
generator_dim=best_params["generator_dim"],
|
| 110 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 111 |
-
batch_size=best_params["batch_size"],
|
| 112 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 113 |
-
pac=best_params["pac"]
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
# 범주 0 데이터로 최종 학습
|
| 117 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 118 |
-
generated_0 = ctgan.sample(10000)
|
| 119 |
-
|
| 120 |
-
# 범주 1 데이터 최적화 및 생성
|
| 121 |
-
def objective_class1(trial):
|
| 122 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 123 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 124 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 126 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 127 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 128 |
-
|
| 129 |
-
ctgan = CTGAN(
|
| 130 |
-
embedding_dim=embedding_dim,
|
| 131 |
-
generator_dim=generator_dim,
|
| 132 |
-
discriminator_dim=discriminator_dim,
|
| 133 |
-
batch_size=batch_size,
|
| 134 |
-
discriminator_steps=discriminator_steps,
|
| 135 |
-
pac=pac
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 139 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 140 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 141 |
-
|
| 142 |
-
real_visi = data_1['visi']
|
| 143 |
-
generated_visi = generated_data['visi']
|
| 144 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 145 |
-
return -mse
|
| 146 |
-
|
| 147 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 148 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 149 |
-
|
| 150 |
-
best_params_class1 = study_class1.best_params
|
| 151 |
-
ctgan = CTGAN(
|
| 152 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 153 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 154 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 155 |
-
batch_size=best_params_class1["batch_size"],
|
| 156 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 157 |
-
pac=best_params_class1["pac"]
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 161 |
-
generated_1 = ctgan.sample(10000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 162 |
-
|
| 163 |
-
# 데이터 병합 및 저장
|
| 164 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 165 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 166 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 167 |
-
# 제거변수 복구
|
| 168 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 169 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 170 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 172 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 174 |
-
|
| 175 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 176 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 177 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 178 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 179 |
-
|
| 180 |
-
# 결과 저장
|
| 181 |
-
final_data.to_csv(output_path, index = False)
|
| 182 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_20000_1.py
DELETED
|
@@ -1,183 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
# import os
|
| 4 |
-
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
# regions = ['busan', 'daegu', 'daejeon', 'gwangju', 'incheon', 'seoul']
|
| 13 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 14 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 15 |
-
output_paths = [f'../../data/data_oversampled/ctgan20000/ctgan20000_1_{region}.csv' for region in regions]
|
| 16 |
-
|
| 17 |
-
# GPU 사용 설정
|
| 18 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
-
print(f"Using device: {device}")
|
| 20 |
-
|
| 21 |
-
# 경고 무시
|
| 22 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 23 |
-
|
| 24 |
-
# 지역별 처리
|
| 25 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 26 |
-
# 데이터 로드
|
| 27 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 28 |
-
data= data.loc[data['year'].isin([2018,2019]),:]
|
| 29 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 30 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 31 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 32 |
-
y = data['multi_class']
|
| 33 |
-
|
| 34 |
-
# 불필요한 열 제거
|
| 35 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 36 |
-
|
| 37 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 38 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 39 |
-
|
| 40 |
-
# sampling_strategy 설정
|
| 41 |
-
count_class_0 = (y == 0).sum()
|
| 42 |
-
count_class_1 = (y == 1).sum()
|
| 43 |
-
count_class_2 = (y == 2).sum()
|
| 44 |
-
sampling_strategy = {
|
| 45 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 46 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 47 |
-
2: count_class_2
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
# SMOTENC 적용
|
| 51 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 52 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 53 |
-
|
| 54 |
-
# Resampled 데이터 생성
|
| 55 |
-
lerp_data = X_resampled.copy()
|
| 56 |
-
lerp_data['multi_class'] = y_resampled
|
| 57 |
-
|
| 58 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 59 |
-
categorical_features = [
|
| 60 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 61 |
-
]
|
| 62 |
-
|
| 63 |
-
# Optuna 목적 함수 정의
|
| 64 |
-
def objective(trial):
|
| 65 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 66 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 67 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 69 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 70 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 71 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 72 |
-
|
| 73 |
-
# CTGAN 모델 생성
|
| 74 |
-
ctgan = CTGAN(
|
| 75 |
-
embedding_dim=embedding_dim,
|
| 76 |
-
generator_dim=generator_dim,
|
| 77 |
-
discriminator_dim=discriminator_dim,
|
| 78 |
-
batch_size=batch_size,
|
| 79 |
-
discriminator_steps=discriminator_steps,
|
| 80 |
-
pac=pac
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
# 범주 0 데이터 필터링
|
| 84 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 85 |
-
|
| 86 |
-
# 모델 학습
|
| 87 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 88 |
-
|
| 89 |
-
# 샘플 생성
|
| 90 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 91 |
-
|
| 92 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 93 |
-
real_visi = data_0['visi']
|
| 94 |
-
generated_visi = generated_data['visi']
|
| 95 |
-
|
| 96 |
-
# 분포 간 차이(MSE) 계산
|
| 97 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 98 |
-
return -mse
|
| 99 |
-
|
| 100 |
-
# Optuna로 최적화 수행
|
| 101 |
-
study = optuna.create_study(direction="maximize")
|
| 102 |
-
study.optimize(objective, n_trials=50)
|
| 103 |
-
|
| 104 |
-
# 최적 하이퍼파라미터 출력
|
| 105 |
-
best_params = study.best_params
|
| 106 |
-
|
| 107 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 108 |
-
ctgan = CTGAN(
|
| 109 |
-
embedding_dim=best_params["embedding_dim"],
|
| 110 |
-
generator_dim=best_params["generator_dim"],
|
| 111 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 112 |
-
batch_size=best_params["batch_size"],
|
| 113 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 114 |
-
pac=best_params["pac"]
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
# 범주 0 데이터로 최종 학습
|
| 118 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 119 |
-
generated_0 = ctgan.sample(20000)
|
| 120 |
-
|
| 121 |
-
# 범주 1 데이터 최적화 및 생성
|
| 122 |
-
def objective_class1(trial):
|
| 123 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 124 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 126 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 127 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 128 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 129 |
-
|
| 130 |
-
ctgan = CTGAN(
|
| 131 |
-
embedding_dim=embedding_dim,
|
| 132 |
-
generator_dim=generator_dim,
|
| 133 |
-
discriminator_dim=discriminator_dim,
|
| 134 |
-
batch_size=batch_size,
|
| 135 |
-
discriminator_steps=discriminator_steps,
|
| 136 |
-
pac=pac
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 140 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 141 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 142 |
-
|
| 143 |
-
real_visi = data_1['visi']
|
| 144 |
-
generated_visi = generated_data['visi']
|
| 145 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 146 |
-
return -mse
|
| 147 |
-
|
| 148 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 149 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 150 |
-
|
| 151 |
-
best_params_class1 = study_class1.best_params
|
| 152 |
-
ctgan = CTGAN(
|
| 153 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 154 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 155 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 156 |
-
batch_size=best_params_class1["batch_size"],
|
| 157 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 158 |
-
pac=best_params_class1["pac"]
|
| 159 |
-
)
|
| 160 |
-
|
| 161 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 162 |
-
generated_1 = ctgan.sample(20000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 163 |
-
|
| 164 |
-
# 데이터 병합 및 저장
|
| 165 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 166 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 167 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 168 |
-
# 제거변수 복구
|
| 169 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 170 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 172 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 174 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 175 |
-
|
| 176 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 177 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 178 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 179 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 180 |
-
|
| 181 |
-
# 결과 저장
|
| 182 |
-
final_data.to_csv(output_path, index = False)
|
| 183 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_20000_2.py
DELETED
|
@@ -1,183 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import os
|
| 4 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
# regions = ['busan', 'daegu', 'daejeon', 'gwangju', 'incheon', 'seoul']
|
| 13 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 14 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 15 |
-
output_paths = [f'../../data/data_oversampled/ctgan20000/ctgan20000_2_{region}.csv' for region in regions]
|
| 16 |
-
|
| 17 |
-
# GPU 사용 설정
|
| 18 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
-
print(f"Using device: {device}")
|
| 20 |
-
|
| 21 |
-
# 경고 무시
|
| 22 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 23 |
-
|
| 24 |
-
# 지역별 처리
|
| 25 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 26 |
-
# 데이터 로드
|
| 27 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 28 |
-
data= data.loc[data['year'].isin([2018,2020]),:]
|
| 29 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 30 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 31 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 32 |
-
y = data['multi_class']
|
| 33 |
-
|
| 34 |
-
# 불필요한 열 제거
|
| 35 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 36 |
-
|
| 37 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 38 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 39 |
-
|
| 40 |
-
# sampling_strategy 설정
|
| 41 |
-
count_class_0 = (y == 0).sum()
|
| 42 |
-
count_class_1 = (y == 1).sum()
|
| 43 |
-
count_class_2 = (y == 2).sum()
|
| 44 |
-
sampling_strategy = {
|
| 45 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 46 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 47 |
-
2: count_class_2
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
# SMOTENC 적용
|
| 51 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 52 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 53 |
-
|
| 54 |
-
# Resampled 데이터 생성
|
| 55 |
-
lerp_data = X_resampled.copy()
|
| 56 |
-
lerp_data['multi_class'] = y_resampled
|
| 57 |
-
|
| 58 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 59 |
-
categorical_features = [
|
| 60 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 61 |
-
]
|
| 62 |
-
|
| 63 |
-
# Optuna 목적 함수 정의
|
| 64 |
-
def objective(trial):
|
| 65 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 66 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 67 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 69 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 70 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 71 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 72 |
-
|
| 73 |
-
# CTGAN 모델 생성
|
| 74 |
-
ctgan = CTGAN(
|
| 75 |
-
embedding_dim=embedding_dim,
|
| 76 |
-
generator_dim=generator_dim,
|
| 77 |
-
discriminator_dim=discriminator_dim,
|
| 78 |
-
batch_size=batch_size,
|
| 79 |
-
discriminator_steps=discriminator_steps,
|
| 80 |
-
pac=pac
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
# 범주 0 데이터 필터링
|
| 84 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 85 |
-
|
| 86 |
-
# 모델 학습
|
| 87 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 88 |
-
|
| 89 |
-
# 샘플 생성
|
| 90 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 91 |
-
|
| 92 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 93 |
-
real_visi = data_0['visi']
|
| 94 |
-
generated_visi = generated_data['visi']
|
| 95 |
-
|
| 96 |
-
# 분포 간 차이(MSE) 계산
|
| 97 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 98 |
-
return -mse
|
| 99 |
-
|
| 100 |
-
# Optuna로 최적화 수행
|
| 101 |
-
study = optuna.create_study(direction="maximize")
|
| 102 |
-
study.optimize(objective, n_trials=50)
|
| 103 |
-
|
| 104 |
-
# 최적 하이퍼파라미터 출력
|
| 105 |
-
best_params = study.best_params
|
| 106 |
-
|
| 107 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 108 |
-
ctgan = CTGAN(
|
| 109 |
-
embedding_dim=best_params["embedding_dim"],
|
| 110 |
-
generator_dim=best_params["generator_dim"],
|
| 111 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 112 |
-
batch_size=best_params["batch_size"],
|
| 113 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 114 |
-
pac=best_params["pac"]
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
# 범주 0 데이터로 최종 학습
|
| 118 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 119 |
-
generated_0 = ctgan.sample(20000)
|
| 120 |
-
|
| 121 |
-
# 범주 1 데이터 최적화 및 생성
|
| 122 |
-
def objective_class1(trial):
|
| 123 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 124 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 126 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 127 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 128 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 129 |
-
|
| 130 |
-
ctgan = CTGAN(
|
| 131 |
-
embedding_dim=embedding_dim,
|
| 132 |
-
generator_dim=generator_dim,
|
| 133 |
-
discriminator_dim=discriminator_dim,
|
| 134 |
-
batch_size=batch_size,
|
| 135 |
-
discriminator_steps=discriminator_steps,
|
| 136 |
-
pac=pac
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 140 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 141 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 142 |
-
|
| 143 |
-
real_visi = data_1['visi']
|
| 144 |
-
generated_visi = generated_data['visi']
|
| 145 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 146 |
-
return -mse
|
| 147 |
-
|
| 148 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 149 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 150 |
-
|
| 151 |
-
best_params_class1 = study_class1.best_params
|
| 152 |
-
ctgan = CTGAN(
|
| 153 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 154 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 155 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 156 |
-
batch_size=best_params_class1["batch_size"],
|
| 157 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 158 |
-
pac=best_params_class1["pac"]
|
| 159 |
-
)
|
| 160 |
-
|
| 161 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 162 |
-
generated_1 = ctgan.sample(20000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 163 |
-
|
| 164 |
-
# 데이터 병합 및 저장
|
| 165 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 166 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 167 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 168 |
-
# 제거변수 복구
|
| 169 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 170 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 172 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 174 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 175 |
-
|
| 176 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 177 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 178 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 179 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 180 |
-
|
| 181 |
-
# 결과 저장
|
| 182 |
-
final_data.to_csv(output_path, index = False)
|
| 183 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_20000_3.py
DELETED
|
@@ -1,183 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import os
|
| 4 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
# regions = ['busan', 'daegu', 'daejeon', 'gwangju', 'incheon', 'seoul']
|
| 13 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 14 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 15 |
-
output_paths = [f'../../data/data_oversampled/ctgan20000/ctgan20000_3_{region}.csv' for region in regions]
|
| 16 |
-
|
| 17 |
-
# GPU 사용 설정
|
| 18 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
-
print(f"Using device: {device}")
|
| 20 |
-
|
| 21 |
-
# 경고 무시
|
| 22 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 23 |
-
|
| 24 |
-
# 지역별 처리
|
| 25 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 26 |
-
# 데이터 로드
|
| 27 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 28 |
-
data= data.loc[data['year'].isin([2019,2020]),:]
|
| 29 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 30 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 31 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 32 |
-
y = data['multi_class']
|
| 33 |
-
|
| 34 |
-
# 불필요한 열 제거
|
| 35 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 36 |
-
|
| 37 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 38 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 39 |
-
|
| 40 |
-
# sampling_strategy 설정
|
| 41 |
-
count_class_0 = (y == 0).sum()
|
| 42 |
-
count_class_1 = (y == 1).sum()
|
| 43 |
-
count_class_2 = (y == 2).sum()
|
| 44 |
-
sampling_strategy = {
|
| 45 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 46 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 47 |
-
2: count_class_2
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
# SMOTENC 적용
|
| 51 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 52 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 53 |
-
|
| 54 |
-
# Resampled 데이터 생성
|
| 55 |
-
lerp_data = X_resampled.copy()
|
| 56 |
-
lerp_data['multi_class'] = y_resampled
|
| 57 |
-
|
| 58 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 59 |
-
categorical_features = [
|
| 60 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 61 |
-
]
|
| 62 |
-
|
| 63 |
-
# Optuna 목적 함수 정의
|
| 64 |
-
def objective(trial):
|
| 65 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 66 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 67 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 69 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 70 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 71 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 72 |
-
|
| 73 |
-
# CTGAN 모델 생성
|
| 74 |
-
ctgan = CTGAN(
|
| 75 |
-
embedding_dim=embedding_dim,
|
| 76 |
-
generator_dim=generator_dim,
|
| 77 |
-
discriminator_dim=discriminator_dim,
|
| 78 |
-
batch_size=batch_size,
|
| 79 |
-
discriminator_steps=discriminator_steps,
|
| 80 |
-
pac=pac
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
# 범주 0 데이터 필터링
|
| 84 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 85 |
-
|
| 86 |
-
# 모델 학습
|
| 87 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 88 |
-
|
| 89 |
-
# 샘플 생성
|
| 90 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 91 |
-
|
| 92 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 93 |
-
real_visi = data_0['visi']
|
| 94 |
-
generated_visi = generated_data['visi']
|
| 95 |
-
|
| 96 |
-
# 분포 간 차이(MSE) 계산
|
| 97 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 98 |
-
return -mse
|
| 99 |
-
|
| 100 |
-
# Optuna로 최적화 수행
|
| 101 |
-
study = optuna.create_study(direction="maximize")
|
| 102 |
-
study.optimize(objective, n_trials=50)
|
| 103 |
-
|
| 104 |
-
# 최적 하이퍼파라미터 출력
|
| 105 |
-
best_params = study.best_params
|
| 106 |
-
|
| 107 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 108 |
-
ctgan = CTGAN(
|
| 109 |
-
embedding_dim=best_params["embedding_dim"],
|
| 110 |
-
generator_dim=best_params["generator_dim"],
|
| 111 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 112 |
-
batch_size=best_params["batch_size"],
|
| 113 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 114 |
-
pac=best_params["pac"]
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
# 범주 0 데이터로 최종 학습
|
| 118 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 119 |
-
generated_0 = ctgan.sample(20000)
|
| 120 |
-
|
| 121 |
-
# 범주 1 데이터 최적화 및 생성
|
| 122 |
-
def objective_class1(trial):
|
| 123 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 124 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 126 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 127 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 128 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 129 |
-
|
| 130 |
-
ctgan = CTGAN(
|
| 131 |
-
embedding_dim=embedding_dim,
|
| 132 |
-
generator_dim=generator_dim,
|
| 133 |
-
discriminator_dim=discriminator_dim,
|
| 134 |
-
batch_size=batch_size,
|
| 135 |
-
discriminator_steps=discriminator_steps,
|
| 136 |
-
pac=pac
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 140 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 141 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 142 |
-
|
| 143 |
-
real_visi = data_1['visi']
|
| 144 |
-
generated_visi = generated_data['visi']
|
| 145 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 146 |
-
return -mse
|
| 147 |
-
|
| 148 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 149 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 150 |
-
|
| 151 |
-
best_params_class1 = study_class1.best_params
|
| 152 |
-
ctgan = CTGAN(
|
| 153 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 154 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 155 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 156 |
-
batch_size=best_params_class1["batch_size"],
|
| 157 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 158 |
-
pac=best_params_class1["pac"]
|
| 159 |
-
)
|
| 160 |
-
|
| 161 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 162 |
-
generated_1 = ctgan.sample(20000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 163 |
-
|
| 164 |
-
# 데이터 병합 및 저장
|
| 165 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 166 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 167 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 168 |
-
# 제거변수 복구
|
| 169 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 170 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 172 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 174 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 175 |
-
|
| 176 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 177 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 178 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 179 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 180 |
-
|
| 181 |
-
# 결과 저장
|
| 182 |
-
final_data.to_csv(output_path, index = False)
|
| 183 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_7000_1.py
DELETED
|
@@ -1,180 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
from imblearn.over_sampling import SMOTENC
|
| 4 |
-
import optuna
|
| 5 |
-
from ctgan import CTGAN
|
| 6 |
-
import torch
|
| 7 |
-
import warnings
|
| 8 |
-
|
| 9 |
-
# 지역별 데이터 파일 경로
|
| 10 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 11 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 12 |
-
output_paths = [f'../../data/data_oversampled/ctgan7000/ctgan7000_1_{region}.csv' for region in regions]
|
| 13 |
-
|
| 14 |
-
# GPU 사용 설정
|
| 15 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
-
print(f"Using device: {device}")
|
| 17 |
-
|
| 18 |
-
# 경고 무시
|
| 19 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 20 |
-
|
| 21 |
-
# 지역별 처리
|
| 22 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 23 |
-
# 데이터 로드
|
| 24 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 25 |
-
data= data.loc[data['year'].isin([2018,2019]),:]
|
| 26 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 27 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 28 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 29 |
-
y = data['multi_class']
|
| 30 |
-
|
| 31 |
-
# 불필요한 열 제거
|
| 32 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 33 |
-
|
| 34 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 35 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 36 |
-
|
| 37 |
-
# sampling_strategy 설정
|
| 38 |
-
count_class_0 = (y == 0).sum()
|
| 39 |
-
count_class_1 = (y == 1).sum()
|
| 40 |
-
count_class_2 = (y == 2).sum()
|
| 41 |
-
sampling_strategy = {
|
| 42 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 43 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 44 |
-
2: count_class_2
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
-
# SMOTENC 적용
|
| 48 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 49 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 50 |
-
|
| 51 |
-
# Resampled 데이터 생성
|
| 52 |
-
lerp_data = X_resampled.copy()
|
| 53 |
-
lerp_data['multi_class'] = y_resampled
|
| 54 |
-
|
| 55 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 56 |
-
categorical_features = [
|
| 57 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 58 |
-
]
|
| 59 |
-
|
| 60 |
-
# Optuna 목적 함수 정의
|
| 61 |
-
def objective(trial):
|
| 62 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 63 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 64 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 65 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 66 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 67 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 68 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 69 |
-
|
| 70 |
-
# CTGAN 모델 생성
|
| 71 |
-
ctgan = CTGAN(
|
| 72 |
-
embedding_dim=embedding_dim,
|
| 73 |
-
generator_dim=generator_dim,
|
| 74 |
-
discriminator_dim=discriminator_dim,
|
| 75 |
-
batch_size=batch_size,
|
| 76 |
-
discriminator_steps=discriminator_steps,
|
| 77 |
-
pac=pac
|
| 78 |
-
)
|
| 79 |
-
|
| 80 |
-
# 범주 0 데이터 필터링
|
| 81 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 82 |
-
|
| 83 |
-
# 모델 학습
|
| 84 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 85 |
-
|
| 86 |
-
# 샘플 생성
|
| 87 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 88 |
-
|
| 89 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 90 |
-
real_visi = data_0['visi']
|
| 91 |
-
generated_visi = generated_data['visi']
|
| 92 |
-
|
| 93 |
-
# 분포 간 차이(MSE) 계산
|
| 94 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 95 |
-
return -mse
|
| 96 |
-
|
| 97 |
-
# Optuna로 최적화 수행
|
| 98 |
-
study = optuna.create_study(direction="maximize")
|
| 99 |
-
study.optimize(objective, n_trials=50)
|
| 100 |
-
|
| 101 |
-
# 최적 하이퍼파라미터 출력
|
| 102 |
-
best_params = study.best_params
|
| 103 |
-
|
| 104 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 105 |
-
ctgan = CTGAN(
|
| 106 |
-
embedding_dim=best_params["embedding_dim"],
|
| 107 |
-
generator_dim=best_params["generator_dim"],
|
| 108 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 109 |
-
batch_size=best_params["batch_size"],
|
| 110 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 111 |
-
pac=best_params["pac"]
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
# 범주 0 데이터로 최종 학습
|
| 115 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 116 |
-
generated_0 = ctgan.sample(7000)
|
| 117 |
-
|
| 118 |
-
# 범주 1 데이터 최적화 및 생성
|
| 119 |
-
def objective_class1(trial):
|
| 120 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 121 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 122 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 123 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 124 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 125 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 126 |
-
|
| 127 |
-
ctgan = CTGAN(
|
| 128 |
-
embedding_dim=embedding_dim,
|
| 129 |
-
generator_dim=generator_dim,
|
| 130 |
-
discriminator_dim=discriminator_dim,
|
| 131 |
-
batch_size=batch_size,
|
| 132 |
-
discriminator_steps=discriminator_steps,
|
| 133 |
-
pac=pac
|
| 134 |
-
)
|
| 135 |
-
|
| 136 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 137 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 138 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 139 |
-
|
| 140 |
-
real_visi = data_1['visi']
|
| 141 |
-
generated_visi = generated_data['visi']
|
| 142 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 143 |
-
return -mse
|
| 144 |
-
|
| 145 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 146 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 147 |
-
|
| 148 |
-
best_params_class1 = study_class1.best_params
|
| 149 |
-
ctgan = CTGAN(
|
| 150 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 151 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 152 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 153 |
-
batch_size=best_params_class1["batch_size"],
|
| 154 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 155 |
-
pac=best_params_class1["pac"]
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 159 |
-
generated_1 = ctgan.sample(7000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 160 |
-
|
| 161 |
-
# 데이터 병합 및 저장
|
| 162 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 163 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 164 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 165 |
-
# 제거변수 복구
|
| 166 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 167 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 168 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 169 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 170 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 171 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 172 |
-
|
| 173 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 174 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 175 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 176 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 177 |
-
|
| 178 |
-
# 결과 저장
|
| 179 |
-
final_data.to_csv(output_path, index = False)
|
| 180 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_7000_2.py
DELETED
|
@@ -1,182 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import os
|
| 4 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 13 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 14 |
-
output_paths = [f'../../data/data_oversampled/ctgan7000/ctgan7000_2_{region}.csv' for region in regions]
|
| 15 |
-
|
| 16 |
-
# GPU 사용 설정
|
| 17 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
-
print(f"Using device: {device}")
|
| 19 |
-
|
| 20 |
-
# 경고 무시
|
| 21 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 22 |
-
|
| 23 |
-
# 지역별 처리
|
| 24 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 25 |
-
# 데이터 로드
|
| 26 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 27 |
-
data= data.loc[data['year'].isin([2018,2020]),:]
|
| 28 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 29 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 30 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 31 |
-
y = data['multi_class']
|
| 32 |
-
|
| 33 |
-
# 불필요한 열 제거
|
| 34 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 35 |
-
|
| 36 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 37 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 38 |
-
|
| 39 |
-
# sampling_strategy 설정
|
| 40 |
-
count_class_0 = (y == 0).sum()
|
| 41 |
-
count_class_1 = (y == 1).sum()
|
| 42 |
-
count_class_2 = (y == 2).sum()
|
| 43 |
-
sampling_strategy = {
|
| 44 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 45 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 46 |
-
2: count_class_2
|
| 47 |
-
}
|
| 48 |
-
|
| 49 |
-
# SMOTENC 적용
|
| 50 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 51 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 52 |
-
|
| 53 |
-
# Resampled 데이터 생성
|
| 54 |
-
lerp_data = X_resampled.copy()
|
| 55 |
-
lerp_data['multi_class'] = y_resampled
|
| 56 |
-
|
| 57 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 58 |
-
categorical_features = [
|
| 59 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 60 |
-
]
|
| 61 |
-
|
| 62 |
-
# Optuna 목적 함수 정의
|
| 63 |
-
def objective(trial):
|
| 64 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 65 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 66 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 67 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 69 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 70 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 71 |
-
|
| 72 |
-
# CTGAN 모델 생성
|
| 73 |
-
ctgan = CTGAN(
|
| 74 |
-
embedding_dim=embedding_dim,
|
| 75 |
-
generator_dim=generator_dim,
|
| 76 |
-
discriminator_dim=discriminator_dim,
|
| 77 |
-
batch_size=batch_size,
|
| 78 |
-
discriminator_steps=discriminator_steps,
|
| 79 |
-
pac=pac
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
# 범주 0 데이터 필터링
|
| 83 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 84 |
-
|
| 85 |
-
# 모델 학습
|
| 86 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 87 |
-
|
| 88 |
-
# 샘플 생성
|
| 89 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 90 |
-
|
| 91 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 92 |
-
real_visi = data_0['visi']
|
| 93 |
-
generated_visi = generated_data['visi']
|
| 94 |
-
|
| 95 |
-
# 분포 간 차이(MSE) 계산
|
| 96 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 97 |
-
return -mse
|
| 98 |
-
|
| 99 |
-
# Optuna로 최적화 수행
|
| 100 |
-
study = optuna.create_study(direction="maximize")
|
| 101 |
-
study.optimize(objective, n_trials=50)
|
| 102 |
-
|
| 103 |
-
# 최적 하이퍼파라미터 출력
|
| 104 |
-
best_params = study.best_params
|
| 105 |
-
|
| 106 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 107 |
-
ctgan = CTGAN(
|
| 108 |
-
embedding_dim=best_params["embedding_dim"],
|
| 109 |
-
generator_dim=best_params["generator_dim"],
|
| 110 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 111 |
-
batch_size=best_params["batch_size"],
|
| 112 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 113 |
-
pac=best_params["pac"]
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
# 범주 0 데이터로 최종 학습
|
| 117 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 118 |
-
generated_0 = ctgan.sample(7000)
|
| 119 |
-
|
| 120 |
-
# 범주 1 데이터 최적화 및 생성
|
| 121 |
-
def objective_class1(trial):
|
| 122 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 123 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 124 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 126 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 127 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 128 |
-
|
| 129 |
-
ctgan = CTGAN(
|
| 130 |
-
embedding_dim=embedding_dim,
|
| 131 |
-
generator_dim=generator_dim,
|
| 132 |
-
discriminator_dim=discriminator_dim,
|
| 133 |
-
batch_size=batch_size,
|
| 134 |
-
discriminator_steps=discriminator_steps,
|
| 135 |
-
pac=pac
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 139 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 140 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 141 |
-
|
| 142 |
-
real_visi = data_1['visi']
|
| 143 |
-
generated_visi = generated_data['visi']
|
| 144 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 145 |
-
return -mse
|
| 146 |
-
|
| 147 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 148 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 149 |
-
|
| 150 |
-
best_params_class1 = study_class1.best_params
|
| 151 |
-
ctgan = CTGAN(
|
| 152 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 153 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 154 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 155 |
-
batch_size=best_params_class1["batch_size"],
|
| 156 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 157 |
-
pac=best_params_class1["pac"]
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 161 |
-
generated_1 = ctgan.sample(7000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 162 |
-
|
| 163 |
-
# 데이터 병합 및 저장
|
| 164 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 165 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 166 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 167 |
-
# 제거변수 복구
|
| 168 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 169 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 170 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 172 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 174 |
-
|
| 175 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 176 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 177 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 178 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 179 |
-
|
| 180 |
-
# 결과 저장
|
| 181 |
-
final_data.to_csv(output_path, index = False)
|
| 182 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/gan_sample_7000_3.py
DELETED
|
@@ -1,182 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import os
|
| 4 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
| 5 |
-
from imblearn.over_sampling import SMOTENC
|
| 6 |
-
import optuna
|
| 7 |
-
from ctgan import CTGAN
|
| 8 |
-
import torch
|
| 9 |
-
import warnings
|
| 10 |
-
|
| 11 |
-
# 지역별 데이터 파일 경로
|
| 12 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 13 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 14 |
-
output_paths = [f'../../data/data_oversampled/ctgan7000/ctgan7000_3_{region}.csv' for region in regions]
|
| 15 |
-
|
| 16 |
-
# GPU 사용 설정
|
| 17 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
-
print(f"Using device: {device}")
|
| 19 |
-
|
| 20 |
-
# 경고 무시
|
| 21 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 22 |
-
|
| 23 |
-
# 지역별 처리
|
| 24 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 25 |
-
# 데이터 로드
|
| 26 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 27 |
-
data= data.loc[data['year'].isin([2019,2020]),:]
|
| 28 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 29 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 30 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 31 |
-
y = data['multi_class']
|
| 32 |
-
|
| 33 |
-
# 불필요한 열 제거
|
| 34 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 35 |
-
|
| 36 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 37 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 38 |
-
|
| 39 |
-
# sampling_strategy 설정
|
| 40 |
-
count_class_0 = (y == 0).sum()
|
| 41 |
-
count_class_1 = (y == 1).sum()
|
| 42 |
-
count_class_2 = (y == 2).sum()
|
| 43 |
-
sampling_strategy = {
|
| 44 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 45 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 46 |
-
2: count_class_2
|
| 47 |
-
}
|
| 48 |
-
|
| 49 |
-
# SMOTENC 적용
|
| 50 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 51 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 52 |
-
|
| 53 |
-
# Resampled 데이터 생성
|
| 54 |
-
lerp_data = X_resampled.copy()
|
| 55 |
-
lerp_data['multi_class'] = y_resampled
|
| 56 |
-
|
| 57 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 58 |
-
categorical_features = [
|
| 59 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 60 |
-
]
|
| 61 |
-
|
| 62 |
-
# Optuna 목적 함수 정의
|
| 63 |
-
def objective(trial):
|
| 64 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 65 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 66 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 67 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 68 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 69 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 70 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 71 |
-
|
| 72 |
-
# CTGAN 모델 생성
|
| 73 |
-
ctgan = CTGAN(
|
| 74 |
-
embedding_dim=embedding_dim,
|
| 75 |
-
generator_dim=generator_dim,
|
| 76 |
-
discriminator_dim=discriminator_dim,
|
| 77 |
-
batch_size=batch_size,
|
| 78 |
-
discriminator_steps=discriminator_steps,
|
| 79 |
-
pac=pac
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
# 범주 0 데이터 필터링
|
| 83 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 84 |
-
|
| 85 |
-
# 모델 학습
|
| 86 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 87 |
-
|
| 88 |
-
# 샘플 생성
|
| 89 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 90 |
-
|
| 91 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 92 |
-
real_visi = data_0['visi']
|
| 93 |
-
generated_visi = generated_data['visi']
|
| 94 |
-
|
| 95 |
-
# 분포 간 차이(MSE) 계산
|
| 96 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 97 |
-
return -mse
|
| 98 |
-
|
| 99 |
-
# Optuna로 최적화 수행
|
| 100 |
-
study = optuna.create_study(direction="maximize")
|
| 101 |
-
study.optimize(objective, n_trials=50)
|
| 102 |
-
|
| 103 |
-
# 최적 하이퍼파라미터 출력
|
| 104 |
-
best_params = study.best_params
|
| 105 |
-
|
| 106 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 107 |
-
ctgan = CTGAN(
|
| 108 |
-
embedding_dim=best_params["embedding_dim"],
|
| 109 |
-
generator_dim=best_params["generator_dim"],
|
| 110 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 111 |
-
batch_size=best_params["batch_size"],
|
| 112 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 113 |
-
pac=best_params["pac"]
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
# 범주 0 데이터로 최종 학습
|
| 117 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 118 |
-
generated_0 = ctgan.sample(7000)
|
| 119 |
-
|
| 120 |
-
# 범주 1 데이터 최적화 및 생성
|
| 121 |
-
def objective_class1(trial):
|
| 122 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 123 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 124 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 125 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 126 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 127 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 128 |
-
|
| 129 |
-
ctgan = CTGAN(
|
| 130 |
-
embedding_dim=embedding_dim,
|
| 131 |
-
generator_dim=generator_dim,
|
| 132 |
-
discriminator_dim=discriminator_dim,
|
| 133 |
-
batch_size=batch_size,
|
| 134 |
-
discriminator_steps=discriminator_steps,
|
| 135 |
-
pac=pac
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 139 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 140 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 141 |
-
|
| 142 |
-
real_visi = data_1['visi']
|
| 143 |
-
generated_visi = generated_data['visi']
|
| 144 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 145 |
-
return -mse
|
| 146 |
-
|
| 147 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 148 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 149 |
-
|
| 150 |
-
best_params_class1 = study_class1.best_params
|
| 151 |
-
ctgan = CTGAN(
|
| 152 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 153 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 154 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 155 |
-
batch_size=best_params_class1["batch_size"],
|
| 156 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 157 |
-
pac=best_params_class1["pac"]
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 161 |
-
generated_1 = ctgan.sample(7000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 162 |
-
|
| 163 |
-
# 데이터 병합 및 저장
|
| 164 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 165 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 166 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 167 |
-
# 제거변수 복구
|
| 168 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 169 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 170 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 171 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 172 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 173 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 174 |
-
|
| 175 |
-
filtered_data = smote_gan_data[smote_gan_data['multi_class'] != 2]
|
| 176 |
-
original_class2 = data[data['multi_class'] == 2]
|
| 177 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 178 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 179 |
-
|
| 180 |
-
# 결과 저장
|
| 181 |
-
final_data.to_csv(output_path, index = False)
|
| 182 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/oversampling_code.py
DELETED
|
@@ -1,355 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
from imblearn.over_sampling import SMOTENC
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
# smote와 ctgan을 이용한 oversampling 진행
|
| 7 |
-
|
| 8 |
-
# 파일 경로와 지역 이름 리스트
|
| 9 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 10 |
-
input_paths = [f'../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
# 반복적으로 각 지역 데이터 처리
|
| 16 |
-
for region, input_path in zip(regions, input_paths):
|
| 17 |
-
# 데이터 읽기
|
| 18 |
-
data = pd.read_csv(input_path, index_col=0)
|
| 19 |
-
data.drop(['Unnamed: 0'], axis=1, inplace=True)
|
| 20 |
-
print("\n######",region,"#######")
|
| 21 |
-
print(len(data[data['multi_class']==0]),'|',len(data[data['multi_class']==1]),'|',len(data[data['multi_class']==2]))
|
| 22 |
-
print(len(data.columns))
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
import pandas as pd
|
| 29 |
-
import numpy as np
|
| 30 |
-
from imblearn.over_sampling import SMOTENC
|
| 31 |
-
|
| 32 |
-
# 파일 경로와 지역 이름 리스트
|
| 33 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 34 |
-
input_paths = [f'../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 35 |
-
output_paths = [f'../data/data_oversampled/smote_{region}.csv' for region in regions]
|
| 36 |
-
|
| 37 |
-
# 반복적으로 각 지역 데이터 처리
|
| 38 |
-
for region, input_path, output_path in zip(regions, input_paths, output_paths):
|
| 39 |
-
# 데이터 읽기
|
| 40 |
-
data = pd.read_csv(input_path, index_col=0)
|
| 41 |
-
data.drop(['Unnamed: 0'], axis=1, inplace=True)
|
| 42 |
-
|
| 43 |
-
# X와 y 분리
|
| 44 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 45 |
-
y = data['multi_class']
|
| 46 |
-
|
| 47 |
-
# 불필요한 열 제거
|
| 48 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 49 |
-
|
| 50 |
-
# 범주형 변수 식별
|
| 51 |
-
categorical_features = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 52 |
-
|
| 53 |
-
# 각 지역의 multi_class 값이 2인 데이터 개수 계산
|
| 54 |
-
count_class_2 = (y == 2).sum()
|
| 55 |
-
|
| 56 |
-
# SMOTENC 적용
|
| 57 |
-
smotenc = SMOTENC(
|
| 58 |
-
categorical_features=categorical_features,
|
| 59 |
-
sampling_strategy={0: 10000, 1: 10000, 2: count_class_2},
|
| 60 |
-
random_state=42
|
| 61 |
-
)
|
| 62 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 63 |
-
|
| 64 |
-
# 추가 변수 생성
|
| 65 |
-
X_resampled['multi_class'] = y_resampled
|
| 66 |
-
X_resampled['binary_class'] = X_resampled['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 67 |
-
X_resampled['hour_sin'] = np.sin(2 * np.pi * X_resampled['hour'] / 24)
|
| 68 |
-
X_resampled['hour_cos'] = np.cos(2 * np.pi * X_resampled['hour'] / 24)
|
| 69 |
-
X_resampled['month_sin'] = np.sin(2 * np.pi * X_resampled['month'] / 12)
|
| 70 |
-
X_resampled['month_cos'] = np.cos(2 * np.pi * X_resampled['month'] / 12)
|
| 71 |
-
X_resampled['ground_temp - temp_C'] = X_resampled['groundtemp'] - X_resampled['temp_C']
|
| 72 |
-
|
| 73 |
-
# 결과 저장
|
| 74 |
-
X_resampled.to_csv(output_path)
|
| 75 |
-
print(f"Processed and saved: {region} -> {output_path}")
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
smote_seoul = pd.read_csv('../data/data_oversampled/smote_seoul.csv')
|
| 79 |
-
print(smote_seoul[smote_seoul['multi_class']==0]['visi'].describe())
|
| 80 |
-
print(smote_seoul[smote_seoul['multi_class']==1]['visi'].describe())
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
import pandas as pd
|
| 84 |
-
import numpy as np
|
| 85 |
-
from imblearn.over_sampling import SMOTENC
|
| 86 |
-
|
| 87 |
-
# 파일 경로와 지역 이름 리스트
|
| 88 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 89 |
-
input_paths = [f'../data/data_oversampled/smote_{region}.csv' for region in regions]
|
| 90 |
-
|
| 91 |
-
# 반복적으로 각 지역 데이터 처리
|
| 92 |
-
for region, input_path in zip(regions, input_paths):
|
| 93 |
-
# 데이터 읽기
|
| 94 |
-
data = pd.read_csv(input_path, index_col=0)
|
| 95 |
-
data.drop(['Unnamed: 0'], axis=1, inplace=True)
|
| 96 |
-
print("\n######",region,"#######")
|
| 97 |
-
print(len(data[data['multi_class']==0]),'|',len(data[data['multi_class']==1]),'|',len(data[data['multi_class']==2]))
|
| 98 |
-
print(len(data.columns))
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
import pandas as pd
|
| 103 |
-
import numpy as np
|
| 104 |
-
from imblearn.over_sampling import SMOTENC
|
| 105 |
-
|
| 106 |
-
# 파일 경로와 지역 이름 리스트
|
| 107 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 108 |
-
input_paths = [f'../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 109 |
-
|
| 110 |
-
# 반복적으로 각 지역 데이터 처리
|
| 111 |
-
for region, input_path in zip(regions, input_paths):
|
| 112 |
-
# 데이터 읽기
|
| 113 |
-
data = pd.read_csv(input_path, index_col=0)
|
| 114 |
-
data.drop(['Unnamed: 0'], axis=1, inplace=True)
|
| 115 |
-
print("\n######",region,"#######")
|
| 116 |
-
print(len(data[data['multi_class']==0]),'|',len(data[data['multi_class']==1]),'|',len(data[data['multi_class']==2]))
|
| 117 |
-
print(len(data.columns))
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
import pandas as pd
|
| 121 |
-
import numpy as np
|
| 122 |
-
from imblearn.over_sampling import SMOTENC
|
| 123 |
-
import optuna
|
| 124 |
-
from ctgan import CTGAN
|
| 125 |
-
import torch
|
| 126 |
-
import warnings
|
| 127 |
-
|
| 128 |
-
# 지역별 데이터 파일 경로
|
| 129 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 130 |
-
file_paths = [f'../data/data_for_modeling/df_{region}.feather' for region in regions]
|
| 131 |
-
output_paths = [f'../data/data_oversampled/ctgan_{region}.csv' for region in regions]
|
| 132 |
-
|
| 133 |
-
# GPU 사용 설정
|
| 134 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 135 |
-
print(f"Using device: {device}")
|
| 136 |
-
|
| 137 |
-
# 경고 무시
|
| 138 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="optuna.distributions")
|
| 139 |
-
|
| 140 |
-
# 지역별 처리
|
| 141 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 142 |
-
# 데이터 로드
|
| 143 |
-
data = pd.read_feather(file_path)
|
| 144 |
-
data.drop(['Unnamed: 0'], axis=1, inplace=True)
|
| 145 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 146 |
-
y = data['multi_class']
|
| 147 |
-
|
| 148 |
-
# 불필요한 열 제거
|
| 149 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 150 |
-
|
| 151 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 152 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 153 |
-
|
| 154 |
-
# sampling_strategy 설정
|
| 155 |
-
count_class_0 = (y == 0).sum()
|
| 156 |
-
count_class_1 = (y == 1).sum()
|
| 157 |
-
count_class_2 = (y == 2).sum()
|
| 158 |
-
sampling_strategy = {
|
| 159 |
-
0: 500 if count_class_0 <= 500 else 1000,
|
| 160 |
-
1: int(np.ceil(count_class_1 / 100) * 100), # 백의 자리로 올림
|
| 161 |
-
2: count_class_2
|
| 162 |
-
}
|
| 163 |
-
|
| 164 |
-
# SMOTENC 적용
|
| 165 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 166 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 167 |
-
|
| 168 |
-
# Resampled 데이터 생성
|
| 169 |
-
lerp_data = X_resampled.copy()
|
| 170 |
-
lerp_data['multi_class'] = y_resampled
|
| 171 |
-
|
| 172 |
-
# CTGAN에서 사용할 범주형 변수 열 이름 설정
|
| 173 |
-
categorical_features = [
|
| 174 |
-
col for col, dtype in zip(lerp_data.columns, lerp_data.dtypes) if dtype != 'float64'
|
| 175 |
-
]
|
| 176 |
-
|
| 177 |
-
# Optuna 목적 함수 정의
|
| 178 |
-
def objective(trial):
|
| 179 |
-
# 하이퍼파라미터 탐색 범위 설정
|
| 180 |
-
embedding_dim = trial.suggest_int("embedding_dim", 64, 128)
|
| 181 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(64, 64), (128, 128)])
|
| 182 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(64, 64), (128, 128)])
|
| 183 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 184 |
-
batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
|
| 185 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 3)
|
| 186 |
-
|
| 187 |
-
# CTGAN 모델 생성
|
| 188 |
-
ctgan = CTGAN(
|
| 189 |
-
embedding_dim=embedding_dim,
|
| 190 |
-
generator_dim=generator_dim,
|
| 191 |
-
discriminator_dim=discriminator_dim,
|
| 192 |
-
batch_size=batch_size,
|
| 193 |
-
discriminator_steps=discriminator_steps,
|
| 194 |
-
pac=pac
|
| 195 |
-
)
|
| 196 |
-
|
| 197 |
-
# 범주 0 데이터 필터링
|
| 198 |
-
data_0 = lerp_data[lerp_data['multi_class'] == 0]
|
| 199 |
-
|
| 200 |
-
# 모델 학습
|
| 201 |
-
ctgan.fit(data_0, discrete_columns=categorical_features)
|
| 202 |
-
|
| 203 |
-
# 샘플 생성
|
| 204 |
-
generated_data = ctgan.sample(len(data_0) * 2)
|
| 205 |
-
|
| 206 |
-
# 평가: 샘플의 연속형 변수 분포 비교
|
| 207 |
-
real_visi = data_0['visi']
|
| 208 |
-
generated_visi = generated_data['visi']
|
| 209 |
-
|
| 210 |
-
# 분포 간 차이(MSE) 계산
|
| 211 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 212 |
-
return -mse
|
| 213 |
-
|
| 214 |
-
# Optuna로 최적화 수행
|
| 215 |
-
study = optuna.create_study(direction="maximize")
|
| 216 |
-
study.optimize(objective, n_trials=50)
|
| 217 |
-
|
| 218 |
-
# 최적 하이퍼파라미터 출력
|
| 219 |
-
best_params = study.best_params
|
| 220 |
-
|
| 221 |
-
# 최적 하이퍼파라미터로 CTGAN 학습 및 샘플 생성
|
| 222 |
-
ctgan = CTGAN(
|
| 223 |
-
embedding_dim=best_params["embedding_dim"],
|
| 224 |
-
generator_dim=best_params["generator_dim"],
|
| 225 |
-
discriminator_dim=best_params["discriminator_dim"],
|
| 226 |
-
batch_size=best_params["batch_size"],
|
| 227 |
-
discriminator_steps=best_params["discriminator_steps"],
|
| 228 |
-
pac=best_params["pac"]
|
| 229 |
-
)
|
| 230 |
-
|
| 231 |
-
# 범주 0 데이터로 최종 학습
|
| 232 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 0], discrete_columns=categorical_features)
|
| 233 |
-
generated_0 = ctgan.sample(19500 if count_class_0 <= 500 else 19000)
|
| 234 |
-
|
| 235 |
-
# 범주 1 데이터 최적화 및 생성
|
| 236 |
-
def objective_class1(trial):
|
| 237 |
-
embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
|
| 238 |
-
generator_dim = trial.suggest_categorical("generator_dim", [(128, 128), (256, 256)])
|
| 239 |
-
discriminator_dim = trial.suggest_categorical("discriminator_dim", [(128, 128), (256, 256)])
|
| 240 |
-
pac = trial.suggest_categorical("pac", [4, 8])
|
| 241 |
-
batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024])
|
| 242 |
-
discriminator_steps = trial.suggest_int("discriminator_steps", 1, 5)
|
| 243 |
-
|
| 244 |
-
ctgan = CTGAN(
|
| 245 |
-
embedding_dim=embedding_dim,
|
| 246 |
-
generator_dim=generator_dim,
|
| 247 |
-
discriminator_dim=discriminator_dim,
|
| 248 |
-
batch_size=batch_size,
|
| 249 |
-
discriminator_steps=discriminator_steps,
|
| 250 |
-
pac=pac
|
| 251 |
-
)
|
| 252 |
-
|
| 253 |
-
data_1 = lerp_data[lerp_data['multi_class'] == 1]
|
| 254 |
-
ctgan.fit(data_1, discrete_columns=categorical_features)
|
| 255 |
-
generated_data = ctgan.sample(len(data_1) * 2)
|
| 256 |
-
|
| 257 |
-
real_visi = data_1['visi']
|
| 258 |
-
generated_visi = generated_data['visi']
|
| 259 |
-
mse = ((real_visi.mean() - generated_visi.mean())**2 + (real_visi.std() - generated_visi.std())**2)
|
| 260 |
-
return -mse
|
| 261 |
-
|
| 262 |
-
study_class1 = optuna.create_study(direction="maximize")
|
| 263 |
-
study_class1.optimize(objective_class1, n_trials=30)
|
| 264 |
-
|
| 265 |
-
best_params_class1 = study_class1.best_params
|
| 266 |
-
ctgan = CTGAN(
|
| 267 |
-
embedding_dim=best_params_class1["embedding_dim"],
|
| 268 |
-
generator_dim=best_params_class1["generator_dim"],
|
| 269 |
-
discriminator_dim=best_params_class1["discriminator_dim"],
|
| 270 |
-
batch_size=best_params_class1["batch_size"],
|
| 271 |
-
discriminator_steps=best_params_class1["discriminator_steps"],
|
| 272 |
-
pac=best_params_class1["pac"]
|
| 273 |
-
)
|
| 274 |
-
|
| 275 |
-
ctgan.fit(lerp_data[lerp_data['multi_class'] == 1], discrete_columns=categorical_features)
|
| 276 |
-
generated_1 = ctgan.sample(20000 - int(np.ceil(count_class_1 / 100) * 100))
|
| 277 |
-
|
| 278 |
-
# 데이터 병합 및 저장
|
| 279 |
-
well_generated0 = generated_0[(generated_0['visi'] >= 0) & (generated_0['visi'] < 100)]
|
| 280 |
-
well_generated1 = generated_1[(generated_1['visi'] >= 100) & (generated_1['visi'] < 500)]
|
| 281 |
-
smote_gan_data = pd.concat([lerp_data, well_generated0, well_generated1], axis=0)
|
| 282 |
-
# 제거변수 복구
|
| 283 |
-
smote_gan_data['binary_class'] = smote_gan_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 284 |
-
smote_gan_data['hour_sin'] = np.sin(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 285 |
-
smote_gan_data['hour_cos'] = np.cos(2 * np.pi * smote_gan_data['hour'] / 24)
|
| 286 |
-
smote_gan_data['month_sin'] = np.sin(2 * np.pi * smote_gan_data['month'] / 12)
|
| 287 |
-
smote_gan_data['month_cos'] = np.cos(2 * np.pi * smote_gan_data['month'] / 12)
|
| 288 |
-
smote_gan_data['ground_temp - temp_C'] = smote_gan_data['groundtemp'] - smote_gan_data['temp_C']
|
| 289 |
-
|
| 290 |
-
# 결과 저장
|
| 291 |
-
smote_gan_data.to_csv(output_path, index = False)
|
| 292 |
-
print(f"Processed and saved: {region} -> {output_path}")
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
import pandas as pd
|
| 297 |
-
import numpy as np
|
| 298 |
-
from imblearn.over_sampling import SMOTENC
|
| 299 |
-
|
| 300 |
-
# 파일 경로와 지역 이름 리스트
|
| 301 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 302 |
-
input_paths = [f'../data/data_oversampled/ctgan_{region}.csv' for region in regions]
|
| 303 |
-
|
| 304 |
-
# 반복적으로 각 지역 데이터 처리
|
| 305 |
-
for region, input_path in zip(regions, input_paths):
|
| 306 |
-
# 데이터 읽기
|
| 307 |
-
data = pd.read_csv(input_path)
|
| 308 |
-
print("\n######",region,"#######")
|
| 309 |
-
print(len(data[data['multi_class']==0]),'|',len(data[data['multi_class']==1]),'|',len(data[data['multi_class']==2]))
|
| 310 |
-
print(len(data.columns))
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
busan_check = pd.read_csv('../data/data_oversampled/ctgan_busan.csv')
|
| 316 |
-
print(busan_check[busan_check['multi_class']==0]['visi'].describe())
|
| 317 |
-
print(busan_check[busan_check['multi_class']==1]['visi'].describe())
|
| 318 |
-
print(busan_check[busan_check['multi_class']==2]['visi'].describe())
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
import pandas as pd
|
| 324 |
-
import numpy as np
|
| 325 |
-
from imblearn.over_sampling import SMOTENC
|
| 326 |
-
|
| 327 |
-
# 파일 경로와 지역 이름 리스트
|
| 328 |
-
regions = ['busan', 'daegu', 'daejeon', 'incheon', 'seoul','gwangju']
|
| 329 |
-
origin_paths = [f'../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 330 |
-
augment_paths = [f'../data/data_oversampled/ctgan_{region}.csv' for region in regions]
|
| 331 |
-
|
| 332 |
-
# 반복적으로 각 지역 데이터 처리
|
| 333 |
-
for region, origin_path, augment_path in zip(regions, origin_paths, augment_paths):
|
| 334 |
-
# 데이터 읽기
|
| 335 |
-
origin = pd.read_csv(origin_path, index_col=0)
|
| 336 |
-
augment = pd.read_csv(augment_path)
|
| 337 |
-
|
| 338 |
-
# 증강된 데이터에서 범주 2 데이터 제거
|
| 339 |
-
filtered_data = augment[augment['multi_class'] != 2]
|
| 340 |
-
|
| 341 |
-
# 원본 데이터에서 범주 2 데이터 추출
|
| 342 |
-
original_class2 = origin[origin['multi_class'] == 2]
|
| 343 |
-
|
| 344 |
-
# 제거된 데이터에 원본 범주 2 데이터를 추가
|
| 345 |
-
final_data = pd.concat([filtered_data, original_class2], axis=0)
|
| 346 |
-
|
| 347 |
-
# 인덱스 재설정
|
| 348 |
-
final_data.reset_index(drop=True, inplace=True)
|
| 349 |
-
|
| 350 |
-
# 결과 저장
|
| 351 |
-
final_data.to_csv(augment_path, index = False)
|
| 352 |
-
|
| 353 |
-
print("\n######",region,"#######")
|
| 354 |
-
print(len(final_data[final_data['multi_class']==0]),'|',len(final_data[final_data['multi_class']==1]),'|',len(final_data[final_data['multi_class']==2]))
|
| 355 |
-
print(len(data.columns))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/smote_sample_1.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from imblearn.over_sampling import SMOTENC
|
| 5 |
-
|
| 6 |
-
# 지역별 데이터 파일 경로
|
| 7 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 8 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 9 |
-
output_paths = [f'../../data/data_oversampled/smote/smote_1_{region}.csv' for region in regions]
|
| 10 |
-
|
| 11 |
-
# 지역별 처리
|
| 12 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 13 |
-
# 데이터 로드
|
| 14 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 15 |
-
data= data.loc[data['year'].isin([2018,2019]),:]
|
| 16 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 17 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 18 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 19 |
-
y = data['multi_class']
|
| 20 |
-
|
| 21 |
-
# 불필요한 열 제거
|
| 22 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 23 |
-
|
| 24 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 25 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 26 |
-
|
| 27 |
-
# sampling_strategy 설정
|
| 28 |
-
count_class_2 = (y == 2).sum()
|
| 29 |
-
sampling_strategy = {
|
| 30 |
-
0: int(np.ceil(count_class_2 / 1000) * 500),
|
| 31 |
-
1: int(np.ceil(count_class_2 / 1000) * 500),
|
| 32 |
-
2: count_class_2
|
| 33 |
-
}
|
| 34 |
-
|
| 35 |
-
# SMOTENC 적용
|
| 36 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 37 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 38 |
-
|
| 39 |
-
# Resampled 데이터 생성
|
| 40 |
-
lerp_data = X_resampled.copy()
|
| 41 |
-
lerp_data['multi_class'] = y_resampled
|
| 42 |
-
|
| 43 |
-
# 제거변수 복구
|
| 44 |
-
lerp_data['binary_class'] = lerp_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 45 |
-
lerp_data['hour_sin'] = np.sin(2 * np.pi * lerp_data['hour'] / 24)
|
| 46 |
-
lerp_data['hour_cos'] = np.cos(2 * np.pi * lerp_data['hour'] / 24)
|
| 47 |
-
lerp_data['month_sin'] = np.sin(2 * np.pi * lerp_data['month'] / 12)
|
| 48 |
-
lerp_data['month_cos'] = np.cos(2 * np.pi * lerp_data['month'] / 12)
|
| 49 |
-
lerp_data['ground_temp - temp_C'] = lerp_data['groundtemp'] - lerp_data['temp_C']
|
| 50 |
-
|
| 51 |
-
# 결과 저장
|
| 52 |
-
lerp_data.to_csv(output_path, index = False)
|
| 53 |
-
print(len(lerp_data[lerp_data['multi_class']==0]),'|',len(lerp_data[lerp_data['multi_class']==1]),'|',len(lerp_data[lerp_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/smote_sample_2.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from imblearn.over_sampling import SMOTENC
|
| 5 |
-
|
| 6 |
-
# 지역별 데이터 파일 경로
|
| 7 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 8 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 9 |
-
output_paths = [f'../../data/data_oversampled/smote/smote_2_{region}.csv' for region in regions]
|
| 10 |
-
|
| 11 |
-
# 지역별 처리
|
| 12 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 13 |
-
# 데이터 로드
|
| 14 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 15 |
-
data= data.loc[data['year'].isin([2018,2020]),:]
|
| 16 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 17 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 18 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 19 |
-
y = data['multi_class']
|
| 20 |
-
|
| 21 |
-
# 불필요한 열 제거
|
| 22 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 23 |
-
|
| 24 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 25 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 26 |
-
|
| 27 |
-
# sampling_strategy 설정
|
| 28 |
-
count_class_2 = (y == 2).sum()
|
| 29 |
-
sampling_strategy = {
|
| 30 |
-
0: int(np.ceil(count_class_2 / 1000) * 500),
|
| 31 |
-
1: int(np.ceil(count_class_2 / 1000) * 500),
|
| 32 |
-
2: count_class_2
|
| 33 |
-
}
|
| 34 |
-
|
| 35 |
-
# SMOTENC 적용
|
| 36 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 37 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 38 |
-
|
| 39 |
-
# Resampled 데이터 생성
|
| 40 |
-
lerp_data = X_resampled.copy()
|
| 41 |
-
lerp_data['multi_class'] = y_resampled
|
| 42 |
-
|
| 43 |
-
# 제거변수 복구
|
| 44 |
-
lerp_data['binary_class'] = lerp_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 45 |
-
lerp_data['hour_sin'] = np.sin(2 * np.pi * lerp_data['hour'] / 24)
|
| 46 |
-
lerp_data['hour_cos'] = np.cos(2 * np.pi * lerp_data['hour'] / 24)
|
| 47 |
-
lerp_data['month_sin'] = np.sin(2 * np.pi * lerp_data['month'] / 12)
|
| 48 |
-
lerp_data['month_cos'] = np.cos(2 * np.pi * lerp_data['month'] / 12)
|
| 49 |
-
lerp_data['ground_temp - temp_C'] = lerp_data['groundtemp'] - lerp_data['temp_C']
|
| 50 |
-
|
| 51 |
-
# 결과 저장
|
| 52 |
-
lerp_data.to_csv(output_path, index = False)
|
| 53 |
-
print(len(lerp_data[lerp_data['multi_class']==0]),'|',len(lerp_data[lerp_data['multi_class']==1]),'|',len(lerp_data[lerp_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_oversample_data/smote_sample_3.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from imblearn.over_sampling import SMOTENC
|
| 5 |
-
|
| 6 |
-
# 지역별 데이터 파일 경로
|
| 7 |
-
regions = ['incheon', 'seoul','busan', 'daegu', 'daejeon', 'gwangju']
|
| 8 |
-
file_paths = [f'../../data/data_for_modeling/{region}_train.csv' for region in regions]
|
| 9 |
-
output_paths = [f'../../data/data_oversampled/smote/smote_3_{region}.csv' for region in regions]
|
| 10 |
-
|
| 11 |
-
# 지역별 처리
|
| 12 |
-
for file_path, output_path in zip(file_paths, output_paths):
|
| 13 |
-
# 데이터 로드
|
| 14 |
-
data = pd.read_csv(file_path, index_col=0)
|
| 15 |
-
data= data.loc[data['year'].isin([2019,2020]),:]
|
| 16 |
-
data['cloudcover'] = data['cloudcover'].astype('int')
|
| 17 |
-
data['lm_cloudcover'] = data['lm_cloudcover'].astype('int')
|
| 18 |
-
X = data.drop(columns=['multi_class', 'binary_class'])
|
| 19 |
-
y = data['multi_class']
|
| 20 |
-
|
| 21 |
-
# 불필요한 열 제거
|
| 22 |
-
X.drop(columns=['ground_temp - temp_C', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'], inplace=True)
|
| 23 |
-
|
| 24 |
-
# SMOTENC에서 사용할 범주형 변수 열 번호 설정
|
| 25 |
-
categorical_features_indices = [i for i, dtype in enumerate(X.dtypes) if dtype != 'float64']
|
| 26 |
-
|
| 27 |
-
# sampling_strategy 설정
|
| 28 |
-
count_class_2 = (y == 2).sum()
|
| 29 |
-
sampling_strategy = {
|
| 30 |
-
0: int(np.ceil(count_class_2 / 1000) * 500),
|
| 31 |
-
1: int(np.ceil(count_class_2 / 1000) * 500),
|
| 32 |
-
2: count_class_2
|
| 33 |
-
}
|
| 34 |
-
|
| 35 |
-
# SMOTENC 적용
|
| 36 |
-
smotenc = SMOTENC(categorical_features=categorical_features_indices, sampling_strategy=sampling_strategy, random_state=42)
|
| 37 |
-
X_resampled, y_resampled = smotenc.fit_resample(X, y)
|
| 38 |
-
|
| 39 |
-
# Resampled 데이터 생성
|
| 40 |
-
lerp_data = X_resampled.copy()
|
| 41 |
-
lerp_data['multi_class'] = y_resampled
|
| 42 |
-
|
| 43 |
-
# 제거변수 복구
|
| 44 |
-
lerp_data['binary_class'] = lerp_data['multi_class'].apply(lambda x: 0 if x == 2 else 1)
|
| 45 |
-
lerp_data['hour_sin'] = np.sin(2 * np.pi * lerp_data['hour'] / 24)
|
| 46 |
-
lerp_data['hour_cos'] = np.cos(2 * np.pi * lerp_data['hour'] / 24)
|
| 47 |
-
lerp_data['month_sin'] = np.sin(2 * np.pi * lerp_data['month'] / 12)
|
| 48 |
-
lerp_data['month_cos'] = np.cos(2 * np.pi * lerp_data['month'] / 12)
|
| 49 |
-
lerp_data['ground_temp - temp_C'] = lerp_data['groundtemp'] - lerp_data['temp_C']
|
| 50 |
-
|
| 51 |
-
# 결과 저장
|
| 52 |
-
lerp_data.to_csv(output_path, index = False)
|
| 53 |
-
print(len(lerp_data[lerp_data['multi_class']==0]),'|',len(lerp_data[lerp_data['multi_class']==1]),'|',len(lerp_data[lerp_data['multi_class']==2]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/make_train_test.ipynb
DELETED
|
@@ -1,1099 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 1,
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"outputs": [],
|
| 8 |
-
"source": [
|
| 9 |
-
"import pandas as pd\n",
|
| 10 |
-
"import numpy as np\n",
|
| 11 |
-
"import matplotlib.pyplot as plt\n",
|
| 12 |
-
"import seaborn as sns\n",
|
| 13 |
-
"from sklearn.model_selection import train_test_split\n",
|
| 14 |
-
"from collections import Counter"
|
| 15 |
-
]
|
| 16 |
-
},
|
| 17 |
-
{
|
| 18 |
-
"cell_type": "code",
|
| 19 |
-
"execution_count": 2,
|
| 20 |
-
"metadata": {},
|
| 21 |
-
"outputs": [],
|
| 22 |
-
"source": [
|
| 23 |
-
"df_seoul = pd.read_feather(\"../data/data_for_modeling/df_seoul.feather\")\n",
|
| 24 |
-
"df_busan = pd.read_feather(\"../data/data_for_modeling/df_busan.feather\")\n",
|
| 25 |
-
"df_incheon = pd.read_feather(\"../data/data_for_modeling/df_incheon.feather\")\n",
|
| 26 |
-
"df_daegu = pd.read_feather(\"../data/data_for_modeling/df_daegu.feather\")\n",
|
| 27 |
-
"df_daejeon = pd.read_feather(\"../data/data_for_modeling/df_daejeon.feather\")\n",
|
| 28 |
-
"df_gwangju = pd.read_feather(\"../data/data_for_modeling/df_gwangju.feather\")"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "code",
|
| 33 |
-
"execution_count": 3,
|
| 34 |
-
"metadata": {},
|
| 35 |
-
"outputs": [
|
| 36 |
-
{
|
| 37 |
-
"data": {
|
| 38 |
-
"text/plain": [
|
| 39 |
-
"Counter({2: 48534, 1: 3941, 0: 109})"
|
| 40 |
-
]
|
| 41 |
-
},
|
| 42 |
-
"execution_count": 3,
|
| 43 |
-
"metadata": {},
|
| 44 |
-
"output_type": "execute_result"
|
| 45 |
-
}
|
| 46 |
-
],
|
| 47 |
-
"source": [
|
| 48 |
-
"Counter(df_seoul['multi_class'])"
|
| 49 |
-
]
|
| 50 |
-
},
|
| 51 |
-
{
|
| 52 |
-
"cell_type": "code",
|
| 53 |
-
"execution_count": 4,
|
| 54 |
-
"metadata": {},
|
| 55 |
-
"outputs": [
|
| 56 |
-
{
|
| 57 |
-
"data": {
|
| 58 |
-
"text/plain": [
|
| 59 |
-
"Counter({2: 50069, 1: 2350, 0: 165})"
|
| 60 |
-
]
|
| 61 |
-
},
|
| 62 |
-
"execution_count": 4,
|
| 63 |
-
"metadata": {},
|
| 64 |
-
"output_type": "execute_result"
|
| 65 |
-
}
|
| 66 |
-
],
|
| 67 |
-
"source": [
|
| 68 |
-
"Counter(df_busan['multi_class'])"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "code",
|
| 73 |
-
"execution_count": 5,
|
| 74 |
-
"metadata": {},
|
| 75 |
-
"outputs": [
|
| 76 |
-
{
|
| 77 |
-
"data": {
|
| 78 |
-
"text/plain": [
|
| 79 |
-
"Counter({2: 44944, 1: 6658, 0: 982})"
|
| 80 |
-
]
|
| 81 |
-
},
|
| 82 |
-
"execution_count": 5,
|
| 83 |
-
"metadata": {},
|
| 84 |
-
"output_type": "execute_result"
|
| 85 |
-
}
|
| 86 |
-
],
|
| 87 |
-
"source": [
|
| 88 |
-
"Counter(df_incheon['multi_class'])"
|
| 89 |
-
]
|
| 90 |
-
},
|
| 91 |
-
{
|
| 92 |
-
"cell_type": "code",
|
| 93 |
-
"execution_count": 6,
|
| 94 |
-
"metadata": {},
|
| 95 |
-
"outputs": [
|
| 96 |
-
{
|
| 97 |
-
"data": {
|
| 98 |
-
"text/plain": [
|
| 99 |
-
"Counter({2: 50919, 1: 1610, 0: 55})"
|
| 100 |
-
]
|
| 101 |
-
},
|
| 102 |
-
"execution_count": 6,
|
| 103 |
-
"metadata": {},
|
| 104 |
-
"output_type": "execute_result"
|
| 105 |
-
}
|
| 106 |
-
],
|
| 107 |
-
"source": [
|
| 108 |
-
"Counter(df_daegu['multi_class'])"
|
| 109 |
-
]
|
| 110 |
-
},
|
| 111 |
-
{
|
| 112 |
-
"cell_type": "code",
|
| 113 |
-
"execution_count": 7,
|
| 114 |
-
"metadata": {},
|
| 115 |
-
"outputs": [
|
| 116 |
-
{
|
| 117 |
-
"data": {
|
| 118 |
-
"text/plain": [
|
| 119 |
-
"Counter({2: 48047, 1: 4227, 0: 310})"
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
"execution_count": 7,
|
| 123 |
-
"metadata": {},
|
| 124 |
-
"output_type": "execute_result"
|
| 125 |
-
}
|
| 126 |
-
],
|
| 127 |
-
"source": [
|
| 128 |
-
"Counter(df_daejeon['multi_class'])"
|
| 129 |
-
]
|
| 130 |
-
},
|
| 131 |
-
{
|
| 132 |
-
"cell_type": "code",
|
| 133 |
-
"execution_count": 8,
|
| 134 |
-
"metadata": {},
|
| 135 |
-
"outputs": [
|
| 136 |
-
{
|
| 137 |
-
"data": {
|
| 138 |
-
"text/plain": [
|
| 139 |
-
"Counter({2: 48405, 1: 4015, 0: 164})"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
"execution_count": 8,
|
| 143 |
-
"metadata": {},
|
| 144 |
-
"output_type": "execute_result"
|
| 145 |
-
}
|
| 146 |
-
],
|
| 147 |
-
"source": [
|
| 148 |
-
"Counter(df_gwangju['multi_class'])"
|
| 149 |
-
]
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"cell_type": "code",
|
| 153 |
-
"execution_count": 9,
|
| 154 |
-
"metadata": {},
|
| 155 |
-
"outputs": [
|
| 156 |
-
{
|
| 157 |
-
"data": {
|
| 158 |
-
"text/plain": [
|
| 159 |
-
"(52584, 30)"
|
| 160 |
-
]
|
| 161 |
-
},
|
| 162 |
-
"execution_count": 9,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"output_type": "execute_result"
|
| 165 |
-
}
|
| 166 |
-
],
|
| 167 |
-
"source": [
|
| 168 |
-
"df_seoul.shape"
|
| 169 |
-
]
|
| 170 |
-
},
|
| 171 |
-
{
|
| 172 |
-
"cell_type": "code",
|
| 173 |
-
"execution_count": 10,
|
| 174 |
-
"metadata": {},
|
| 175 |
-
"outputs": [],
|
| 176 |
-
"source": [
|
| 177 |
-
"df_seoul = df_seoul.loc[df_seoul['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
|
| 178 |
-
"df_busan = df_busan.loc[df_busan['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
|
| 179 |
-
"df_incheon = df_incheon.loc[df_incheon['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
|
| 180 |
-
"df_daegu = df_daegu.loc[df_daegu['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
|
| 181 |
-
"df_daejeon = df_daejeon.loc[df_daejeon['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
|
| 182 |
-
"df_gwangju = df_gwangju.loc[df_gwangju['year'].isin([2018, 2019, 2020, 2021]),:].copy()"
|
| 183 |
-
]
|
| 184 |
-
},
|
| 185 |
-
{
|
| 186 |
-
"cell_type": "code",
|
| 187 |
-
"execution_count": 11,
|
| 188 |
-
"metadata": {},
|
| 189 |
-
"outputs": [],
|
| 190 |
-
"source": [
|
| 191 |
-
"cols = [col for col in df_seoul.columns if col != \"multi_class\"] + [\"multi_class\"]"
|
| 192 |
-
]
|
| 193 |
-
},
|
| 194 |
-
{
|
| 195 |
-
"cell_type": "code",
|
| 196 |
-
"execution_count": 12,
|
| 197 |
-
"metadata": {},
|
| 198 |
-
"outputs": [],
|
| 199 |
-
"source": [
|
| 200 |
-
"df_seoul = df_seoul[cols]\n",
|
| 201 |
-
"df_busan = df_busan[cols]\n",
|
| 202 |
-
"df_incheon = df_incheon[cols]\n",
|
| 203 |
-
"df_daegu = df_daegu[cols]\n",
|
| 204 |
-
"df_daejeon = df_daejeon[cols]\n",
|
| 205 |
-
"df_gwangju = df_gwangju[cols]"
|
| 206 |
-
]
|
| 207 |
-
},
|
| 208 |
-
{
|
| 209 |
-
"cell_type": "code",
|
| 210 |
-
"execution_count": 13,
|
| 211 |
-
"metadata": {},
|
| 212 |
-
"outputs": [],
|
| 213 |
-
"source": [
|
| 214 |
-
"df_seoul_train = df_seoul.loc[df_seoul['year'].isin([2018, 2019, 2020]),:].copy()\n",
|
| 215 |
-
"df_seoul_test = df_seoul.loc[df_seoul['year'].isin([2021]),:].copy()\n",
|
| 216 |
-
"\n",
|
| 217 |
-
"df_busan_train = df_busan.loc[df_busan['year'].isin([2018, 2019, 2020]),:].copy()\n",
|
| 218 |
-
"df_busan_test = df_busan.loc[df_busan['year'].isin([2021]),:].copy()\n",
|
| 219 |
-
"\n",
|
| 220 |
-
"df_incheon_train = df_incheon.loc[df_incheon['year'].isin([2018, 2019, 2020]),:].copy()\n",
|
| 221 |
-
"df_incheon_test = df_incheon.loc[df_incheon['year'].isin([2021]),:].copy()\n",
|
| 222 |
-
"\n",
|
| 223 |
-
"df_daegu_train = df_daegu.loc[df_daegu['year'].isin([2018, 2019, 2020]),:].copy()\n",
|
| 224 |
-
"df_daegu_test = df_daegu.loc[df_daegu['year'].isin([2021]),:].copy()\n",
|
| 225 |
-
"\n",
|
| 226 |
-
"df_daejeon_train = df_daejeon.loc[df_daejeon['year'].isin([2018, 2019, 2020]),:].copy()\n",
|
| 227 |
-
"df_daejeon_test = df_daejeon.loc[df_daejeon['year'].isin([2021]),:].copy()\n",
|
| 228 |
-
"\n",
|
| 229 |
-
"df_gwangju_train = df_gwangju.loc[df_gwangju['year'].isin([2018, 2019, 2020]),:].copy()\n",
|
| 230 |
-
"df_gwangju_test = df_gwangju.loc[df_gwangju['year'].isin([2021]),:].copy()"
|
| 231 |
-
]
|
| 232 |
-
},
|
| 233 |
-
{
|
| 234 |
-
"cell_type": "code",
|
| 235 |
-
"execution_count": 14,
|
| 236 |
-
"metadata": {},
|
| 237 |
-
"outputs": [
|
| 238 |
-
{
|
| 239 |
-
"data": {
|
| 240 |
-
"text/html": [
|
| 241 |
-
"<div>\n",
|
| 242 |
-
"<style scoped>\n",
|
| 243 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
| 244 |
-
" vertical-align: middle;\n",
|
| 245 |
-
" }\n",
|
| 246 |
-
"\n",
|
| 247 |
-
" .dataframe tbody tr th {\n",
|
| 248 |
-
" vertical-align: top;\n",
|
| 249 |
-
" }\n",
|
| 250 |
-
"\n",
|
| 251 |
-
" .dataframe thead th {\n",
|
| 252 |
-
" text-align: right;\n",
|
| 253 |
-
" }\n",
|
| 254 |
-
"</style>\n",
|
| 255 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
| 256 |
-
" <thead>\n",
|
| 257 |
-
" <tr style=\"text-align: right;\">\n",
|
| 258 |
-
" <th></th>\n",
|
| 259 |
-
" <th>temp_C</th>\n",
|
| 260 |
-
" <th>precip_mm</th>\n",
|
| 261 |
-
" <th>wind_speed</th>\n",
|
| 262 |
-
" <th>wind_dir</th>\n",
|
| 263 |
-
" <th>hm</th>\n",
|
| 264 |
-
" <th>vap_pressure</th>\n",
|
| 265 |
-
" <th>dewpoint_C</th>\n",
|
| 266 |
-
" <th>loc_pressure</th>\n",
|
| 267 |
-
" <th>sea_pressure</th>\n",
|
| 268 |
-
" <th>solarRad</th>\n",
|
| 269 |
-
" <th>...</th>\n",
|
| 270 |
-
" <th>year</th>\n",
|
| 271 |
-
" <th>month</th>\n",
|
| 272 |
-
" <th>hour</th>\n",
|
| 273 |
-
" <th>ground_temp - temp_C</th>\n",
|
| 274 |
-
" <th>hour_sin</th>\n",
|
| 275 |
-
" <th>hour_cos</th>\n",
|
| 276 |
-
" <th>month_sin</th>\n",
|
| 277 |
-
" <th>month_cos</th>\n",
|
| 278 |
-
" <th>visi</th>\n",
|
| 279 |
-
" <th>multi_class</th>\n",
|
| 280 |
-
" </tr>\n",
|
| 281 |
-
" </thead>\n",
|
| 282 |
-
" <tbody>\n",
|
| 283 |
-
" <tr>\n",
|
| 284 |
-
" <th>0</th>\n",
|
| 285 |
-
" <td>1.2</td>\n",
|
| 286 |
-
" <td>0.0</td>\n",
|
| 287 |
-
" <td>1.6</td>\n",
|
| 288 |
-
" <td>360</td>\n",
|
| 289 |
-
" <td>35.0</td>\n",
|
| 290 |
-
" <td>2.3</td>\n",
|
| 291 |
-
" <td>-12.6</td>\n",
|
| 292 |
-
" <td>1015.8</td>\n",
|
| 293 |
-
" <td>1024.6</td>\n",
|
| 294 |
-
" <td>0.00</td>\n",
|
| 295 |
-
" <td>...</td>\n",
|
| 296 |
-
" <td>2018</td>\n",
|
| 297 |
-
" <td>1</td>\n",
|
| 298 |
-
" <td>0</td>\n",
|
| 299 |
-
" <td>-5.4</td>\n",
|
| 300 |
-
" <td>0.000000</td>\n",
|
| 301 |
-
" <td>1.000000e+00</td>\n",
|
| 302 |
-
" <td>0.5</td>\n",
|
| 303 |
-
" <td>0.866025</td>\n",
|
| 304 |
-
" <td>2000.0</td>\n",
|
| 305 |
-
" <td>2</td>\n",
|
| 306 |
-
" </tr>\n",
|
| 307 |
-
" <tr>\n",
|
| 308 |
-
" <th>1</th>\n",
|
| 309 |
-
" <td>0.5</td>\n",
|
| 310 |
-
" <td>0.0</td>\n",
|
| 311 |
-
" <td>1.3</td>\n",
|
| 312 |
-
" <td>360</td>\n",
|
| 313 |
-
" <td>33.0</td>\n",
|
| 314 |
-
" <td>2.1</td>\n",
|
| 315 |
-
" <td>-13.9</td>\n",
|
| 316 |
-
" <td>1015.5</td>\n",
|
| 317 |
-
" <td>1024.3</td>\n",
|
| 318 |
-
" <td>0.00</td>\n",
|
| 319 |
-
" <td>...</td>\n",
|
| 320 |
-
" <td>2018</td>\n",
|
| 321 |
-
" <td>1</td>\n",
|
| 322 |
-
" <td>1</td>\n",
|
| 323 |
-
" <td>-5.4</td>\n",
|
| 324 |
-
" <td>0.258819</td>\n",
|
| 325 |
-
" <td>9.659258e-01</td>\n",
|
| 326 |
-
" <td>0.5</td>\n",
|
| 327 |
-
" <td>0.866025</td>\n",
|
| 328 |
-
" <td>2000.0</td>\n",
|
| 329 |
-
" <td>2</td>\n",
|
| 330 |
-
" </tr>\n",
|
| 331 |
-
" <tr>\n",
|
| 332 |
-
" <th>2</th>\n",
|
| 333 |
-
" <td>0.1</td>\n",
|
| 334 |
-
" <td>0.0</td>\n",
|
| 335 |
-
" <td>1.5</td>\n",
|
| 336 |
-
" <td>20</td>\n",
|
| 337 |
-
" <td>34.0</td>\n",
|
| 338 |
-
" <td>2.1</td>\n",
|
| 339 |
-
" <td>-13.9</td>\n",
|
| 340 |
-
" <td>1015.7</td>\n",
|
| 341 |
-
" <td>1024.5</td>\n",
|
| 342 |
-
" <td>0.00</td>\n",
|
| 343 |
-
" <td>...</td>\n",
|
| 344 |
-
" <td>2018</td>\n",
|
| 345 |
-
" <td>1</td>\n",
|
| 346 |
-
" <td>2</td>\n",
|
| 347 |
-
" <td>-5.4</td>\n",
|
| 348 |
-
" <td>0.500000</td>\n",
|
| 349 |
-
" <td>8.660254e-01</td>\n",
|
| 350 |
-
" <td>0.5</td>\n",
|
| 351 |
-
" <td>0.866025</td>\n",
|
| 352 |
-
" <td>2000.0</td>\n",
|
| 353 |
-
" <td>2</td>\n",
|
| 354 |
-
" </tr>\n",
|
| 355 |
-
" <tr>\n",
|
| 356 |
-
" <th>3</th>\n",
|
| 357 |
-
" <td>0.0</td>\n",
|
| 358 |
-
" <td>0.0</td>\n",
|
| 359 |
-
" <td>2.1</td>\n",
|
| 360 |
-
" <td>320</td>\n",
|
| 361 |
-
" <td>37.0</td>\n",
|
| 362 |
-
" <td>2.3</td>\n",
|
| 363 |
-
" <td>-12.9</td>\n",
|
| 364 |
-
" <td>1015.9</td>\n",
|
| 365 |
-
" <td>1024.7</td>\n",
|
| 366 |
-
" <td>0.00</td>\n",
|
| 367 |
-
" <td>...</td>\n",
|
| 368 |
-
" <td>2018</td>\n",
|
| 369 |
-
" <td>1</td>\n",
|
| 370 |
-
" <td>3</td>\n",
|
| 371 |
-
" <td>-5.0</td>\n",
|
| 372 |
-
" <td>0.707107</td>\n",
|
| 373 |
-
" <td>7.071068e-01</td>\n",
|
| 374 |
-
" <td>0.5</td>\n",
|
| 375 |
-
" <td>0.866025</td>\n",
|
| 376 |
-
" <td>2000.0</td>\n",
|
| 377 |
-
" <td>2</td>\n",
|
| 378 |
-
" </tr>\n",
|
| 379 |
-
" <tr>\n",
|
| 380 |
-
" <th>4</th>\n",
|
| 381 |
-
" <td>-0.1</td>\n",
|
| 382 |
-
" <td>0.0</td>\n",
|
| 383 |
-
" <td>2.3</td>\n",
|
| 384 |
-
" <td>340</td>\n",
|
| 385 |
-
" <td>42.0</td>\n",
|
| 386 |
-
" <td>2.5</td>\n",
|
| 387 |
-
" <td>-11.5</td>\n",
|
| 388 |
-
" <td>1016.0</td>\n",
|
| 389 |
-
" <td>1024.9</td>\n",
|
| 390 |
-
" <td>0.00</td>\n",
|
| 391 |
-
" <td>...</td>\n",
|
| 392 |
-
" <td>2018</td>\n",
|
| 393 |
-
" <td>1</td>\n",
|
| 394 |
-
" <td>4</td>\n",
|
| 395 |
-
" <td>-4.3</td>\n",
|
| 396 |
-
" <td>0.866025</td>\n",
|
| 397 |
-
" <td>5.000000e-01</td>\n",
|
| 398 |
-
" <td>0.5</td>\n",
|
| 399 |
-
" <td>0.866025</td>\n",
|
| 400 |
-
" <td>2000.0</td>\n",
|
| 401 |
-
" <td>2</td>\n",
|
| 402 |
-
" </tr>\n",
|
| 403 |
-
" <tr>\n",
|
| 404 |
-
" <th>5</th>\n",
|
| 405 |
-
" <td>-0.1</td>\n",
|
| 406 |
-
" <td>0.0</td>\n",
|
| 407 |
-
" <td>2.8</td>\n",
|
| 408 |
-
" <td>50</td>\n",
|
| 409 |
-
" <td>43.0</td>\n",
|
| 410 |
-
" <td>2.6</td>\n",
|
| 411 |
-
" <td>-11.2</td>\n",
|
| 412 |
-
" <td>1016.0</td>\n",
|
| 413 |
-
" <td>1024.9</td>\n",
|
| 414 |
-
" <td>0.00</td>\n",
|
| 415 |
-
" <td>...</td>\n",
|
| 416 |
-
" <td>2018</td>\n",
|
| 417 |
-
" <td>1</td>\n",
|
| 418 |
-
" <td>5</td>\n",
|
| 419 |
-
" <td>-4.0</td>\n",
|
| 420 |
-
" <td>0.965926</td>\n",
|
| 421 |
-
" <td>2.588190e-01</td>\n",
|
| 422 |
-
" <td>0.5</td>\n",
|
| 423 |
-
" <td>0.866025</td>\n",
|
| 424 |
-
" <td>2000.0</td>\n",
|
| 425 |
-
" <td>2</td>\n",
|
| 426 |
-
" </tr>\n",
|
| 427 |
-
" <tr>\n",
|
| 428 |
-
" <th>6</th>\n",
|
| 429 |
-
" <td>-0.5</td>\n",
|
| 430 |
-
" <td>0.0</td>\n",
|
| 431 |
-
" <td>2.1</td>\n",
|
| 432 |
-
" <td>20</td>\n",
|
| 433 |
-
" <td>45.0</td>\n",
|
| 434 |
-
" <td>2.6</td>\n",
|
| 435 |
-
" <td>-11.0</td>\n",
|
| 436 |
-
" <td>1016.5</td>\n",
|
| 437 |
-
" <td>1025.4</td>\n",
|
| 438 |
-
" <td>0.00</td>\n",
|
| 439 |
-
" <td>...</td>\n",
|
| 440 |
-
" <td>2018</td>\n",
|
| 441 |
-
" <td>1</td>\n",
|
| 442 |
-
" <td>6</td>\n",
|
| 443 |
-
" <td>-4.1</td>\n",
|
| 444 |
-
" <td>1.000000</td>\n",
|
| 445 |
-
" <td>6.123234e-17</td>\n",
|
| 446 |
-
" <td>0.5</td>\n",
|
| 447 |
-
" <td>0.866025</td>\n",
|
| 448 |
-
" <td>2000.0</td>\n",
|
| 449 |
-
" <td>2</td>\n",
|
| 450 |
-
" </tr>\n",
|
| 451 |
-
" <tr>\n",
|
| 452 |
-
" <th>7</th>\n",
|
| 453 |
-
" <td>-0.8</td>\n",
|
| 454 |
-
" <td>0.0</td>\n",
|
| 455 |
-
" <td>2.5</td>\n",
|
| 456 |
-
" <td>340</td>\n",
|
| 457 |
-
" <td>45.0</td>\n",
|
| 458 |
-
" <td>2.6</td>\n",
|
| 459 |
-
" <td>-11.2</td>\n",
|
| 460 |
-
" <td>1017.1</td>\n",
|
| 461 |
-
" <td>1026.0</td>\n",
|
| 462 |
-
" <td>0.00</td>\n",
|
| 463 |
-
" <td>...</td>\n",
|
| 464 |
-
" <td>2018</td>\n",
|
| 465 |
-
" <td>1</td>\n",
|
| 466 |
-
" <td>7</td>\n",
|
| 467 |
-
" <td>-4.5</td>\n",
|
| 468 |
-
" <td>0.965926</td>\n",
|
| 469 |
-
" <td>-2.588190e-01</td>\n",
|
| 470 |
-
" <td>0.5</td>\n",
|
| 471 |
-
" <td>0.866025</td>\n",
|
| 472 |
-
" <td>2000.0</td>\n",
|
| 473 |
-
" <td>2</td>\n",
|
| 474 |
-
" </tr>\n",
|
| 475 |
-
" <tr>\n",
|
| 476 |
-
" <th>8</th>\n",
|
| 477 |
-
" <td>-0.5</td>\n",
|
| 478 |
-
" <td>0.0</td>\n",
|
| 479 |
-
" <td>1.2</td>\n",
|
| 480 |
-
" <td>360</td>\n",
|
| 481 |
-
" <td>43.0</td>\n",
|
| 482 |
-
" <td>2.5</td>\n",
|
| 483 |
-
" <td>-11.5</td>\n",
|
| 484 |
-
" <td>1017.4</td>\n",
|
| 485 |
-
" <td>1026.3</td>\n",
|
| 486 |
-
" <td>0.03</td>\n",
|
| 487 |
-
" <td>...</td>\n",
|
| 488 |
-
" <td>2018</td>\n",
|
| 489 |
-
" <td>1</td>\n",
|
| 490 |
-
" <td>8</td>\n",
|
| 491 |
-
" <td>-4.0</td>\n",
|
| 492 |
-
" <td>0.866025</td>\n",
|
| 493 |
-
" <td>-5.000000e-01</td>\n",
|
| 494 |
-
" <td>0.5</td>\n",
|
| 495 |
-
" <td>0.866025</td>\n",
|
| 496 |
-
" <td>2000.0</td>\n",
|
| 497 |
-
" <td>2</td>\n",
|
| 498 |
-
" </tr>\n",
|
| 499 |
-
" <tr>\n",
|
| 500 |
-
" <th>9</th>\n",
|
| 501 |
-
" <td>1.7</td>\n",
|
| 502 |
-
" <td>0.0</td>\n",
|
| 503 |
-
" <td>2.1</td>\n",
|
| 504 |
-
" <td>20</td>\n",
|
| 505 |
-
" <td>39.0</td>\n",
|
| 506 |
-
" <td>2.7</td>\n",
|
| 507 |
-
" <td>-10.8</td>\n",
|
| 508 |
-
" <td>1018.1</td>\n",
|
| 509 |
-
" <td>1026.9</td>\n",
|
| 510 |
-
" <td>0.46</td>\n",
|
| 511 |
-
" <td>...</td>\n",
|
| 512 |
-
" <td>2018</td>\n",
|
| 513 |
-
" <td>1</td>\n",
|
| 514 |
-
" <td>9</td>\n",
|
| 515 |
-
" <td>2.8</td>\n",
|
| 516 |
-
" <td>0.707107</td>\n",
|
| 517 |
-
" <td>-7.071068e-01</td>\n",
|
| 518 |
-
" <td>0.5</td>\n",
|
| 519 |
-
" <td>0.866025</td>\n",
|
| 520 |
-
" <td>1953.0</td>\n",
|
| 521 |
-
" <td>2</td>\n",
|
| 522 |
-
" </tr>\n",
|
| 523 |
-
" </tbody>\n",
|
| 524 |
-
"</table>\n",
|
| 525 |
-
"<p>10 rows × 30 columns</p>\n",
|
| 526 |
-
"</div>"
|
| 527 |
-
],
|
| 528 |
-
"text/plain": [
|
| 529 |
-
" temp_C precip_mm wind_speed wind_dir hm vap_pressure dewpoint_C \\\n",
|
| 530 |
-
"0 1.2 0.0 1.6 360 35.0 2.3 -12.6 \n",
|
| 531 |
-
"1 0.5 0.0 1.3 360 33.0 2.1 -13.9 \n",
|
| 532 |
-
"2 0.1 0.0 1.5 20 34.0 2.1 -13.9 \n",
|
| 533 |
-
"3 0.0 0.0 2.1 320 37.0 2.3 -12.9 \n",
|
| 534 |
-
"4 -0.1 0.0 2.3 340 42.0 2.5 -11.5 \n",
|
| 535 |
-
"5 -0.1 0.0 2.8 50 43.0 2.6 -11.2 \n",
|
| 536 |
-
"6 -0.5 0.0 2.1 20 45.0 2.6 -11.0 \n",
|
| 537 |
-
"7 -0.8 0.0 2.5 340 45.0 2.6 -11.2 \n",
|
| 538 |
-
"8 -0.5 0.0 1.2 360 43.0 2.5 -11.5 \n",
|
| 539 |
-
"9 1.7 0.0 2.1 20 39.0 2.7 -10.8 \n",
|
| 540 |
-
"\n",
|
| 541 |
-
" loc_pressure sea_pressure solarRad ... year month hour \\\n",
|
| 542 |
-
"0 1015.8 1024.6 0.00 ... 2018 1 0 \n",
|
| 543 |
-
"1 1015.5 1024.3 0.00 ... 2018 1 1 \n",
|
| 544 |
-
"2 1015.7 1024.5 0.00 ... 2018 1 2 \n",
|
| 545 |
-
"3 1015.9 1024.7 0.00 ... 2018 1 3 \n",
|
| 546 |
-
"4 1016.0 1024.9 0.00 ... 2018 1 4 \n",
|
| 547 |
-
"5 1016.0 1024.9 0.00 ... 2018 1 5 \n",
|
| 548 |
-
"6 1016.5 1025.4 0.00 ... 2018 1 6 \n",
|
| 549 |
-
"7 1017.1 1026.0 0.00 ... 2018 1 7 \n",
|
| 550 |
-
"8 1017.4 1026.3 0.03 ... 2018 1 8 \n",
|
| 551 |
-
"9 1018.1 1026.9 0.46 ... 2018 1 9 \n",
|
| 552 |
-
"\n",
|
| 553 |
-
" ground_temp - temp_C hour_sin hour_cos month_sin month_cos visi \\\n",
|
| 554 |
-
"0 -5.4 0.000000 1.000000e+00 0.5 0.866025 2000.0 \n",
|
| 555 |
-
"1 -5.4 0.258819 9.659258e-01 0.5 0.866025 2000.0 \n",
|
| 556 |
-
"2 -5.4 0.500000 8.660254e-01 0.5 0.866025 2000.0 \n",
|
| 557 |
-
"3 -5.0 0.707107 7.071068e-01 0.5 0.866025 2000.0 \n",
|
| 558 |
-
"4 -4.3 0.866025 5.000000e-01 0.5 0.866025 2000.0 \n",
|
| 559 |
-
"5 -4.0 0.965926 2.588190e-01 0.5 0.866025 2000.0 \n",
|
| 560 |
-
"6 -4.1 1.000000 6.123234e-17 0.5 0.866025 2000.0 \n",
|
| 561 |
-
"7 -4.5 0.965926 -2.588190e-01 0.5 0.866025 2000.0 \n",
|
| 562 |
-
"8 -4.0 0.866025 -5.000000e-01 0.5 0.866025 2000.0 \n",
|
| 563 |
-
"9 2.8 0.707107 -7.071068e-01 0.5 0.866025 1953.0 \n",
|
| 564 |
-
"\n",
|
| 565 |
-
" multi_class \n",
|
| 566 |
-
"0 2 \n",
|
| 567 |
-
"1 2 \n",
|
| 568 |
-
"2 2 \n",
|
| 569 |
-
"3 2 \n",
|
| 570 |
-
"4 2 \n",
|
| 571 |
-
"5 2 \n",
|
| 572 |
-
"6 2 \n",
|
| 573 |
-
"7 2 \n",
|
| 574 |
-
"8 2 \n",
|
| 575 |
-
"9 2 \n",
|
| 576 |
-
"\n",
|
| 577 |
-
"[10 rows x 30 columns]"
|
| 578 |
-
]
|
| 579 |
-
},
|
| 580 |
-
"execution_count": 14,
|
| 581 |
-
"metadata": {},
|
| 582 |
-
"output_type": "execute_result"
|
| 583 |
-
}
|
| 584 |
-
],
|
| 585 |
-
"source": [
|
| 586 |
-
"df_busan_train.head(10)"
|
| 587 |
-
]
|
| 588 |
-
},
|
| 589 |
-
{
|
| 590 |
-
"cell_type": "code",
|
| 591 |
-
"execution_count": 15,
|
| 592 |
-
"metadata": {},
|
| 593 |
-
"outputs": [
|
| 594 |
-
{
|
| 595 |
-
"data": {
|
| 596 |
-
"text/html": [
|
| 597 |
-
"<div>\n",
|
| 598 |
-
"<style scoped>\n",
|
| 599 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
| 600 |
-
" vertical-align: middle;\n",
|
| 601 |
-
" }\n",
|
| 602 |
-
"\n",
|
| 603 |
-
" .dataframe tbody tr th {\n",
|
| 604 |
-
" vertical-align: top;\n",
|
| 605 |
-
" }\n",
|
| 606 |
-
"\n",
|
| 607 |
-
" .dataframe thead th {\n",
|
| 608 |
-
" text-align: right;\n",
|
| 609 |
-
" }\n",
|
| 610 |
-
"</style>\n",
|
| 611 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
| 612 |
-
" <thead>\n",
|
| 613 |
-
" <tr style=\"text-align: right;\">\n",
|
| 614 |
-
" <th></th>\n",
|
| 615 |
-
" <th>temp_C</th>\n",
|
| 616 |
-
" <th>precip_mm</th>\n",
|
| 617 |
-
" <th>wind_speed</th>\n",
|
| 618 |
-
" <th>wind_dir</th>\n",
|
| 619 |
-
" <th>hm</th>\n",
|
| 620 |
-
" <th>vap_pressure</th>\n",
|
| 621 |
-
" <th>dewpoint_C</th>\n",
|
| 622 |
-
" <th>loc_pressure</th>\n",
|
| 623 |
-
" <th>sea_pressure</th>\n",
|
| 624 |
-
" <th>solarRad</th>\n",
|
| 625 |
-
" <th>...</th>\n",
|
| 626 |
-
" <th>year</th>\n",
|
| 627 |
-
" <th>month</th>\n",
|
| 628 |
-
" <th>hour</th>\n",
|
| 629 |
-
" <th>ground_temp - temp_C</th>\n",
|
| 630 |
-
" <th>hour_sin</th>\n",
|
| 631 |
-
" <th>hour_cos</th>\n",
|
| 632 |
-
" <th>month_sin</th>\n",
|
| 633 |
-
" <th>month_cos</th>\n",
|
| 634 |
-
" <th>visi</th>\n",
|
| 635 |
-
" <th>multi_class</th>\n",
|
| 636 |
-
" </tr>\n",
|
| 637 |
-
" </thead>\n",
|
| 638 |
-
" <tbody>\n",
|
| 639 |
-
" <tr>\n",
|
| 640 |
-
" <th>26294</th>\n",
|
| 641 |
-
" <td>0.1</td>\n",
|
| 642 |
-
" <td>0.0</td>\n",
|
| 643 |
-
" <td>6.3</td>\n",
|
| 644 |
-
" <td>270</td>\n",
|
| 645 |
-
" <td>37.0</td>\n",
|
| 646 |
-
" <td>2.3</td>\n",
|
| 647 |
-
" <td>-12.9</td>\n",
|
| 648 |
-
" <td>1013.3</td>\n",
|
| 649 |
-
" <td>1022.1</td>\n",
|
| 650 |
-
" <td>2.07</td>\n",
|
| 651 |
-
" <td>...</td>\n",
|
| 652 |
-
" <td>2020</td>\n",
|
| 653 |
-
" <td>12</td>\n",
|
| 654 |
-
" <td>14</td>\n",
|
| 655 |
-
" <td>5.8</td>\n",
|
| 656 |
-
" <td>-0.500000</td>\n",
|
| 657 |
-
" <td>-8.660254e-01</td>\n",
|
| 658 |
-
" <td>-2.449294e-16</td>\n",
|
| 659 |
-
" <td>1.0</td>\n",
|
| 660 |
-
" <td>5000.0</td>\n",
|
| 661 |
-
" <td>2</td>\n",
|
| 662 |
-
" </tr>\n",
|
| 663 |
-
" <tr>\n",
|
| 664 |
-
" <th>26295</th>\n",
|
| 665 |
-
" <td>1.2</td>\n",
|
| 666 |
-
" <td>0.0</td>\n",
|
| 667 |
-
" <td>5.9</td>\n",
|
| 668 |
-
" <td>270</td>\n",
|
| 669 |
-
" <td>35.0</td>\n",
|
| 670 |
-
" <td>2.3</td>\n",
|
| 671 |
-
" <td>-12.6</td>\n",
|
| 672 |
-
" <td>1013.2</td>\n",
|
| 673 |
-
" <td>1022.0</td>\n",
|
| 674 |
-
" <td>1.71</td>\n",
|
| 675 |
-
" <td>...</td>\n",
|
| 676 |
-
" <td>2020</td>\n",
|
| 677 |
-
" <td>12</td>\n",
|
| 678 |
-
" <td>15</td>\n",
|
| 679 |
-
" <td>5.6</td>\n",
|
| 680 |
-
" <td>-0.707107</td>\n",
|
| 681 |
-
" <td>-7.071068e-01</td>\n",
|
| 682 |
-
" <td>-2.449294e-16</td>\n",
|
| 683 |
-
" <td>1.0</td>\n",
|
| 684 |
-
" <td>5000.0</td>\n",
|
| 685 |
-
" <td>2</td>\n",
|
| 686 |
-
" </tr>\n",
|
| 687 |
-
" <tr>\n",
|
| 688 |
-
" <th>26296</th>\n",
|
| 689 |
-
" <td>1.6</td>\n",
|
| 690 |
-
" <td>0.0</td>\n",
|
| 691 |
-
" <td>3.6</td>\n",
|
| 692 |
-
" <td>290</td>\n",
|
| 693 |
-
" <td>34.0</td>\n",
|
| 694 |
-
" <td>2.3</td>\n",
|
| 695 |
-
" <td>-12.6</td>\n",
|
| 696 |
-
" <td>1012.8</td>\n",
|
| 697 |
-
" <td>1021.6</td>\n",
|
| 698 |
-
" <td>1.14</td>\n",
|
| 699 |
-
" <td>...</td>\n",
|
| 700 |
-
" <td>2020</td>\n",
|
| 701 |
-
" <td>12</td>\n",
|
| 702 |
-
" <td>16</td>\n",
|
| 703 |
-
" <td>1.4</td>\n",
|
| 704 |
-
" <td>-0.866025</td>\n",
|
| 705 |
-
" <td>-5.000000e-01</td>\n",
|
| 706 |
-
" <td>-2.449294e-16</td>\n",
|
| 707 |
-
" <td>1.0</td>\n",
|
| 708 |
-
" <td>5000.0</td>\n",
|
| 709 |
-
" <td>2</td>\n",
|
| 710 |
-
" </tr>\n",
|
| 711 |
-
" <tr>\n",
|
| 712 |
-
" <th>26297</th>\n",
|
| 713 |
-
" <td>1.2</td>\n",
|
| 714 |
-
" <td>0.0</td>\n",
|
| 715 |
-
" <td>3.8</td>\n",
|
| 716 |
-
" <td>250</td>\n",
|
| 717 |
-
" <td>38.0</td>\n",
|
| 718 |
-
" <td>2.5</td>\n",
|
| 719 |
-
" <td>-11.5</td>\n",
|
| 720 |
-
" <td>1012.8</td>\n",
|
| 721 |
-
" <td>1021.6</td>\n",
|
| 722 |
-
" <td>0.48</td>\n",
|
| 723 |
-
" <td>...</td>\n",
|
| 724 |
-
" <td>2020</td>\n",
|
| 725 |
-
" <td>12</td>\n",
|
| 726 |
-
" <td>17</td>\n",
|
| 727 |
-
" <td>-0.4</td>\n",
|
| 728 |
-
" <td>-0.965926</td>\n",
|
| 729 |
-
" <td>-2.588190e-01</td>\n",
|
| 730 |
-
" <td>-2.449294e-16</td>\n",
|
| 731 |
-
" <td>1.0</td>\n",
|
| 732 |
-
" <td>5000.0</td>\n",
|
| 733 |
-
" <td>2</td>\n",
|
| 734 |
-
" </tr>\n",
|
| 735 |
-
" <tr>\n",
|
| 736 |
-
" <th>26298</th>\n",
|
| 737 |
-
" <td>0.9</td>\n",
|
| 738 |
-
" <td>0.0</td>\n",
|
| 739 |
-
" <td>3.8</td>\n",
|
| 740 |
-
" <td>270</td>\n",
|
| 741 |
-
" <td>40.0</td>\n",
|
| 742 |
-
" <td>2.6</td>\n",
|
| 743 |
-
" <td>-11.2</td>\n",
|
| 744 |
-
" <td>1013.1</td>\n",
|
| 745 |
-
" <td>1021.9</td>\n",
|
| 746 |
-
" <td>0.02</td>\n",
|
| 747 |
-
" <td>...</td>\n",
|
| 748 |
-
" <td>2020</td>\n",
|
| 749 |
-
" <td>12</td>\n",
|
| 750 |
-
" <td>18</td>\n",
|
| 751 |
-
" <td>-0.8</td>\n",
|
| 752 |
-
" <td>-1.000000</td>\n",
|
| 753 |
-
" <td>-1.836970e-16</td>\n",
|
| 754 |
-
" <td>-2.449294e-16</td>\n",
|
| 755 |
-
" <td>1.0</td>\n",
|
| 756 |
-
" <td>5000.0</td>\n",
|
| 757 |
-
" <td>2</td>\n",
|
| 758 |
-
" </tr>\n",
|
| 759 |
-
" <tr>\n",
|
| 760 |
-
" <th>26299</th>\n",
|
| 761 |
-
" <td>0.6</td>\n",
|
| 762 |
-
" <td>0.0</td>\n",
|
| 763 |
-
" <td>6.2</td>\n",
|
| 764 |
-
" <td>270</td>\n",
|
| 765 |
-
" <td>41.0</td>\n",
|
| 766 |
-
" <td>2.6</td>\n",
|
| 767 |
-
" <td>-11.1</td>\n",
|
| 768 |
-
" <td>1014.0</td>\n",
|
| 769 |
-
" <td>1022.8</td>\n",
|
| 770 |
-
" <td>0.00</td>\n",
|
| 771 |
-
" <td>...</td>\n",
|
| 772 |
-
" <td>2020</td>\n",
|
| 773 |
-
" <td>12</td>\n",
|
| 774 |
-
" <td>19</td>\n",
|
| 775 |
-
" <td>-1.1</td>\n",
|
| 776 |
-
" <td>-0.965926</td>\n",
|
| 777 |
-
" <td>2.588190e-01</td>\n",
|
| 778 |
-
" <td>-2.449294e-16</td>\n",
|
| 779 |
-
" <td>1.0</td>\n",
|
| 780 |
-
" <td>5000.0</td>\n",
|
| 781 |
-
" <td>2</td>\n",
|
| 782 |
-
" </tr>\n",
|
| 783 |
-
" <tr>\n",
|
| 784 |
-
" <th>26300</th>\n",
|
| 785 |
-
" <td>0.1</td>\n",
|
| 786 |
-
" <td>0.0</td>\n",
|
| 787 |
-
" <td>6.0</td>\n",
|
| 788 |
-
" <td>270</td>\n",
|
| 789 |
-
" <td>44.0</td>\n",
|
| 790 |
-
" <td>2.7</td>\n",
|
| 791 |
-
" <td>-10.7</td>\n",
|
| 792 |
-
" <td>1014.8</td>\n",
|
| 793 |
-
" <td>1023.6</td>\n",
|
| 794 |
-
" <td>0.00</td>\n",
|
| 795 |
-
" <td>...</td>\n",
|
| 796 |
-
" <td>2020</td>\n",
|
| 797 |
-
" <td>12</td>\n",
|
| 798 |
-
" <td>20</td>\n",
|
| 799 |
-
" <td>-0.9</td>\n",
|
| 800 |
-
" <td>-0.866025</td>\n",
|
| 801 |
-
" <td>5.000000e-01</td>\n",
|
| 802 |
-
" <td>-2.449294e-16</td>\n",
|
| 803 |
-
" <td>1.0</td>\n",
|
| 804 |
-
" <td>5000.0</td>\n",
|
| 805 |
-
" <td>2</td>\n",
|
| 806 |
-
" </tr>\n",
|
| 807 |
-
" <tr>\n",
|
| 808 |
-
" <th>26301</th>\n",
|
| 809 |
-
" <td>-0.2</td>\n",
|
| 810 |
-
" <td>0.0</td>\n",
|
| 811 |
-
" <td>5.0</td>\n",
|
| 812 |
-
" <td>290</td>\n",
|
| 813 |
-
" <td>48.0</td>\n",
|
| 814 |
-
" <td>2.9</td>\n",
|
| 815 |
-
" <td>-9.9</td>\n",
|
| 816 |
-
" <td>1014.6</td>\n",
|
| 817 |
-
" <td>1023.4</td>\n",
|
| 818 |
-
" <td>0.00</td>\n",
|
| 819 |
-
" <td>...</td>\n",
|
| 820 |
-
" <td>2020</td>\n",
|
| 821 |
-
" <td>12</td>\n",
|
| 822 |
-
" <td>21</td>\n",
|
| 823 |
-
" <td>-0.8</td>\n",
|
| 824 |
-
" <td>-0.707107</td>\n",
|
| 825 |
-
" <td>7.071068e-01</td>\n",
|
| 826 |
-
" <td>-2.449294e-16</td>\n",
|
| 827 |
-
" <td>1.0</td>\n",
|
| 828 |
-
" <td>5000.0</td>\n",
|
| 829 |
-
" <td>2</td>\n",
|
| 830 |
-
" </tr>\n",
|
| 831 |
-
" <tr>\n",
|
| 832 |
-
" <th>26302</th>\n",
|
| 833 |
-
" <td>-0.7</td>\n",
|
| 834 |
-
" <td>0.0</td>\n",
|
| 835 |
-
" <td>2.7</td>\n",
|
| 836 |
-
" <td>270</td>\n",
|
| 837 |
-
" <td>51.0</td>\n",
|
| 838 |
-
" <td>3.0</td>\n",
|
| 839 |
-
" <td>-9.6</td>\n",
|
| 840 |
-
" <td>1014.8</td>\n",
|
| 841 |
-
" <td>1023.6</td>\n",
|
| 842 |
-
" <td>0.00</td>\n",
|
| 843 |
-
" <td>...</td>\n",
|
| 844 |
-
" <td>2020</td>\n",
|
| 845 |
-
" <td>12</td>\n",
|
| 846 |
-
" <td>22</td>\n",
|
| 847 |
-
" <td>-0.6</td>\n",
|
| 848 |
-
" <td>-0.500000</td>\n",
|
| 849 |
-
" <td>8.660254e-01</td>\n",
|
| 850 |
-
" <td>-2.449294e-16</td>\n",
|
| 851 |
-
" <td>1.0</td>\n",
|
| 852 |
-
" <td>5000.0</td>\n",
|
| 853 |
-
" <td>2</td>\n",
|
| 854 |
-
" </tr>\n",
|
| 855 |
-
" <tr>\n",
|
| 856 |
-
" <th>26303</th>\n",
|
| 857 |
-
" <td>-0.7</td>\n",
|
| 858 |
-
" <td>0.0</td>\n",
|
| 859 |
-
" <td>3.8</td>\n",
|
| 860 |
-
" <td>250</td>\n",
|
| 861 |
-
" <td>55.0</td>\n",
|
| 862 |
-
" <td>3.2</td>\n",
|
| 863 |
-
" <td>-8.6</td>\n",
|
| 864 |
-
" <td>1015.1</td>\n",
|
| 865 |
-
" <td>1024.0</td>\n",
|
| 866 |
-
" <td>0.00</td>\n",
|
| 867 |
-
" <td>...</td>\n",
|
| 868 |
-
" <td>2020</td>\n",
|
| 869 |
-
" <td>12</td>\n",
|
| 870 |
-
" <td>23</td>\n",
|
| 871 |
-
" <td>-0.6</td>\n",
|
| 872 |
-
" <td>-0.258819</td>\n",
|
| 873 |
-
" <td>9.659258e-01</td>\n",
|
| 874 |
-
" <td>-2.449294e-16</td>\n",
|
| 875 |
-
" <td>1.0</td>\n",
|
| 876 |
-
" <td>5000.0</td>\n",
|
| 877 |
-
" <td>2</td>\n",
|
| 878 |
-
" </tr>\n",
|
| 879 |
-
" </tbody>\n",
|
| 880 |
-
"</table>\n",
|
| 881 |
-
"<p>10 rows × 30 columns</p>\n",
|
| 882 |
-
"</div>"
|
| 883 |
-
],
|
| 884 |
-
"text/plain": [
|
| 885 |
-
" temp_C precip_mm wind_speed wind_dir hm vap_pressure dewpoint_C \\\n",
|
| 886 |
-
"26294 0.1 0.0 6.3 270 37.0 2.3 -12.9 \n",
|
| 887 |
-
"26295 1.2 0.0 5.9 270 35.0 2.3 -12.6 \n",
|
| 888 |
-
"26296 1.6 0.0 3.6 290 34.0 2.3 -12.6 \n",
|
| 889 |
-
"26297 1.2 0.0 3.8 250 38.0 2.5 -11.5 \n",
|
| 890 |
-
"26298 0.9 0.0 3.8 270 40.0 2.6 -11.2 \n",
|
| 891 |
-
"26299 0.6 0.0 6.2 270 41.0 2.6 -11.1 \n",
|
| 892 |
-
"26300 0.1 0.0 6.0 270 44.0 2.7 -10.7 \n",
|
| 893 |
-
"26301 -0.2 0.0 5.0 290 48.0 2.9 -9.9 \n",
|
| 894 |
-
"26302 -0.7 0.0 2.7 270 51.0 3.0 -9.6 \n",
|
| 895 |
-
"26303 -0.7 0.0 3.8 250 55.0 3.2 -8.6 \n",
|
| 896 |
-
"\n",
|
| 897 |
-
" loc_pressure sea_pressure solarRad ... year month hour \\\n",
|
| 898 |
-
"26294 1013.3 1022.1 2.07 ... 2020 12 14 \n",
|
| 899 |
-
"26295 1013.2 1022.0 1.71 ... 2020 12 15 \n",
|
| 900 |
-
"26296 1012.8 1021.6 1.14 ... 2020 12 16 \n",
|
| 901 |
-
"26297 1012.8 1021.6 0.48 ... 2020 12 17 \n",
|
| 902 |
-
"26298 1013.1 1021.9 0.02 ... 2020 12 18 \n",
|
| 903 |
-
"26299 1014.0 1022.8 0.00 ... 2020 12 19 \n",
|
| 904 |
-
"26300 1014.8 1023.6 0.00 ... 2020 12 20 \n",
|
| 905 |
-
"26301 1014.6 1023.4 0.00 ... 2020 12 21 \n",
|
| 906 |
-
"26302 1014.8 1023.6 0.00 ... 2020 12 22 \n",
|
| 907 |
-
"26303 1015.1 1024.0 0.00 ... 2020 12 23 \n",
|
| 908 |
-
"\n",
|
| 909 |
-
" ground_temp - temp_C hour_sin hour_cos month_sin month_cos \\\n",
|
| 910 |
-
"26294 5.8 -0.500000 -8.660254e-01 -2.449294e-16 1.0 \n",
|
| 911 |
-
"26295 5.6 -0.707107 -7.071068e-01 -2.449294e-16 1.0 \n",
|
| 912 |
-
"26296 1.4 -0.866025 -5.000000e-01 -2.449294e-16 1.0 \n",
|
| 913 |
-
"26297 -0.4 -0.965926 -2.588190e-01 -2.449294e-16 1.0 \n",
|
| 914 |
-
"26298 -0.8 -1.000000 -1.836970e-16 -2.449294e-16 1.0 \n",
|
| 915 |
-
"26299 -1.1 -0.965926 2.588190e-01 -2.449294e-16 1.0 \n",
|
| 916 |
-
"26300 -0.9 -0.866025 5.000000e-01 -2.449294e-16 1.0 \n",
|
| 917 |
-
"26301 -0.8 -0.707107 7.071068e-01 -2.449294e-16 1.0 \n",
|
| 918 |
-
"26302 -0.6 -0.500000 8.660254e-01 -2.449294e-16 1.0 \n",
|
| 919 |
-
"26303 -0.6 -0.258819 9.659258e-01 -2.449294e-16 1.0 \n",
|
| 920 |
-
"\n",
|
| 921 |
-
" visi multi_class \n",
|
| 922 |
-
"26294 5000.0 2 \n",
|
| 923 |
-
"26295 5000.0 2 \n",
|
| 924 |
-
"26296 5000.0 2 \n",
|
| 925 |
-
"26297 5000.0 2 \n",
|
| 926 |
-
"26298 5000.0 2 \n",
|
| 927 |
-
"26299 5000.0 2 \n",
|
| 928 |
-
"26300 5000.0 2 \n",
|
| 929 |
-
"26301 5000.0 2 \n",
|
| 930 |
-
"26302 5000.0 2 \n",
|
| 931 |
-
"26303 5000.0 2 \n",
|
| 932 |
-
"\n",
|
| 933 |
-
"[10 rows x 30 columns]"
|
| 934 |
-
]
|
| 935 |
-
},
|
| 936 |
-
"execution_count": 15,
|
| 937 |
-
"metadata": {},
|
| 938 |
-
"output_type": "execute_result"
|
| 939 |
-
}
|
| 940 |
-
],
|
| 941 |
-
"source": [
|
| 942 |
-
"df_busan_train.tail(10)"
|
| 943 |
-
]
|
| 944 |
-
},
|
| 945 |
-
{
|
| 946 |
-
"cell_type": "code",
|
| 947 |
-
"execution_count": 16,
|
| 948 |
-
"metadata": {},
|
| 949 |
-
"outputs": [
|
| 950 |
-
{
|
| 951 |
-
"name": "stdout",
|
| 952 |
-
"output_type": "stream",
|
| 953 |
-
"text": [
|
| 954 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
| 955 |
-
"Index: 26304 entries, 0 to 26303\n",
|
| 956 |
-
"Data columns (total 30 columns):\n",
|
| 957 |
-
" # Column Non-Null Count Dtype \n",
|
| 958 |
-
"--- ------ -------------- ----- \n",
|
| 959 |
-
" 0 temp_C 26304 non-null float64 \n",
|
| 960 |
-
" 1 precip_mm 26304 non-null float64 \n",
|
| 961 |
-
" 2 wind_speed 26304 non-null float64 \n",
|
| 962 |
-
" 3 wind_dir 26304 non-null category\n",
|
| 963 |
-
" 4 hm 26304 non-null float64 \n",
|
| 964 |
-
" 5 vap_pressure 26304 non-null float64 \n",
|
| 965 |
-
" 6 dewpoint_C 26304 non-null float64 \n",
|
| 966 |
-
" 7 loc_pressure 26304 non-null float64 \n",
|
| 967 |
-
" 8 sea_pressure 26304 non-null float64 \n",
|
| 968 |
-
" 9 solarRad 26304 non-null float64 \n",
|
| 969 |
-
" 10 snow_cm 26304 non-null float64 \n",
|
| 970 |
-
" 11 cloudcover 26304 non-null category\n",
|
| 971 |
-
" 12 lm_cloudcover 26304 non-null category\n",
|
| 972 |
-
" 13 low_cloudbase 26304 non-null float64 \n",
|
| 973 |
-
" 14 groundtemp 26304 non-null float64 \n",
|
| 974 |
-
" 15 O3 26304 non-null float64 \n",
|
| 975 |
-
" 16 NO2 26304 non-null float64 \n",
|
| 976 |
-
" 17 PM10 26304 non-null float64 \n",
|
| 977 |
-
" 18 PM25 26304 non-null float64 \n",
|
| 978 |
-
" 19 binary_class 26304 non-null int64 \n",
|
| 979 |
-
" 20 year 26304 non-null int64 \n",
|
| 980 |
-
" 21 month 26304 non-null int64 \n",
|
| 981 |
-
" 22 hour 26304 non-null int64 \n",
|
| 982 |
-
" 23 ground_temp - temp_C 26304 non-null float64 \n",
|
| 983 |
-
" 24 hour_sin 26304 non-null float64 \n",
|
| 984 |
-
" 25 hour_cos 26304 non-null float64 \n",
|
| 985 |
-
" 26 month_sin 26304 non-null float64 \n",
|
| 986 |
-
" 27 month_cos 26304 non-null float64 \n",
|
| 987 |
-
" 28 visi 26304 non-null float64 \n",
|
| 988 |
-
" 29 multi_class 26304 non-null int64 \n",
|
| 989 |
-
"dtypes: category(3), float64(22), int64(5)\n",
|
| 990 |
-
"memory usage: 5.7 MB\n"
|
| 991 |
-
]
|
| 992 |
-
}
|
| 993 |
-
],
|
| 994 |
-
"source": [
|
| 995 |
-
"df_busan_train.info()"
|
| 996 |
-
]
|
| 997 |
-
},
|
| 998 |
-
{
|
| 999 |
-
"cell_type": "code",
|
| 1000 |
-
"execution_count": 17,
|
| 1001 |
-
"metadata": {},
|
| 1002 |
-
"outputs": [],
|
| 1003 |
-
"source": [
|
| 1004 |
-
"df_seoul_train.to_csv(\"../data/data_for_modeling/seoul_train.csv\")\n",
|
| 1005 |
-
"df_seoul_test.to_csv(\"../data/data_for_modeling/seoul_test.csv\")\n",
|
| 1006 |
-
"\n",
|
| 1007 |
-
"df_busan_train.to_csv(\"../data/data_for_modeling/busan_train.csv\")\n",
|
| 1008 |
-
"df_busan_test.to_csv(\"../data/data_for_modeling/busan_test.csv\")\n",
|
| 1009 |
-
"\n",
|
| 1010 |
-
"df_incheon_train.to_csv(\"../data/data_for_modeling/incheon_train.csv\")\n",
|
| 1011 |
-
"df_incheon_test.to_csv(\"../data/data_for_modeling/incheon_test.csv\")\n",
|
| 1012 |
-
"\n",
|
| 1013 |
-
"df_daegu_train.to_csv(\"../data/data_for_modeling/daegu_train.csv\")\n",
|
| 1014 |
-
"df_daegu_test.to_csv(\"../data/data_for_modeling/daegu_test.csv\")\n",
|
| 1015 |
-
"\n",
|
| 1016 |
-
"df_daejeon_train.to_csv(\"../data/data_for_modeling/daejeon_train.csv\")\n",
|
| 1017 |
-
"df_daejeon_test.to_csv(\"../data/data_for_modeling/daejeon_test.csv\")\n",
|
| 1018 |
-
"\n",
|
| 1019 |
-
"df_gwangju_train.to_csv(\"../data/data_for_modeling/gwangju_train.csv\")\n",
|
| 1020 |
-
"df_gwangju_test.to_csv(\"../data/data_for_modeling/gwangju_test.csv\")\n",
|
| 1021 |
-
"\n",
|
| 1022 |
-
"df_seoul_train = pd.read_csv(\"../data/data_for_modeling/seoul_train.csv\")\n",
|
| 1023 |
-
"df_seoul_test = pd.read_csv(\"../data/data_for_modeling/seoul_test.csv\")\n"
|
| 1024 |
-
]
|
| 1025 |
-
},
|
| 1026 |
-
{
|
| 1027 |
-
"cell_type": "code",
|
| 1028 |
-
"execution_count": 18,
|
| 1029 |
-
"metadata": {},
|
| 1030 |
-
"outputs": [
|
| 1031 |
-
{
|
| 1032 |
-
"name": "stdout",
|
| 1033 |
-
"output_type": "stream",
|
| 1034 |
-
"text": [
|
| 1035 |
-
"Counter({2: 8266, 1: 481, 0: 13})\n",
|
| 1036 |
-
"Counter({2: 23686, 1: 2579, 0: 39})\n",
|
| 1037 |
-
"Counter({2: 8455, 1: 281, 0: 24})\n",
|
| 1038 |
-
"Counter({2: 24694, 1: 1516, 0: 94})\n",
|
| 1039 |
-
"Counter({2: 7373, 1: 1205, 0: 182})\n",
|
| 1040 |
-
"Counter({2: 21893, 1: 3892, 0: 519})\n",
|
| 1041 |
-
"Counter({2: 8631, 1: 128, 0: 1})\n",
|
| 1042 |
-
"Counter({2: 25149, 1: 1107, 0: 48})\n",
|
| 1043 |
-
"Counter({2: 8089, 1: 618, 0: 53})\n",
|
| 1044 |
-
"Counter({2: 23471, 1: 2660, 0: 173})\n",
|
| 1045 |
-
"Counter({2: 8087, 1: 643, 0: 30})\n",
|
| 1046 |
-
"Counter({2: 23798, 1: 2411, 0: 95})\n"
|
| 1047 |
-
]
|
| 1048 |
-
}
|
| 1049 |
-
],
|
| 1050 |
-
"source": [
|
| 1051 |
-
"print(Counter(df_seoul_test['multi_class']))\n",
|
| 1052 |
-
"print(Counter(df_seoul_train['multi_class']))\n",
|
| 1053 |
-
"\n",
|
| 1054 |
-
"print(Counter(df_busan_test['multi_class']))\n",
|
| 1055 |
-
"print(Counter(df_busan_train['multi_class']))\n",
|
| 1056 |
-
"\n",
|
| 1057 |
-
"print(Counter(df_incheon_test['multi_class']))\n",
|
| 1058 |
-
"print(Counter(df_incheon_train['multi_class']))\n",
|
| 1059 |
-
"\n",
|
| 1060 |
-
"print(Counter(df_daegu_test['multi_class']))\n",
|
| 1061 |
-
"print(Counter(df_daegu_train['multi_class']))\n",
|
| 1062 |
-
"\n",
|
| 1063 |
-
"print(Counter(df_daejeon_test['multi_class']))\n",
|
| 1064 |
-
"print(Counter(df_daejeon_train['multi_class']))\n",
|
| 1065 |
-
"\n",
|
| 1066 |
-
"print(Counter(df_gwangju_test['multi_class']))\n",
|
| 1067 |
-
"print(Counter(df_gwangju_train['multi_class']))"
|
| 1068 |
-
]
|
| 1069 |
-
},
|
| 1070 |
-
{
|
| 1071 |
-
"cell_type": "code",
|
| 1072 |
-
"execution_count": null,
|
| 1073 |
-
"metadata": {},
|
| 1074 |
-
"outputs": [],
|
| 1075 |
-
"source": []
|
| 1076 |
-
}
|
| 1077 |
-
],
|
| 1078 |
-
"metadata": {
|
| 1079 |
-
"kernelspec": {
|
| 1080 |
-
"display_name": "Python 3",
|
| 1081 |
-
"language": "python",
|
| 1082 |
-
"name": "python3"
|
| 1083 |
-
},
|
| 1084 |
-
"language_info": {
|
| 1085 |
-
"codemirror_mode": {
|
| 1086 |
-
"name": "ipython",
|
| 1087 |
-
"version": 3
|
| 1088 |
-
},
|
| 1089 |
-
"file_extension": ".py",
|
| 1090 |
-
"mimetype": "text/x-python",
|
| 1091 |
-
"name": "python",
|
| 1092 |
-
"nbconvert_exporter": "python",
|
| 1093 |
-
"pygments_lexer": "ipython3",
|
| 1094 |
-
"version": "3.8.10"
|
| 1095 |
-
}
|
| 1096 |
-
},
|
| 1097 |
-
"nbformat": 4,
|
| 1098 |
-
"nbformat_minor": 2
|
| 1099 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_result/best_sample/ensemble_best_sample.csv
DELETED
|
@@ -1,157 +0,0 @@
|
|
| 1 |
-
model,CSI,MCC,Accuracy,region,data_sample
|
| 2 |
-
deepgbm+ft_transformer,0.6992424754332419,0.8049778134563997,0.9617050719032529,seoul,best
|
| 3 |
-
deepgbm+resnet_like,0.7090721222546449,0.8134996041947694,0.9639575067994112,seoul,best
|
| 4 |
-
deepgbm+XGBoost,0.6692268948642789,0.7828377071996622,0.9579950262411524,seoul,best
|
| 5 |
-
deepgbm+LightGBM,0.6935149238495999,0.801572479004747,0.9620935948299524,seoul,best
|
| 6 |
-
ft_transformer+resnet_like,0.6252731559795573,0.7509360530096033,0.9488043848924154,seoul,best
|
| 7 |
-
ft_transformer+XGBoost,0.6142839547044981,0.7403116223696319,0.9470938235563208,seoul,best
|
| 8 |
-
ft_transformer+LightGBM,0.6085686211212852,0.7359331482423171,0.9466367866856302,seoul,best
|
| 9 |
-
resnet_like+XGBoost,0.6194270544060083,0.7446567256109252,0.9478222130731675,seoul,best
|
| 10 |
-
resnet_like+LightGBM,0.6136847718241535,0.741095701007121,0.9457727208457053,seoul,best
|
| 11 |
-
XGBoost+LightGBM,0.5861106806341969,0.7149062718037028,0.9421973118413721,seoul,best
|
| 12 |
-
deepgbm+ft_transformer+resnet_like,0.6821685259093041,0.7946632359980877,0.9596256373148356,seoul,best
|
| 13 |
-
deepgbm+ft_transformer+XGBoost,0.6730333196442752,0.7867573605932577,0.9584481123836616,seoul,best
|
| 14 |
-
deepgbm+ft_transformer+LightGBM,0.6764764169794865,0.7902017631994056,0.9590548610591277,seoul,best
|
| 15 |
-
deepgbm+resnet_like+XGBoost,0.6784375221650579,0.7915054245115618,0.9591382422170655,seoul,best
|
| 16 |
-
deepgbm+resnet_like+LightGBM,0.6775625748710835,0.7913206958685782,0.9590235671332685,seoul,best
|
| 17 |
-
deepgbm+XGBoost+LightGBM,0.6325378315490732,0.7547274504003308,0.9511999817018905,seoul,best
|
| 18 |
-
ft_transformer+resnet_like+XGBoost,0.6332314687362993,0.7561343704178952,0.9500612362868145,seoul,best
|
| 19 |
-
ft_transformer+resnet_like+LightGBM,0.6306548804840734,0.7545886604473321,0.9496810306826026,seoul,best
|
| 20 |
-
ft_transformer+XGBoost+LightGBM,0.6052879958628536,0.7319675532436248,0.9455772637672482,seoul,best
|
| 21 |
-
resnet_like+XGBoost+LightGBM,0.6103490502864374,0.7369455626993492,0.9460374196338716,seoul,best
|
| 22 |
-
deepgbm+ft_transformer+resnet_like+XGBoost,0.6729967545904317,0.7875845379790564,0.9581108449567916,seoul,best
|
| 23 |
-
deepgbm+ft_transformer+resnet_like+LightGBM,0.6719561859986568,0.7873723840805852,0.9580337017907196,seoul,best
|
| 24 |
-
deepgbm+ft_transformer+XGBoost+LightGBM,0.6531335492225346,0.7716405213564462,0.9549966314843926,seoul,best
|
| 25 |
-
deepgbm+resnet_like+XGBoost+LightGBM,0.6495283344540792,0.769203226760809,0.9539740166845488,seoul,best
|
| 26 |
-
ft_transformer+resnet_like+XGBoost+LightGBM,0.6233313312481723,0.7475113353931637,0.9482768587136429,seoul,best
|
| 27 |
-
deepgbm+ft_transformer+resnet_like+XGBoost+LightGBM,0.6569118107203898,0.7752226196801869,0.955377564854322,seoul,best
|
| 28 |
-
deepgbm+ft_transformer,0.6231286091598959,0.762319461740938,0.9692817368232819,busan,best
|
| 29 |
-
deepgbm+resnet_like,0.693965736363066,0.8126142282014271,0.9774987316083207,busan,best
|
| 30 |
-
deepgbm+XGBoost,0.5949465703841524,0.7372757652859034,0.9695935324500337,busan,best
|
| 31 |
-
deepgbm+LightGBM,0.6134672160989406,0.7540785415035627,0.969397659505452,busan,best
|
| 32 |
-
ft_transformer+resnet_like,0.5993289393140034,0.7415869283802814,0.9687921584283586,busan,best
|
| 33 |
-
ft_transformer+XGBoost,0.5275703861594696,0.6836131837976293,0.9627103242924039,busan,best
|
| 34 |
-
ft_transformer+LightGBM,0.5340117437566735,0.690570910687501,0.9611092397135513,busan,best
|
| 35 |
-
resnet_like+XGBoost,0.5476348496496576,0.6975679320922632,0.9653371426670326,busan,best
|
| 36 |
-
resnet_like+LightGBM,0.5541563454462936,0.7052881357398278,0.9637749415708096,busan,best
|
| 37 |
-
XGBoost+LightGBM,0.4789290487253062,0.6395338303931094,0.9572717310843294,busan,best
|
| 38 |
-
deepgbm+ft_transformer+resnet_like,0.6446887106206897,0.777619884223958,0.9728593291247681,busan,best
|
| 39 |
-
deepgbm+ft_transformer+XGBoost,0.6102029002552042,0.7511249566662123,0.9699326712744633,busan,best
|
| 40 |
-
deepgbm+ft_transformer+LightGBM,0.6162985577163776,0.7570233197218496,0.9692066729878318,busan,best
|
| 41 |
-
deepgbm+resnet_like+XGBoost,0.6261344637257955,0.7612451856135231,0.9726364248821019,busan,best
|
| 42 |
-
deepgbm+resnet_like+LightGBM,0.6357139958459742,0.770523881133966,0.9720613859986192,busan,best
|
| 43 |
-
deepgbm+XGBoost+LightGBM,0.550954042564896,0.7024853673988001,0.9645722608977718,busan,best
|
| 44 |
-
ft_transformer+resnet_like+XGBoost,0.5708235070191732,0.7183051671189992,0.9673502466086118,busan,best
|
| 45 |
-
ft_transformer+resnet_like+LightGBM,0.5747801644059322,0.723481589964096,0.9662444585838926,busan,best
|
| 46 |
-
ft_transformer+XGBoost+LightGBM,0.5188547215134367,0.6758426217880089,0.9612265139606259,busan,best
|
| 47 |
-
resnet_like+XGBoost+LightGBM,0.5283032797443571,0.6828500944597963,0.9625962730077933,busan,best
|
| 48 |
-
deepgbm+ft_transformer+resnet_like+XGBoost,0.62252465197324,0.7595480882291313,0.9716073641573305,busan,best
|
| 49 |
-
deepgbm+ft_transformer+resnet_like+LightGBM,0.6273921955959187,0.7650978772099609,0.9711861957398673,busan,best
|
| 50 |
-
deepgbm+ft_transformer+XGBoost+LightGBM,0.5803171388664561,0.7273427892946641,0.9671579085260872,busan,best
|
| 51 |
-
deepgbm+resnet_like+XGBoost+LightGBM,0.5883015315947713,0.7322310098800981,0.9685279794728481,busan,best
|
| 52 |
-
ft_transformer+resnet_like+XGBoost+LightGBM,0.5527408442450511,0.7040063830266932,0.9648394548826841,busan,best
|
| 53 |
-
deepgbm+ft_transformer+resnet_like+XGBoost+LightGBM,0.5949050119452819,0.7385090163673939,0.9689076652444045,busan,best
|
| 54 |
-
deepgbm+ft_transformer,0.5873884001633557,0.7086005084355499,0.9163964576523526,incheon,best
|
| 55 |
-
deepgbm+resnet_like,0.5938343436639008,0.7129218344958757,0.9139223661119011,incheon,best
|
| 56 |
-
deepgbm+XGBoost,0.5919031180535835,0.7111840783628853,0.9141871688665986,incheon,best
|
| 57 |
-
deepgbm+LightGBM,0.5936054700869063,0.71280494431202,0.9151763064434298,incheon,best
|
| 58 |
-
ft_transformer+resnet_like,0.5967079690105518,0.7167701525416347,0.9161293676339713,incheon,best
|
| 59 |
-
ft_transformer+XGBoost,0.5958609493419124,0.7170391565071776,0.9169653625105005,incheon,best
|
| 60 |
-
ft_transformer+LightGBM,0.5970916463252486,0.7186247751024871,0.9177277490830152,incheon,best
|
| 61 |
-
resnet_like+XGBoost,0.6048691059122082,0.7230658352305586,0.9150990593108267,incheon,best
|
| 62 |
-
resnet_like+LightGBM,0.6006642978391444,0.7204725929547785,0.913995974415916,incheon,best
|
| 63 |
-
XGBoost+LightGBM,0.5923037998052801,0.7130252479770401,0.9127417221847942,incheon,best
|
| 64 |
-
deepgbm+ft_transformer+resnet_like,0.5991788818331828,0.7177110572369007,0.9171559331619964,incheon,best
|
| 65 |
-
deepgbm+ft_transformer+XGBoost,0.5957796816002722,0.7161817507572641,0.9171555172958721,incheon,best
|
| 66 |
-
deepgbm+ft_transformer+LightGBM,0.5956266359998414,0.7160886468480842,0.9173455681147126,incheon,best
|
| 67 |
-
deepgbm+resnet_like+XGBoost,0.606730149793507,0.7239728449518917,0.9167729204614451,incheon,best
|
| 68 |
-
deepgbm+resnet_like+LightGBM,0.6063493089884,0.7235778913430906,0.9167717768296031,incheon,best
|
| 69 |
-
deepgbm+XGBoost+LightGBM,0.6018765631426876,0.7202419676609231,0.9160501451372774,incheon,best
|
| 70 |
-
ft_transformer+resnet_like+XGBoost,0.6044191487414033,0.7234603140558615,0.9175339554690555,incheon,best
|
| 71 |
-
ft_transformer+resnet_like+LightGBM,0.6061367487233379,0.7248883683993704,0.9179906804401528,incheon,best
|
| 72 |
-
ft_transformer+XGBoost+LightGBM,0.6013258447351273,0.721556226775587,0.9174587876670742,incheon,best
|
| 73 |
-
resnet_like+XGBoost+LightGBM,0.6028836532105156,0.7216380012607432,0.915023475642721,incheon,best
|
| 74 |
-
deepgbm+ft_transformer+resnet_like+XGBoost,0.6058688939038325,0.7235044499077539,0.9180666799743826,incheon,best
|
| 75 |
-
deepgbm+ft_transformer+resnet_like+LightGBM,0.6053266964324343,0.7232391120300768,0.9180279004582844,incheon,best
|
| 76 |
-
deepgbm+ft_transformer+XGBoost+LightGBM,0.6030917448851759,0.7220222485555309,0.9181051475908876,incheon,best
|
| 77 |
-
deepgbm+resnet_like+XGBoost+LightGBM,0.6054312862809185,0.7231150429381622,0.9164303507414893,incheon,best
|
| 78 |
-
ft_transformer+resnet_like+XGBoost+LightGBM,0.6046920079336132,0.7233826726564859,0.9171152822483387,incheon,best
|
| 79 |
-
deepgbm+ft_transformer+resnet_like+XGBoost+LightGBM,0.6109420059115421,0.72786127635734,0.9189396869359815,incheon,best
|
| 80 |
-
deepgbm+ft_transformer,0.6446005403603131,0.7684243711794304,0.9801833553742378,daegu,best
|
| 81 |
-
deepgbm+resnet_like,0.6137498386585009,0.7496378336201855,0.9772962048057489,daegu,best
|
| 82 |
-
deepgbm+XGBoost,0.5974990513708366,0.7398559419115336,0.9765007568763463,daegu,best
|
| 83 |
-
deepgbm+LightGBM,0.6115123902876293,0.7497436182930696,0.9776771381756785,daegu,best
|
| 84 |
-
ft_transformer+resnet_like,0.5469112144188149,0.6944690161411611,0.9743305595062838,daegu,best
|
| 85 |
-
ft_transformer+XGBoost,0.5161941376039343,0.6722802482596641,0.9719755096439354,daegu,best
|
| 86 |
-
ft_transformer+LightGBM,0.5073005220098658,0.6635248688295593,0.9720125217290049,daegu,best
|
| 87 |
-
resnet_like+XGBoost,0.5029516970497304,0.6645749569024574,0.9683687027472115,daegu,best
|
| 88 |
-
resnet_like+LightGBM,0.4951131290074195,0.6589965524803283,0.968139976378804,daegu,best
|
| 89 |
-
XGBoost+LightGBM,0.4549973810817563,0.6238297325596118,0.9647562816578087,daegu,best
|
| 90 |
-
deepgbm+ft_transformer+resnet_like,0.6183060943020874,0.7512947555661068,0.978549833237684,daegu,best
|
| 91 |
-
deepgbm+ft_transformer+XGBoost,0.6017176745611856,0.7392748101693137,0.9775999950096065,daegu,best
|
| 92 |
-
deepgbm+ft_transformer+LightGBM,0.6004996462619208,0.7388281525299276,0.9778661493292079,daegu,best
|
| 93 |
-
deepgbm+resnet_like+XGBoost,0.5739664154870325,0.7226992953385828,0.9747144039390839,daegu,best
|
| 94 |
-
deepgbm+resnet_like+LightGBM,0.5760590538958189,0.7240729842389874,0.975132349394083,daegu,best
|
| 95 |
-
deepgbm+XGBoost+LightGBM,0.5273980786589871,0.6866104535247874,0.9714085801498781,daegu,best
|
| 96 |
-
ft_transformer+resnet_like+XGBoost,0.5287822236272242,0.6819314770838893,0.9723570668130517,daegu,best
|
| 97 |
-
ft_transformer+resnet_like+LightGBM,0.520853018041326,0.675931210971238,0.972014705026158,daegu,best
|
| 98 |
-
ft_transformer+XGBoost+LightGBM,0.48437190164347993,0.6467498445011408,0.9690512430238457,daegu,best
|
| 99 |
-
resnet_like+XGBoost+LightGBM,0.4803327637605557,0.6466009408894583,0.9668476723973018,daegu,best
|
| 100 |
-
deepgbm+ft_transformer+resnet_like+XGBoost,0.5787636584539054,0.7225361676622583,0.9760432001730003,daegu,best
|
| 101 |
-
deepgbm+ft_transformer+resnet_like+LightGBM,0.5827514275296815,0.7252407488954487,0.9763842103949898,daegu,best
|
| 102 |
-
deepgbm+ft_transformer+XGBoost+LightGBM,0.5563231124331217,0.7066219754268251,0.9745609493391888,daegu,best
|
| 103 |
-
deepgbm+resnet_like+XGBoost+LightGBM,0.5497033929377105,0.7036672133164642,0.9729662067187331,daegu,best
|
| 104 |
-
ft_transformer+resnet_like+XGBoost+LightGBM,0.5057939271151667,0.6644447331953721,0.9703813908226664,daegu,best
|
| 105 |
-
deepgbm+ft_transformer+resnet_like+XGBoost+LightGBM,0.5538086053234044,0.7036101925279703,0.9742201470502616,daegu,best
|
| 106 |
-
deepgbm+ft_transformer,0.6798477338109503,0.7890627213525137,0.9581849730934616,daejeon,best
|
| 107 |
-
deepgbm+resnet_like,0.665680366173557,0.7799054500657331,0.9548432808510284,daejeon,best
|
| 108 |
-
deepgbm+XGBoost,0.6574065703012586,0.7732790182043394,0.9541580374445858,daejeon,best
|
| 109 |
-
deepgbm+LightGBM,0.6615432493953055,0.7763462848232718,0.9551088113714433,daejeon,best
|
| 110 |
-
ft_transformer+resnet_like,0.5874777029243061,0.7124933721407519,0.9448812078415716,daejeon,best
|
| 111 |
-
ft_transformer+XGBoost,0.5819133878684225,0.7079795420327318,0.9433591378263508,daejeon,best
|
| 112 |
-
ft_transformer+LightGBM,0.5805334443959108,0.7070987268766625,0.9437394473970939,daejeon,best
|
| 113 |
-
resnet_like+XGBoost,0.5621088257073251,0.6925993749297947,0.9389891126248638,daejeon,best
|
| 114 |
-
resnet_like+LightGBM,0.5552708987061287,0.6862528661139199,0.9386090109871829,daejeon,best
|
| 115 |
-
XGBoost+LightGBM,0.5288440675809284,0.6634738950206458,0.9327136928080112,daejeon,best
|
| 116 |
-
deepgbm+ft_transformer+resnet_like,0.6605998838783642,0.7743701608802702,0.9553335870116691,daejeon,best
|
| 117 |
-
deepgbm+ft_transformer+XGBoost,0.6495239084733958,0.7651993867637442,0.9536631567565769,daejeon,best
|
| 118 |
-
deepgbm+ft_transformer+LightGBM,0.6556854096497969,0.7699591127202252,0.9550316682053713,daejeon,best
|
| 119 |
-
deepgbm+resnet_like+XGBoost,0.646377396487214,0.7644362437422076,0.9526370070946761,daejeon,best
|
| 120 |
-
deepgbm+resnet_like+LightGBM,0.6441717942778256,0.7623630196443353,0.9527517861450042,daejeon,best
|
| 121 |
-
deepgbm+XGBoost+LightGBM,0.6104936580055047,0.7355161749128437,0.9470099225657277,daejeon,best
|
| 122 |
-
ft_transformer+resnet_like+XGBoost,0.5926724219238438,0.7169506517948077,0.9453380367792,daejeon,best
|
| 123 |
-
ft_transformer+resnet_like+LightGBM,0.5868504053718506,0.7120674745305696,0.9448050003742795,daejeon,best
|
| 124 |
-
ft_transformer+XGBoost+LightGBM,0.5697749480416849,0.6978540261423789,0.9408133093794446,daejeon,best
|
| 125 |
-
resnet_like+XGBoost+LightGBM,0.5568421888456356,0.6879639430352676,0.9383421289018639,daejeon,best
|
| 126 |
-
deepgbm+ft_transformer+resnet_like+XGBoost,0.6464484806055241,0.7628617034260374,0.953282847185834,daejeon,best
|
| 127 |
-
deepgbm+ft_transformer+resnet_like+LightGBM,0.6432761028976647,0.7603027833589299,0.9530166928662326,daejeon,best
|
| 128 |
-
deepgbm+ft_transformer+XGBoost+LightGBM,0.6268590959555621,0.7470252829879999,0.9501275669336527,daejeon,best
|
| 129 |
-
deepgbm+resnet_like+XGBoost+LightGBM,0.617990326312608,0.7412687534166494,0.9484927971987257,daejeon,best
|
| 130 |
-
ft_transformer+resnet_like+XGBoost+LightGBM,0.5778772266115083,0.7046854387067157,0.9426776372150277,daejeon,best
|
| 131 |
-
deepgbm+ft_transformer+resnet_like+XGBoost+LightGBM,0.6251709503819354,0.7454103687111592,0.9500901389824588,daejeon,best
|
| 132 |
-
deepgbm+ft_transformer,0.6798477338109503,0.7672365450318365,0.9541375560379602,gwangju,best
|
| 133 |
-
deepgbm+resnet_like,0.665680366173557,0.7714595133764687,0.9543727976423164,gwangju,best
|
| 134 |
-
deepgbm+XGBoost,0.6574065703012586,0.7719143895834364,0.9543191075928837,gwangju,best
|
| 135 |
-
deepgbm+LightGBM,0.6615432493953055,0.7728007686314036,0.9544770483485955,gwangju,best
|
| 136 |
-
ft_transformer+resnet_like,0.5874777029243061,0.7627495358829616,0.9528777415974249,gwangju,best
|
| 137 |
-
ft_transformer+XGBoost,0.5819133878684225,0.7549252510472145,0.9515179410587,gwangju,best
|
| 138 |
-
ft_transformer+LightGBM,0.5805334443959108,0.7489469355258954,0.9505456293509993,gwangju,best
|
| 139 |
-
resnet_like+XGBoost,0.5621088257073251,0.742686095459662,0.9492615719369842,gwangju,best
|
| 140 |
-
resnet_like+LightGBM,0.5552708987061287,0.7370427725250878,0.9481963158420041,gwangju,best
|
| 141 |
-
XGBoost+LightGBM,0.5288440675809284,0.7303546927519567,0.9467888046570956,gwangju,best
|
| 142 |
-
deepgbm+ft_transformer+resnet_like,0.6605998838783642,0.7743701608802702,0.9553335870116691,gwangju,best
|
| 143 |
-
deepgbm+ft_transformer+XGBoost,0.6495239084733958,0.7651993867637442,0.9536631567565769,gwangju,best
|
| 144 |
-
deepgbm+ft_transformer+LightGBM,0.6556854096497969,0.7699591127202252,0.9550316682053713,gwangju,best
|
| 145 |
-
deepgbm+resnet_like+XGBoost,0.646377396487214,0.7644362437422076,0.9526370070946761,gwangju,best
|
| 146 |
-
deepgbm+resnet_like+LightGBM,0.6441717942778256,0.7623630196443353,0.9527517861450042,gwangju,best
|
| 147 |
-
deepgbm+XGBoost+LightGBM,0.6104936580055047,0.7355161749128437,0.9470099225657277,gwangju,best
|
| 148 |
-
ft_transformer+resnet_like+XGBoost,0.5926724219238438,0.7169506517948077,0.9453380367792,gwangju,best
|
| 149 |
-
ft_transformer+resnet_like+LightGBM,0.5868504053718506,0.7120674745305696,0.9448050003742795,gwangju,best
|
| 150 |
-
ft_transformer+XGBoost+LightGBM,0.5697749480416849,0.6978540261423789,0.9408133093794446,gwangju,best
|
| 151 |
-
resnet_like+XGBoost+LightGBM,0.5568421888456356,0.6879639430352676,0.9383421289018639,gwangju,best
|
| 152 |
-
deepgbm+ft_transformer+resnet_like+XGBoost,0.6464484806055241,0.7628617034260374,0.953282847185834,gwangju,best
|
| 153 |
-
deepgbm+ft_transformer+resnet_like+LightGBM,0.6432761028976647,0.7603027833589299,0.9530166928662326,gwangju,best
|
| 154 |
-
deepgbm+ft_transformer+XGBoost+LightGBM,0.6268590959555621,0.7470252829879999,0.9501275669336527,gwangju,best
|
| 155 |
-
deepgbm+resnet_like+XGBoost+LightGBM,0.617990326312608,0.7412687534166494,0.9484927971987257,gwangju,best
|
| 156 |
-
ft_transformer+resnet_like+XGBoost+LightGBM,0.5778772266115083,0.7046854387067157,0.9426776372150277,gwangju,best
|
| 157 |
-
deepgbm+ft_transformer+resnet_like+XGBoost+LightGBM,0.6251709503819354,0.7454103687111592,0.9500901389824588,gwangju,best
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_result/deepgbm_sampled_data_test.csv
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
region,model,data_sample,CSI,MCC,Accuracy
|
| 2 |
-
seoul,deepgbm,pure,0.6442509203188513,0.7660970126461858,0.9575257213197927
|
| 3 |
-
busan,deepgbm,pure,0.6818642050717673,0.8049988566475562,0.9749078856534504
|
| 4 |
-
incheon,deepgbm,pure,0.5671286321477068,0.6887072986816571,0.9090567324566875
|
| 5 |
-
daegu,deepgbm,pure,0.5768403304029271,0.7158561147888857,0.9752356921259908
|
| 6 |
-
daejeon,deepgbm,pure,0.6879666308365847,0.797628286801526,0.9573174763580109
|
| 7 |
-
gwangju,deepgbm,pure,0.6169039999207967,0.7404995782864335,0.9513605060259002
|
| 8 |
-
seoul,deepgbm,smote,0.6178386591753761,0.7437507767516012,0.9473890885046287
|
| 9 |
-
busan,deepgbm,smote,0.5771081499427251,0.7328290895553372,0.9590890660478578
|
| 10 |
-
incheon,deepgbm,smote,0.559840116480077,0.6829120896700739,0.9007683126647871
|
| 11 |
-
daegu,deepgbm,smote,0.6425770288404345,0.7686582033950807,0.9787012085069575
|
| 12 |
-
daejeon,deepgbm,smote,0.5569398315371836,0.6906810572279379,0.9299289492726501
|
| 13 |
-
gwangju,deepgbm,smote,0.5526168770566259,0.6910361569617187,0.9341257662333341
|
| 14 |
-
seoul,deepgbm,ctgan20000,0.7095252362606882,0.8058537207489471,0.9619258968152972
|
| 15 |
-
busan,deepgbm,ctgan20000,0.5799669524449778,0.7334430424969706,0.9640024203408438
|
| 16 |
-
incheon,deepgbm,ctgan20000,0.5289538894298621,0.6580659191964314,0.8935004283421081
|
| 17 |
-
daegu,deepgbm,ctgan20000,0.44609227542135127,0.616365605274888,0.9682904159492977
|
| 18 |
-
daejeon,deepgbm,ctgan20000,0.5716353901847903,0.7095774395951602,0.9385713751029269
|
| 19 |
-
gwangju,deepgbm,ctgan20000,0.4571493095973662,0.620850435638757,0.9200951709625637
|
| 20 |
-
seoul,deepgbm,ctgan10000,0.5616336216829804,0.700657840332931,0.9383062604486363
|
| 21 |
-
busan,deepgbm,ctgan10000,0.5879352056252677,0.7351657312694786,0.9672767422711281
|
| 22 |
-
incheon,deepgbm,ctgan10000,0.5290269463089048,0.6582877751557777,0.8928201753291581
|
| 23 |
-
daegu,deepgbm,ctgan10000,0.5557972102197738,0.7032214449405951,0.9747075421480318
|
| 24 |
-
daejeon,deepgbm,ctgan10000,0.5413249801262358,0.6761149432485588,0.9286541116683718
|
| 25 |
-
gwangju,deepgbm,ctgan10000,0.41816087724732665,0.5735307327535618,0.9039835816054095
|
| 26 |
-
seoul,deepgbm,ctgan7000,0.5813133937275518,0.7156872321691327,0.9403897497317663
|
| 27 |
-
busan,deepgbm,ctgan7000,0.597435747844303,0.7454746315945583,0.9670328367891807
|
| 28 |
-
incheon,deepgbm,ctgan7000,0.5123361043389418,0.646967905975133,0.8907633014779882
|
| 29 |
-
daegu,deepgbm,ctgan7000,0.6146700981464019,0.7399778687018386,0.9766398640949504
|
| 30 |
-
daejeon,deepgbm,ctgan7000,0.5581763081057168,0.6896808823936648,0.9308861691244354
|
| 31 |
-
gwangju,deepgbm,ctgan7000,0.5798647223742114,0.7012366964380711,0.9421613394216134
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_result/ft_transformer_sampled_data_test.csv
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
region,model,data_sample,CSI,MCC,Accuracy
|
| 2 |
-
seoul,ft_transformer,pure,0.5998006360910436,0.7351618125239984,0.9481156066239157
|
| 3 |
-
busan,ft_transformer,pure,0.525891811597548,0.6847515269647338,0.9593201836464805
|
| 4 |
-
incheon,ft_transformer,pure,0.574439584484584,0.6997555499382345,0.9142701341584117
|
| 5 |
-
daegu,ft_transformer,pure,0.5325909397972257,0.6846558598973905,0.9751660345501576
|
| 6 |
-
daejeon,ft_transformer,pure,0.5841417763225328,0.710099560862753,0.9446515457743843
|
| 7 |
-
gwangju,ft_transformer,pure,0.5123621312403577,0.6524953764059983,0.9400000831732248
|
| 8 |
-
seoul,ft_transformer,smote,0.6027209460369666,0.7326561727884195,0.9459808618409561
|
| 9 |
-
busan,ft_transformer,smote,0.49442102850607217,0.6656582087002536,0.9448999218171686
|
| 10 |
-
incheon,ft_transformer,smote,0.5695903712638072,0.6933882645176865,0.9045313812577455
|
| 11 |
-
daegu,ft_transformer,smote,0.5152208156718004,0.680387175933169,0.9707628440252515
|
| 12 |
-
daejeon,ft_transformer,smote,0.5604907640952831,0.6919263799254157,0.9305719822674684
|
| 13 |
-
gwangju,ft_transformer,smote,0.4852746797180565,0.6329231459699102,0.9290042709450974
|
| 14 |
-
seoul,ft_transformer,ctgan20000,0.5803899707808177,0.7130066787294806,0.9414579018722292
|
| 15 |
-
busan,ft_transformer,ctgan20000,0.4639353486936595,0.6467071571334145,0.9446345992298159
|
| 16 |
-
incheon,ft_transformer,ctgan20000,0.5640202503368519,0.68857629970368,0.9072701715863629
|
| 17 |
-
daegu,ft_transformer,ctgan20000,0.3506001798809408,0.5079853139279686,0.9495035598140248
|
| 18 |
-
daejeon,ft_transformer,ctgan20000,0.5239938645095054,0.669429458663152,0.9276992830468016
|
| 19 |
-
gwangju,ft_transformer,ctgan20000,0.4145097694671273,0.5668829126361233,0.9129590122347814
|
| 20 |
-
seoul,ft_transformer,ctgan10000,0.5205630914565772,0.6720401513203828,0.9380401061290349
|
| 21 |
-
busan,ft_transformer,ctgan10000,0.4161261582289995,0.5928041147082687,0.9366729462451447
|
| 22 |
-
incheon,ft_transformer,ctgan10000,0.5623027751442908,0.6844425259764174,0.9051445758581398
|
| 23 |
-
daegu,ft_transformer,ctgan10000,0.4662085962650442,0.6230709704268124,0.9675943600236212
|
| 24 |
-
daejeon,ft_transformer,ctgan10000,0.5229201335739658,0.6589209425719041,0.9264041719689597
|
| 25 |
-
gwangju,ft_transformer,ctgan10000,0.4575447167412667,0.6033826231809732,0.9180866415483528
|
| 26 |
-
seoul,ft_transformer,ctgan7000,0.5679676743815468,0.7057250825135037,0.9415375402350475
|
| 27 |
-
busan,ft_transformer,ctgan7000,0.4934746936691438,0.6650891221099499,0.9561240445475793
|
| 28 |
-
incheon,ft_transformer,ctgan7000,0.5469994030503834,0.6742953373159715,0.9032061198858864
|
| 29 |
-
daegu,ft_transformer,ctgan7000,0.37617290206033815,0.5389485667787934,0.9553074914123645
|
| 30 |
-
daejeon,ft_transformer,ctgan7000,0.5437623015445493,0.6785419094519339,0.9309658074872537
|
| 31 |
-
gwangju,ft_transformer,ctgan7000,0.46978186086856627,0.6149128334271473,0.9244421155941479
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_result/lightgbm_sampled_data_test.csv
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
region,model,data_sample,CSI,MCC,Accuracy
|
| 2 |
-
seoul,LightGBM,pure,0.4830701326492519,0.6294046921291281,0.9297619790237127
|
| 3 |
-
busan,LightGBM,pure,0.43506984201427806,0.603839321192473,0.9573520972128652
|
| 4 |
-
incheon,LightGBM,pure,0.5522436617260514,0.6857225827228159,0.9117255533098785
|
| 5 |
-
daegu,LightGBM,pure,0.2974860165425261,0.4862244089903067,0.9538572622701301
|
| 6 |
-
daejeon,LightGBM,pure,0.48570589458995483,0.6313963695110991,0.9337746712578286
|
| 7 |
-
gwangju,LightGBM,pure,0.4841000529921969,0.6363547628117262,0.9430086666500319
|
| 8 |
-
seoul,LightGBM,smote,0.5811441298770988,0.7097862880128495,0.9400710923139624
|
| 9 |
-
busan,LightGBM,smote,0.468977364244475,0.6345551645621824,0.9506557169116118
|
| 10 |
-
incheon,LightGBM,smote,0.5845373158941275,0.7079881355705151,0.9115271851685506
|
| 11 |
-
daegu,LightGBM,smote,0.45130042516888275,0.6228321490115053,0.9638063394632
|
| 12 |
-
daejeon,LightGBM,smote,0.5294747877274647,0.6632804903422373,0.9322567599038517
|
| 13 |
-
gwangju,LightGBM,smote,0.5204263336367001,0.658370785349228,0.9371142841696402
|
| 14 |
-
seoul,LightGBM,ctgan20000,0.5278642102510234,0.6732186063208289,0.9409731059377364
|
| 15 |
-
busan,LightGBM,ctgan20000,0.4608874696247123,0.6276000553143347,0.958718945197162
|
| 16 |
-
incheon,LightGBM,ctgan20000,0.5536461635852592,0.6798438480715302,0.8955189385433041
|
| 17 |
-
daegu,LightGBM,ctgan20000,0.4051438319036602,0.5791827944687878,0.965851153196763
|
| 18 |
-
daejeon,LightGBM,ctgan20000,0.4682791445662134,0.6207261950649575,0.9317582403872545
|
| 19 |
-
gwangju,LightGBM,ctgan20000,0.4817029529408228,0.6339890466307277,0.9420201528723874
|
| 20 |
-
seoul,LightGBM,ctgan10000,0.5442974261785484,0.6853617099956927,0.9422277740349827
|
| 21 |
-
busan,LightGBM,ctgan10000,0.4518917046571256,0.6185787696267423,0.9571202518485249
|
| 22 |
-
incheon,LightGBM,ctgan10000,0.5472322273711279,0.6822157410012658,0.9102826018248206
|
| 23 |
-
daegu,LightGBM,ctgan10000,0.42595146846779247,0.5975633254437179,0.9694990268732688
|
| 24 |
-
daejeon,LightGBM,ctgan10000,0.48065206351943335,0.6295068759845875,0.9330893238848551
|
| 25 |
-
gwangju,LightGBM,ctgan10000,0.48182318531993423,0.6331992271211336,0.9421339962239356
|
| 26 |
-
seoul,LightGBM,ctgan7000,0.5395567725547653,0.6810123059399511,0.9410132370187391
|
| 27 |
-
busan,LightGBM,ctgan7000,0.4584344058363401,0.6253420379252826,0.9581093894253563
|
| 28 |
-
incheon,LightGBM,ctgan7000,0.5498019261194692,0.6849206337149152,0.911005065249395
|
| 29 |
-
daegu,LightGBM,ctgan7000,0.39073560033282706,0.5690411701305705,0.9644477089935207
|
| 30 |
-
daejeon,LightGBM,ctgan7000,0.4739750751092528,0.6246889480882462,0.9325196912609893
|
| 31 |
-
gwangju,LightGBM,ctgan7000,0.48000404186608364,0.6311702463153694,0.9417549342515658
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_result/resnet_like_sampled_data_test.csv
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
region,model,data_sample,CSI,MCC,Accuracy
|
| 2 |
-
seoul,resnet_like,pure,0.5556513128454227,0.6944843892225018,0.939922004308373
|
| 3 |
-
busan,resnet_like,pure,0.5246252526595989,0.6874494677398264,0.9591643378163702
|
| 4 |
-
incheon,resnet_like,pure,0.5799566950805518,0.7030239128662202,0.9153711397227005
|
| 5 |
-
daegu,resnet_like,pure,0.4394531403147615,0.6143074280063717,0.9659717543728323
|
| 6 |
-
daejeon,resnet_like,pure,0.5430465060488938,0.6757025454804458,0.9349603055784282
|
| 7 |
-
gwangju,resnet_like,pure,0.5577057247702664,0.689775566381261,0.9437623200339346
|
| 8 |
-
seoul,resnet_like,smote,0.6137456175954706,0.7422130871955783,0.9452800234548494
|
| 9 |
-
busan,resnet_like,smote,0.5023305919728182,0.6726508273995124,0.9473319069125267
|
| 10 |
-
incheon,resnet_like,smote,0.5860766625532056,0.7078408929922378,0.9089022381914814
|
| 11 |
-
daegu,resnet_like,smote,0.4901581106489206,0.6522246131616541,0.9662389483577446
|
| 12 |
-
daejeon,resnet_like,smote,0.5108150204429799,0.6519519041478157,0.9180820670209847
|
| 13 |
-
gwangju,resnet_like,smote,0.50385899296261,0.6510080124725456,0.9282384534770567
|
| 14 |
-
seoul,resnet_like,ctgan20000,0.5072327540718837,0.6620612708395112,0.9275254510068119
|
| 15 |
-
busan,resnet_like,ctgan20000,0.38986584909236405,0.5796375719673601,0.9392384243664279
|
| 16 |
-
incheon,resnet_like,ctgan20000,0.5432016410802436,0.6676977064251691,0.8980009315401186
|
| 17 |
-
daegu,resnet_like,ctgan20000,0.2688077976760475,0.440646061562418,0.9229636075554558
|
| 18 |
-
daejeon,resnet_like,ctgan20000,0.5006204822135542,0.6492987602101151,0.9202535119894204
|
| 19 |
-
gwangju,resnet_like,ctgan20000,0.444672867325423,0.5955785036774024,0.9136339629546457
|
| 20 |
-
seoul,resnet_like,ctgan10000,0.5304050574661009,0.6815565974252659,0.9339477755321007
|
| 21 |
-
busan,resnet_like,ctgan10000,0.5675115951993178,0.7176500871011381,0.965488621902837
|
| 22 |
-
incheon,resnet_like,ctgan10000,0.5608852333036517,0.6835219927895569,0.9037794953048714
|
| 23 |
-
daegu,resnet_like,ctgan10000,0.3723832547505761,0.5464472897265699,0.9578420914739127
|
| 24 |
-
daejeon,resnet_like,ctgan10000,0.5181167801633592,0.6610880657167911,0.9187646072976188
|
| 25 |
-
gwangju,resnet_like,ctgan10000,0.40351880029410764,0.5649610782208994,0.9066332726168792
|
| 26 |
-
seoul,resnet_like,ctgan7000,0.5731369888329633,0.7091329811132822,0.9410729138075871
|
| 27 |
-
busan,resnet_like,ctgan7000,0.4605071450423024,0.638239831036732,0.9479047624988564
|
| 28 |
-
incheon,resnet_like,ctgan7000,0.5343315289869124,0.6644525961066555,0.8994391005647463
|
| 29 |
-
daegu,resnet_like,ctgan7000,0.44530071901663565,0.6174979682782439,0.9673712478478929
|
| 30 |
-
daejeon,resnet_like,ctgan7000,0.5092618652909245,0.6496900098750791,0.9181710623716013
|
| 31 |
-
gwangju,resnet_like,ctgan7000,0.49270227739086486,0.6375371144090152,0.9272533705949381
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_result/xgboost_sampled_data_test.csv
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
region,model,data_sample,CSI,MCC,Accuracy
|
| 2 |
-
seoul,XGBoost,pure,0.565011886957529,0.7004827765196581,0.9455723773402865
|
| 3 |
-
busan,XGBoost,pure,0.4602444563246328,0.6260853753404706,0.9597446789929386
|
| 4 |
-
incheon,XGBoost,pure,0.5581443524210103,0.6892177198119791,0.9130578844058522
|
| 5 |
-
daegu,XGBoost,pure,0.4166514505499186,0.600956509945712,0.9683242050719033
|
| 6 |
-
daejeon,XGBoost,pure,0.5044504047922097,0.6471930212882061,0.9357524265788357
|
| 7 |
-
gwangju,XGBoost,pure,0.49003643817143744,0.6379812479286601,0.943428795402184
|
| 8 |
-
seoul,XGBoost,smote,0.5833152545530768,0.7120196607674459,0.9418898828089262
|
| 9 |
-
busan,XGBoost,smote,0.46473886869778475,0.6265333263141976,0.9518734768903195
|
| 10 |
-
incheon,XGBoost,smote,0.5891561059817683,0.7103919570983672,0.9131620588700086
|
| 11 |
-
daegu,XGBoost,smote,0.4611201906216012,0.6275336823062336,0.9654021217489666
|
| 12 |
-
daejeon,XGBoost,smote,0.5188768068576874,0.6552123484066442,0.9321021616721145
|
| 13 |
-
gwangju,XGBoost,smote,0.5099103046808192,0.6499937333212472,0.9358984995550234
|
| 14 |
-
seoul,XGBoost,ctgan20000,0.5570923548170014,0.6942604158597989,0.9440149587044938
|
| 15 |
-
busan,XGBoost,ctgan20000,0.46709831119818074,0.6290561623391081,0.9589464239671965
|
| 16 |
-
incheon,XGBoost,ctgan20000,0.5523124093005438,0.6854497320743039,0.9114981785063753
|
| 17 |
-
daegu,XGBoost,ctgan20000,0.4182253863349025,0.5952665980456898,0.9664244246492171
|
| 18 |
-
daejeon,XGBoost,ctgan20000,0.4918910489160739,0.6390630711637499,0.933889866174281
|
| 19 |
-
gwangju,XGBoost,ctgan20000,0.5013450661414979,0.6465339172273498,0.9428181999650672
|
| 20 |
-
seoul,XGBoost,ctgan10000,0.5651854445647394,0.7027118144017459,0.9450405885337393
|
| 21 |
-
busan,XGBoost,ctgan10000,0.46692094075656104,0.6325842231097966,0.958222712944249
|
| 22 |
-
incheon,XGBoost,ctgan10000,0.5578011155842045,0.6901038363927982,0.9120290316141428
|
| 23 |
-
daegu,XGBoost,ctgan10000,0.424541930975018,0.6005603470129561,0.966994369172676
|
| 24 |
-
daejeon,XGBoost,ctgan10000,0.4845439338249185,0.63402545260005,0.9338891384085635
|
| 25 |
-
gwangju,XGBoost,ctgan10000,0.490651260954804,0.6368315107576099,0.9422102036912277
|
| 26 |
-
seoul,XGBoost,ctgan7000,0.5637353305761614,0.7003517728134977,0.9446616305279004
|
| 27 |
-
busan,XGBoost,ctgan7000,0.4616459581003174,0.6279091576031567,0.9586027106153988
|
| 28 |
-
incheon,XGBoost,ctgan7000,0.5600003713221271,0.6911778795112914,0.9122589016143924
|
| 29 |
-
daegu,XGBoost,ctgan7000,0.41141211423242163,0.5889669181000196,0.9661207384118904
|
| 30 |
-
daejeon,XGBoost,ctgan7000,0.4880797047418282,0.6352680209224976,0.9336605160066872
|
| 31 |
-
gwangju,XGBoost,ctgan7000,0.49570507941388103,0.6441170200639816,0.9430463025342881
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analysis_code/model_visualize.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|