| |
| import numpy as np |
| import seaborn as sns |
| import matplotlib.pyplot as plt |
| import os |
| plt.style.use('seaborn-colorblind') |
|
|
| |
|
|
|
|
| def get_dtypes(data,drop_col=[]): |
| """Return the dtypes for each column of a pandas Dataframe |
| |
| Parameters |
| ---------- |
| data : pandas Dataframe |
| |
| drop_col : columns to omit in a list |
| |
| Returns |
| ------- |
| str_var_list, num_var_list, all_var_list |
| |
| """ |
|
|
| name_of_col = list(data.columns) |
| num_var_list = [] |
| str_var_list = [] |
| all_var_list = [] |
|
|
| str_var_list = name_of_col.copy() |
| for var in name_of_col: |
| |
| if (data[var].dtypes in (np.int, np.int64, np.uint, np.int32, np.float, |
| np.float64, np.float32, np.double)): |
| str_var_list.remove(var) |
| num_var_list.append(var) |
| |
| for var in drop_col: |
| if var in str_var_list: |
| str_var_list.remove(var) |
| if var in num_var_list: |
| num_var_list.remove(var) |
|
|
| all_var_list.extend(str_var_list) |
| all_var_list.extend(num_var_list) |
| return str_var_list, num_var_list, all_var_list |
|
|
|
|
| def describe(data,output_path=None): |
| """output the general description of a pandas Dataframe |
| into a csv file |
| |
| """ |
| |
| result = data.describe(include='all') |
| if output_path is not None: |
| output = os.path.join(output_path,'describe.csv') |
| result.to_csv(output) |
| print('result saved at:', str(output)) |
| return result |
| |
| |
| def discrete_var_barplot(x,y,data,output_path=None): |
| """draw the barplot of a discrete variable x against y(target variable). |
| By default the bar shows the mean value of y. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| plt.figure(figsize=(15,10)) |
| sns.barplot(x=x,y=y,data=data) |
| if output_path is not None: |
| output = os.path.join(output_path,'Barplot_'+str(x)+'_'+str(y)+'.png') |
| plt.savefig(output) |
| print('Image saved at', str(output)) |
| |
| |
| def discrete_var_countplot(x,data,output_path=None): |
| """draw the countplot of a discrete variable x. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| plt.figure(figsize=(15,10)) |
| sns.countplot(x=x,data=data) |
| if output_path is not None: |
| output = os.path.join(output_path,'Countplot_'+str(x)+'.png') |
| plt.savefig(output) |
| print('Image saved at',str(output)) |
|
|
|
|
| def discrete_var_boxplot(x,y,data,output_path=None): |
| """draw the boxplot of a discrete variable x against y. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| plt.figure(figsize=(15,10)) |
| sns.boxplot(x=x,y=y,data=data) |
| if output_path is not None: |
| output = os.path.join(output_path,'Boxplot_'+str(x)+'_'+str(y)+'.png') |
| plt.savefig(output) |
| print('Image saved at',str(output)) |
|
|
|
|
| def continuous_var_distplot(x,output_path=None,bins=None): |
| """draw the distplot of a continuous variable x. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| plt.figure(figsize=(15,10)) |
| sns.distplot(a=x,kde=False,bins=bins) |
| if output_path is not None: |
| output=os.path.join(output_path,'Distplot_'+str(x.name)+'.png') |
| plt.savefig(output) |
| print('Image saved at',str(output)) |
| |
| |
| |
|
|
| def scatter_plot(x,y,data,output_path=None): |
| """draw the scatter-plot of two variables. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| plt.figure(figsize=(15,10)) |
| sns.scatterplot(x=x,y=y,data=data) |
| if output_path is not None: |
| output = os.path.join(output_path,'Scatter_plot_'+str(x.name)+'_'+str(y.name)+'.png') |
| plt.savefig(output) |
| print('Image saved at',str(output)) |
| |
| |
| def correlation_plot(data,output_path=None): |
| """draw the correlation plot between variables. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| corrmat = data.corr() |
| fig, ax = plt.subplots() |
| fig.set_size_inches(11,11) |
| sns.heatmap(corrmat,cmap="YlGnBu",linewidths=.5,annot=True) |
| if output_path is not None: |
| output = os.path.join(output_path,'Corr_plot'+'.png') |
| plt.savefig(output) |
| print('Image saved at',str(output)) |
| |
| |
| def heatmap(data,output_path=None,fmt='d'): |
| """draw the heatmap between 2 variables. |
| |
| Parameters |
| ---------- |
| |
| |
| Returns |
| ------- |
| figure save as PNG |
| """ |
| |
| fig, ax = plt.subplots() |
| fig.set_size_inches(11,11) |
| sns.heatmap(data,cmap="YlGnBu",linewidths=.5,annot=True,fmt=fmt) |
| if output_path is not None: |
| output = os.path.join(output_path,'Heatmap'+'.png') |
| plt.savefig(output) |
| print('Image saved at',str(output)) |