| import numpy as np |
| import _pickle |
| import pandas as pd |
| |
| |
| |
| |
|
|
| from utils.ndgan import DCGAN |
|
|
| np.random.seed(4269) |
|
|
|
|
|
|
| class dataset(): |
| """ Creates dataset from input source |
| """ |
| def __init__(self,number_samples:int, name:str,source:str,boundary_conditions:list=None): |
| """ Init |
| |
| Args: |
| number_samples (int): number of samples to be genarated |
| name (str): name of dataset |
| source (str): source file |
| boundary_conditions (list): y1,y2,x1,x2 |
| """ |
| self.sample_size = number_samples |
| self.name = name |
| self.samples = [] |
| self.encoding_dim = 8 |
| self.latent_dim = 16 |
| self.source = source |
| self.boundary_conditions = boundary_conditions |
| |
| def generate(self): |
| with open(f"./data/{self.source}", "rb") as input_file: |
| local = pd.read_csv(input_file) |
| dfs = local.drop("Name",axis=1) |
| dfs = (dfs-dfs.min())/(dfs.max()-dfs.min()) |
| dfs = pd.concat([local.Name,dfs],1) |
| |
| self.vae = DCGAN(self.latent_dim,dfs) |
| |
| self.vae.start_training() |
| self.samples = self.vae.predict(self.sample_size) |
| |
| if self.boundary_conditions: |
| self.samples=self.samples[((self.samples[:,0]>self.boundary_conditions[2]) & (self.samples[:,0] < self.boundary_conditions[-1]))&((self.samples[:,0]>self.boundary_conditions[0]) & (self.samples[:,0] < self.boundary_conditions[1]))] |
| |
| print("Samples:",self.samples) |
| dataframe = pd.concat([dfs,pd.DataFrame(self.samples,columns=dfs.columns[1:])]) |
| dataframe.to_pickle(f'./data/{self.name}') |
| print(dataframe) |
| |
| |
| |
| return dataframe |
| |
| |