Spaces:
Runtime error
Runtime error
| # preprocess data | |
| import numpy as np | |
| import re | |
| def get_most_common_features(target, all_features, max = 3, min = 3): | |
| res = [] | |
| main_keys = target.split('_') | |
| for feature in all_features: | |
| if target == feature: | |
| continue | |
| f_keys = feature.split('_') | |
| common_key_num = len(list(set(f_keys) & set(main_keys))) | |
| if common_key_num >= min and common_key_num <= max: | |
| res.append(feature) | |
| return res | |
| def build_net(target, all_features): | |
| # get edge_indexes, and index_feature_map | |
| main_keys = target.split('_') | |
| edge_indexes = [ | |
| [], | |
| [] | |
| ] | |
| index_feature_map = [target] | |
| # find closest features(nodes): | |
| parent_list = [target] | |
| graph_map = {} | |
| depth = 2 | |
| for i in range(depth): | |
| for feature in parent_list: | |
| children = get_most_common_features(feature, all_features) | |
| if feature not in graph_map: | |
| graph_map[feature] = [] | |
| # exclude parent | |
| pure_children = [] | |
| for child in children: | |
| if child not in graph_map: | |
| pure_children.append(child) | |
| graph_map[feature] = pure_children | |
| if feature not in index_feature_map: | |
| index_feature_map.append(feature) | |
| p_index = index_feature_map.index(feature) | |
| for child in pure_children: | |
| if child not in index_feature_map: | |
| index_feature_map.append(child) | |
| c_index = index_feature_map.index(child) | |
| edge_indexes[1].append(p_index) | |
| edge_indexes[0].append(c_index) | |
| parent_list = pure_children | |
| return edge_indexes, index_feature_map | |
| def construct_data(data, feature_map, labels=0): | |
| res = [] | |
| for feature in feature_map: | |
| if feature in data.columns: | |
| res.append(data.loc[:, feature].values.tolist()) | |
| else: | |
| print(feature, 'not exist in data') | |
| # append labels as last | |
| sample_n = len(res[0]) | |
| if type(labels) == int: | |
| res.append([labels]*sample_n) | |
| elif len(labels) == sample_n: | |
| res.append(labels) | |
| return res | |
| def build_loc_net(struc, all_features, feature_map=[]): | |
| index_feature_map = feature_map | |
| edge_indexes = [ | |
| [], | |
| [] | |
| ] | |
| for node_name, node_list in struc.items(): | |
| if node_name not in all_features: | |
| continue | |
| if node_name not in index_feature_map: | |
| index_feature_map.append(node_name) | |
| p_index = index_feature_map.index(node_name) | |
| for child in node_list: | |
| if child not in all_features: | |
| continue | |
| if child not in index_feature_map: | |
| print(f'error: {child} not in index_feature_map') | |
| # index_feature_map.append(child) | |
| c_index = index_feature_map.index(child) | |
| # edge_indexes[0].append(p_index) | |
| # edge_indexes[1].append(c_index) | |
| edge_indexes[0].append(c_index) | |
| edge_indexes[1].append(p_index) | |
| return edge_indexes |