| import pandas as pd
|
|
|
|
|
| def cal_f1(df, standard=False):
|
| df['label_list'] = df['label'].apply(lambda x: [i.strip().lower() for i in x.split(';')])
|
|
|
| if standard:
|
| df['pred_list'] = df['pred'].apply(lambda x: [i[0] for i in eval(str(x))])
|
| else:
|
| df['pred_list_prob'] = df['pred'].apply(lambda x: [eval(i.strip()) for i in str(x).split(';')])
|
| df['pred_list'] = df['pred_list_prob'].apply(lambda x: [i[0] for i in x])
|
|
|
| labels = []
|
| pred_labels = []
|
| for l in df['label_list']:
|
| labels.extend(l)
|
|
|
| label_count = {}
|
| for x in labels:
|
| if x not in label_count:
|
| label_count[x] = 1
|
| else:
|
| label_count[x] += 1
|
|
|
| labels = list(set(labels))
|
| total = len(labels)
|
| tp_dict, fp_dict, fn_dict = dict(zip(labels, [0] * len(labels))), dict(zip(labels, [0] * len(labels))), dict(
|
| zip(labels, [0] * len(labels)))
|
| for preds, label in zip(df['pred_list'], df['label_list']):
|
| for t in label:
|
|
|
|
|
| if t in preds:
|
| tp_dict[t] += 1
|
| else:
|
| fn_dict[t] += 1
|
| for p in preds:
|
|
|
|
|
| if p not in label:
|
| if p in fp_dict:
|
| fp_dict[p] += 1
|
| else:
|
| fp_dict[p] = 1
|
| pred_labels.extend(preds)
|
| p_total = len(set(pred_labels))
|
| recall, pr = 0., 0.
|
| for x in labels:
|
| recall += tp_dict[x] / (1.0 * (tp_dict[x] + fn_dict[x] + 1e-8))
|
| pr += tp_dict[x] / (1.0 * (tp_dict[x] + fp_dict[x] + 1e-8))
|
| r = recall / total
|
| p = pr / p_total
|
| f1 = 2 * p * r / (p + r + 1e-8)
|
|
|
| print("preds not in labels: {}".format(len(list(fp_dict.keys())) - total))
|
| print("recall:{}; percision:{}; f1 score: {}".format(r, p, f1))
|
|
|
|
|
| names = ['output_test_mf_exp_493552.txt', 'output_test_mf_exp_445772_pre.txt', 'output_test_mf_exp_445772.txt', 'output_test_mf_exp_486524.txt', 'output_test_mf_493552_standard.csv', 'output_test_mf_445772_standard.csv', 'output_test_mf_exp_445772_withprompt.txt', 'output_test_mf_exp_506753.txt']
|
|
|
|
|
| for name in names:
|
| print(name)
|
| df = pd.read_csv('/cluster/home/wenkai/LAVIS/output/mf_bp_cc/{}'.format(name), sep='|', header=None)
|
| if df.iloc[0, 0] == 'name':
|
| df = df[1:]
|
|
|
| df.columns = ['name', 'pred', 'label']
|
| if 'standard' in name:
|
| cal_f1(df, standard=True)
|
| else:
|
| cal_f1(df)
|
|
|
|
|
|
|
|
|