Udbhav01 commited on
Commit
a1588af
·
verified ·
1 Parent(s): 83f399c

Upload EDL.py

Browse files
Files changed (1) hide show
  1. EDL.py +907 -0
EDL.py ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import keras
2
+ from keras.datasets import mnist, cifar10, cifar100
3
+ from keras import layers
4
+ from keras.models import Sequential
5
+ from keras.layers import Dense, Dropout, Flatten
6
+ from keras.layers import Conv2D, MaxPooling2D
7
+ from keras import backend as K
8
+ import cv2
9
+ import tensorflow as tf
10
+ #import GPy
11
+ #import gpflow, gpflux
12
+ import time
13
+ from tensorflow.keras.applications import VGG16,ResNet50
14
+ from keras import regularizers
15
+
16
+ import numpy as np
17
+
18
+ import sklearn
19
+ from sklearn.metrics import classification_report
20
+ from sklearn.metrics import accuracy_score
21
+ import sklearn.gaussian_process as gp
22
+ from sklearn.gaussian_process import GaussianProcessClassifier
23
+ from sklearn.gaussian_process.kernels import RBF, WhiteKernel
24
+ import matplotlib.pyplot as plt
25
+ # import official.nlp.modeling.layers as nlp_layers
26
+ # from official.nlp.modeling.layers import SpectralNormalization
27
+ import gp_layer
28
+ from sklearn.metrics import roc_auc_score
29
+ #%matplotlib inline
30
+ import os
31
+
32
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
33
+
34
+ #Load training data
35
+ (X_train, y_train), (X_test, y_test) = cifar10.load_data()
36
+
37
+ X_train = X_train.astype('float32')
38
+ X_test = X_test.astype('float32')
39
+
40
+ X_train /= 255
41
+ X_test /= 255
42
+
43
+ num_classes = 10
44
+ y_train_one_hot = keras.utils.to_categorical(y_train, num_classes)
45
+ y_test_one_hot = keras.utils.to_categorical(y_test, num_classes)
46
+
47
+ print('x_train shape:', X_train.shape)
48
+ print(X_train.shape[0], 'train samples')
49
+ print(X_test.shape[0], 'test samples')
50
+
51
+
52
+ # kernel = gpflow.kernels.SquaredExponential()
53
+
54
+ # inducing_variable = gpflow.inducing_variables.InducingPoints(
55
+ # np.linspace(0, 1, 128*100).reshape(-1, 128)
56
+ # )
57
+
58
+ # mean = gpflow.mean_functions.Zero()
59
+
60
+ # invlink = gpflow.likelihoods.RobustMax(10)
61
+ # likelihood = gpflow.likelihoods.MultiClass(10, invlink=invlink)
62
+
63
+ # likelihood_container = gpflux.layers.TrackableLayer()
64
+
65
+ # likelihood_container.likelihood = likelihood
66
+
67
+ # loss = gpflux.losses.LikelihoodLoss(likelihood)
68
+
69
+
70
+ gp_layer = gp_layer.RandomFeatureGaussianProcess(units=10,
71
+ num_inducing=2048,
72
+ normalize_input=True,
73
+ scale_random_features=False,
74
+ gp_cov_momentum=-1,
75
+ return_gp_cov=True)
76
+
77
+ def feature_extractor(inputs):
78
+
79
+ feature_extractor = tf.keras.applications.resnet.ResNet50(input_shape=(224, 224, 3),
80
+ include_top=False,
81
+ weights='imagenet')(inputs)
82
+ return feature_extractor
83
+
84
+ def classifier(inputs):
85
+ x = tf.keras.layers.GlobalAveragePooling2D()(inputs)
86
+ x = tf.keras.layers.Flatten()(x)
87
+ # x = tf.keras.layers.Dropout(0.3)(x)
88
+ # x = tf.keras.layers.Dense(256, activation="relu")(x)
89
+ # x = tf.keras.layers.Dense(128, activation="relu")(x)
90
+ # x = tf.keras.layers.Dropout(0.1)(x)
91
+ #x = tf.keras.layers.Dense(10, activation="softmax", name="classification")(x)
92
+ #x = tf.keras.layers.SpectralNormalization(tf.keras.layers.Dense(512, activation='relu'))(x)
93
+ x = (tf.keras.layers.Dense(256, activation='relu'))(x)
94
+ x = (tf.keras.layers.Dense(128, activation='relu'))(x)
95
+ x = (tf.keras.layers.Dense(10, activation='linear'))(x)
96
+ # outputs = gpflux.layers.GPLayer(mean_function=mean,
97
+ # kernel=kernel,
98
+ # inducing_variable=inducing_variable,
99
+ # num_data=X_train.shape[0],
100
+ # num_latent_gps=10)(x)
101
+ #outputs, sd = gp_layer(x)
102
+
103
+ return x
104
+
105
+
106
+ def final_model(inputs):
107
+
108
+ resize = tf.keras.layers.UpSampling2D(size=(7,7))(inputs)
109
+
110
+ resnet_feature_extractor = feature_extractor(resize)
111
+ classification_output = classifier(resnet_feature_extractor)
112
+
113
+
114
+ return classification_output
115
+
116
+
117
+ # lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
118
+ # 0.001,
119
+ # decay_steps=20*50,
120
+ # decay_rate=1,
121
+ # staircase=False)
122
+
123
+
124
+ # def get_optimizer():
125
+ # return tf.keras.optimizers.Adam(lr_schedule)
126
+
127
+
128
+ def define_compile_model():
129
+ inputs = tf.keras.layers.Input(shape=(32,32,3))
130
+
131
+ classification_output = final_model(inputs)
132
+ model = tf.keras.Model(inputs=inputs, outputs = classification_output)
133
+
134
+ # model.compile(optimizer=get_optimizer(),
135
+ # loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
136
+ # metrics = ['accuracy'])
137
+ return model
138
+
139
+ # inputs = tf.keras.Input(shape=(28, 28, 1))
140
+
141
+ # x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
142
+ # x = tf.keras.layers.MaxPooling2D((1, 1))(x)
143
+ # x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
144
+ # x = tf.keras.layers.MaxPooling2D((2, 2))(x)
145
+ # x = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
146
+ # x = tf.keras.layers.MaxPooling2D((2, 2))(x)
147
+ # x = tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
148
+ # x = tf.keras.layers.MaxPooling2D((2, 2))(x)
149
+ # x = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
150
+ # x = tf.keras.layers.MaxPooling2D((2, 2))(x)
151
+ # x = tf.keras.layers.Flatten()(x)
152
+ # #x = tf.keras.layers.Dropout(0.5)(x)
153
+ # x = tf.keras.layers.Dense(256, activation='linear')(x)
154
+ # #x = tf.keras.layers.Dense(128, activation='linear')(x)
155
+ # #l = tf.keras.layers.Dense(10, activation='linear')(x)
156
+ # gp_output, gp_std= gp_layer(x)
157
+
158
+ # model = tf.keras.Model(inputs=inputs, outputs=gp_output)
159
+
160
+ model = define_compile_model()
161
+
162
+ model.summary()
163
+
164
+ # t = tf.expand_dims(X_train[0], axis=0)
165
+
166
+ # model(t)[0]
167
+
168
+
169
+ # from tensorflow.keras.callbacks import ReduceLROnPlateau
170
+
171
+ # lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
172
+ # 0.001,
173
+ # decay_steps=20*50,
174
+ # decay_rate=1,
175
+ # staircase=False)
176
+
177
+ # def get_optimizer():
178
+ # return tf.keras.optimizers.Adam(lr_schedule)
179
+
180
+
181
+ # #Compiling the model
182
+ # model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer = get_optimizer(), metrics=['accuracy'])
183
+ # # early_stop = EarlyStopping(monitor='val_loss',patience=5)
184
+ # # checkpoint = ModelCheckpoint("./Best_model/",save_best_only=True,)
185
+ # rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
186
+
187
+
188
+ # # # # Train the model
189
+ # model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_test, y_test), callbacks=[rlrp])
190
+
191
+ # predictions = np.argmax(model.predict(X_test), axis=1)
192
+
193
+ # print(classification_report(y_test, predictions))
194
+
195
+ # print(model(X_train[0].reshape(1,32,32,3)))
196
+
197
+ #t = X_train[0].reshape(1,32,32,3)
198
+
199
+ #model.predict(t)
200
+
201
+
202
+ def relu_evidence(logits):
203
+ return tf.nn.relu(logits)
204
+
205
+ def exp_evidence(logits):
206
+ return tf.exp(tf.clip_by_value(logits, -10, 10))
207
+
208
+
209
+ def softplus_evidence(logits):
210
+ return tf.nn.softplus(((logits + 1)**2) / 2)
211
+
212
+ # # # def log_marginal_likelihood_gp_layer(model, X_train, y_train):
213
+ # # # """Compute the log marginal likelihood for a GP layer within the model."""
214
+ # # # gp_layer = model.layers[-1]
215
+
216
+
217
+ # # # kernel = gp_layer.kernel
218
+ # # # inducing_points = gp_layer.inducing_variable.Z.numpy()
219
+ # # # mean = gp_layer.mean_function
220
+
221
+
222
+ # # # y_train_subset = y_train[:inducing_points.shape[0]].astype(np.float64) # Ensure float64 dtype
223
+
224
+
225
+ # # # K = kernel.K(inducing_points)
226
+ # # # K += np.eye(inducing_points.shape[0]) * 1e-6
227
+
228
+
229
+ # # # L = tf.linalg.cholesky(K)
230
+
231
+
232
+ # # # alpha = tf.linalg.cholesky_solve(L, y_train_subset)
233
+
234
+
235
+ # # # log_likelihood = -0.5 * tf.reduce_sum(tf.matmul(tf.transpose(y_train_subset), alpha)) - tf.reduce_sum(tf.math.log(tf.linalg.diag_part(L))) - 0.5 * inducing_points.shape[0] * np.log(2 * np.pi)
236
+
237
+ # # # return tf.squeeze(log_likelihood)
238
+
239
+
240
+
241
+ def kl_divergence(alpha):
242
+ # KL divergence for Dirichlet distribution
243
+ beta = tf.ones_like(alpha)
244
+ S_alpha = tf.reduce_sum(alpha, axis=1, keepdims=True)
245
+ S_beta = tf.reduce_sum(beta, axis=1, keepdims=True)
246
+
247
+ lnB = tf.math.lgamma(S_alpha) - tf.reduce_sum(tf.math.lgamma(alpha), axis=1, keepdims=True)
248
+ lnB_uni = tf.reduce_sum(tf.math.lgamma(beta), axis=1, keepdims=True) - tf.math.lgamma(S_beta)
249
+
250
+ dg0 = tf.math.digamma(S_alpha)
251
+ dg1 = tf.math.digamma(alpha)
252
+
253
+ kl = tf.reduce_sum((alpha - beta) * (dg1 - dg0), axis=1, keepdims=True) + lnB + lnB_uni
254
+ return kl
255
+
256
+
257
+
258
+ def loglikelihood_loss(y, alpha):
259
+ S = tf.reduce_sum(alpha, axis=1, keepdims=True)
260
+ S = tf.cast(S, tf.float32)
261
+ y = tf.cast(y, tf.float32)
262
+ alpha = tf.cast(alpha, tf.float32)
263
+ loglikelihood_err = tf.reduce_sum(tf.square(y - (alpha / S)), axis=1, keepdims=True)
264
+ loglikelihood_var = tf.reduce_sum(alpha * (S - alpha) / (S * S * (S + 1)), axis=1, keepdims=True)
265
+ loglikelihood = loglikelihood_err + loglikelihood_var
266
+ return loglikelihood
267
+
268
+
269
+ def mse_loss(y, alpha, epoch_num, num_classes=10, annealing_step=10):
270
+ loglikelihood = loglikelihood_loss(y, alpha)
271
+
272
+ annealing_coef = tf.minimum(
273
+ tf.constant(1.0, dtype=tf.float32),
274
+ tf.cast(epoch_num / annealing_step, dtype=tf.float32),
275
+ )
276
+
277
+ kl_alpha = (alpha - 1) * (1 - y) + 1
278
+ kl_div = annealing_coef * kl_divergence(kl_alpha)
279
+
280
+ S = tf.reduce_sum(alpha, axis=1, keepdims=True)
281
+ vacuity = num_classes / tf.stop_gradient(S)
282
+ vacuity = tf.identity(vacuity, name="vacuity")
283
+
284
+
285
+ # gp_layer = model.layers[-1]
286
+
287
+ # ker = gp_layer.kernel
288
+ # ind = gp_layer.inducing_variable
289
+
290
+ # K = ker.K(inducing_variable.Z) # Kernel matrix at inducing points
291
+ # reg = tf.sqrt(tf.reduce_sum(tf.square(K))).numpy()*0.001
292
+ #reg = log_marginal_likelihood_gp_layer(model, X_train, y_train_one_hot)
293
+ #reg = tf.cast(reg, dtype=tf.float32)
294
+
295
+ return loglikelihood + kl_div, vacuity
296
+
297
+
298
+ # # # def edl_loss(func, y, alpha, epoch_num, num_classes, annealing_step, device=None):
299
+ # # # y = tf.convert_to_tensor(y, dtype=tf.float32)
300
+ # # # alpha = tf.convert_to_tensor(alpha, dtype=tf.float32)
301
+ # # # S = tf.reduce_sum(alpha, axis=1, keepdims=True)
302
+
303
+ # # # A = tf.reduce_sum(y * (func(S) - func(alpha)), axis=1, keepdims=True)
304
+
305
+ # # # annealing_coef = tf.minimum(
306
+ # # # tf.constant(1.0, dtype=tf.float32),
307
+ # # # tf.constant(epoch_num / annealing_step, dtype=tf.float32),
308
+ # # # )
309
+
310
+ # # # kl_alpha = (alpha - 1) * (1 - y) + 1
311
+ # # # kl_div = annealing_coef * kl_divergence(kl_alpha)
312
+
313
+ # # # S = tf.reduce_sum(alpha, axis=1, keepdims=True)
314
+ # # # with tf.GradientTape() as tape:
315
+ # # # vacuity = num_classes / tf.stop_gradient(S)
316
+
317
+ # # # return A + kl_div, vacuity
318
+
319
+
320
+ def compute_metrics(logits, Y, epoch, global_step, annealing_step, lmb=0.0005):
321
+ logits = tf.cast(logits, tf.float32)
322
+ evidence = exp_evidence(logits)
323
+ alpha = evidence + 1
324
+ alpha = tf.cast(alpha, tf.float32)
325
+ Y_onehot = tf.one_hot(Y, depth=10)
326
+ K = 10
327
+
328
+ if len(alpha.shape) == 1:
329
+ u = K / tf.reduce_sum(alpha)
330
+ else:
331
+ u = K / tf.reduce_sum(alpha, axis=1, keepdims=True)
332
+
333
+ #u = K / tf.reduce_sum(alpha, axis=1, keepdims=True) # uncertainty
334
+ prob = alpha / tf.reduce_sum(alpha, axis=1, keepdims=True)
335
+
336
+ mse_loss_val, vacuity = mse_loss(Y_onehot, alpha, epoch, num_classes, annealing_step)
337
+ loss = tf.reduce_mean(mse_loss_val)
338
+
339
+ output_correct = logits * Y_onehot
340
+ #print(vacuity * output_correct)
341
+
342
+ loss -= (tf.reduce_sum(vacuity * output_correct) / tf.cast(tf.shape(output_correct)[0], tf.float32))
343
+ #print(loss)
344
+ # loss, vacuity = mse_loss(Y_onehot, alpha, epoch)
345
+ # l2 = model.l2_loss_last_layers()
346
+ # loss = tf.reduce_mean(loss) + lmb * l2
347
+ return loss, u, prob
348
+
349
+
350
+ x_train = np.array(X_train)
351
+ y_train = np.array(y_train)
352
+
353
+
354
+ optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
355
+ model.compile(optimizer=optimizer)
356
+ num_epochs = 15
357
+ batch_size = 32
358
+
359
+ train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
360
+ train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
361
+
362
+ test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
363
+ test_dataset = test_dataset.shuffle(buffer_size=len(X_test)).batch(batch_size)
364
+
365
+ # # # def get_multiple_samples(model, inputs, num_samples=5):
366
+ # # # samples = [model(inputs, training=True) for _ in range(num_samples)]
367
+ # # # mean_output = tf.reduce_mean(samples, axis=0)
368
+ # # # return mean_output
369
+
370
+ for epoch in range(num_epochs):
371
+ total_loss = 0.0
372
+ correct = 0
373
+ total = 0
374
+
375
+
376
+ # indices = np.random.permutation(len(x_train))
377
+ # x_train_shuffled = x_train[indices]
378
+ # y_train_shuffled = y_train[indices]
379
+
380
+ for inputs, labels in train_dataset:
381
+ labels = tf.squeeze(labels)
382
+ # inputs = x_train_shuffled[i:i+batch_size]
383
+ # labels = y_train_shuffled[i:i+batch_size]
384
+
385
+ # inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)
386
+ # labels = tf.convert_to_tensor(labels, dtype=tf.int32)
387
+
388
+ with tf.GradientTape() as tape:
389
+
390
+ outputs = model(inputs, training=True)
391
+ #outputs = outputs[0]
392
+ #outputs = get_multiple_samples(model, inputs, num_samples=5)
393
+ #print(outputs)
394
+ #gradient_penalty = calc_gradient_penalty(X_train, outputs)
395
+
396
+
397
+ loss, _, _ = compute_metrics(outputs, labels, epoch, global_step=epoch, annealing_step=10)
398
+
399
+
400
+ #print(loss)
401
+
402
+ gradients = tape.gradient(loss, model.trainable_variables)
403
+
404
+ # gradients_l2 = [tf.norm(grad) for grad in gradients]
405
+
406
+ # gradients_l2 = [0.000001*(grad_norm - 1)**2 for grad_norm in gradients_l2]
407
+
408
+ # # Penalize the loss with the L2 norm of gradients
409
+ # penalty_weight = 0.001 # Adjust this weight as needed
410
+ # penalty = tf.reduce_sum([tf.square(grad) for grad in gradients_l2])
411
+ # loss += penalty_weight * penalty
412
+
413
+ optimizer.apply_gradients(zip(gradients, model.trainable_variables))
414
+
415
+
416
+ total_loss += loss.numpy()
417
+
418
+ predicted = tf.argmax(outputs, axis=1)
419
+ predicted = tf.cast(predicted, tf.int32)
420
+ total += labels.shape[0]
421
+ #labels = tf.squeeze(labels)
422
+ #print(predicted)
423
+ #print(labels)
424
+
425
+ correct += tf.reduce_sum(tf.cast(predicted == tf.cast(labels, tf.int32), tf.float32)).numpy()
426
+
427
+ #print(correct)
428
+ #print(len(x_train))
429
+ avg_loss = total_loss / (len(x_train) // batch_size)
430
+ accuracy = 100 * correct / len(x_train)
431
+
432
+ print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')
433
+
434
+ if avg_loss < 0.05:
435
+ print(f'Stopping training. Loss ({avg_loss:.4f}) is below threshold ({0.05}).')
436
+ break
437
+
438
+ predictions = np.argmax(model.predict(X_test), axis=1)
439
+
440
+ print(classification_report(y_test, predictions))
441
+
442
+ # # # #model.save('test_sngp.keras')
443
+
444
+ def test(model, test_dataset):
445
+ correct = 0
446
+ total = 0
447
+ all_predictions = []
448
+ all_uncertainties = []
449
+
450
+ for inputs, labels in test_dataset:
451
+ labels = tf.squeeze(labels)
452
+ outputs = model(inputs, training=False)
453
+ #outputs[0]
454
+ predicted = tf.argmax(outputs, axis=1)
455
+ predicted = tf.cast(predicted, tf.int32)
456
+
457
+ _, u, _ = compute_metrics(outputs, labels, epoch=0, global_step=0, annealing_step=10) # Calculate loss and uncertainty
458
+
459
+ all_predictions.append(predicted.numpy())
460
+ all_uncertainties.append(u.numpy())
461
+
462
+ total += labels.shape[0]
463
+ correct += tf.reduce_sum(tf.cast(predicted == tf.cast(labels, tf.int32), tf.float32)).numpy()
464
+
465
+ accuracy = 100 * correct / total
466
+ all_predictions = np.concatenate(all_predictions)
467
+ all_uncertainties = np.concatenate(all_uncertainties)
468
+
469
+ print(f'Test Accuracy: {accuracy:.2f}%')
470
+ print(f'Shape of predictions array: {all_predictions.shape}')
471
+ print(f'Shape of uncertainties array: {all_uncertainties.shape}')
472
+
473
+ np.save('predictions.npy', all_predictions)
474
+ np.save('uncertainties.npy', all_uncertainties)
475
+
476
+ return accuracy, all_predictions, all_uncertainties
477
+
478
+
479
+ # def add_gaussian_noise_to_image(image, noise_stddev=0.3):
480
+ # noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=noise_stddev)
481
+ # corrupted_image = tf.clip_by_value(image + noise, 0.0, 1.0) # Clip values to [0, 1]
482
+ # return corrupted_image
483
+
484
+ # # Corrupt the test dataset images with Gaussian noise
485
+ # corrupted_test_dataset = test_dataset.map(lambda x, y: (add_gaussian_noise_to_image(x), y))
486
+
487
+ # X, y = corrupted_test_dataset
488
+
489
+ # predictions = np.argmax(model.predict(X), axis=1)
490
+
491
+ # print(classification_report(y, predictions))
492
+
493
+
494
+ # _,u,_ = compute_metrics(predictions, y_test, 1, global_step=1, annealing_step=10)
495
+ test_accuracy, predictions_1, uncertainties = test(model, test_dataset)
496
+
497
+ TC_indices = [] # True Certainty (TC)
498
+ TU_indices = [] # True Uncertainty (TU)
499
+ FU_indices = [] # False Uncertainty (FU)
500
+ FC_indices = [] # False Certainty (FC)
501
+
502
+
503
+ for i in range(len(predictions)):
504
+ #p = y_pred_mc_dropout[i]
505
+
506
+ if (predictions[i] == y_test[i]):
507
+
508
+ if uncertainties[i] < 0.3:
509
+ # True certainty (TU): Correct and certain
510
+ TC_indices.append(i)
511
+ else:
512
+ # False certainty (FU): Correct and uncertain
513
+ FU_indices.append(i)
514
+ else:
515
+ # Certain prediction
516
+ if uncertainties[i] < 0.3:
517
+ # True Unertainty (TC): Incorrect and certain
518
+ FC_indices.append(i)
519
+ else:
520
+ # False Uncertainty (FC): Incorrect and uncertain
521
+ TU_indices.append(i)
522
+
523
+
524
+ print('USen:',len(TU_indices) / (len(TU_indices) + len(FC_indices)))
525
+
526
+ print('USpe:', len(TC_indices) / (len(TC_indices) + len(FU_indices)))
527
+
528
+ print('UPre:', len(TU_indices) / (len(TU_indices) + len(FU_indices)))
529
+
530
+ print('UAcc:', (len(TU_indices) + len(TC_indices)) / (len(TU_indices) + len(TC_indices) + len(FU_indices) + len(FC_indices)))
531
+
532
+
533
+
534
+ # def combine_images_with_padding(img_index_1, img_index_2, padding_type="top_bottom"):
535
+ # """
536
+ # Combines two CIFAR-10 images with padding and normalization.
537
+
538
+ # Args:
539
+ # img_index_1: Index of the first image in the dataset.
540
+ # img_index_2: Index of the second image in the dataset.
541
+ # padding_type: Type of padding to use ("top_bottom" or "left_right").
542
+
543
+ # Returns:
544
+ # A combined image tensor.
545
+ # """
546
+
547
+ # def combine_images_with_padding(img_index_1, img_index_2, padding_type):
548
+
549
+ # (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
550
+
551
+
552
+ # img_1 = tf.convert_to_tensor(test_images[img_index_1], dtype=tf.float32) / 255.0
553
+ # img_2 = tf.convert_to_tensor(test_images[img_index_2], dtype=tf.float32) / 255.0
554
+
555
+
556
+ # if padding_type == "top_bottom":
557
+ # padding_amount = (img_2.shape[0] - img_1.shape[0]) // 2
558
+ # top_bottom_padding = tf.zeros((padding_amount, img_1.shape[1], 3))
559
+ # padded_img_1 = tf.concat([top_bottom_padding, img_1, top_bottom_padding], axis=0)
560
+ # padded_img_2 = img_2
561
+ # elif padding_type == "left_right":
562
+ # padding_amount = (img_2.shape[1] - img_1.shape[1]) // 2
563
+ # left_right_padding = tf.zeros((img_1.shape[0], padding_amount, 3))
564
+ # padded_img_1 = tf.concat([left_right_padding, img_1, left_right_padding], axis=1)
565
+ # padded_img_2 = img_2
566
+ # else:
567
+ # raise ValueError("Invalid padding type. Choose 'top_bottom' or 'left_right'.")
568
+
569
+
570
+ # combined_img = tf.concat([padded_img_1, padded_img_2], axis=0)
571
+
572
+
573
+ # combined_img_resized = tf.image.resize(combined_img, [32, 32])
574
+
575
+ # return combined_img_resized
576
+
577
+
578
+
579
+ # img_index_1 = 50
580
+ # img_index_2 = 100
581
+ # padding_type = "top_bottom"
582
+
583
+ # combined_img = combine_images_with_padding(img_index_1, img_index_2, padding_type)
584
+
585
+ # combined_img = np.expand_dims(combined_img, axis=0)
586
+
587
+ # image1_index = 10
588
+ # image2_index = 21
589
+
590
+
591
+ # combined_img = np.zeros((32, 32))
592
+ # combined_img[:, :-6] += x_train[image1_index][:, 6:]
593
+ # combined_img[:, 14:] += x_train[image2_index][:, 5:19]
594
+ # combined_img /= combined_img.max()
595
+
596
+ # combined_img = combined_img.reshape(1, 32, 32, 3)
597
+
598
+
599
+ (train_images, _), (_, _) = mnist.load_data()
600
+
601
+
602
+ mnist_image = train_images[np.random.randint(0, train_images.shape[0])]
603
+
604
+
605
+ rescaled_image = cv2.resize(mnist_image, (32, 32))
606
+
607
+
608
+ rgb_image = cv2.cvtColor(rescaled_image, cv2.COLOR_GRAY2RGB)
609
+
610
+ rgb_image = np.expand_dims(rgb_image, axis=0)
611
+
612
+
613
+ # pred_unc = model(combined_img)
614
+ pred = model(X_test[0].reshape(1, 32, 32, 3))
615
+ #var = pred.variance().numpy()
616
+
617
+ pred_rgb = model(rgb_image)
618
+ #var_rgb = pred_rgb.variance().numpy()
619
+ # l_unc, u_unc, p_unc = compute_metrics(pred_unc, y_test[50], 0, global_step=0, annealing_step=10)
620
+ l, u, p = compute_metrics(pred, y_test[0], 0, global_step=0, annealing_step=10)
621
+ l_rgb, u_rgb, p_rgb = compute_metrics(pred_rgb, y_test[0], 0, global_step=0, annealing_step=10)
622
+
623
+ # print('u_unc:',u_unc)
624
+ # print('p_unc:',p_unc)
625
+ # print('preds:', pred_unc)
626
+
627
+ print('u:', u)
628
+ print('p:', p)
629
+ print('pred:', pred)
630
+ #print('sd:', var)
631
+
632
+ print('u_rgb:', u_rgb)
633
+ print('p_rgb:', p_rgb)
634
+ print('preds:', pred_rgb)
635
+ #print('sd_rgb:', var_rgb)
636
+
637
+ #----------------------------------------------------------------------------------------------------
638
+ #Variance based EDL
639
+
640
+ def uncertainty(alpha, reduce=True):
641
+ S = tf.reduce_sum(alpha, axis=1, keepdims=True)
642
+ p = alpha / S
643
+ variance = p - tf.square(p)
644
+ EU = (alpha / S) * (1 - alpha / S) / (S + 1)
645
+ AU = variance - EU
646
+ if reduce:
647
+ AU = tf.reduce_sum(AU) / alpha.shape[0]
648
+ EU = tf.reduce_sum(EU) / alpha.shape[0]
649
+ return AU, EU
650
+
651
+ pred_var = model(rgb_image)
652
+ pred_var = exp_evidence(pred_var)
653
+
654
+ unc_ale, unc_eps = uncertainty(pred_var)
655
+ print('u_ale:', unc_ale)
656
+ print('p_eps:', unc_eps)
657
+
658
+ y_pred_probs = model.predict(X_test)
659
+ y_pred = np.argmax(y_pred_probs, axis=1)
660
+
661
+ #-----------------------------------------------------------------------------------------------------
662
+
663
+
664
+ #-----------------------------------------------------------------------------------------------------
665
+ #Different Variance based unc
666
+
667
+ # def total_uncertainty_variance(probs):
668
+ # if isinstance(probs, tf.Tensor):
669
+ # mean = tf.reduce_mean(probs, axis=2)
670
+ # t_u = tf.reduce_sum(mean * (1 - mean), axis=1)
671
+ # else:
672
+ # probs = tf.convert_to_tensor(probs, dtype=tf.float32)
673
+ # mean = tf.reduce_mean(probs, axis=2)
674
+ # t_u = tf.reduce_sum(mean * (1 - mean), axis=1)
675
+ # return t_u
676
+
677
+ # def aleatoric_uncertainty_variance(probs):
678
+ # if isinstance(probs, tf.Tensor):
679
+ # a_u = tf.reduce_mean(tf.reduce_sum(probs * (1 - probs), axis=1), axis=1)
680
+ # else:
681
+ # probs = tf.convert_to_tensor(probs, dtype=tf.float32)
682
+ # a_u = tf.reduce_mean(tf.reduce_sum(probs * (1 - probs), axis=1), axis=1)
683
+ # return a_u
684
+
685
+ # def epistemic_uncertainty_variance(probs):
686
+ # if isinstance(probs, tf.Tensor):
687
+ # mean = tf.reduce_mean(probs, axis=2, keepdims=True)
688
+ # e_u = tf.reduce_mean(tf.reduce_sum(probs * (probs - mean), axis=1), axis=1)
689
+ # else:
690
+ # probs = tf.convert_to_tensor(probs, dtype=tf.float32)
691
+ # mean = tf.reduce_mean(probs, axis=2, keepdims=True)
692
+ # e_u = tf.reduce_mean(tf.reduce_sum(probs * (probs - mean), axis=1), axis=1)
693
+ # return e_u
694
+
695
+ # eu = epistemic_uncertainty_variance(pred_rgb)
696
+ # au = aleatoric_uncertainty_variance(pred_rgb)
697
+
698
+ # print('eu:', eu)
699
+ # print('au:', au)
700
+
701
+
702
+ #------------------------------------------------------------------------------------------------------
703
+
704
+ def softmax(vector):
705
+ e = np.exp(vector)
706
+ return e / e.sum()
707
+
708
+ def expected_calibration_error(samples, true_labels, M=5):
709
+ # uniform binning approach with M number of bins
710
+ bin_boundaries = np.linspace(0, 1, M + 1)
711
+ bin_lowers = bin_boundaries[:-1]
712
+ bin_uppers = bin_boundaries[1:]
713
+
714
+ #samples = softmax(samples)
715
+
716
+ # get max probability per sample i
717
+ confidences = np.max(samples, axis=1)
718
+ # get predictions from confidences (positional in this case)
719
+ predicted_label = np.argmax(samples, axis=1)
720
+
721
+ # get a boolean list of correct/false predictions
722
+ accuracies = predicted_label==true_labels
723
+
724
+ ece = np.zeros(1)
725
+ for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
726
+ # determine if sample is in bin m (between bin lower & upper)
727
+ in_bin = np.logical_and(confidences > bin_lower.item(), confidences <= bin_upper.item())
728
+ # can calculate the empirical probability of a sample falling into bin m: (|Bm|/n)
729
+ prob_in_bin = in_bin.mean()
730
+
731
+ if prob_in_bin.item() > 0:
732
+ # get the accuracy of bin m: acc(Bm)
733
+ accuracy_in_bin = accuracies[in_bin].mean()
734
+ # get the average confidence of bin m: conf(Bm)
735
+ avg_confidence_in_bin = confidences[in_bin].mean()
736
+ # calculate |acc(Bm) - conf(Bm)| * (|Bm|/n) for bin m and add to the total ECE
737
+ ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prob_in_bin
738
+ return ece
739
+
740
+ ece = expected_calibration_error(y_pred_probs, y_test)
741
+ print("Expected Calibration Error:", ece)
742
+
743
+ # xtest = X_test[0]
744
+
745
+ # xtest = tf.convert_to_tensor([xtest])
746
+
747
+ # # Define the FGSM attack function
748
+ # def fgsm_attack(image, label, epsilon):
749
+ # with tf.GradientTape() as tape:
750
+ # tape.watch(image)
751
+ # prediction = model(image)
752
+ # prediction = exp_evidence(prediction) + 1
753
+ # loss,_ = mse_loss(label, prediction, epoch_num=1, num_classes=10, annealing_step=10)
754
+ # #loss = tf.keras.losses.sparse_categorical_crossentropy(label, prediction)
755
+ # gradient = tape.gradient(loss, image)
756
+ # signed_grad = tf.sign(gradient)
757
+ # adversarial_image = image + epsilon * signed_grad
758
+ # adversarial_image = tf.clip_by_value(adversarial_image, -1, 1)
759
+ # return adversarial_image
760
+
761
+ # # Create the adversarial image
762
+ # epsilon = 0.5
763
+ # label = tf.convert_to_tensor([y_test[0]], dtype=tf.int64)
764
+ # adversarial_image = fgsm_attack(xtest, label, epsilon)
765
+
766
+
767
+ # # Get the model predictions for both images
768
+ # original_pred = model(xtest)
769
+ # adversarial_pred = model(adversarial_image)
770
+
771
+ # l1, u1, p1 = compute_metrics(adversarial_pred, y_test[0], 0, global_step=0, annealing_step=10)
772
+
773
+ # print('u_rgb:', u1)
774
+ # print('p_rgb:', p1)
775
+ #print('preds:', pred_rgb)
776
+
777
+ # # # def plot_reliability_diagram(confidences, true_labels, M=5):
778
+ # # # """Plots the reliability diagram for the given data."""
779
+ # # # bin_boundaries = np.linspace(0, 1, M + 1)
780
+ # # # bin_centers = (bin_boundaries[:-1] + bin_boundaries[1:]) / 2
781
+
782
+ # # # # Get binned accuracy (average accuracy for each confidence bin)
783
+ # # # binned_accuracy = np.zeros(M)
784
+ # # # for i, bin_lower in enumerate(bin_boundaries[:-1]):
785
+ # # # bin_upper = bin_boundaries[i + 1]
786
+ # # # in_bin = np.logical_and(confidences >= bin_lower, confidences < bin_upper)
787
+ # # # if in_bin.sum() > 0:
788
+ # # # binned_accuracy[i] = true_labels[in_bin].mean()
789
+
790
+ # # # # Perfect calibration line (y = x)
791
+ # # # perfect_calibration = np.linspace(0, 1, M)
792
+
793
+ # # # plt.plot(bin_centers, binned_accuracy, 'o', label='Binned Accuracy')
794
+ # # # plt.plot(perfect_calibration, perfect_calibration, '-', label='Perfect Calibration')
795
+ # # # plt.xlabel('Predicted Probability')
796
+ # # # plt.ylabel('Observed Accuracy')
797
+ # # # plt.title('Reliability Diagram')
798
+ # # # plt.legend()
799
+ # # # plt.grid(True)
800
+ # # # plt.show()
801
+
802
+
803
+ # # #plot_reliability_diagram(y_pred_probs, y_test)
804
+
805
+
806
+
807
+ # # # def fgsm_attack(image, epsilon, data_grad):
808
+ # # # # Collect the element-wise sign of the data gradient
809
+ # # # sign_data_grad = tf.sign(data_grad)
810
+ # # # # Create the perturbed image by adjusting each pixel of the input image
811
+ # # # perturbed_image = image + epsilon * sign_data_grad
812
+ # # # # Adding clipping to maintain [0,1] range
813
+ # # # perturbed_image = tf.clip_by_value(perturbed_image, 0, 1)
814
+ # # # # Return the perturbed image
815
+ # # # return perturbed_image
816
+
817
+ # # # # Restores the tensors to their original scale
818
+ # # # def denorm(batch, mean=[0.1307], std=[0.3081]):
819
+ # # # mean = tf.convert_to_tensor(mean)
820
+ # # # std = tf.convert_to_tensor(std)
821
+
822
+ # # # return batch * std + mean
823
+
824
+
825
+ # # # def test(model, test_dataset, epsilon):
826
+
827
+ # # # # Accuracy counter
828
+ # # # correct = 0
829
+ # # # adv_examples = []
830
+
831
+ # # # # Loop over all examples in test set
832
+ # # # for data, target in test_dataset:
833
+
834
+ # # # # Send the data and label to the device
835
+ # # # data, target = data.numpy(), target.numpy()
836
+
837
+ # # # # Set requires_grad attribute of tensor. Important for Attack
838
+ # # # data = tf.convert_to_tensor(data, dtype=tf.float32)
839
+ # # # with tf.GradientTape() as tape:
840
+ # # # tape.watch(data)
841
+ # # # # Forward pass the data through the model
842
+ # # # output = model(data)
843
+ # # # init_pred = tf.argmax(output, axis=1, output_type=tf.int32)
844
+
845
+ # # # # If the initial prediction is wrong, don't bother attacking, just move on
846
+ # # # if not np.array_equal(init_pred.numpy(), target):
847
+ # # # continue
848
+
849
+ # # # # Calculate the loss
850
+ # # # loss, _, _ = compute_metrics(outputs, target, epoch=1, global_step=0, annealing_step=10)
851
+
852
+ # # # # Calculate gradients of model in backward pass
853
+ # # # data_grad = tape.gradient(loss, data)
854
+
855
+ # # # # Call FGSM Attack
856
+ # # # perturbed_data = fgsm_attack(data, epsilon, data_grad)
857
+
858
+ # # # # Re-classify the perturbed image
859
+ # # # output = model(perturbed_data)
860
+
861
+ # # # # Check for success
862
+ # # # final_pred = tf.argmax(output, axis=1, output_type=tf.int32)
863
+ # # # if np.array_equal(final_pred.numpy(), target):
864
+ # # # correct += 1
865
+ # # # # Special case for saving 0 epsilon examples
866
+ # # # if epsilon == 0 and len(adv_examples) < 5:
867
+ # # # adv_examples.append((init_pred.numpy()[0], final_pred.numpy()[0], perturbed_data.numpy()))
868
+ # # # else:
869
+ # # # # Save some adv examples for visualization later
870
+ # # # if len(adv_examples) < 5:
871
+ # # # adv_examples.append((init_pred.numpy()[0], final_pred.numpy()[0], perturbed_data.numpy()))
872
+
873
+ # # # # Calculate final accuracy for this epsilon
874
+ # # # final_acc = correct / float(len(test_dataset))
875
+ # # # print(f"Epsilon: {epsilon}\tTest Accuracy = {correct} / {len(test_dataset)} = {final_acc}")
876
+
877
+ # # # # Return the accuracy and adversarial examples
878
+ # # # return final_acc, adv_examples
879
+
880
+
881
+ # # # accuracies = []
882
+ # # # examples = []
883
+ # # # epsilons = [0,0.05, 0.1, 0.15,0.2,0.25,0.3]
884
+
885
+ # # # # Run test for each epsilon
886
+ # # # for eps in epsilons:
887
+ # # # acc, ex = test(model, test_dataset, eps)
888
+ # # # accuracies.append(acc)
889
+ # # # examples.append(ex)
890
+
891
+
892
+ # # # import matplotlib.pyplot as plt
893
+
894
+ # # # # Plot accuracy vs epsilon
895
+ # # # plt.figure(figsize=(5,5))
896
+ # # # plt.plot(epsilons, accuracies, "*-")
897
+ # # # plt.yticks(np.arange(0, 1.1, step=0.1))
898
+ # # # plt.xticks(np.arange(0, .35, step=0.05))
899
+ # # # plt.title("Accuracy vs Epsilon")
900
+ # # # plt.xlabel("Epsilon")
901
+ # # # plt.ylabel("Accuracy")
902
+ # # # plt.grid(True)
903
+ # # # plt.show()
904
+
905
+ # # # # Save the plot as a PNG file
906
+ # # # plt.savefig('accuracy_vs_epsilon.png')
907
+