|
|
| import gymnasium as gym |
| import numpy |
| import numpy as np |
| from Qlearning_pole import Qlearning |
| import os |
|
|
| |
| |
|
|
|
|
| Q1 = Qlearning() |
| |
| Q1.train() |
| |
| (obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy() |
|
|
| |
| env1.close() |
| |
| np.sum(obtainedRewardsOptimal) |
| import matplotlib.pyplot as plt |
| |
| (obtainedRewardsRandom, env2) = Q1.simulateRandomStrategy() |
| plt.figure(figsize=(12, 5)) |
| |
| numpy.save("Qmatrix_new.npy",Q1.Q) |
| plt.plot(Q1.sumRewardsEpisode, color='blue', linewidth=1) |
| plt.xlabel('Episode') |
| plt.ylabel('Reward') |
| plt.yscale('log') |
| plt.savefig('convergence.png') |
| plt.title("Convergence of rewards") |
| plt.show() |
|
|
|
|
| |
| env1.close() |
| |
| np.sum(obtainedRewardsOptimal) |
|
|
| |
| obtainedRewardsRandom = [] |
| for i in range(50): |
| (rewardsRandom, env2) = Q1.simulateRandomStrategy() |
| obtainedRewardsRandom.append(rewardsRandom) |
| plt.title("Rewards with random strategy") |
| plt.hist(obtainedRewardsRandom) |
| plt.xlabel('Sum of rewards') |
| plt.ylabel('Percentage') |
| plt.savefig('histogram.png') |
| plt.show() |
|
|
| |
| (obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy() |