CarlosMN
/

CartPole

Reinforcement Learning

Model card Files Files and versions

CartPole / main.py

CarlosMN's picture

Initial Commit

f1acf35 almost 3 years ago

history blame contribute delete

1.45 kB


	import gymnasium as gym
	import numpy
	import numpy as np
	from Qlearning_pole import Qlearning
	import os

	# Rendering the environment
	# env=gym.make('CartPole-v1',render_mode='human')


	Q1 = Qlearning()
	# run the Q-Learning algorithm
	Q1.train()
	# simulate the learned strategy
	(obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy()

	# close the environment
	env1.close()
	# get the sum of rewards
	np.sum(obtainedRewardsOptimal)
	import matplotlib.pyplot as plt
	# now simulate a random strategy
	(obtainedRewardsRandom, env2) = Q1.simulateRandomStrategy()
	plt.figure(figsize=(12, 5))
	# plot the figure and adjust the plot parameters
	numpy.save("Qmatrix_new.npy",Q1.Q)
	plt.plot(Q1.sumRewardsEpisode, color='blue', linewidth=1)
	plt.xlabel('Episode')
	plt.ylabel('Reward')
	plt.yscale('log')
	plt.savefig('convergence.png')
	plt.title("Convergence of rewards")
	plt.show()


	# close the environment
	env1.close()
	# get the sum of rewards
	np.sum(obtainedRewardsOptimal)

	# now simulate a random strategy
	obtainedRewardsRandom = []
	for i in range(50):
	(rewardsRandom, env2) = Q1.simulateRandomStrategy()
	obtainedRewardsRandom.append(rewardsRandom)
	plt.title("Rewards with random strategy")
	plt.hist(obtainedRewardsRandom)
	plt.xlabel('Sum of rewards')
	plt.ylabel('Percentage')
	plt.savefig('histogram.png')
	plt.show()

	# run this several times and compare with a random learning strategy
	(obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy()