VLAdaptorBench / external /pyrep /examples /example_reinforcement_learning_env.py

Add files using upload-large-folder tool

30747b3 verified 11 days ago

2.87 kB

	"""
	An example of how one might use PyRep to create their RL environments.
	In this case, the Franka Panda must reach a randomly placed target.
	This script contains examples of:
	- RL environment example.
	- Scene manipulation.
	- Environment resets.
	- Setting joint properties (control loop disabled, motor locked at 0 vel)
	"""
	from os.path import dirname, join, abspath
	from pyrep import PyRep
	from pyrep.robots.arms.panda import Panda
	from pyrep.objects.shape import Shape
	import numpy as np

	SCENE_FILE = join(dirname(abspath(__file__)),
	'scene_reinforcement_learning_env.ttt')
	POS_MIN, POS_MAX = [0.8, -0.2, 1.0], [1.0, 0.2, 1.4]
	EPISODES = 5
	EPISODE_LENGTH = 200


	class ReacherEnv(object):

	def __init__(self):
	self.pr = PyRep()
	self.pr.launch(SCENE_FILE, headless=False)
	self.pr.start()
	self.agent = Panda()
	self.agent.set_control_loop_enabled(False)
	self.agent.set_motor_locked_at_zero_velocity(True)
	self.target = Shape('target')
	self.agent_ee_tip = self.agent.get_tip()
	self.initial_joint_positions = self.agent.get_joint_positions()

	def _get_state(self):
	# Return state containing arm joint angles/velocities & target position
	return np.concatenate([self.agent.get_joint_positions(),
	self.agent.get_joint_velocities(),
	self.target.get_position()])

	def reset(self):
	# Get a random position within a cuboid and set the target position
	pos = list(np.random.uniform(POS_MIN, POS_MAX))
	self.target.set_position(pos)
	self.agent.set_joint_positions(self.initial_joint_positions)
	return self._get_state()

	def step(self, action):
	self.agent.set_joint_target_velocities(action) # Execute action on arm
	self.pr.step() # Step the physics simulation
	ax, ay, az = self.agent_ee_tip.get_position()
	tx, ty, tz = self.target.get_position()
	# Reward is negative distance to target
	reward = -np.sqrt((ax - tx) 2 + (ay - ty) 2 + (az - tz) ** 2)
	return reward, self._get_state()

	def shutdown(self):
	self.pr.stop()
	self.pr.shutdown()


	class Agent(object):

	def act(self, state):
	del state
	return list(np.random.uniform(-1.0, 1.0, size=(7,)))

	def learn(self, replay_buffer):
	del replay_buffer
	pass


	env = ReacherEnv()
	agent = Agent()
	replay_buffer = []

	for e in range(EPISODES):

	print('Starting episode %d' % e)
	state = env.reset()
	for i in range(EPISODE_LENGTH):
	action = agent.act(state)
	reward, next_state = env.step(action)
	replay_buffer.append((state, action, reward, next_state))
	state = next_state
	agent.learn(replay_buffer)

	print('Done!')
	env.shutdown()