| """ |
| An example of how one might use PyRep to create their RL environments. |
| In this case, the Franka Panda must reach a randomly placed target. |
| This script contains examples of: |
| - RL environment example. |
| - Scene manipulation. |
| - Environment resets. |
| - Setting joint properties (control loop disabled, motor locked at 0 vel) |
| """ |
| from os.path import dirname, join, abspath |
| from pyrep import PyRep |
| from pyrep.robots.arms.panda import Panda |
| from pyrep.objects.shape import Shape |
| import numpy as np |
|
|
| SCENE_FILE = join(dirname(abspath(__file__)), |
| 'scene_reinforcement_learning_env.ttt') |
| POS_MIN, POS_MAX = [0.8, -0.2, 1.0], [1.0, 0.2, 1.4] |
| EPISODES = 5 |
| EPISODE_LENGTH = 200 |
|
|
|
|
| class ReacherEnv(object): |
|
|
| def __init__(self): |
| self.pr = PyRep() |
| self.pr.launch(SCENE_FILE, headless=False) |
| self.pr.start() |
| self.agent = Panda() |
| self.agent.set_control_loop_enabled(False) |
| self.agent.set_motor_locked_at_zero_velocity(True) |
| self.target = Shape('target') |
| self.agent_ee_tip = self.agent.get_tip() |
| self.initial_joint_positions = self.agent.get_joint_positions() |
|
|
| def _get_state(self): |
| |
| return np.concatenate([self.agent.get_joint_positions(), |
| self.agent.get_joint_velocities(), |
| self.target.get_position()]) |
|
|
| def reset(self): |
| |
| pos = list(np.random.uniform(POS_MIN, POS_MAX)) |
| self.target.set_position(pos) |
| self.agent.set_joint_positions(self.initial_joint_positions) |
| return self._get_state() |
|
|
| def step(self, action): |
| self.agent.set_joint_target_velocities(action) |
| self.pr.step() |
| ax, ay, az = self.agent_ee_tip.get_position() |
| tx, ty, tz = self.target.get_position() |
| |
| reward = -np.sqrt((ax - tx) ** 2 + (ay - ty) ** 2 + (az - tz) ** 2) |
| return reward, self._get_state() |
|
|
| def shutdown(self): |
| self.pr.stop() |
| self.pr.shutdown() |
|
|
|
|
| class Agent(object): |
|
|
| def act(self, state): |
| del state |
| return list(np.random.uniform(-1.0, 1.0, size=(7,))) |
|
|
| def learn(self, replay_buffer): |
| del replay_buffer |
| pass |
|
|
|
|
| env = ReacherEnv() |
| agent = Agent() |
| replay_buffer = [] |
|
|
| for e in range(EPISODES): |
|
|
| print('Starting episode %d' % e) |
| state = env.reset() |
| for i in range(EPISODE_LENGTH): |
| action = agent.act(state) |
| reward, next_state = env.step(action) |
| replay_buffer.append((state, action, reward, next_state)) |
| state = next_state |
| agent.learn(replay_buffer) |
|
|
| print('Done!') |
| env.shutdown() |
|
|