| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| import argparse |
| import time |
| import os |
| import numpy as np |
|
|
| import simulate as sm |
| import os |
| from pathlib import Path |
| from agent import DuelingDQNAgent, MetricLogger |
| from params import hyperparams |
|
|
| |
|
|
| |
| |
| SCALE = 30.0 |
|
|
| |
| INITIAL_RANDOM = 1000.0 |
|
|
| |
| LANDER_POLY = np.array([(-17, -10, 0), (-17, 0, 0), (-14, 17, 0), (14, 17, 0), (17, 0, 0), (17, -10, 0)])[::-1] / SCALE |
| LEG_AWAY = 20 |
| LEG_DOWN = -7 |
| LEG_ANGLE = 0.25 |
| LEG_W, LEG_H = 2, 8 |
|
|
| LEG_RIGHT_POLY = ( |
| np.array( |
| [ |
| (LEG_AWAY, LEG_DOWN, 0), |
| (LEG_AWAY + LEG_H * np.sin(LEG_ANGLE), LEG_DOWN - LEG_H * np.cos(LEG_ANGLE), 0), |
| ( |
| LEG_AWAY + LEG_H * np.sin(LEG_ANGLE) + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), |
| LEG_DOWN - LEG_H * np.cos(LEG_ANGLE) + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), |
| 0, |
| ), |
| (LEG_AWAY + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), LEG_DOWN + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), 0), |
| ] |
| ) |
| / SCALE |
| ) |
|
|
| LEG_LEFT_POLY = [[-x, y, z] for x, y, z in LEG_RIGHT_POLY][::-1] |
| LANDER_COLOR = [128 / 255, 102 / 255, 230 / 255] |
|
|
| |
| VIEWPORT_W = 600 |
| VIEWPORT_H = 400 |
|
|
| W = VIEWPORT_W / SCALE |
| H = VIEWPORT_H / SCALE |
|
|
| CHUNKS = 11 |
| HEIGHTS = np.random.uniform(0, H / 2, size=(CHUNKS + 1,)) |
| CHUNK_X = [W / (CHUNKS - 1) * i for i in range(CHUNKS)] |
| HELIPAD_x1 = CHUNK_X[CHUNKS // 2 - 1] |
| HELIPAD_x2 = CHUNK_X[CHUNKS // 2 + 1] |
| HELIPAD_y = H / 4 |
| HEIGHTS[CHUNKS // 2 - 2] = HELIPAD_y |
| HEIGHTS[CHUNKS // 2 - 1] = HELIPAD_y |
| HEIGHTS[CHUNKS // 2 + 0] = HELIPAD_y |
| HEIGHTS[CHUNKS // 2 + 1] = HELIPAD_y |
| HEIGHTS[CHUNKS // 2 + 2] = HELIPAD_y |
| SMOOTH_Y = [0.33 * (HEIGHTS[i - 1] + HEIGHTS[i + 0] + HEIGHTS[i + 1]) for i in range(CHUNKS)] |
|
|
| |
| MAIN_ENGINE_POWER = 13.0 |
| SIDE_ENGINE_POWER = 0.6 |
| LEG_SPRING_TORQUE = 40 |
| SIDE_ENGINE_HEIGHT = 14.0 |
| SIDE_ENGINE_AWAY = 12.0 |
|
|
| LAND_POLY = ( |
| [[CHUNK_X[0], SMOOTH_Y[0] - 3, 0]] |
| + [[x, y, 0] for x, y in zip(CHUNK_X, SMOOTH_Y)] |
| + [[CHUNK_X[-1], SMOOTH_Y[0] - 3, 0]] |
| ) |
|
|
|
|
| def make_lander(engine="unity", engine_exe=""): |
| |
| sc = sm.Scene(engine=engine, engine_exe=engine_exe) |
|
|
| |
| lander_init_pos = (10, 15, 0) + np.random.uniform(2, 4, 3) |
| lander_init_pos[2] = 0.0 |
|
|
| lander_material = sm.Material(base_color=LANDER_COLOR) |
|
|
| |
|
|
| |
| lander = sm.Polygon( |
| points=LANDER_POLY, |
| material=lander_material, |
| position=lander_init_pos, |
| name="lunar_lander", |
| is_actor=True, |
| physics_component=sm.RigidBodyComponent( |
| use_gravity=True, |
| constraints=["freeze_rotation_x", "freeze_rotation_y", "freeze_position_z"], |
| mass=1, |
| ), |
| ) |
|
|
| |
| lander.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
| lander.actuator = sm.Actuator( |
| mapping=[ |
| sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=5), |
| sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=-5), |
| sm.ActionMapping("add_force", axis=[0, 1, 0], amplitude=2.5), |
| ], |
| n=3, |
| ) |
|
|
| |
| lander += sm.Box( |
| position=[0, np.min(LEG_RIGHT_POLY, axis=0)[1], -0.5], |
| bounds=[0.1, 2 * np.max(LEG_RIGHT_POLY, axis=0)[0], 1], |
| material=sm.Material.TRANSPARENT, |
| rotation=[0, 0, 90], |
| with_collider=True, |
| name="lander_collider_box_bottom", |
| ) |
| lander += sm.Box( |
| position=[-0.6, 0, -0.5], |
| bounds=[0.1, 26 / SCALE, 1], |
| material=sm.Material.TRANSPARENT, |
| rotation=[0, 0, -15], |
| with_collider=True, |
| name="lander_collider_box_right", |
| ) |
| lander += sm.Box( |
| position=[0.6, 0, -0.5], |
| bounds=[0.1, 26 / SCALE, 1], |
| material=sm.Material.TRANSPARENT, |
| rotation=[0, 0, 15], |
| with_collider=True, |
| name="lander_collider_box_left", |
| ) |
|
|
| |
| r_leg = sm.Polygon( |
| points=LEG_RIGHT_POLY, |
| material=lander_material, |
| parent=lander, |
| name="lander_r_leg", |
| |
| ) |
| r_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
|
| l_leg = sm.Polygon( |
| points=LEG_LEFT_POLY, |
| material=lander_material, |
| parent=lander, |
| name="lander_l_leg", |
| |
| ) |
| l_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
|
| |
| land = sm.Polygon( |
| points=LAND_POLY[::-1], |
| material=sm.Material.GRAY, |
| name="Moon", |
| ) |
| land.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
|
| |
| for i in range(len(CHUNK_X) - 1): |
| x1, x2 = CHUNK_X[i], CHUNK_X[i + 1] |
| y1, y2 = SMOOTH_Y[i], SMOOTH_Y[i + 1] |
|
|
| |
| rotation = [0, 0, +90 + np.degrees(np.arctan2(y2 - (y1 + y2) / 2, (x2 - x1) / 2))] |
| block_i = sm.Box( |
| position=[(x1 + x2) / 2, (y1 + y2) / 2, -0.5], |
| bounds=[0.2, 1.025 * np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1], |
| material=sm.Material.GRAY, |
| rotation=rotation, |
| with_collider=True, |
| name="land_collider_" + str(i), |
| ) |
| sc += block_i |
|
|
| |
| sc += sm.Cone( |
| position=[(HELIPAD_x1 + HELIPAD_x2) / 2, HELIPAD_y, -0.5], |
| height=10 / SCALE, |
| radius=10 / SCALE, |
| material=sm.Material.YELLOW, |
| name="target", |
| ) |
|
|
| |
| sc += sm.StateSensor( |
| target_entity=sc.target, |
| reference_entity=lander, |
| properties=["position", "rotation", "distance"], |
| name="goal_sense", |
| ) |
|
|
| |
| cost = sm.RewardFunction( |
| type="dense", entity_a=lander, entity_b=sc.target, scalar=-1 |
| ) |
| lander += cost |
|
|
| sc += lander |
| sc += land |
|
|
| return sc |
|
|
|
|
| def get_values(state): |
| return state.get("StateSensor") |
|
|
| def train(agent, env, logger): |
| episodes = 20000 |
| for e in range(episodes): |
|
|
| state = env.reset() |
| |
| for i in range(100): |
| |
| |
| action = agent.act(get_values(state)) |
| |
| |
| next_state, reward, done, info = env.step(action) |
|
|
| print("####################") |
| print(done) |
| print("####################") |
| |
| |
| agent.cache(get_values(state), get_values(next_state), action, reward, done) |
|
|
| |
| q, loss = agent.learn() |
|
|
| |
| logger.log_step(reward, loss, q) |
|
|
| |
| state = next_state |
| |
| |
| if done: |
| break |
| |
| logger.log_episode(e) |
|
|
| if e % 20 == 0: |
| logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step) |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--build_exe", default="", type=str, required=False, help="Pre-built unity app for simulate") |
| parser.add_argument( |
| "--num_steps", default=100, type=int, required=False, help="number of steps to run the simulator" |
| ) |
| args = parser.parse_args() |
|
|
| sc = make_lander(engine="unity", engine_exe=args.build_exe) |
| sc += sm.LightSun() |
|
|
| env = sm.RLEnv(sc, frame_skip=1) |
| env.reset() |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| checkpoint = None |
| |
|
|
| path = "checkpoints/lunar-lander-dueling-dqn-rc" |
| save_dir = Path(path) |
|
|
| isExist = os.path.exists(path) |
| if not isExist: |
| os.makedirs(path) |
|
|
| logger = MetricLogger(save_dir) |
|
|
| print("Training Dueling DQN Agent with step decay!") |
| agent = DuelingDQNAgent( |
| state_dim=7, |
| action_dim=env.action_space.n, |
| save_dir=save_dir, |
| checkpoint=checkpoint, |
| **hyperparams |
| ) |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| train(agent, env, logger) |
|
|