Added lunar lander files

85e4824 about 3 years ago

10.5 kB

	# Copyright 2022 The HuggingFace Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# File inspired by source: https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py

	import argparse
	import time
	import os
	import numpy as np

	import simulate as sm
	import os
	from pathlib import Path
	from agent import DuelingDQNAgent, MetricLogger
	from params import hyperparams

	# This example reimplements the famous lunar lander reinforcement learning environment.

	# CONSTANTS From source
	# TODO implement scaling
	SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well

	# TODO integrate random initial forces
	INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder

	# Lander construction
	LANDER_POLY = np.array([(-17, -10, 0), (-17, 0, 0), (-14, 17, 0), (14, 17, 0), (17, 0, 0), (17, -10, 0)])[::-1] / SCALE
	LEG_AWAY = 20
	LEG_DOWN = -7
	LEG_ANGLE = 0.25 # radians
	LEG_W, LEG_H = 2, 8

	LEG_RIGHT_POLY = (
	np.array(
	[
	(LEG_AWAY, LEG_DOWN, 0),
	(LEG_AWAY + LEG_H * np.sin(LEG_ANGLE), LEG_DOWN - LEG_H * np.cos(LEG_ANGLE), 0),
	(
	LEG_AWAY + LEG_H * np.sin(LEG_ANGLE) + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE),
	LEG_DOWN - LEG_H * np.cos(LEG_ANGLE) + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE),
	0,
	),
	(LEG_AWAY + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), LEG_DOWN + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), 0),
	]
	)
	/ SCALE
	)

	LEG_LEFT_POLY = [[-x, y, z] for x, y, z in LEG_RIGHT_POLY][::-1]
	LANDER_COLOR = [128 / 255, 102 / 255, 230 / 255]

	# terrain construction
	VIEWPORT_W = 600 # TODO integrate camera with these exact dimensions
	VIEWPORT_H = 400

	W = VIEWPORT_W / SCALE
	H = VIEWPORT_H / SCALE

	CHUNKS = 11
	HEIGHTS = np.random.uniform(0, H / 2, size=(CHUNKS + 1,))
	CHUNK_X = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
	HELIPAD_x1 = CHUNK_X[CHUNKS // 2 - 1]
	HELIPAD_x2 = CHUNK_X[CHUNKS // 2 + 1]
	HELIPAD_y = H / 4
	HEIGHTS[CHUNKS // 2 - 2] = HELIPAD_y
	HEIGHTS[CHUNKS // 2 - 1] = HELIPAD_y
	HEIGHTS[CHUNKS // 2 + 0] = HELIPAD_y
	HEIGHTS[CHUNKS // 2 + 1] = HELIPAD_y
	HEIGHTS[CHUNKS // 2 + 2] = HELIPAD_y
	SMOOTH_Y = [0.33 * (HEIGHTS[i - 1] + HEIGHTS[i + 0] + HEIGHTS[i + 1]) for i in range(CHUNKS)]

	# advanced features
	MAIN_ENGINE_POWER = 13.0 # TODO integrate specific forces
	SIDE_ENGINE_POWER = 0.6 # TODO integrate specific forces
	LEG_SPRING_TORQUE = 40 # TODO integrate specific forces
	SIDE_ENGINE_HEIGHT = 14.0 # TODO integrate specific forces
	SIDE_ENGINE_AWAY = 12.0 # TODO integrate specific forces

	LAND_POLY = (
	[[CHUNK_X[0], SMOOTH_Y[0] - 3, 0]]
	+ [[x, y, 0] for x, y in zip(CHUNK_X, SMOOTH_Y)]
	+ [[CHUNK_X[-1], SMOOTH_Y[0] - 3, 0]]
	)


	def make_lander(engine="unity", engine_exe=""):
	# Add sm scene
	sc = sm.Scene(engine=engine, engine_exe=engine_exe)

	# initial lander position sampling
	lander_init_pos = (10, 15, 0) + np.random.uniform(2, 4, 3)
	lander_init_pos[2] = 0.0 # z axis is always 0, for 2D

	lander_material = sm.Material(base_color=LANDER_COLOR)

	# create the lander polygons

	# first, the main lander body
	lander = sm.Polygon(
	points=LANDER_POLY,
	material=lander_material,
	position=lander_init_pos,
	name="lunar_lander",
	is_actor=True,
	physics_component=sm.RigidBodyComponent(
	use_gravity=True,
	constraints=["freeze_rotation_x", "freeze_rotation_y", "freeze_position_z"],
	mass=1,
	),
	)

	# extrude to make 3D visually.
	lander.mesh.extrude((0, 0, -1), capping=True, inplace=True)
	lander.actuator = sm.Actuator(
	mapping=[
	sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=5),
	sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=-5),
	sm.ActionMapping("add_force", axis=[0, 1, 0], amplitude=2.5),
	],
	n=3,
	)

	# add an invisible box as collider until convex meshes are completed
	lander += sm.Box(
	position=[0, np.min(LEG_RIGHT_POLY, axis=0)[1], -0.5],
	bounds=[0.1, 2 * np.max(LEG_RIGHT_POLY, axis=0)[0], 1],
	material=sm.Material.TRANSPARENT,
	rotation=[0, 0, 90],
	with_collider=True,
	name="lander_collider_box_bottom",
	)
	lander += sm.Box(
	position=[-0.6, 0, -0.5],
	bounds=[0.1, 26 / SCALE, 1],
	material=sm.Material.TRANSPARENT,
	rotation=[0, 0, -15],
	with_collider=True,
	name="lander_collider_box_right",
	)
	lander += sm.Box(
	position=[0.6, 0, -0.5],
	bounds=[0.1, 26 / SCALE, 1],
	material=sm.Material.TRANSPARENT,
	rotation=[0, 0, 15],
	with_collider=True,
	name="lander_collider_box_left",
	)

	# add legs as children objects (they take positions as local coordinates!)
	r_leg = sm.Polygon(
	points=LEG_RIGHT_POLY,
	material=lander_material,
	parent=lander,
	name="lander_r_leg",
	# with_collider=True, # TODO can use this when convex colliders is added
	)
	r_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True)

	l_leg = sm.Polygon(
	points=LEG_LEFT_POLY,
	material=lander_material,
	parent=lander,
	name="lander_l_leg",
	# with_collider=True, # TODO can use this when convex colliders is added
	)
	l_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True)

	# Create land object
	land = sm.Polygon(
	points=LAND_POLY[::-1], # Reversing vertex order so the normal faces the right direction
	material=sm.Material.GRAY,
	name="Moon",
	)
	land.mesh.extrude((0, 0, -1), capping=True, inplace=True)

	# Create collider blocks for the land (non-convex meshes are TODO)
	for i in range(len(CHUNK_X) - 1):
	x1, x2 = CHUNK_X[i], CHUNK_X[i + 1]
	y1, y2 = SMOOTH_Y[i], SMOOTH_Y[i + 1]

	# compute rotation from generated coordinates
	rotation = [0, 0, +90 + np.degrees(np.arctan2(y2 - (y1 + y2) / 2, (x2 - x1) / 2))]
	block_i = sm.Box(
	position=[(x1 + x2) / 2, (y1 + y2) / 2, -0.5],
	bounds=[0.2, 1.025 * np.sqrt((x2 - x1) 2 + (y2 - y1) 2), 1], # adjustment for better colliders
	material=sm.Material.GRAY,
	rotation=rotation,
	with_collider=True,
	name="land_collider_" + str(i),
	)
	sc += block_i

	# add target triangle / cone for reward
	sc += sm.Cone(
	position=[(HELIPAD_x1 + HELIPAD_x2) / 2, HELIPAD_y, -0.5],
	height=10 / SCALE,
	radius=10 / SCALE,
	material=sm.Material.YELLOW,
	name="target",
	)

	# TODO add lander state sensors for state-based RL
	sc += sm.StateSensor(
	target_entity=sc.target,
	reference_entity=lander,
	properties=["position", "rotation", "distance"],
	name="goal_sense",
	)

	# create Euclidean distance reward, scalar changes the reward to a cost
	cost = sm.RewardFunction(
	type="dense", entity_a=lander, entity_b=sc.target, scalar=-1
	) # By default a dense reward equal to the distance between 2 entities
	lander += cost

	sc += lander
	sc += land

	return sc


	def get_values(state):
	return state.get("StateSensor")

	def train(agent, env, logger):
	episodes = 20000
	for e in range(episodes):

	state = env.reset()
	# Play the game!
	for i in range(100):

	# Run agent on the state
	action = agent.act(get_values(state))
	# env.render()
	# Agent performs action
	next_state, reward, done, info = env.step(action)

	print("####################")
	print(done)
	print("####################")

	# Remember
	agent.cache(get_values(state), get_values(next_state), action, reward, done)

	# Learn
	q, loss = agent.learn()

	# Logging
	logger.log_step(reward, loss, q)

	# Update state
	state = next_state

	# Check if end of game
	if done:
	break

	logger.log_episode(e)

	if e % 20 == 0:
	logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--build_exe", default="", type=str, required=False, help="Pre-built unity app for simulate")
	parser.add_argument(
	"--num_steps", default=100, type=int, required=False, help="number of steps to run the simulator"
	)
	args = parser.parse_args()

	sc = make_lander(engine="unity", engine_exe=args.build_exe)
	sc += sm.LightSun()

	env = sm.RLEnv(sc, frame_skip=1)
	env.reset()

	# for i in range(500):
	# print(sc.observation_space.sample())
	# action = [sc.action_space.sample()]
	# print("###############")
	# print(action)
	# obs, reward, done, info = env.step(action)
	# print(obs)
	# print(f"step {i}, reward {reward[0]}")
	# time.sleep(0.1)

	# env.close()

	checkpoint = None
	# checkpoint = Path('checkpoints/latest/airstriker_net_3.chkpt')

	path = "checkpoints/lunar-lander-dueling-dqn-rc"
	save_dir = Path(path)

	isExist = os.path.exists(path)
	if not isExist:
	os.makedirs(path)

	logger = MetricLogger(save_dir)

	print("Training Dueling DQN Agent with step decay!")
	agent = DuelingDQNAgent(
	state_dim=7,
	action_dim=env.action_space.n,
	save_dir=save_dir,
	checkpoint=checkpoint,
	**hyperparams
	)
	# print("Training Dueling DQN Agent!")
	# agent = DuelingDQNAgent(
	# state_dim=8,
	# action_dim=env.action_space.n,
	# save_dir=save_dir,
	# checkpoint=checkpoint,
	# **hyperparams
	# )

	# fill_memory(agent, env, 5000)
	train(agent, env, logger)