Akchunks commited on
Commit
ba8901f
·
verified ·
1 Parent(s): 3b9975b

Upload PPO agent

Browse files
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - LunarLander-v3
4
+ - ppo
5
+ - reinforcement-learning
6
+ - cleanrl
7
+ - colab
8
+ model-index:
9
+ - name: PPO
10
+ results:
11
+ - task:
12
+ type: reinforcement-learning
13
+ name: reinforcement-learning
14
+ dataset:
15
+ name: LunarLander-v3
16
+ type: LunarLander-v3
17
+ metrics:
18
+ - type: mean_reward
19
+ value: -200.82 +/- 95.02
20
+ name: mean_reward
21
+ verified: false
22
+ ---
23
+ # PPO Agent for LunarLander-v3
24
+
25
+ Mean reward: -200.82 ± 95.02
26
+
27
+ ```python
28
+ {
29
+ "exp_name": "ppo_colab",
30
+ "seed": 1,
31
+ "torch_deterministic": true,
32
+ "cuda": true,
33
+ "track": false,
34
+ "wandb_project_name": "cleanRL",
35
+ "wandb_entity": null,
36
+ "capture_video": true,
37
+ "env_id": "LunarLander-v3",
38
+ "total_timesteps": 10,
39
+ "learning_rate": 0.00025,
40
+ "num_envs": 1,
41
+ "num_steps": 128,
42
+ "anneal_lr": true,
43
+ "gae": true,
44
+ "gamma": 0.99,
45
+ "gae_lambda": 0.95,
46
+ "num_minibatches": 1,
47
+ "update_epochs": 4,
48
+ "norm_adv": true,
49
+ "clip_coef": 0.2,
50
+ "clip_vloss": true,
51
+ "ent_coef": 0.01,
52
+ "vf_coef": 0.5,
53
+ "max_grad_norm": 0.5,
54
+ "target_kl": null,
55
+ "repo_id": "Akchunks/LunarLander-v2",
56
+ "batch_size": 128,
57
+ "minibatch_size": 128
58
+ }
59
+ ```
logs/events.out.tfevents.1748689997.13543feeb83d.188.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac50032600feacaf912cefbbe03936d25ed8cec930556df5116de92f680399a
3
+ size 88
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:701d954461323cfbc3d99b1b5933e746fe6c44ac2196732d35fbc3e401205b23
3
+ size 42898
replay.mp4 ADDED
Binary file (23.5 kB). View file
 
results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"env_id": "LunarLander-v3", "mean_reward": -200.8202600548159, "std_reward": 95.0173797879124, "n_eval_episodes": 10, "eval_datetime": "2025-05-31T11:15:14.484207"}