SafeSpaceAI / src /training /train_rl.py
Shreya Pal
Make API Key private
5c5b473
raw
history blame contribute delete
715 Bytes
def train(env, agent, episodes=50, batch_size=32):
for ep in range(episodes):
state = env.reset()
total_reward = 0
done = False
while not done:
# 🎯 choose action
action = agent.choose_action(state)
# environment step
next_state, reward, done = env.step(action)
# 💾 store experience
agent.remember(state, action, reward, next_state, done)
# 🧠 learn from memory
agent.learn(batch_size)
# move forward
state = next_state
total_reward += reward
print(f"Episode {ep+1}, Reward: {total_reward:.2f}, Epsilon: {agent.epsilon:.3f}")