From 008f0753b7d9b8619ba7e9b71405d6e9ca30a370 Mon Sep 17 00:00:00 2001 From: durffy Date: Fri, 7 Jul 2023 07:02:24 -0700 Subject: [PATCH 1/2] modified to work with new gym framework --- naive_deep_q_learning/cartpole_naive_dqn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/naive_deep_q_learning/cartpole_naive_dqn.py b/naive_deep_q_learning/cartpole_naive_dqn.py index 21c33ae..997dbd2 100644 --- a/naive_deep_q_learning/cartpole_naive_dqn.py +++ b/naive_deep_q_learning/cartpole_naive_dqn.py @@ -83,7 +83,7 @@ def learn(self, state, action, reward, state_): for i in range(n_games): score = 0 done = False - obs = env.reset() + obs = env.reset()[0] while not done: action = agent.choose_action(obs) From 911d4d7100067d376e4306781b6b240799dc9f9b Mon Sep 17 00:00:00 2001 From: durffy Date: Fri, 7 Jul 2023 07:10:46 -0700 Subject: [PATCH 2/2] modified to work with the new openai framework. from Gym API "Accepts an action and returns a tuple (observation, reward, terminated, truncated, info)" --- naive_deep_q_learning/cartpole_naive_dqn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/naive_deep_q_learning/cartpole_naive_dqn.py b/naive_deep_q_learning/cartpole_naive_dqn.py index 997dbd2..e433783 100644 --- a/naive_deep_q_learning/cartpole_naive_dqn.py +++ b/naive_deep_q_learning/cartpole_naive_dqn.py @@ -87,7 +87,7 @@ def learn(self, state, action, reward, state_): while not done: action = agent.choose_action(obs) - obs_, reward, done, info = env.step(action) + obs_, reward, done, _, info = env.step(action) score += reward agent.learn(obs, action, reward, obs_) obs = obs_