import matplotlib.pyplot as plt
import torch
try:
import gymnasium as gym
except ImportError:
%pip install gymnasium[classic-control] -q
import gymnasium as gym
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format='retina'
Reinforcement Learning 1 Gym Environments
ML
Reference
Basic Imports
https://www.gymlibrary.dev/environments/classic_control/mountain_car/
# List of environments
list(gym.envs.registry.keys())
['CartPole-v0',
'CartPole-v1',
'MountainCar-v0',
'MountainCarContinuous-v0',
'Pendulum-v1',
'Acrobot-v1',
'phys2d/CartPole-v0',
'phys2d/CartPole-v1',
'phys2d/Pendulum-v0',
'LunarLander-v2',
'LunarLanderContinuous-v2',
'BipedalWalker-v3',
'BipedalWalkerHardcore-v3',
'CarRacing-v2',
'Blackjack-v1',
'FrozenLake-v1',
'FrozenLake8x8-v1',
'CliffWalking-v0',
'Taxi-v3',
'tabular/Blackjack-v0',
'tabular/CliffWalking-v0',
'Reacher-v2',
'Reacher-v4',
'Pusher-v2',
'Pusher-v4',
'InvertedPendulum-v2',
'InvertedPendulum-v4',
'InvertedDoublePendulum-v2',
'InvertedDoublePendulum-v4',
'HalfCheetah-v2',
'HalfCheetah-v3',
'HalfCheetah-v4',
'Hopper-v2',
'Hopper-v3',
'Hopper-v4',
'Swimmer-v2',
'Swimmer-v3',
'Swimmer-v4',
'Walker2d-v2',
'Walker2d-v3',
'Walker2d-v4',
'Ant-v2',
'Ant-v3',
'Ant-v4',
'Humanoid-v2',
'Humanoid-v3',
'Humanoid-v4',
'HumanoidStandup-v2',
'HumanoidStandup-v4',
'GymV21Environment-v0',
'GymV26Environment-v0']
= gym.make('MountainCar-v0', render_mode='human') env
= env.reset(seed=42) observation, info
observation
array([-0.4452088, 0. ], dtype=float32)
info
{}
env
<TimeLimit<OrderEnforcing<PassiveEnvChecker<MountainCarEnv<MountainCar-v0>>>>>
env.action_space
Discrete(3)
env.action_space.n
3
env.reward_range
(-inf, inf)
for i in range(10):
print(i, env.action_space.sample())
0 1
1 0
2 0
3 0
4 2
5 0
6 2
7 2
8 2
9 2
env.observation_space
Box([-1.2 -0.07], [0.6 0.07], (2,), float32)
env.observation_space.high
array([0.6 , 0.07], dtype=float32)
env.observation_space.sample()
array([-0.2961844, -0.034966 ], dtype=float32)
=42)
env.reset(seedfor i in range(100):
env.render()= env.action_space.sample()
action = env.step(action)
observation, reward, terminated, truncated, info print(i, observation, reward, terminated, truncated, info)
if terminated:
break
0 [-0.4457913 -0.00058252] -1.0 False False {}
1 [-0.4469521 -0.00116079] -1.0 False False {}
2 [-0.4486827 -0.00173059] -1.0 False False {}
3 [-0.45097044 -0.00228774] -1.0 False False {}
4 [-0.45479858 -0.00382815] -1.0 False False {}
5 [-0.4601391 -0.0053405] -1.0 False False {}
6 [-0.46495268 -0.00481358] -1.0 False False {}
7 [-0.47120383 -0.00625116] -1.0 False False {}
8 [-0.47784632 -0.0066425 ] -1.0 False False {}
9 [-0.4858309 -0.00798457] -1.0 False False {}
10 [-0.4950981 -0.00926722] -1.0 False False {}
11 [-0.5045788 -0.00948072] -1.0 False False {}
12 [-0.5142021 -0.00962329] -1.0 False False {}
13 [-0.5228959 -0.00869376] -1.0 False False {}
14 [-0.5325949 -0.00969903] -1.0 False False {}
15 [-0.5432265 -0.01063157] -1.0 False False {}
16 [-0.55371094 -0.01048444] -1.0 False False {}
17 [-0.56296986 -0.00925891] -1.0 False False {}
18 [-0.57293415 -0.00996431] -1.0 False False {}
19 [-0.5825298 -0.00959565] -1.0 False False {}
20 [-0.5916858 -0.00915596] -1.0 False False {}
21 [-0.6003346 -0.00864885] -1.0 False False {}
22 [-0.60841304 -0.0080784 ] -1.0 False False {}
23 [-0.6168622 -0.00844914] -1.0 False False {}
24 [-0.6236209 -0.00675875] -1.0 False False {}
25 [-0.6296407 -0.00601979] -1.0 False False {}
26 [-0.6338785 -0.00423783] -1.0 False False {}
27 [-0.63630426 -0.00242574] -1.0 False False {}
28 [-6.3690072e-01 -5.9645844e-04] -1.0 False False {}
29 [-6.3666368e-01 2.3703734e-04] -1.0 False False {}
30 [-0.63559484 0.00106886] -1.0 False False {}
31 [-0.63370174 0.00189311] -1.0 False False {}
32 [-0.6309978 0.00270395] -1.0 False False {}
33 [-0.6285022 0.00249558] -1.0 False False {}
34 [-0.62523276 0.00326943] -1.0 False False {}
35 [-0.62021285 0.00501993] -1.0 False False {}
36 [-0.6134784 0.00673443] -1.0 False False {}
37 [-0.606078 0.00740039] -1.0 False False {}
38 [-0.5990653 0.00701269] -1.0 False False {}
39 [-0.59249145 0.00657387] -1.0 False False {}
40 [-0.58640456 0.00608689] -1.0 False False {}
41 [-0.5788494 0.00755515] -1.0 False False {}
42 [-0.5708818 0.00796764] -1.0 False False {}
43 [-0.5615607 0.00932107] -1.0 False False {}
44 [-0.5529555 0.00860517] -1.0 False False {}
45 [-0.54513043 0.00782506] -1.0 False False {}
46 [-0.538144 0.00698644] -1.0 False False {}
47 [-0.5300485 0.00809549] -1.0 False False {}
48 [-0.52190465 0.00814386] -1.0 False False {}
49 [-0.5147735 0.00713116] -1.0 False False {}
50 [-0.50770855 0.00706498] -1.0 False False {}
51 [-0.5007627 0.00694584] -1.0 False False {}
52 [-0.49398798 0.00677471] -1.0 False False {}
53 [-0.48743504 0.00655292] -1.0 False False {}
54 [-0.48115283 0.00628222] -1.0 False False {}
55 [-0.4751881 0.00596474] -1.0 False False {}
56 [-0.47058517 0.00460293] -1.0 False False {}
57 [-0.46637815 0.004207 ] -1.0 False False {}
58 [-0.46359822 0.00277995] -1.0 False False {}
59 [-0.46126583 0.00233238] -1.0 False False {}
60 [-0.45839822 0.0028676 ] -1.0 False False {}
61 [-0.45501652 0.00338171] -1.0 False False {}
62 [-0.45214558 0.00287096] -1.0 False False {}
63 [-0.44980642 0.00233916] -1.0 False False {}
64 [-0.4480162 0.00179022] -1.0 False False {}
65 [-4.4778800e-01 2.2819887e-04] -1.0 False False {}
66 [-0.44912347 -0.00133549] -1.0 False False {}
67 [-0.4520129 -0.00288942] -1.0 False False {}
68 [-0.4554351 -0.0034222] -1.0 False False {}
69 [-0.45836496 -0.00292987] -1.0 False False {}
70 [-0.46278098 -0.00441601] -1.0 False False {}
71 [-0.4666506 -0.00386961] -1.0 False False {}
72 [-0.46994525 -0.00329465] -1.0 False False {}
73 [-0.47264057 -0.00269532] -1.0 False False {}
74 [-0.4757166 -0.00307602] -1.0 False False {}
75 [-0.4781505 -0.0024339] -1.0 False False {}
76 [-0.4809242 -0.00277371] -1.0 False False {}
77 [-0.4850171 -0.00409289] -1.0 False False {}
78 [-0.4893987 -0.00438161] -1.0 False False {}
79 [-0.49403635 -0.00463766] -1.0 False False {}
80 [-0.49989542 -0.00585909] -1.0 False False {}
81 [-0.50593215 -0.00603671] -1.0 False False {}
82 [-0.5131013 -0.00716915] -1.0 False False {}
83 [-0.5213492 -0.00824787] -1.0 False False {}
84 [-0.5306139 -0.00926474] -1.0 False False {}
85 [-0.53982604 -0.00921213] -1.0 False False {}
86 [-0.5499165 -0.01009047] -1.0 False False {}
87 [-0.5608098 -0.0108933] -1.0 False False {}
88 [-0.5704246 -0.00961479] -1.0 False False {}
89 [-0.57868934 -0.00826475] -1.0 False False {}
90 [-0.5865428 -0.00785345] -1.0 False False {}
91 [-0.59392697 -0.00738417] -1.0 False False {}
92 [-0.6007876 -0.00686062] -1.0 False False {}
93 [-0.60607445 -0.00528686] -1.0 False False {}
94 [-0.61174905 -0.00567458] -1.0 False False {}
95 [-0.6177702 -0.00602114] -1.0 False False {}
96 [-0.62409437 -0.00632421] -1.0 False False {}
97 [-0.6296762 -0.00558186] -1.0 False False {}
98 [-0.6344759 -0.00479964] -1.0 False False {}
99 [-0.6384592 -0.00398331] -1.0 False False {}
# Does it help to always go right?
=42)
env.reset(seedfor i in range(100):
env.render()= 2
action = env.step(action)
observation, reward, terminated, truncated, info print(i, observation, reward, terminated, truncated, info)
if terminated:
break
0 [-4.4479132e-01 4.1747934e-04] -1.0 False False {}
1 [-0.4439594 0.00083191] -1.0 False False {}
2 [-0.4427191 0.00124029] -1.0 False False {}
3 [-0.4410795 0.00163962] -1.0 False False {}
4 [-0.43905246 0.00202703] -1.0 False False {}
5 [-0.43665275 0.00239971] -1.0 False False {}
6 [-0.43389776 0.00275498] -1.0 False False {}
7 [-0.43080744 0.00309032] -1.0 False False {}
8 [-0.4274041 0.00340333] -1.0 False False {}
9 [-0.42371225 0.00369185] -1.0 False False {}
10 [-0.4197584 0.00395386] -1.0 False False {}
11 [-0.41557083 0.00418759] -1.0 False False {}
12 [-0.41117933 0.00439149] -1.0 False False {}
13 [-0.40661508 0.00456424] -1.0 False False {}
14 [-0.40191033 0.00470476] -1.0 False False {}
15 [-0.3970981 0.00481224] -1.0 False False {}
16 [-0.392212 0.00488609] -1.0 False False {}
17 [-0.38728598 0.00492601] -1.0 False False {}
18 [-0.38235408 0.00493192] -1.0 False False {}
19 [-0.37745008 0.004904 ] -1.0 False False {}
20 [-0.3726074 0.00484267] -1.0 False False {}
21 [-0.36785883 0.00474856] -1.0 False False {}
22 [-0.36323628 0.00462255] -1.0 False False {}
23 [-0.3587706 0.00446569] -1.0 False False {}
24 [-0.35449135 0.00427925] -1.0 False False {}
25 [-0.3504267 0.00406465] -1.0 False False {}
26 [-0.3466032 0.0038235] -1.0 False False {}
27 [-0.34304565 0.00355754] -1.0 False False {}
28 [-0.33977702 0.00326864] -1.0 False False {}
29 [-0.33681822 0.0029588 ] -1.0 False False {}
30 [-0.3341881 0.00263011] -1.0 False False {}
31 [-0.33190334 0.00228476] -1.0 False False {}
32 [-0.32997838 0.00192499] -1.0 False False {}
33 [-0.32842523 0.00155313] -1.0 False False {}
34 [-0.3272537 0.00117154] -1.0 False False {}
35 [-0.32647103 0.00078265] -1.0 False False {}
36 [-0.32608217 0.00038887] -1.0 False False {}
37 [-3.2608950e-01 -7.3227225e-06] -1.0 False False {}
38 [-0.32649297 -0.00040347] -1.0 False False {}
39 [-0.3272901 -0.00079711] -1.0 False False {}
40 [-0.32847586 -0.00118578] -1.0 False False {}
41 [-0.3300429 -0.00156705] -1.0 False False {}
42 [-0.33198142 -0.0019385 ] -1.0 False False {}
43 [-0.33427918 -0.00229778] -1.0 False False {}
44 [-0.33692175 -0.00264256] -1.0 False False {}
45 [-0.33989236 -0.00297059] -1.0 False False {}
46 [-0.34317204 -0.0032797 ] -1.0 False False {}
47 [-0.34673983 -0.00356778] -1.0 False False {}
48 [-0.35057268 -0.00383286] -1.0 False False {}
49 [-0.35464573 -0.00407306] -1.0 False False {}
50 [-0.35893238 -0.00428664] -1.0 False False {}
51 [-0.3634044 -0.00447202] -1.0 False False {}
52 [-0.36803216 -0.00462776] -1.0 False False {}
53 [-0.37278476 -0.00475261] -1.0 False False {}
54 [-0.3776303 -0.00484552] -1.0 False False {}
55 [-0.3825359 -0.00490563] -1.0 False False {}
56 [-0.38746822 -0.0049323 ] -1.0 False False {}
57 [-0.39239335 -0.00492514] -1.0 False False {}
58 [-0.39727733 -0.00488397] -1.0 False False {}
59 [-0.40208617 -0.00480886] -1.0 False False {}
60 [-0.40678635 -0.00470015] -1.0 False False {}
61 [-0.41134477 -0.00455843] -1.0 False False {}
62 [-0.41572928 -0.00438451] -1.0 False False {}
63 [-0.41990876 -0.00417948] -1.0 False False {}
64 [-0.42385343 -0.00394468] -1.0 False False {}
65 [-0.4275351 -0.00368165] -1.0 False False {}
66 [-0.43092728 -0.0033922 ] -1.0 False False {}
67 [-0.4340056 -0.00307832] -1.0 False False {}
68 [-0.4367478 -0.0027422] -1.0 False False {}
69 [-0.43913403 -0.00238624] -1.0 False False {}
70 [-0.441147 -0.00201297] -1.0 False False {}
71 [-0.4427721 -0.00162507] -1.0 False False {}
72 [-0.4439974 -0.00122535] -1.0 False False {}
73 [-0.44481412 -0.0008167 ] -1.0 False False {}
74 [-4.452162e-01 -4.020977e-04] -1.0 False False {}
75 [-4.4520080e-01 1.5435844e-05] -1.0 False False {}
76 [-4.4476792e-01 4.3285682e-04] -1.0 False False {}
77 [-0.44392082 0.00084712] -1.0 False False {}
78 [-0.4426656 0.00125521] -1.0 False False {}
79 [-0.44101143 0.00165416] -1.0 False False {}
80 [-0.43897036 0.00204107] -1.0 False False {}
81 [-0.4365572 0.00241315] -1.0 False False {}
82 [-0.4337895 0.00276774] -1.0 False False {}
83 [-0.4306872 0.00310229] -1.0 False False {}
84 [-0.42727277 0.00341444] -1.0 False False {}
85 [-0.42357075 0.00370201] -1.0 False False {}
86 [-0.41960773 0.003963 ] -1.0 False False {}
87 [-0.41541207 0.00419566] -1.0 False False {}
88 [-0.41101363 0.00439843] -1.0 False False {}
89 [-0.40644366 0.00457001] -1.0 False False {}
90 [-0.40173432 0.00470932] -1.0 False False {}
91 [-0.39691874 0.00481556] -1.0 False False {}
92 [-0.3920306 0.00488817] -1.0 False False {}
93 [-0.38710377 0.00492683] -1.0 False False {}
94 [-0.38217226 0.00493149] -1.0 False False {}
95 [-0.37726995 0.00490233] -1.0 False False {}
96 [-0.37243018 0.00483977] -1.0 False False {}
97 [-0.3676857 0.00474447] -1.0 False False {}
98 [-0.3630684 0.0046173] -1.0 False False {}
99 [-0.35860908 0.00445932] -1.0 False False {}
# Go left first K iterations and then go right N - K iterations
=42)
env.reset(seed= 50
K = 250
N
for i in range(N):
env.render()= 0 if i < K else 2
action = env.step(action)
observation, reward, terminated, truncated, info print(i, observation, reward, terminated, truncated, info)
if terminated:
break
0 [-0.44679132 -0.00158252] -1.0 False False {}
1 [-0.4499448 -0.00315349] -1.0 False False {}
2 [-0.45464623 -0.00470141] -1.0 False False {}
3 [-0.4608611 -0.00621488] -1.0 False False {}
4 [-0.46854374 -0.00768264] -1.0 False False {}
5 [-0.4776374 -0.00909367] -1.0 False False {}
6 [-0.4880747 -0.01043729] -1.0 False False {}
7 [-0.4997779 -0.01170322] -1.0 False False {}
8 [-0.51265967 -0.01288173] -1.0 False False {}
9 [-0.52662337 -0.01396375] -1.0 False False {}
10 [-0.54156446 -0.01494107] -1.0 False False {}
11 [-0.55737084 -0.01580639] -1.0 False False {}
12 [-0.5739244 -0.01655353] -1.0 False False {}
13 [-0.59110194 -0.01717752] -1.0 False False {}
14 [-0.6087766 -0.0176747] -1.0 False False {}
15 [-0.62681943 -0.0180428 ] -1.0 False False {}
16 [-0.64510036 -0.01828096] -1.0 False False {}
17 [-0.6634901 -0.01838974] -1.0 False False {}
18 [-0.6818612 -0.01837108] -1.0 False False {}
19 [-0.7000894 -0.01822821] -1.0 False False {}
20 [-0.71805495 -0.01796552] -1.0 False False {}
21 [-0.7356433 -0.01758842] -1.0 False False {}
22 [-0.7527465 -0.0171032] -1.0 False False {}
23 [-0.7692633 -0.01651679] -1.0 False False {}
24 [-0.7851 -0.01583663] -1.0 False False {}
25 [-0.8001704 -0.01507044] -1.0 False False {}
26 [-0.8143965 -0.01422609] -1.0 False False {}
27 [-0.82770795 -0.01331142] -1.0 False False {}
28 [-0.840042 -0.01233409] -1.0 False False {}
29 [-0.8513436 -0.01130153] -1.0 False False {}
30 [-0.86156434 -0.01022079] -1.0 False False {}
31 [-0.87066287 -0.00909855] -1.0 False False {}
32 [-0.87860394 -0.00794103] -1.0 False False {}
33 [-0.885358 -0.00675404] -1.0 False False {}
34 [-0.8909009 -0.00554296] -1.0 False False {}
35 [-0.8952137 -0.00431278] -1.0 False False {}
36 [-0.8982819 -0.00306818] -1.0 False False {}
37 [-0.9000954 -0.00181353] -1.0 False False {}
38 [-9.006485e-01 -5.530463e-04] -1.0 False False {}
39 [-8.9993924e-01 7.0920831e-04] -1.0 False False {}
40 [-0.8979701 0.00196919] -1.0 False False {}
41 [-0.8947472 0.00322283] -1.0 False False {}
42 [-0.8902813 0.00446589] -1.0 False False {}
43 [-0.88458735 0.00569396] -1.0 False False {}
44 [-0.877685 0.00690234] -1.0 False False {}
45 [-0.86959904 0.00808598] -1.0 False False {}
46 [-0.8603596 0.00923946] -1.0 False False {}
47 [-0.85000265 0.01035692] -1.0 False False {}
48 [-0.8385706 0.01143206] -1.0 False False {}
49 [-0.82611245 0.01245818] -1.0 False False {}
50 [-0.81068426 0.01542816] -1.0 False False {}
51 [-0.7923595 0.01832481] -1.0 False False {}
52 [-0.77123034 0.02112911] -1.0 False False {}
53 [-0.7474102 0.02382017] -1.0 False False {}
54 [-0.72103477 0.02637544] -1.0 False False {}
55 [-0.6922636 0.02877113] -1.0 False False {}
56 [-0.66128075 0.03098283] -1.0 False False {}
57 [-0.62829447 0.03298633] -1.0 False False {}
58 [-0.5935357 0.0347587] -1.0 False False {}
59 [-0.55725634 0.03627939] -1.0 False False {}
60 [-0.51972497 0.03753139] -1.0 False False {}
61 [-0.48122263 0.03850234] -1.0 False False {}
62 [-0.44203725 0.03918537] -1.0 False False {}
63 [-0.4024575 0.03957975] -1.0 False False {}
64 [-0.36276644 0.03969106] -1.0 False False {}
65 [-0.32323536 0.03953107] -1.0 False False {}
66 [-0.28411815 0.03911722] -1.0 False False {}
67 [-0.24564646 0.03847169] -1.0 False False {}
68 [-0.2080261 0.03762037] -1.0 False False {}
69 [-0.17143448 0.03659161] -1.0 False False {}
70 [-0.13601945 0.03541502] -1.0 False False {}
71 [-0.10189916 0.03412029] -1.0 False False {}
72 [-0.06916296 0.0327362 ] -1.0 False False {}
73 [-0.03787315 0.03128982] -1.0 False False {}
74 [-0.00806721 0.02980594] -1.0 False False {}
75 [0.02023946 0.02830667] -1.0 False False {}
76 [0.04705074 0.02681128] -1.0 False False {}
77 [0.07238688 0.02533614] -1.0 False False {}
78 [0.09628174 0.02389486] -1.0 False False {}
79 [0.11878016 0.02249843] -1.0 False False {}
80 [0.13993563 0.02115547] -1.0 False False {}
81 [0.15980819 0.01987256] -1.0 False False {}
82 [0.1784626 0.0186544] -1.0 False False {}
83 [0.19596682 0.01750423] -1.0 False False {}
84 [0.21239078 0.01642396] -1.0 False False {}
85 [0.22780529 0.01541451] -1.0 False False {}
86 [0.24228124 0.01447596] -1.0 False False {}
87 [0.255889 0.01360777] -1.0 False False {}
88 [0.26869795 0.01280894] -1.0 False False {}
89 [0.28077608 0.01207813] -1.0 False False {}
90 [0.2921899 0.01141381] -1.0 False False {}
91 [0.30300424 0.01081433] -1.0 False False {}
92 [0.31328225 0.01027802] -1.0 False False {}
93 [0.3230855 0.00980324] -1.0 False False {}
94 [0.33247393 0.00938846] -1.0 False False {}
95 [0.34150624 0.00903228] -1.0 False False {}
96 [0.35023972 0.00873351] -1.0 False False {}
97 [0.35873088 0.00849114] -1.0 False False {}
98 [0.3670353 0.00830443] -1.0 False False {}
99 [0.37520823 0.00817291] -1.0 False False {}
100 [0.3833046 0.00809638] -1.0 False False {}
101 [0.39137957 0.00807496] -1.0 False False {}
102 [0.3994887 0.00810912] -1.0 False False {}
103 [0.40768832 0.00819965] -1.0 False False {}
104 [0.41603607 0.00834773] -1.0 False False {}
105 [0.424591 0.00855494] -1.0 False False {}
106 [0.43341425 0.00882325] -1.0 False False {}
107 [0.44256935 0.00915509] -1.0 False False {}
108 [0.4521227 0.00955334] -1.0 False False {}
109 [0.46214405 0.01002137] -1.0 False False {}
110 [0.47270712 0.01056306] -1.0 False False {}
111 [0.48388997 0.01118286] -1.0 False False {}
112 [0.49577573 0.01188574] -1.0 False False {}
113 [0.508453 0.01267731] -1.0 True False {}
env.close()
%pip install flappy-bird-gymnasium -q
Note: you may need to restart the kernel to use updated packages.
import flappy_bird_gymnasium
= gym.make("FlappyBird-v0", render_mode="human", use_lidar=False) env
= env.reset() obs, _
obs
array([ 1. , 0.234375 , 0.4296875, 1. , 0. ,
1. , 1. , 0. , 1. , 0.4765625,
-0.9 , 0.5 ])
env.observation_space
Box(-1.0, 1.0, (12,), float64)
The Kernel crashed while executing code in the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.
= 6
n_bins import pandas as pd
= env.observation_space.low
obs_low = env.observation_space.high
obs_high
# Discretize the observation space
def discretize_observation(observation):
= np.linspace(obs_low, obs_high, n_bins)
bins return tuple(np.digitize(observation, bins))
# Define the variables for the MultiIndex
= [
variables "last_pipe_h_pos",
"last_top_pipe_v_pos",
"last_bottom_pipe_v_pos",
"next_pipe_h_pos",
"next_top_pipe_v_pos",
"next_bottom_pipe_v_pos",
"next_next_pipe_h_pos",
"next_next_top_pipe_v_pos",
"next_next_bottom_pipe_v_pos",
"player_v_pos",
"player_v_vel",
"player_rotation",
]
# leave out the first three and last three variables
= variables[3:-3]
var_consider
= np.zeros([n_bins] * len(var_consider) + [env.action_space.n]) q_table_np