GridMind
Contents:
gridmind
API Reference
sarsa
q_derived
q_learning
prediction
trajectory
Classes
Module Contents
Trajectory
base_soft_policy
episode_collector
simple_replay_buffer
state_value_fn_from_action_value_fn
stochastic_start_epsilon_greedy_policy
GridMind
API Reference
trajectory
View page source
trajectory
Classes
Trajectory
Module Contents
class
trajectory.
Trajectory
[source]
state_actions
=
[]
[source]
rewards
=
[]
[source]
additional_info
=
[]
[source]
update_step
(
state
,
action
,
reward
,
timestep
:
int
|
None
=
None
,
**
kwargs
)
[source]
record_step
(
state
,
action
,
reward
,
**
kwargs
)
[source]
get_step_with_info
(
timestep
:
int
)
[source]
get_step
(
timestep
:
int
)
[source]
get_state_action
(
timestep
:
int
)
[source]
get_state
(
timestep
:
int
)
[source]
get_reward
(
timestep
:
int
)
[source]
check_state_action_appearance_before_timestep
(
state_action
,
timestep
)
[source]
get_trajectory_length
(
)
[source]
clear
(
)
[source]