src.gridmind.algorithms.base_learning_algorithm
===============================================

.. py:module:: src.gridmind.algorithms.base_learning_algorithm


Attributes
----------

.. autoapisummary::

   src.gridmind.algorithms.base_learning_algorithm.SAVE_DATA_DIR


Classes
-------

.. autoapisummary::

   src.gridmind.algorithms.base_learning_algorithm.BaseLearningAlgorithm


Module Contents
---------------

.. py:data:: SAVE_DATA_DIR
   :value: None


.. py:class:: BaseLearningAlgorithm(name: str, env: Optional[gymnasium.Env] = None, summary_dir: Optional[str] = None, write_summary: bool = True)

   Bases: :py:obj:`abc.ABC`


   Helper class that provides a standard way to create an ABC using
   inheritance.


   .. py:attribute:: name


   .. py:attribute:: logger
      :value: None


   .. py:attribute:: env
      :value: None


   .. py:attribute:: epoch_eval_interval
      :value: None


   .. py:attribute:: perform_evaluation
      :value: False


   .. py:attribute:: monitor_divergence
      :value: False


   .. py:attribute:: stop_on_divergence
      :value: False


   .. py:attribute:: write_summary
      :value: True


   .. py:method:: _initialize_summary_writer(summary_dir, env_name, extra_info: str = '', use_async_writer: bool = False)


   .. py:method:: register_performance_evaluator(evaluator: gridmind.utils.performance_evaluation.base_performance_evaluator.BasePerformanceEvaluator)


   .. py:method:: register_divergence_detector(detector: gridmind.utils.divergence.base_divergence_detector.BaseDivergenceDetector)


   .. py:method:: report_policy()


   .. py:method:: report_state_values()


   .. py:method:: report_state_action_values()


   .. py:method:: _preprocess(observation)


   .. py:method:: speculate_divergence()


   .. py:method:: _get_state_value_fn(force_functional_interface: bool = True)
      :abstractmethod:


   .. py:method:: _get_state_action_value_fn(force_functional_interface: bool = True)
      :abstractmethod:


   .. py:method:: _get_policy()
      :abstractmethod:


   .. py:method:: get_state_value_fn(force_functional_interface: bool = True, autopreprocess: bool = False)


   .. py:method:: get_state_action_value_fn(force_functional_interface: bool = True, autopreprocess: bool = False)


   .. py:method:: get_policy(autopreprocess: bool = False)


   .. py:method:: set_policy(policy: gridmind.policies.base_policy.BasePolicy, **kwargs)
      :abstractmethod:


   .. py:method:: _train_episodes(num_episodes: int, prediction_only: bool, *args, **kwargs)
      :abstractmethod:


   .. py:method:: get_policy_cloned()


   .. py:method:: train(num_episodes: Optional[int] = None, num_steps: Optional[int] = None, prediction_only: bool = False, save_policy: bool = True, *args, **kwargs)


   .. py:method:: train_steps(num_steps: int, prediction_only: bool, save_policy: bool = True, *args, **kwargs)


   .. py:method:: _train_steps(num_steps: int, prediction_only: bool, *args, **kwargs)
      :abstractmethod:


   .. py:method:: train_episodes(num_episodes: int, prediction_only: bool, save_policy: bool = True, *args, **kwargs)


   .. py:method:: _training_wrapper(num_iter: int, prediction_only: bool, save_policy: bool, training_fn: Callable, *args, **kwargs)


   .. py:method:: _report_all_metrics()


   .. py:method:: evaluate_policy(num_episodes: int)


   .. py:method:: optimize_policy(num_episodes: int)


   .. py:method:: save_policy(path: str)


   .. py:method:: load_policy(saved_policy_path: str)
      :staticmethod: