Source code for state_value_fn_from_action_value_fn

from typing import Callable, List
from gridmind.policies.base_policy import BasePolicy


[docs]def get_state_value_fn( action_value_fn: Callable, policy: BasePolicy, actions: List ) -> Callable: """ It is assumed that every action is possible in every state. If not policy should return 0 action probility for that action. """ state_value_fn = lambda state: sum( [ action_value_fn(state, action) * policy.get_action_prob(state=state, action=action) for action in actions ] ) return state_value_fn