Source code for rl_agents.agents.mab.softmax

import numpy as np

from rl_agents.agents.mab.base import BaseMAB


[docs]class Softmax(BaseMAB):
    """Short summary.

    Parameters
    ----------
    n_arms : type
        Description of parameter `n_arms`.
    temperature : type
        Description of parameter `temperature`.

    Attributes
    ----------
    means : type
        Description of attribute `means`.
    p_arms : type
        Description of attribute `p_arms`.
    trials : type
        Description of attribute `trials`.
    n_arms
    temperature

    """

    def __init__(self, n_arms, temperature):
        self.n_arms = n_arms
        self.means = np.zeros(self.n_arms)
        self.temperature = temperature
        self.p_arms = np.zeros(self.n_arms)
        self.trials = np.zeros(self.n_arms)

[docs]    def learn(self, a_idx, reward):
        """Short summary.

        Parameters
        ----------
        a_idx : type
            Description of parameter `a_idx`.
        reward : type
            Description of parameter `reward`.

        Returns
        -------
        type
            Description of returned object.

        """
        self.means[a_idx] = (
            (self.means[a_idx] * self.trials[a_idx]) + reward
        ) / (self.trials[a_idx] + 1)
        self.trials[a_idx] += 1  # add trial

[docs]    def predict(self):
        """Short summary.

        Returns
        -------
        type
            Description of returned object.

        """
        e_x = np.exp(self.means / self.temperature)
        self.p_arms = e_x / e_x.sum()
        return np.random.choice(range(self.n_arms), p=self.p_arms)