Source code for rl_agents.agents.mab.pursuit

import numpy as np

from rl_agents.agents.mab.base import BaseMAB


[docs]class Pursuit(BaseMAB): """Short summary. Parameters ---------- n_arms : type Description of parameter `n_arms`. beta : type Description of parameter `beta`. Attributes ---------- means : type Description of attribute `means`. p_arms : type Description of attribute `p_arms`. trials : type Description of attribute `trials`. n_arms beta """ def __init__(self, n_arms, beta): self.n_arms = n_arms self.means = np.zeros(self.n_arms) self.beta = beta self.p_arms = np.ones(self.n_arms) / self.n_arms self.trials = np.zeros(self.n_arms)
[docs] def learn(self, a_idx, reward): """Short summary. Parameters ---------- reward : type Description of parameter `reward`. a_idx : type Description of parameter `a_idx`. Returns ------- type Description of returned object. """ self.means[a_idx] = ( (self.means[a_idx] * self.trials[a_idx]) + reward ) / (self.trials[a_idx] + 1) self.trials[a_idx] += 1 # add trial ii = np.argmax(self.means) p_ii = self.p_arms[ii] self.p_arms = (1 - self.beta) * self.p_arms self.p_arms[ii] = p_ii + self.beta * (1 - p_ii)
[docs] def predict(self): """Short summary. Returns ------- type Description of returned object. """ return np.random.choice(range(self.n_arms), p=self.p_arms)