import numpy as np

class Adam():
    """
    Implements the Adam optimizer presented in the paper *Adam: A Method for
    Stochastic Optimization* by Kingman and Ba, ICLR 2015.
    """
    def __init__(self, param,\
        lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8):

        self.lr = lr 

        self.param = param
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.epsilon = epsilon
        self.t = 0
        self.state = {}

        size = param.shape[0]
        self.state = (np.zeros(size), np.zeros(size))
    def step(self, grads):
        """ Take a gradient step """
        self.t += 1
        lr_t = self.lr * np.sqrt(1 - self.beta_2**self.t) / (1 - self.beta_1**self.t)
        
        m_tp, v_tp = self.state
        m_t = self.beta_1 * m_tp + (1 - self.beta_1) * grads
        v_t = self.beta_2 * v_tp + (1 - self.beta_2) * np.power(grads, 2)
        self.state = (m_t, v_t)
        self.param -= lr_t * m_t / (np.sqrt(v_t) + self.epsilon)