From 98b5f94a6203c8780ea3ee9075105f44a4f8168a Mon Sep 17 00:00:00 2001
From: guyko81 <guyko81@users.noreply.github.com>
Date: Sun, 14 Jun 2020 18:18:29 +0100
Subject: [PATCH 1/4] adding BetaBernoulli distribution with LogScore

---
 ngboost/distns/__init__.py      |   1 +
 ngboost/distns/betabernoulli.py | 125 ++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 ngboost/distns/betabernoulli.py

diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py
index 5308b667..1157775c 100644
--- a/ngboost/distns/__init__.py
+++ b/ngboost/distns/__init__.py
@@ -4,3 +4,4 @@
 from .lognormal import LogNormal
 from .exponential import Exponential
 from .categorical import k_categorical, Bernoulli
+from .betabernoulli import BetaBernoulli
diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py
new file mode 100644
index 00000000..b1452d9d
--- /dev/null
+++ b/ngboost/distns/betabernoulli.py
@@ -0,0 +1,125 @@
+from scipy.stats import betabinom as dist
+from scipy.stats import beta as betadist
+import numpy as np
+from ngboost.distns.distn import RegressionDistn
+from ngboost.scores import LogScore
+from scipy.special import polygamma, gamma, digamma
+from scipy.special import beta as betafunction
+from fastbetabino import *
+from array import array  
+import sys   
+class BetaBernoulliLogScore(LogScore): 
+    
+    def score(self, Y):
+        return -self.dist.logpmf(Y)
+
+    def d_score(self, Y):
+        D = np.zeros((len(Y), 2)) # first col is dS/d(log(α)), second col is dS/d(log(β))
+
+        D[:, 0] =   -self.alpha * (
+                        digamma(self.alpha + self.beta) + 
+                        digamma(Y + self.alpha) -
+                        digamma(self.alpha + self.beta + 1) -
+                        digamma(self.alpha)
+                    )
+        D[:, 1] =   -self.beta * (
+                        digamma(self.alpha + self.beta) + 
+                        digamma(-Y + self.beta + 1) -
+                        digamma(self.alpha + self.beta + 1) -
+                        digamma(self.beta)
+                    )
+        return D
+    
+    def metric(self):
+        FI = np.zeros((self.alpha.shape[0], 2, 2))
+        FI[:, 0, 0] =   ((self.alpha * (
+                        digamma(self.alpha + self.beta) + 
+                        digamma(0 + self.alpha) -
+                        digamma(self.alpha + self.beta + 1) -
+                        digamma(self.alpha)
+                        ))**2 * self.dist.pmf(0) + 
+                        (self.alpha * (
+                        digamma(self.alpha + self.beta) + 
+                        digamma(1 + self.alpha) -
+                        digamma(self.alpha + self.beta + 1) -
+                        digamma(self.alpha)
+                        ))**2 * self.dist.pmf(1))
+        FI[:, 1, 1] =   ((self.beta * (
+                        digamma(self.alpha + self.beta) + 
+                        digamma(-0 + self.beta + 1) -
+                        digamma(self.alpha + self.beta + 1) -
+                        digamma(self.beta)
+                        ))**2 * self.dist.pmf(0) + 
+                        (self.beta * (
+                        digamma(self.alpha + self.beta) + 
+                        digamma(-1 + self.beta + 1) -
+                        digamma(self.alpha + self.beta + 1) -
+                        digamma(self.beta)
+                        ))**2 * self.dist.pmf(1))
+        return FI
+class BetaBernoulli(RegressionDistn):
+
+    n_params = 2
+    scores = [BetaBernoulliLogScore]
+
+    def __init__(self, params):
+        # save the parameters
+        self._params = params
+        
+        # create other objects that will be useful later
+        self.log_alpha = params[0]
+        self.log_beta = params[1]
+        self.alpha = np.exp(self.log_alpha)
+        self.beta = np.exp(self.log_beta)
+        self.dist = dist(n=1, a=self.alpha, b=self.beta)
+
+    def sigmoid(self, x):
+        return 1/(1+np.exp(-x))
+
+    def fit(Y):
+        
+        def fit_alpha_beta_py(impressions, clicks, alpha0=1.5, beta0=5, niter=1000):
+            # based on https://github.com/lfiaschi/fastbetabino/blob/master/fastbetabino.pyx
+
+            alpha_old=alpha0
+            beta_old=beta0
+
+            for it in range(niter):
+
+                alpha=alpha_old*\
+                (sum(digamma(c + alpha_old) - digamma(alpha_old) for c,i in zip(clicks,impressions)))/\
+                (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions)))
+
+
+                beta=beta_old*\
+                (sum(digamma(i-c + beta_old) - digamma(beta_old) for c,i in zip(clicks,impressions)))/\
+                (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions)))
+
+
+                #print('alpha {} | {}  beta {} | {}'.format(alpha,alpha_old,beta,beta_old))
+                sys.stdout.flush()
+
+                if np.abs(alpha-alpha_old) and np.abs(beta-beta_old)<1e-10:
+                    #print('early stop')
+                    break
+
+                alpha_old=alpha
+                beta_old=beta
+
+            return alpha, beta
+        
+        imps = np.ones_like(Y)
+        alpha, beta = fit_alpha_beta_py(imps, Y) # use scipy's implementation
+        return np.array([np.log(alpha), np.log(beta)])
+
+    def sample(self, m):
+        return np.array([self.dist.rvs() for i in range(m)])
+    
+    def __getattr__(self, name): # gives us access to Laplace.mean() required for RegressionDist.predict()
+        if name in dir(self.dist):
+            return getattr(self.dist, name)
+        return None
+    
+    @property
+    def params(self):
+        return {'alpha':self.alpha, 'beta':self.beta}
\ No newline at end of file

From c30c64e5e6962a6407856721fff4081746df7a2d Mon Sep 17 00:00:00 2001
From: guyko81 <guyko81@users.noreply.github.com>
Date: Sat, 20 Jun 2020 00:05:00 +0100
Subject: [PATCH 2/4] fix import

---
 ngboost/distns/betabernoulli.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py
index b1452d9d..7f0c6567 100644
--- a/ngboost/distns/betabernoulli.py
+++ b/ngboost/distns/betabernoulli.py
@@ -1,13 +1,10 @@
-from scipy.stats import betabinom as dist
-from scipy.stats import beta as betadist
+from scipy.stats import betabinom
 import numpy as np
 from ngboost.distns.distn import RegressionDistn
 from ngboost.scores import LogScore
-from scipy.special import polygamma, gamma, digamma
-from scipy.special import beta as betafunction
-from fastbetabino import *
+from scipy.special import digamma
 from array import array  
-import sys   
+import sys
 class BetaBernoulliLogScore(LogScore): 
     
     def score(self, Y):

From 98c950876366c2f9d19736b1671a3bc14f500942 Mon Sep 17 00:00:00 2001
From: guyko81 <guyko81@users.noreply.github.com>
Date: Sat, 20 Jun 2020 00:06:11 +0100
Subject: [PATCH 3/4] fix import dist

---
 ngboost/distns/betabernoulli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py
index 7f0c6567..16d4d12e 100644
--- a/ngboost/distns/betabernoulli.py
+++ b/ngboost/distns/betabernoulli.py
@@ -1,4 +1,4 @@
-from scipy.stats import betabinom
+from scipy.stats import betabinom as dist
 import numpy as np
 from ngboost.distns.distn import RegressionDistn
 from ngboost.scores import LogScore

From ef7c60fcb0ea179815788ca2094a27ff5c5c80ef Mon Sep 17 00:00:00 2001
From: guyko81 <guyko81@users.noreply.github.com>
Date: Sun, 21 Jun 2020 12:47:56 +0100
Subject: [PATCH 4/4] implementing different Fisher Information matrices

---
 ngboost/distns/betabernoulli.py | 202 ++++++++++++++++++++------------
 1 file changed, 130 insertions(+), 72 deletions(-)

diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py
index 16d4d12e..c92dab9f 100644
--- a/ngboost/distns/betabernoulli.py
+++ b/ngboost/distns/betabernoulli.py
@@ -2,58 +2,93 @@
 import numpy as np
 from ngboost.distns.distn import RegressionDistn
 from ngboost.scores import LogScore
-from scipy.special import digamma
-from array import array  
+from scipy.special import digamma, polygamma
+from array import array
 import sys
-class BetaBernoulliLogScore(LogScore): 
-    
+
+class BetaBernoulliLogScore(LogScore):
     def score(self, Y):
         return -self.dist.logpmf(Y)
 
+    def d_alpha(self, Y):
+        return self.alpha * (
+            digamma(self.alpha + self.beta)
+            + digamma(Y + self.alpha)
+            - digamma(self.alpha + self.beta + 1)
+            - digamma(self.alpha)
+        )
+    
+    def d_beta(self, Y):
+        return self.beta * (
+            digamma(self.alpha + self.beta)
+            + digamma(-Y + self.beta + 1)
+            - digamma(self.alpha + self.beta + 1)
+            - digamma(self.beta)
+        )
+    
+    def d_alpha_alpha(self, Y):
+        return (
+            (polygamma(1, Y + self.alpha) +
+             polygamma(1, self.alpha + self.beta) -
+             polygamma(1, self.alpha + self.beta + 1) -
+             polygamma(1, self.alpha)
+             ) * self.alpha**2 + self.d_alpha(Y)
+        ) 
+    
+    def d_alpha_beta(self, Y):
+        return (
+            self.alpha * self.beta * (
+                polygamma(1, self.alpha + self.beta) -
+                polygamma(1, self.alpha + self.beta + 1)
+            )
+        )
+    
+    def d_beta_beta(self, Y):
+        return (
+            (polygamma(1, -Y + self.beta + 1) + 
+             polygamma(1, self.alpha + self.beta) - 
+             polygamma(1, self.alpha + self.beta + 1) - 
+             polygamma(1, self.beta)
+            ) * self.beta**2 + self.d_beta(Y)
+        )
     def d_score(self, Y):
-        D = np.zeros((len(Y), 2)) # first col is dS/d(log(α)), second col is dS/d(log(β))
-
-        D[:, 0] =   -self.alpha * (
-                        digamma(self.alpha + self.beta) + 
-                        digamma(Y + self.alpha) -
-                        digamma(self.alpha + self.beta + 1) -
-                        digamma(self.alpha)
-                    )
-        D[:, 1] =   -self.beta * (
-                        digamma(self.alpha + self.beta) + 
-                        digamma(-Y + self.beta + 1) -
-                        digamma(self.alpha + self.beta + 1) -
-                        digamma(self.beta)
-                    )
+        D = np.zeros(
+            (len(Y), 2)
+        )  # first col is dS/d(log(α)), second col is dS/d(log(β))
+        D[:, 0] = -self.d_alpha(Y)
+        D[:, 1] = -self.d_beta(Y)
         return D
-    
+
+    # Variance
     def metric(self):
         FI = np.zeros((self.alpha.shape[0], 2, 2))
-        FI[:, 0, 0] =   ((self.alpha * (
-                        digamma(self.alpha + self.beta) + 
-                        digamma(0 + self.alpha) -
-                        digamma(self.alpha + self.beta + 1) -
-                        digamma(self.alpha)
-                        ))**2 * self.dist.pmf(0) + 
-                        (self.alpha * (
-                        digamma(self.alpha + self.beta) + 
-                        digamma(1 + self.alpha) -
-                        digamma(self.alpha + self.beta + 1) -
-                        digamma(self.alpha)
-                        ))**2 * self.dist.pmf(1))
-        FI[:, 1, 1] =   ((self.beta * (
-                        digamma(self.alpha + self.beta) + 
-                        digamma(-0 + self.beta + 1) -
-                        digamma(self.alpha + self.beta + 1) -
-                        digamma(self.beta)
-                        ))**2 * self.dist.pmf(0) + 
-                        (self.beta * (
-                        digamma(self.alpha + self.beta) + 
-                        digamma(-1 + self.beta + 1) -
-                        digamma(self.alpha + self.beta + 1) -
-                        digamma(self.beta)
-                        ))**2 * self.dist.pmf(1))
+        FI[:, 0, 0] = (
+            self.d_alpha(0)*self.d_alpha(0)*self.dist.pmf(0) +
+            self.d_alpha(1)*self.d_alpha(1)*self.dist.pmf(1)
+        ) 
+        # FI[:, 1, 0] = (
+        #     self.d_alpha(0)*self.d_beta(0)*self.dist.pmf(0) +
+        #     self.d_alpha(1)*self.d_beta(1)*self.dist.pmf(1)
+        # ) 
+        # FI[:, 0, 1] = (
+        #     self.d_alpha(0)*self.d_beta(0)*self.dist.pmf(0) +
+        #     self.d_alpha(1)*self.d_beta(1)*self.dist.pmf(1)
+        # ) 
+        FI[:, 1, 1] = (
+            self.d_beta(0)*self.d_beta(0)*self.dist.pmf(0) +
+            self.d_beta(1)*self.d_beta(1)*self.dist.pmf(1)
+        ) 
         return FI
+    
+    # Hessian
+    # def metric(self):
+    #     FI = np.zeros((self.alpha.shape[0], 2, 2))
+    #     FI[:, 0, 0] = self.d_alpha_alpha(0) * self.dist.pmf(0) + self.d_alpha_alpha(1) * self.dist.pmf(1)
+    #     # FI[:, 1, 0] = self.d_alpha_beta(0) * self.dist.pmf(0) + self.d_alpha_beta(1) * self.dist.pmf(1)
+    #     # FI[:, 0, 1] = self.d_alpha_beta(0) * self.dist.pmf(0) + self.d_alpha_beta(1) * self.dist.pmf(1)
+    #     FI[:, 1, 1] = self.d_beta_beta(0) * self.dist.pmf(0) + self.d_beta_beta(1) * self.dist.pmf(1)
+    #     return FI
+
 class BetaBernoulli(RegressionDistn):
 
     n_params = 2
@@ -62,7 +97,7 @@ class BetaBernoulli(RegressionDistn):
     def __init__(self, params):
         # save the parameters
         self._params = params
-        
+
         # create other objects that will be useful later
         self.log_alpha = params[0]
         self.log_beta = params[1]
@@ -70,53 +105,76 @@ def __init__(self, params):
         self.beta = np.exp(self.log_beta)
         self.dist = dist(n=1, a=self.alpha, b=self.beta)
 
-    def sigmoid(self, x):
-        return 1/(1+np.exp(-x))
-
     def fit(Y):
-        
-        def fit_alpha_beta_py(impressions, clicks, alpha0=1.5, beta0=5, niter=1000):
+        def fit_alpha_beta_py(success, alpha0=1.5, beta0=5, niter=1000):
             # based on https://github.com/lfiaschi/fastbetabino/blob/master/fastbetabino.pyx
 
-            alpha_old=alpha0
-            beta_old=beta0
-
-            for it in range(niter):
-
-                alpha=alpha_old*\
-                (sum(digamma(c + alpha_old) - digamma(alpha_old) for c,i in zip(clicks,impressions)))/\
-                (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions)))
+            # This optimisation works for Beta-Binomial distribution in general. 
+            # For Beta-Bernoulli it's simplified by fixing the trials to 1.
+            trials = np.ones_like(Y)
 
+            alpha_old = alpha0
+            beta_old = beta0
 
-                beta=beta_old*\
-                (sum(digamma(i-c + beta_old) - digamma(beta_old) for c,i in zip(clicks,impressions)))/\
-                (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions)))
+            for it in range(niter):
 
+                alpha = (
+                    alpha_old
+                    * (
+                        sum(
+                            digamma(c + alpha_old) - digamma(alpha_old)
+                            for c, i in zip(success, trials)
+                        )
+                    )
+                    / (
+                        sum(
+                            digamma(i + alpha_old + beta_old)
+                            - digamma(alpha_old + beta_old)
+                            for c, i in zip(success, trials)
+                        )
+                    )
+                )
+
+                beta = (
+                    beta_old
+                    * (
+                        sum(
+                            digamma(i - c + beta_old) - digamma(beta_old)
+                            for c, i in zip(success, trials)
+                        )
+                    )
+                    / (
+                        sum(
+                            digamma(i + alpha_old + beta_old)
+                            - digamma(alpha_old + beta_old)
+                            for c, i in zip(success, trials)
+                        )
+                    )
+                )
 
-                #print('alpha {} | {}  beta {} | {}'.format(alpha,alpha_old,beta,beta_old))
+                # print('alpha {} | {}  beta {} | {}'.format(alpha,alpha_old,beta,beta_old))
                 sys.stdout.flush()
 
-                if np.abs(alpha-alpha_old) and np.abs(beta-beta_old)<1e-10:
-                    #print('early stop')
+                if np.abs(alpha - alpha_old) and np.abs(beta - beta_old) < 1e-10:
+                    # print('early stop')
                     break
 
-                alpha_old=alpha
-                beta_old=beta
+                alpha_old = alpha
+                beta_old = beta
 
             return alpha, beta
-        
-        imps = np.ones_like(Y)
-        alpha, beta = fit_alpha_beta_py(imps, Y) # use scipy's implementation
+
+        alpha, beta = fit_alpha_beta_py(Y)
         return np.array([np.log(alpha), np.log(beta)])
 
     def sample(self, m):
         return np.array([self.dist.rvs() for i in range(m)])
-    
-    def __getattr__(self, name): # gives us access to Laplace.mean() required for RegressionDist.predict()
+
+    def __getattr__(self, name):  
         if name in dir(self.dist):
             return getattr(self.dist, name)
         return None
-    
+
     @property
     def params(self):
-        return {'alpha':self.alpha, 'beta':self.beta}
\ No newline at end of file
+        return {"alpha": self.alpha, "beta": self.beta}