From 98b5f94a6203c8780ea3ee9075105f44a4f8168a Mon Sep 17 00:00:00 2001 From: guyko81 Date: Sun, 14 Jun 2020 18:18:29 +0100 Subject: [PATCH 1/4] adding BetaBernoulli distribution with LogScore --- ngboost/distns/__init__.py | 1 + ngboost/distns/betabernoulli.py | 125 ++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 ngboost/distns/betabernoulli.py diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py index 5308b667..1157775c 100644 --- a/ngboost/distns/__init__.py +++ b/ngboost/distns/__init__.py @@ -4,3 +4,4 @@ from .lognormal import LogNormal from .exponential import Exponential from .categorical import k_categorical, Bernoulli +from .betabernoulli import BetaBernoulli diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py new file mode 100644 index 00000000..b1452d9d --- /dev/null +++ b/ngboost/distns/betabernoulli.py @@ -0,0 +1,125 @@ +from scipy.stats import betabinom as dist +from scipy.stats import beta as betadist +import numpy as np +from ngboost.distns.distn import RegressionDistn +from ngboost.scores import LogScore +from scipy.special import polygamma, gamma, digamma +from scipy.special import beta as betafunction +from fastbetabino import * +from array import array +import sys +class BetaBernoulliLogScore(LogScore): + + def score(self, Y): + return -self.dist.logpmf(Y) + + def d_score(self, Y): + D = np.zeros((len(Y), 2)) # first col is dS/d(log(α)), second col is dS/d(log(β)) + + D[:, 0] = -self.alpha * ( + digamma(self.alpha + self.beta) + + digamma(Y + self.alpha) - + digamma(self.alpha + self.beta + 1) - + digamma(self.alpha) + ) + D[:, 1] = -self.beta * ( + digamma(self.alpha + self.beta) + + digamma(-Y + self.beta + 1) - + digamma(self.alpha + self.beta + 1) - + digamma(self.beta) + ) + return D + + def metric(self): + FI = np.zeros((self.alpha.shape[0], 2, 2)) + FI[:, 0, 0] = ((self.alpha * ( + digamma(self.alpha + self.beta) + + digamma(0 + self.alpha) - + digamma(self.alpha + self.beta + 1) - + digamma(self.alpha) + ))**2 * self.dist.pmf(0) + + (self.alpha * ( + digamma(self.alpha + self.beta) + + digamma(1 + self.alpha) - + digamma(self.alpha + self.beta + 1) - + digamma(self.alpha) + ))**2 * self.dist.pmf(1)) + FI[:, 1, 1] = ((self.beta * ( + digamma(self.alpha + self.beta) + + digamma(-0 + self.beta + 1) - + digamma(self.alpha + self.beta + 1) - + digamma(self.beta) + ))**2 * self.dist.pmf(0) + + (self.beta * ( + digamma(self.alpha + self.beta) + + digamma(-1 + self.beta + 1) - + digamma(self.alpha + self.beta + 1) - + digamma(self.beta) + ))**2 * self.dist.pmf(1)) + return FI +class BetaBernoulli(RegressionDistn): + + n_params = 2 + scores = [BetaBernoulliLogScore] + + def __init__(self, params): + # save the parameters + self._params = params + + # create other objects that will be useful later + self.log_alpha = params[0] + self.log_beta = params[1] + self.alpha = np.exp(self.log_alpha) + self.beta = np.exp(self.log_beta) + self.dist = dist(n=1, a=self.alpha, b=self.beta) + + def sigmoid(self, x): + return 1/(1+np.exp(-x)) + + def fit(Y): + + def fit_alpha_beta_py(impressions, clicks, alpha0=1.5, beta0=5, niter=1000): + # based on https://github.com/lfiaschi/fastbetabino/blob/master/fastbetabino.pyx + + alpha_old=alpha0 + beta_old=beta0 + + for it in range(niter): + + alpha=alpha_old*\ + (sum(digamma(c + alpha_old) - digamma(alpha_old) for c,i in zip(clicks,impressions)))/\ + (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions))) + + + beta=beta_old*\ + (sum(digamma(i-c + beta_old) - digamma(beta_old) for c,i in zip(clicks,impressions)))/\ + (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions))) + + + #print('alpha {} | {} beta {} | {}'.format(alpha,alpha_old,beta,beta_old)) + sys.stdout.flush() + + if np.abs(alpha-alpha_old) and np.abs(beta-beta_old)<1e-10: + #print('early stop') + break + + alpha_old=alpha + beta_old=beta + + return alpha, beta + + imps = np.ones_like(Y) + alpha, beta = fit_alpha_beta_py(imps, Y) # use scipy's implementation + return np.array([np.log(alpha), np.log(beta)]) + + def sample(self, m): + return np.array([self.dist.rvs() for i in range(m)]) + + def __getattr__(self, name): # gives us access to Laplace.mean() required for RegressionDist.predict() + if name in dir(self.dist): + return getattr(self.dist, name) + return None + + @property + def params(self): + return {'alpha':self.alpha, 'beta':self.beta} \ No newline at end of file From c30c64e5e6962a6407856721fff4081746df7a2d Mon Sep 17 00:00:00 2001 From: guyko81 Date: Sat, 20 Jun 2020 00:05:00 +0100 Subject: [PATCH 2/4] fix import --- ngboost/distns/betabernoulli.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py index b1452d9d..7f0c6567 100644 --- a/ngboost/distns/betabernoulli.py +++ b/ngboost/distns/betabernoulli.py @@ -1,13 +1,10 @@ -from scipy.stats import betabinom as dist -from scipy.stats import beta as betadist +from scipy.stats import betabinom import numpy as np from ngboost.distns.distn import RegressionDistn from ngboost.scores import LogScore -from scipy.special import polygamma, gamma, digamma -from scipy.special import beta as betafunction -from fastbetabino import * +from scipy.special import digamma from array import array -import sys +import sys class BetaBernoulliLogScore(LogScore): def score(self, Y): From 98c950876366c2f9d19736b1671a3bc14f500942 Mon Sep 17 00:00:00 2001 From: guyko81 Date: Sat, 20 Jun 2020 00:06:11 +0100 Subject: [PATCH 3/4] fix import dist --- ngboost/distns/betabernoulli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py index 7f0c6567..16d4d12e 100644 --- a/ngboost/distns/betabernoulli.py +++ b/ngboost/distns/betabernoulli.py @@ -1,4 +1,4 @@ -from scipy.stats import betabinom +from scipy.stats import betabinom as dist import numpy as np from ngboost.distns.distn import RegressionDistn from ngboost.scores import LogScore From ef7c60fcb0ea179815788ca2094a27ff5c5c80ef Mon Sep 17 00:00:00 2001 From: guyko81 Date: Sun, 21 Jun 2020 12:47:56 +0100 Subject: [PATCH 4/4] implementing different Fisher Information matrices --- ngboost/distns/betabernoulli.py | 202 ++++++++++++++++++++------------ 1 file changed, 130 insertions(+), 72 deletions(-) diff --git a/ngboost/distns/betabernoulli.py b/ngboost/distns/betabernoulli.py index 16d4d12e..c92dab9f 100644 --- a/ngboost/distns/betabernoulli.py +++ b/ngboost/distns/betabernoulli.py @@ -2,58 +2,93 @@ import numpy as np from ngboost.distns.distn import RegressionDistn from ngboost.scores import LogScore -from scipy.special import digamma -from array import array +from scipy.special import digamma, polygamma +from array import array import sys -class BetaBernoulliLogScore(LogScore): - + +class BetaBernoulliLogScore(LogScore): def score(self, Y): return -self.dist.logpmf(Y) + def d_alpha(self, Y): + return self.alpha * ( + digamma(self.alpha + self.beta) + + digamma(Y + self.alpha) + - digamma(self.alpha + self.beta + 1) + - digamma(self.alpha) + ) + + def d_beta(self, Y): + return self.beta * ( + digamma(self.alpha + self.beta) + + digamma(-Y + self.beta + 1) + - digamma(self.alpha + self.beta + 1) + - digamma(self.beta) + ) + + def d_alpha_alpha(self, Y): + return ( + (polygamma(1, Y + self.alpha) + + polygamma(1, self.alpha + self.beta) - + polygamma(1, self.alpha + self.beta + 1) - + polygamma(1, self.alpha) + ) * self.alpha**2 + self.d_alpha(Y) + ) + + def d_alpha_beta(self, Y): + return ( + self.alpha * self.beta * ( + polygamma(1, self.alpha + self.beta) - + polygamma(1, self.alpha + self.beta + 1) + ) + ) + + def d_beta_beta(self, Y): + return ( + (polygamma(1, -Y + self.beta + 1) + + polygamma(1, self.alpha + self.beta) - + polygamma(1, self.alpha + self.beta + 1) - + polygamma(1, self.beta) + ) * self.beta**2 + self.d_beta(Y) + ) def d_score(self, Y): - D = np.zeros((len(Y), 2)) # first col is dS/d(log(α)), second col is dS/d(log(β)) - - D[:, 0] = -self.alpha * ( - digamma(self.alpha + self.beta) + - digamma(Y + self.alpha) - - digamma(self.alpha + self.beta + 1) - - digamma(self.alpha) - ) - D[:, 1] = -self.beta * ( - digamma(self.alpha + self.beta) + - digamma(-Y + self.beta + 1) - - digamma(self.alpha + self.beta + 1) - - digamma(self.beta) - ) + D = np.zeros( + (len(Y), 2) + ) # first col is dS/d(log(α)), second col is dS/d(log(β)) + D[:, 0] = -self.d_alpha(Y) + D[:, 1] = -self.d_beta(Y) return D - + + # Variance def metric(self): FI = np.zeros((self.alpha.shape[0], 2, 2)) - FI[:, 0, 0] = ((self.alpha * ( - digamma(self.alpha + self.beta) + - digamma(0 + self.alpha) - - digamma(self.alpha + self.beta + 1) - - digamma(self.alpha) - ))**2 * self.dist.pmf(0) + - (self.alpha * ( - digamma(self.alpha + self.beta) + - digamma(1 + self.alpha) - - digamma(self.alpha + self.beta + 1) - - digamma(self.alpha) - ))**2 * self.dist.pmf(1)) - FI[:, 1, 1] = ((self.beta * ( - digamma(self.alpha + self.beta) + - digamma(-0 + self.beta + 1) - - digamma(self.alpha + self.beta + 1) - - digamma(self.beta) - ))**2 * self.dist.pmf(0) + - (self.beta * ( - digamma(self.alpha + self.beta) + - digamma(-1 + self.beta + 1) - - digamma(self.alpha + self.beta + 1) - - digamma(self.beta) - ))**2 * self.dist.pmf(1)) + FI[:, 0, 0] = ( + self.d_alpha(0)*self.d_alpha(0)*self.dist.pmf(0) + + self.d_alpha(1)*self.d_alpha(1)*self.dist.pmf(1) + ) + # FI[:, 1, 0] = ( + # self.d_alpha(0)*self.d_beta(0)*self.dist.pmf(0) + + # self.d_alpha(1)*self.d_beta(1)*self.dist.pmf(1) + # ) + # FI[:, 0, 1] = ( + # self.d_alpha(0)*self.d_beta(0)*self.dist.pmf(0) + + # self.d_alpha(1)*self.d_beta(1)*self.dist.pmf(1) + # ) + FI[:, 1, 1] = ( + self.d_beta(0)*self.d_beta(0)*self.dist.pmf(0) + + self.d_beta(1)*self.d_beta(1)*self.dist.pmf(1) + ) return FI + + # Hessian + # def metric(self): + # FI = np.zeros((self.alpha.shape[0], 2, 2)) + # FI[:, 0, 0] = self.d_alpha_alpha(0) * self.dist.pmf(0) + self.d_alpha_alpha(1) * self.dist.pmf(1) + # # FI[:, 1, 0] = self.d_alpha_beta(0) * self.dist.pmf(0) + self.d_alpha_beta(1) * self.dist.pmf(1) + # # FI[:, 0, 1] = self.d_alpha_beta(0) * self.dist.pmf(0) + self.d_alpha_beta(1) * self.dist.pmf(1) + # FI[:, 1, 1] = self.d_beta_beta(0) * self.dist.pmf(0) + self.d_beta_beta(1) * self.dist.pmf(1) + # return FI + class BetaBernoulli(RegressionDistn): n_params = 2 @@ -62,7 +97,7 @@ class BetaBernoulli(RegressionDistn): def __init__(self, params): # save the parameters self._params = params - + # create other objects that will be useful later self.log_alpha = params[0] self.log_beta = params[1] @@ -70,53 +105,76 @@ def __init__(self, params): self.beta = np.exp(self.log_beta) self.dist = dist(n=1, a=self.alpha, b=self.beta) - def sigmoid(self, x): - return 1/(1+np.exp(-x)) - def fit(Y): - - def fit_alpha_beta_py(impressions, clicks, alpha0=1.5, beta0=5, niter=1000): + def fit_alpha_beta_py(success, alpha0=1.5, beta0=5, niter=1000): # based on https://github.com/lfiaschi/fastbetabino/blob/master/fastbetabino.pyx - alpha_old=alpha0 - beta_old=beta0 - - for it in range(niter): - - alpha=alpha_old*\ - (sum(digamma(c + alpha_old) - digamma(alpha_old) for c,i in zip(clicks,impressions)))/\ - (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions))) + # This optimisation works for Beta-Binomial distribution in general. + # For Beta-Bernoulli it's simplified by fixing the trials to 1. + trials = np.ones_like(Y) + alpha_old = alpha0 + beta_old = beta0 - beta=beta_old*\ - (sum(digamma(i-c + beta_old) - digamma(beta_old) for c,i in zip(clicks,impressions)))/\ - (sum(digamma(i + alpha_old+beta_old) - digamma(alpha_old+beta_old) for c,i in zip(clicks,impressions))) + for it in range(niter): + alpha = ( + alpha_old + * ( + sum( + digamma(c + alpha_old) - digamma(alpha_old) + for c, i in zip(success, trials) + ) + ) + / ( + sum( + digamma(i + alpha_old + beta_old) + - digamma(alpha_old + beta_old) + for c, i in zip(success, trials) + ) + ) + ) + + beta = ( + beta_old + * ( + sum( + digamma(i - c + beta_old) - digamma(beta_old) + for c, i in zip(success, trials) + ) + ) + / ( + sum( + digamma(i + alpha_old + beta_old) + - digamma(alpha_old + beta_old) + for c, i in zip(success, trials) + ) + ) + ) - #print('alpha {} | {} beta {} | {}'.format(alpha,alpha_old,beta,beta_old)) + # print('alpha {} | {} beta {} | {}'.format(alpha,alpha_old,beta,beta_old)) sys.stdout.flush() - if np.abs(alpha-alpha_old) and np.abs(beta-beta_old)<1e-10: - #print('early stop') + if np.abs(alpha - alpha_old) and np.abs(beta - beta_old) < 1e-10: + # print('early stop') break - alpha_old=alpha - beta_old=beta + alpha_old = alpha + beta_old = beta return alpha, beta - - imps = np.ones_like(Y) - alpha, beta = fit_alpha_beta_py(imps, Y) # use scipy's implementation + + alpha, beta = fit_alpha_beta_py(Y) return np.array([np.log(alpha), np.log(beta)]) def sample(self, m): return np.array([self.dist.rvs() for i in range(m)]) - - def __getattr__(self, name): # gives us access to Laplace.mean() required for RegressionDist.predict() + + def __getattr__(self, name): if name in dir(self.dist): return getattr(self.dist, name) return None - + @property def params(self): - return {'alpha':self.alpha, 'beta':self.beta} \ No newline at end of file + return {"alpha": self.alpha, "beta": self.beta}