popt.loop.dist
1# External imports 2import numpy as np 3from scipy import stats 4from scipy.special import polygamma, digamma 5 6# Internal imports 7from popt.misc_tools import optim_tools as ot 8 9 10class GenOptDistribution: 11 ''' 12 Class that contains all the operations on the mutation distribution of GenOpt 13 ''' 14 def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None): 15 ''' 16 Parameters 17 ---------- 18 x : array_like, shape (d,) 19 Initial control vector. Used initally to get the dimensionality of the problem. 20 21 cov : array_like, shape (d,d) 22 Initial covaraince matrix. Used to construct the correlation matrix and 23 epsilon parameter of GenOpt 24 25 theta0 : list, of length 2 ([alpha, beta]) 26 Initial alpha and beta parameter of the marginal Beta distributions. 27 28 func : callable (optional) 29 An objective function that can be used later for the gradeint. 30 Can also be passed directly to the gradeint fucntion. 31 32 ne : int 33 ''' 34 self.dim = x.size # dimension of state 35 self.corr = ot.cov2corr(cov) # initial correlation 36 self.var = np.diag(cov) # initial varaince 37 self.theta = np.tile(theta0, (self.dim,1)) # initial theta parameters, shape (dim, 2) 38 self.eps = var2eps(self.var, self.theta) # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)! 39 self.func = func # an objective function (optional) 40 self.size = ne # ensemble size 41 42 def update_distribution(self, theta, corr): 43 ''' 44 Updates the parameters (theta and corr) of the distirbution. 45 46 Parameters 47 ---------- 48 theta : array_like, shape (d,2) 49 Contains the alpha (first column) and beta (second column) 50 of the marginal distirbutions. 51 52 corr : array_like, shape (d,d) 53 Correlation matrix 54 ''' 55 self.theta = theta 56 self.corr = corr 57 return 58 59 def get_theta(self): 60 return self.theta 61 62 def get_corr(self): 63 return self.corr 64 65 def get_cov(self): 66 67 std = np.zeros(self.dim) 68 for d in range(self.dim): 69 std[d] = stats.beta(*self.theta[d]).std() * 2 * self.eps[d] 70 71 return ot.corr2cov(self.corr, std=std) 72 73 def sample(self, size): 74 ''' 75 Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!) 76 77 Parameters 78 ---------- 79 size: int 80 Ensemble size (ne). Size of the sample to be drawn. 81 82 Returns 83 ------- 84 out: tuple, (enZ, enX) 85 86 enZ : array_like, shape (ne,d) 87 Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr 88 89 enX : array_like, shape (ne,d) 90 The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf) 91 ''' 92 # Sample normal distribution with correlation 93 enZ = np.random.multivariate_normal(mean=np.zeros(self.dim), 94 cov=self.corr, 95 size=size) 96 97 # Transform Z to a uniform variable U 98 enU = stats.norm.cdf(enZ) 99 100 # Initialize enX 101 enX = np.zeros_like(enZ) 102 103 # Loop over dim 104 for d in range(self.dim): 105 marginal = stats.beta(*self.theta[d]) # Make marginal dist. 106 enX[:,d] = marginal.ppf(enU[:,d]) # Transform U to marginal variables X 107 108 return enZ, enX 109 110 def epsilon_transformation(self, x, enX): 111 ''' 112 Performs the epsilon transformation, 113 X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε] 114 115 Parameters 116 ---------- 117 x : array_like, shape (d,) 118 Current state vector. 119 120 enX : array_like, shape (ne,d) 121 Ensemble matrix X sampled from GenOpt distribution 122 123 Returns 124 ------- 125 out : array_like, shape (ne,d) 126 Epsilon transfromed ensemble matrix, Y 127 ''' 128 enY = np.zeros_like(enX) # tranfomred ensemble 129 130 # loop over dimenstion 131 for d, xd in enumerate(x): 132 eps = self.eps[d] 133 134 a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \ 135 - ( (xd+eps-1)*(xd+eps > 1) ) \ 136 + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble 137 138 b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \ 139 - ( (xd+eps-1)*(xd+eps > 1) ) \ 140 + (xd-eps)*(xd-eps < 0)*(xd+eps > 1) #Upper bound of ensemble 141 142 enY[:,d] = a + enX[:, d]*(b-a) #Component-wise trafo. 143 144 return enY 145 146 def gradient(self, x, *args, **kwargs): 147 ''' 148 Calcualtes the average gradient of func using Stein's Lemma. 149 Described in GenOpt paper. 150 151 Parameters 152 ---------- 153 x : array_like, shape (d,) 154 Current state vector. 155 156 args : (theta, corr) 157 theta (parameters of distribution), shape (d,2) 158 corr (correlation matrix), shape (d,d) 159 160 kwargs : 161 func : callable objectvie function 162 ne : ensemble size 163 164 Returns 165 ------- 166 out : array_like, shape (d,) 167 The average gradient. 168 ''' 169 # check for objective fucntion 170 if 'func' in kwargs: 171 func = kwargs.get('func') 172 elif self.func is None: 173 raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>') 174 else: 175 func = self.func 176 177 # check for ensemble size 178 if 'ne' in kwargs: 179 ne = kwargs.get('ne') 180 elif self.size is None: 181 ne = max(int(0.25*self.dim), 5) 182 else: 183 ne = self.size 184 185 # update dist 186 if args: 187 self.update_distribution(*args) 188 189 # sample and eps-transfrom 190 self.enZ, self.enX = self.sample(size=ne) 191 self.enY = self.epsilon_transformation(x, self.enX) 192 self.enJ = func(self.enY.T) 193 194 # calculate the ensemble gradient 195 self.enJ = self.enJ[:,np.newaxis] # shape (ne,1) 196 self.enZ = self.enZ.T # shape (d,ne) 197 self.enX = self.enX.T # shape (d,ne) 198 alphas = (self.theta[:,0])[:,np.newaxis] # shape (d,1) 199 betas = (self.theta[:,1])[:,np.newaxis] # shape (d,1) 200 201 H = np.linalg.inv(self.corr) - np.identity(self.dim) # shape (d, d) 202 N = stats.norm.pdf(self.enZ) # shape (d, Ne) 203 M = np.zeros((self.dim, ne)) # shape (d, Ne) 204 205 for d in range(self.dim): 206 marginal = stats.beta(*self.theta[d]) 207 M[d,:] = marginal.pdf(self.enX[d]) 208 209 # get cov matrix 210 cov = self.get_cov() 211 212 # grad via Stein's lemma 213 ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 214 grad = -cov@ensembleX@self.enJ/(ne-1) 215 grad = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,) 216 217 # mutation gradient 218 finv = lambda th: np.linalg.inv(self.fisher_matrix(*th)) 219 fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2) 220 221 log_term = np.array([np.log(self.enX), np.log(1-self.enX)]) 222 psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta), 223 np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))]) 224 225 ensembleTheta = log_term-psi_term[:,:,None] 226 self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1) 227 self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True) 228 229 return grad 230 231 def mutation_gradient(self, x=None, *args, **kwargs): 232 ''' 233 Returns the mutation gradient of theta. It is actually calulated in 234 self.ensemble_gradient. 235 236 Parameters 237 ---------- 238 kwargs: 239 return_ensemble : bool 240 If True, all the ensemble matrices are also returned in a dictionary. 241 242 Returns 243 ------- 244 out : array_like, shape (d,2) 245 Mutation gradeint of theta 246 247 NB! If return_ensembles=True, the ensmebles are also returned! 248 ''' 249 if 'return_ensembles' in kwargs: 250 ensembles = {'gaussian' : self.enZ, 251 'vanilla' : self.enX, 252 'transformed': self.enY, 253 'objective' : self.enJ} 254 return self.grad_theta, ensembles 255 else: 256 return self.grad_theta 257 258 def corr_gradient(self): 259 ''' 260 Returns the mutation gradeint of the correlation matrix 261 ''' 262 enZ = self.enZ 263 enJ = self.enJ 264 ne = np.squeeze(enJ).size 265 grad_corr = np.zeros_like(self.corr) 266 267 for n in range(ne): 268 grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr) 269 270 np.fill_diagonal(grad_corr, 0) 271 corr_gradient = grad_corr/(ne-1) 272 273 return corr_gradient 274 275 def fisher_matrix(self, alpha, beta): 276 ''' 277 Calculates the Fisher matrix of a Beta distribution. 278 279 Parameters 280 ---------------------------------------------- 281 alpha : float 282 alpha parameter in Beta distribution 283 284 beta : float 285 beta parameter in Beta distribution 286 287 Returns 288 ---------------------------------------------- 289 out : array_like, of shape (2, 2) 290 Fisher matrix 291 ''' 292 a = alpha 293 b = beta 294 295 upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)] 296 lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)] 297 fisher_matrix = np.array([upper_row, 298 lower_row]) 299 return fisher_matrix 300 301 302# Some helping functions 303def var2eps(var, theta): 304 alphas = theta[:,0] 305 betas = theta[:,1] 306 frac = alphas*betas / ( (alphas+betas)**2 * (alphas+betas+1) ) 307 epsilon = np.sqrt(0.25*var/frac) 308 return epsilon 309 310def delA(theta): 311 ''' 312 Calculates the expression psi(a) - psi(a+b), 313 where psi() is the digamma function. 314 315 Parameters 316 -------------------------------------------- 317 a : float 318 b : float 319 320 Returns 321 -------------------------------------------- 322 out : float 323 ''' 324 a = theta[0] 325 b = theta[1] 326 return digamma(a)-digamma(a+b)
11class GenOptDistribution: 12 ''' 13 Class that contains all the operations on the mutation distribution of GenOpt 14 ''' 15 def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None): 16 ''' 17 Parameters 18 ---------- 19 x : array_like, shape (d,) 20 Initial control vector. Used initally to get the dimensionality of the problem. 21 22 cov : array_like, shape (d,d) 23 Initial covaraince matrix. Used to construct the correlation matrix and 24 epsilon parameter of GenOpt 25 26 theta0 : list, of length 2 ([alpha, beta]) 27 Initial alpha and beta parameter of the marginal Beta distributions. 28 29 func : callable (optional) 30 An objective function that can be used later for the gradeint. 31 Can also be passed directly to the gradeint fucntion. 32 33 ne : int 34 ''' 35 self.dim = x.size # dimension of state 36 self.corr = ot.cov2corr(cov) # initial correlation 37 self.var = np.diag(cov) # initial varaince 38 self.theta = np.tile(theta0, (self.dim,1)) # initial theta parameters, shape (dim, 2) 39 self.eps = var2eps(self.var, self.theta) # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)! 40 self.func = func # an objective function (optional) 41 self.size = ne # ensemble size 42 43 def update_distribution(self, theta, corr): 44 ''' 45 Updates the parameters (theta and corr) of the distirbution. 46 47 Parameters 48 ---------- 49 theta : array_like, shape (d,2) 50 Contains the alpha (first column) and beta (second column) 51 of the marginal distirbutions. 52 53 corr : array_like, shape (d,d) 54 Correlation matrix 55 ''' 56 self.theta = theta 57 self.corr = corr 58 return 59 60 def get_theta(self): 61 return self.theta 62 63 def get_corr(self): 64 return self.corr 65 66 def get_cov(self): 67 68 std = np.zeros(self.dim) 69 for d in range(self.dim): 70 std[d] = stats.beta(*self.theta[d]).std() * 2 * self.eps[d] 71 72 return ot.corr2cov(self.corr, std=std) 73 74 def sample(self, size): 75 ''' 76 Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!) 77 78 Parameters 79 ---------- 80 size: int 81 Ensemble size (ne). Size of the sample to be drawn. 82 83 Returns 84 ------- 85 out: tuple, (enZ, enX) 86 87 enZ : array_like, shape (ne,d) 88 Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr 89 90 enX : array_like, shape (ne,d) 91 The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf) 92 ''' 93 # Sample normal distribution with correlation 94 enZ = np.random.multivariate_normal(mean=np.zeros(self.dim), 95 cov=self.corr, 96 size=size) 97 98 # Transform Z to a uniform variable U 99 enU = stats.norm.cdf(enZ) 100 101 # Initialize enX 102 enX = np.zeros_like(enZ) 103 104 # Loop over dim 105 for d in range(self.dim): 106 marginal = stats.beta(*self.theta[d]) # Make marginal dist. 107 enX[:,d] = marginal.ppf(enU[:,d]) # Transform U to marginal variables X 108 109 return enZ, enX 110 111 def epsilon_transformation(self, x, enX): 112 ''' 113 Performs the epsilon transformation, 114 X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε] 115 116 Parameters 117 ---------- 118 x : array_like, shape (d,) 119 Current state vector. 120 121 enX : array_like, shape (ne,d) 122 Ensemble matrix X sampled from GenOpt distribution 123 124 Returns 125 ------- 126 out : array_like, shape (ne,d) 127 Epsilon transfromed ensemble matrix, Y 128 ''' 129 enY = np.zeros_like(enX) # tranfomred ensemble 130 131 # loop over dimenstion 132 for d, xd in enumerate(x): 133 eps = self.eps[d] 134 135 a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \ 136 - ( (xd+eps-1)*(xd+eps > 1) ) \ 137 + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble 138 139 b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \ 140 - ( (xd+eps-1)*(xd+eps > 1) ) \ 141 + (xd-eps)*(xd-eps < 0)*(xd+eps > 1) #Upper bound of ensemble 142 143 enY[:,d] = a + enX[:, d]*(b-a) #Component-wise trafo. 144 145 return enY 146 147 def gradient(self, x, *args, **kwargs): 148 ''' 149 Calcualtes the average gradient of func using Stein's Lemma. 150 Described in GenOpt paper. 151 152 Parameters 153 ---------- 154 x : array_like, shape (d,) 155 Current state vector. 156 157 args : (theta, corr) 158 theta (parameters of distribution), shape (d,2) 159 corr (correlation matrix), shape (d,d) 160 161 kwargs : 162 func : callable objectvie function 163 ne : ensemble size 164 165 Returns 166 ------- 167 out : array_like, shape (d,) 168 The average gradient. 169 ''' 170 # check for objective fucntion 171 if 'func' in kwargs: 172 func = kwargs.get('func') 173 elif self.func is None: 174 raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>') 175 else: 176 func = self.func 177 178 # check for ensemble size 179 if 'ne' in kwargs: 180 ne = kwargs.get('ne') 181 elif self.size is None: 182 ne = max(int(0.25*self.dim), 5) 183 else: 184 ne = self.size 185 186 # update dist 187 if args: 188 self.update_distribution(*args) 189 190 # sample and eps-transfrom 191 self.enZ, self.enX = self.sample(size=ne) 192 self.enY = self.epsilon_transformation(x, self.enX) 193 self.enJ = func(self.enY.T) 194 195 # calculate the ensemble gradient 196 self.enJ = self.enJ[:,np.newaxis] # shape (ne,1) 197 self.enZ = self.enZ.T # shape (d,ne) 198 self.enX = self.enX.T # shape (d,ne) 199 alphas = (self.theta[:,0])[:,np.newaxis] # shape (d,1) 200 betas = (self.theta[:,1])[:,np.newaxis] # shape (d,1) 201 202 H = np.linalg.inv(self.corr) - np.identity(self.dim) # shape (d, d) 203 N = stats.norm.pdf(self.enZ) # shape (d, Ne) 204 M = np.zeros((self.dim, ne)) # shape (d, Ne) 205 206 for d in range(self.dim): 207 marginal = stats.beta(*self.theta[d]) 208 M[d,:] = marginal.pdf(self.enX[d]) 209 210 # get cov matrix 211 cov = self.get_cov() 212 213 # grad via Stein's lemma 214 ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 215 grad = -cov@ensembleX@self.enJ/(ne-1) 216 grad = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,) 217 218 # mutation gradient 219 finv = lambda th: np.linalg.inv(self.fisher_matrix(*th)) 220 fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2) 221 222 log_term = np.array([np.log(self.enX), np.log(1-self.enX)]) 223 psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta), 224 np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))]) 225 226 ensembleTheta = log_term-psi_term[:,:,None] 227 self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1) 228 self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True) 229 230 return grad 231 232 def mutation_gradient(self, x=None, *args, **kwargs): 233 ''' 234 Returns the mutation gradient of theta. It is actually calulated in 235 self.ensemble_gradient. 236 237 Parameters 238 ---------- 239 kwargs: 240 return_ensemble : bool 241 If True, all the ensemble matrices are also returned in a dictionary. 242 243 Returns 244 ------- 245 out : array_like, shape (d,2) 246 Mutation gradeint of theta 247 248 NB! If return_ensembles=True, the ensmebles are also returned! 249 ''' 250 if 'return_ensembles' in kwargs: 251 ensembles = {'gaussian' : self.enZ, 252 'vanilla' : self.enX, 253 'transformed': self.enY, 254 'objective' : self.enJ} 255 return self.grad_theta, ensembles 256 else: 257 return self.grad_theta 258 259 def corr_gradient(self): 260 ''' 261 Returns the mutation gradeint of the correlation matrix 262 ''' 263 enZ = self.enZ 264 enJ = self.enJ 265 ne = np.squeeze(enJ).size 266 grad_corr = np.zeros_like(self.corr) 267 268 for n in range(ne): 269 grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr) 270 271 np.fill_diagonal(grad_corr, 0) 272 corr_gradient = grad_corr/(ne-1) 273 274 return corr_gradient 275 276 def fisher_matrix(self, alpha, beta): 277 ''' 278 Calculates the Fisher matrix of a Beta distribution. 279 280 Parameters 281 ---------------------------------------------- 282 alpha : float 283 alpha parameter in Beta distribution 284 285 beta : float 286 beta parameter in Beta distribution 287 288 Returns 289 ---------------------------------------------- 290 out : array_like, of shape (2, 2) 291 Fisher matrix 292 ''' 293 a = alpha 294 b = beta 295 296 upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)] 297 lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)] 298 fisher_matrix = np.array([upper_row, 299 lower_row]) 300 return fisher_matrix
Class that contains all the operations on the mutation distribution of GenOpt
15 def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None): 16 ''' 17 Parameters 18 ---------- 19 x : array_like, shape (d,) 20 Initial control vector. Used initally to get the dimensionality of the problem. 21 22 cov : array_like, shape (d,d) 23 Initial covaraince matrix. Used to construct the correlation matrix and 24 epsilon parameter of GenOpt 25 26 theta0 : list, of length 2 ([alpha, beta]) 27 Initial alpha and beta parameter of the marginal Beta distributions. 28 29 func : callable (optional) 30 An objective function that can be used later for the gradeint. 31 Can also be passed directly to the gradeint fucntion. 32 33 ne : int 34 ''' 35 self.dim = x.size # dimension of state 36 self.corr = ot.cov2corr(cov) # initial correlation 37 self.var = np.diag(cov) # initial varaince 38 self.theta = np.tile(theta0, (self.dim,1)) # initial theta parameters, shape (dim, 2) 39 self.eps = var2eps(self.var, self.theta) # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)! 40 self.func = func # an objective function (optional) 41 self.size = ne # ensemble size
Parameters
- x (array_like, shape (d,)): Initial control vector. Used initally to get the dimensionality of the problem.
- cov (array_like, shape (d,d)): Initial covaraince matrix. Used to construct the correlation matrix and epsilon parameter of GenOpt
- theta0 (list, of length 2 ([alpha, beta])): Initial alpha and beta parameter of the marginal Beta distributions.
- func (callable (optional)): An objective function that can be used later for the gradeint. Can also be passed directly to the gradeint fucntion.
- ne (int):
43 def update_distribution(self, theta, corr): 44 ''' 45 Updates the parameters (theta and corr) of the distirbution. 46 47 Parameters 48 ---------- 49 theta : array_like, shape (d,2) 50 Contains the alpha (first column) and beta (second column) 51 of the marginal distirbutions. 52 53 corr : array_like, shape (d,d) 54 Correlation matrix 55 ''' 56 self.theta = theta 57 self.corr = corr 58 return
Updates the parameters (theta and corr) of the distirbution.
Parameters
- theta (array_like, shape (d,2)): Contains the alpha (first column) and beta (second column) of the marginal distirbutions.
- corr (array_like, shape (d,d)): Correlation matrix
74 def sample(self, size): 75 ''' 76 Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!) 77 78 Parameters 79 ---------- 80 size: int 81 Ensemble size (ne). Size of the sample to be drawn. 82 83 Returns 84 ------- 85 out: tuple, (enZ, enX) 86 87 enZ : array_like, shape (ne,d) 88 Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr 89 90 enX : array_like, shape (ne,d) 91 The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf) 92 ''' 93 # Sample normal distribution with correlation 94 enZ = np.random.multivariate_normal(mean=np.zeros(self.dim), 95 cov=self.corr, 96 size=size) 97 98 # Transform Z to a uniform variable U 99 enU = stats.norm.cdf(enZ) 100 101 # Initialize enX 102 enX = np.zeros_like(enZ) 103 104 # Loop over dim 105 for d in range(self.dim): 106 marginal = stats.beta(*self.theta[d]) # Make marginal dist. 107 enX[:,d] = marginal.ppf(enU[:,d]) # Transform U to marginal variables X 108 109 return enZ, enX
Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)
Parameters
- size (int): Ensemble size (ne). Size of the sample to be drawn.
Returns
out (tuple, (enZ, enX)): enZ : array_like, shape (ne,d) Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr
enX : array_like, shape (ne,d) The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)
111 def epsilon_transformation(self, x, enX): 112 ''' 113 Performs the epsilon transformation, 114 X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε] 115 116 Parameters 117 ---------- 118 x : array_like, shape (d,) 119 Current state vector. 120 121 enX : array_like, shape (ne,d) 122 Ensemble matrix X sampled from GenOpt distribution 123 124 Returns 125 ------- 126 out : array_like, shape (ne,d) 127 Epsilon transfromed ensemble matrix, Y 128 ''' 129 enY = np.zeros_like(enX) # tranfomred ensemble 130 131 # loop over dimenstion 132 for d, xd in enumerate(x): 133 eps = self.eps[d] 134 135 a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \ 136 - ( (xd+eps-1)*(xd+eps > 1) ) \ 137 + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble 138 139 b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \ 140 - ( (xd+eps-1)*(xd+eps > 1) ) \ 141 + (xd-eps)*(xd-eps < 0)*(xd+eps > 1) #Upper bound of ensemble 142 143 enY[:,d] = a + enX[:, d]*(b-a) #Component-wise trafo. 144 145 return enY
Performs the epsilon transformation, X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]
Parameters
- x (array_like, shape (d,)): Current state vector.
- enX (array_like, shape (ne,d)): Ensemble matrix X sampled from GenOpt distribution
Returns
- out (array_like, shape (ne,d)): Epsilon transfromed ensemble matrix, Y
147 def gradient(self, x, *args, **kwargs): 148 ''' 149 Calcualtes the average gradient of func using Stein's Lemma. 150 Described in GenOpt paper. 151 152 Parameters 153 ---------- 154 x : array_like, shape (d,) 155 Current state vector. 156 157 args : (theta, corr) 158 theta (parameters of distribution), shape (d,2) 159 corr (correlation matrix), shape (d,d) 160 161 kwargs : 162 func : callable objectvie function 163 ne : ensemble size 164 165 Returns 166 ------- 167 out : array_like, shape (d,) 168 The average gradient. 169 ''' 170 # check for objective fucntion 171 if 'func' in kwargs: 172 func = kwargs.get('func') 173 elif self.func is None: 174 raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>') 175 else: 176 func = self.func 177 178 # check for ensemble size 179 if 'ne' in kwargs: 180 ne = kwargs.get('ne') 181 elif self.size is None: 182 ne = max(int(0.25*self.dim), 5) 183 else: 184 ne = self.size 185 186 # update dist 187 if args: 188 self.update_distribution(*args) 189 190 # sample and eps-transfrom 191 self.enZ, self.enX = self.sample(size=ne) 192 self.enY = self.epsilon_transformation(x, self.enX) 193 self.enJ = func(self.enY.T) 194 195 # calculate the ensemble gradient 196 self.enJ = self.enJ[:,np.newaxis] # shape (ne,1) 197 self.enZ = self.enZ.T # shape (d,ne) 198 self.enX = self.enX.T # shape (d,ne) 199 alphas = (self.theta[:,0])[:,np.newaxis] # shape (d,1) 200 betas = (self.theta[:,1])[:,np.newaxis] # shape (d,1) 201 202 H = np.linalg.inv(self.corr) - np.identity(self.dim) # shape (d, d) 203 N = stats.norm.pdf(self.enZ) # shape (d, Ne) 204 M = np.zeros((self.dim, ne)) # shape (d, Ne) 205 206 for d in range(self.dim): 207 marginal = stats.beta(*self.theta[d]) 208 M[d,:] = marginal.pdf(self.enX[d]) 209 210 # get cov matrix 211 cov = self.get_cov() 212 213 # grad via Stein's lemma 214 ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 215 grad = -cov@ensembleX@self.enJ/(ne-1) 216 grad = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,) 217 218 # mutation gradient 219 finv = lambda th: np.linalg.inv(self.fisher_matrix(*th)) 220 fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2) 221 222 log_term = np.array([np.log(self.enX), np.log(1-self.enX)]) 223 psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta), 224 np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))]) 225 226 ensembleTheta = log_term-psi_term[:,:,None] 227 self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1) 228 self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True) 229 230 return grad
Calcualtes the average gradient of func using Stein's Lemma. Described in GenOpt paper.
Parameters
- x (array_like, shape (d,)): Current state vector.
- args ((theta, corr)): theta (parameters of distribution), shape (d,2) corr (correlation matrix), shape (d,d)
- kwargs :: func : callable objectvie function ne : ensemble size
Returns
- out (array_like, shape (d,)): The average gradient.
232 def mutation_gradient(self, x=None, *args, **kwargs): 233 ''' 234 Returns the mutation gradient of theta. It is actually calulated in 235 self.ensemble_gradient. 236 237 Parameters 238 ---------- 239 kwargs: 240 return_ensemble : bool 241 If True, all the ensemble matrices are also returned in a dictionary. 242 243 Returns 244 ------- 245 out : array_like, shape (d,2) 246 Mutation gradeint of theta 247 248 NB! If return_ensembles=True, the ensmebles are also returned! 249 ''' 250 if 'return_ensembles' in kwargs: 251 ensembles = {'gaussian' : self.enZ, 252 'vanilla' : self.enX, 253 'transformed': self.enY, 254 'objective' : self.enJ} 255 return self.grad_theta, ensembles 256 else: 257 return self.grad_theta
Returns the mutation gradient of theta. It is actually calulated in self.ensemble_gradient.
Parameters
- kwargs:: return_ensemble : bool If True, all the ensemble matrices are also returned in a dictionary.
Returns
- out (array_like, shape (d,2)): Mutation gradeint of theta
- NB! If return_ensembles=True, the ensmebles are also returned!
259 def corr_gradient(self): 260 ''' 261 Returns the mutation gradeint of the correlation matrix 262 ''' 263 enZ = self.enZ 264 enJ = self.enJ 265 ne = np.squeeze(enJ).size 266 grad_corr = np.zeros_like(self.corr) 267 268 for n in range(ne): 269 grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr) 270 271 np.fill_diagonal(grad_corr, 0) 272 corr_gradient = grad_corr/(ne-1) 273 274 return corr_gradient
Returns the mutation gradeint of the correlation matrix
276 def fisher_matrix(self, alpha, beta): 277 ''' 278 Calculates the Fisher matrix of a Beta distribution. 279 280 Parameters 281 ---------------------------------------------- 282 alpha : float 283 alpha parameter in Beta distribution 284 285 beta : float 286 beta parameter in Beta distribution 287 288 Returns 289 ---------------------------------------------- 290 out : array_like, of shape (2, 2) 291 Fisher matrix 292 ''' 293 a = alpha 294 b = beta 295 296 upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)] 297 lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)] 298 fisher_matrix = np.array([upper_row, 299 lower_row]) 300 return fisher_matrix
Calculates the Fisher matrix of a Beta distribution.
Parameters
- alpha (float): alpha parameter in Beta distribution
- beta (float): beta parameter in Beta distribution
Returns
- out (array_like, of shape (2, 2)): Fisher matrix
311def delA(theta): 312 ''' 313 Calculates the expression psi(a) - psi(a+b), 314 where psi() is the digamma function. 315 316 Parameters 317 -------------------------------------------- 318 a : float 319 b : float 320 321 Returns 322 -------------------------------------------- 323 out : float 324 ''' 325 a = theta[0] 326 b = theta[1] 327 return digamma(a)-digamma(a+b)
Calculates the expression psi(a) - psi(a+b), where psi() is the digamma function.
Parameters
a (float):
b (float):
Returns
- out (float):