popt.loop.dist

  1# External imports 
  2import numpy as np
  3from scipy import stats
  4from scipy.special import polygamma, digamma
  5
  6# Internal imports
  7from popt.misc_tools import optim_tools as ot
  8
  9
 10class GenOptDistribution:
 11    '''
 12    Class that contains all the operations on the mutation distribution of GenOpt
 13    '''
 14    def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None):
 15        '''
 16        Parameters
 17        ----------
 18        x : array_like, shape (d,)
 19            Initial control vector. Used initally to get the dimensionality of the problem.
 20        
 21        cov : array_like, shape (d,d)
 22            Initial covaraince matrix. Used to construct the correlation matrix and 
 23            epsilon parameter of GenOpt
 24        
 25        theta0 : list, of length 2 ([alpha, beta])
 26            Initial alpha and beta parameter of the marginal Beta distributions.
 27        
 28        func : callable (optional)
 29            An objective function that can be used later for the gradeint.
 30            Can also be passed directly to the gradeint fucntion.
 31        
 32        ne : int
 33        '''
 34        self.dim   = x.size                         # dimension of state                 
 35        self.corr  = ot.cov2corr(cov)               # initial correlation
 36        self.var   = np.diag(cov)                   # initial varaince
 37        self.theta = np.tile(theta0, (self.dim,1))  # initial theta parameters, shape (dim, 2)
 38        self.eps   = var2eps(self.var, self.theta)  # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)!
 39        self.func  = func                           # an objective function (optional)
 40        self.size  = ne                             # ensemble size
 41
 42    def update_distribution(self, theta, corr):
 43        '''
 44        Updates the parameters (theta and corr) of the distirbution.
 45
 46        Parameters
 47        ----------
 48        theta : array_like, shape (d,2)
 49            Contains the alpha (first column) and beta (second column) 
 50            of the marginal distirbutions.
 51
 52        corr : array_like, shape (d,d)
 53            Correlation matrix 
 54        '''
 55        self.theta = theta
 56        self.corr  = corr
 57        return 
 58    
 59    def get_theta(self):
 60        return self.theta
 61    
 62    def get_corr(self):
 63        return self.corr
 64
 65    def get_cov(self):
 66
 67        std = np.zeros(self.dim)
 68        for d in range(self.dim):
 69            std[d] = stats.beta(*self.theta[d]).std() * 2 * self.eps[d]
 70        
 71        return ot.corr2cov(self.corr, std=std)
 72        
 73    def sample(self, size):
 74        '''
 75        Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)
 76
 77        Parameters
 78        ----------
 79        size: int
 80            Ensemble size (ne). Size of the sample to be drawn.
 81        
 82        Returns
 83        -------
 84        out: tuple, (enZ, enX)
 85
 86            enZ : array_like, shape (ne,d)
 87                Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr
 88            
 89            enX : array_like, shape (ne,d)
 90                The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)
 91        '''
 92        # Sample normal distribution with correlation
 93        enZ = np.random.multivariate_normal(mean=np.zeros(self.dim),
 94                                            cov=self.corr,
 95                                            size=size)
 96
 97        # Transform Z to a uniform variable U
 98        enU = stats.norm.cdf(enZ)
 99
100        # Initialize enX
101        enX = np.zeros_like(enZ)
102
103        # Loop over dim
104        for d in range(self.dim):
105            marginal = stats.beta(*self.theta[d]) # Make marginal dist.
106            enX[:,d] = marginal.ppf(enU[:,d])     # Transform U to marginal variables X
107
108        return enZ, enX
109
110    def epsilon_transformation(self, x, enX):
111        '''
112        Performs the epsilon transformation, 
113            X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]
114        
115        Parameters
116        ----------
117        x : array_like, shape (d,)
118            Current state vector.
119        
120        enX : array_like, shape (ne,d)
121            Ensemble matrix X sampled from GenOpt distribution
122        
123        Returns
124        -------
125        out : array_like, shape (ne,d)
126            Epsilon transfromed ensemble matrix, Y
127        '''
128        enY = np.zeros_like(enX) # tranfomred ensemble 
129
130        # loop over dimenstion   
131        for d, xd in enumerate(x):
132            eps = self.eps[d]
133
134            a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \
135                         - ( (xd+eps-1)*(xd+eps > 1) ) \
136                         + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble
137            
138            b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \
139                         - ( (xd+eps-1)*(xd+eps > 1) ) \
140                         + (xd-eps)*(xd-eps < 0)*(xd+eps > 1)   #Upper bound of ensemble
141    
142            enY[:,d] =  a + enX[:, d]*(b-a)                 #Component-wise trafo.
143        
144        return enY
145
146    def gradient(self, x, *args, **kwargs):
147        '''
148        Calcualtes the average gradient of func using Stein's Lemma.
149        Described in GenOpt paper.
150
151        Parameters
152        ----------
153        x : array_like, shape (d,)
154            Current state vector.
155        
156        args : (theta, corr)
157            theta (parameters of distribution), shape (d,2)
158            corr (correlation matrix), shape (d,d)
159        
160        kwargs :
161            func : callable objectvie function
162            ne : ensemble size
163    
164        Returns
165        -------
166        out : array_like, shape (d,)
167            The average gradient.
168        '''
169        # check for objective fucntion 
170        if 'func' in kwargs:
171            func = kwargs.get('func')
172        elif self.func is None:
173            raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>')
174        else:
175            func = self.func
176        
177        # check for ensemble size
178        if 'ne' in kwargs:
179            ne = kwargs.get('ne')
180        elif self.size is None:
181            ne = max(int(0.25*self.dim), 5)
182        else:
183            ne = self.size
184
185        # update dist
186        if args:
187            self.update_distribution(*args)
188
189        # sample and eps-transfrom
190        self.enZ, self.enX = self.sample(size=ne)
191        self.enY = self.epsilon_transformation(x, self.enX)
192        self.enJ = func(self.enY.T)
193
194        # calculate the ensemble gradient
195        self.enJ    = self.enJ[:,np.newaxis]                    # shape (ne,1)
196        self.enZ    = self.enZ.T                                # shape (d,ne)
197        self.enX    = self.enX.T                                # shape (d,ne)
198        alphas      = (self.theta[:,0])[:,np.newaxis]           # shape (d,1)   
199        betas       = (self.theta[:,1])[:,np.newaxis]           # shape (d,1)
200
201        H = np.linalg.inv(self.corr) - np.identity(self.dim)    # shape (d, d)
202        N = stats.norm.pdf(self.enZ)                            # shape (d, Ne)
203        M = np.zeros((self.dim, ne))                            # shape (d, Ne)
204
205        for d in range(self.dim):
206            marginal = stats.beta(*self.theta[d])
207            M[d,:]   = marginal.pdf(self.enX[d])
208
209        # get cov matrix
210        cov = self.get_cov()
211
212        # grad via Stein's lemma
213        ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 
214        grad      = -cov@ensembleX@self.enJ/(ne-1)
215        grad      = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,)
216
217        # mutation gradient
218        finv = lambda th: np.linalg.inv(self.fisher_matrix(*th))
219        fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2)
220
221        log_term = np.array([np.log(self.enX), np.log(1-self.enX)])
222        psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta),
223                             np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))])
224        
225        ensembleTheta   = log_term-psi_term[:,:,None]
226        self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1)
227        self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True)
228
229        return grad
230    
231    def mutation_gradient(self, x=None, *args, **kwargs):
232        '''
233        Returns the mutation gradient of theta. It is actually calulated in 
234        self.ensemble_gradient.
235
236        Parameters
237        ----------
238        kwargs:
239            return_ensemble : bool
240                If True, all the ensemble matrices are also returned in a dictionary.
241        
242        Returns
243        -------
244        out : array_like, shape (d,2)
245            Mutation gradeint of theta
246        
247        NB! If return_ensembles=True, the ensmebles are also returned!
248        '''
249        if 'return_ensembles' in kwargs:
250            ensembles = {'gaussian'   : self.enZ,
251                         'vanilla'    : self.enX,
252                         'transformed': self.enY,
253                         'objective'  : self.enJ}
254            return self.grad_theta, ensembles
255        else:
256            return self.grad_theta
257    
258    def corr_gradient(self):
259        '''
260        Returns the mutation gradeint of the correlation matrix
261        '''
262        enZ = self.enZ
263        enJ = self.enJ
264        ne  = np.squeeze(enJ).size 
265        grad_corr = np.zeros_like(self.corr)
266
267        for n in range(ne):
268            grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr)
269        
270        np.fill_diagonal(grad_corr, 0)
271        corr_gradient = grad_corr/(ne-1)
272
273        return corr_gradient
274    
275    def fisher_matrix(self, alpha, beta):
276        '''
277        Calculates the Fisher matrix of a Beta distribution.
278        
279        Parameters
280        ----------------------------------------------
281        alpha : float
282            alpha parameter in Beta distribution 
283
284        beta : float
285            beta parameter in Beta distribution
286
287        Returns
288        ----------------------------------------------
289        out : array_like, of shape (2, 2)
290            Fisher matrix 
291        '''
292        a = alpha
293        b = beta
294
295        upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)]
296        lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)]
297        fisher_matrix = np.array([upper_row, 
298                                  lower_row])
299        return fisher_matrix
300
301
302# Some helping functions
303def var2eps(var, theta):
304    alphas  = theta[:,0]
305    betas   = theta[:,1]
306    frac    = alphas*betas / ( (alphas+betas)**2 * (alphas+betas+1) )
307    epsilon = np.sqrt(0.25*var/frac)
308    return epsilon
309
310def delA(theta):
311    '''
312    Calculates the expression psi(a) - psi(a+b),
313    where psi() is the digamma function.
314
315    Parameters
316    --------------------------------------------
317    a : float
318    b : float
319    
320    Returns
321    --------------------------------------------
322    out : float
323    '''
324    a = theta[0]
325    b = theta[1]
326    return digamma(a)-digamma(a+b)
class GenOptDistribution:
 11class GenOptDistribution:
 12    '''
 13    Class that contains all the operations on the mutation distribution of GenOpt
 14    '''
 15    def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None):
 16        '''
 17        Parameters
 18        ----------
 19        x : array_like, shape (d,)
 20            Initial control vector. Used initally to get the dimensionality of the problem.
 21        
 22        cov : array_like, shape (d,d)
 23            Initial covaraince matrix. Used to construct the correlation matrix and 
 24            epsilon parameter of GenOpt
 25        
 26        theta0 : list, of length 2 ([alpha, beta])
 27            Initial alpha and beta parameter of the marginal Beta distributions.
 28        
 29        func : callable (optional)
 30            An objective function that can be used later for the gradeint.
 31            Can also be passed directly to the gradeint fucntion.
 32        
 33        ne : int
 34        '''
 35        self.dim   = x.size                         # dimension of state                 
 36        self.corr  = ot.cov2corr(cov)               # initial correlation
 37        self.var   = np.diag(cov)                   # initial varaince
 38        self.theta = np.tile(theta0, (self.dim,1))  # initial theta parameters, shape (dim, 2)
 39        self.eps   = var2eps(self.var, self.theta)  # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)!
 40        self.func  = func                           # an objective function (optional)
 41        self.size  = ne                             # ensemble size
 42
 43    def update_distribution(self, theta, corr):
 44        '''
 45        Updates the parameters (theta and corr) of the distirbution.
 46
 47        Parameters
 48        ----------
 49        theta : array_like, shape (d,2)
 50            Contains the alpha (first column) and beta (second column) 
 51            of the marginal distirbutions.
 52
 53        corr : array_like, shape (d,d)
 54            Correlation matrix 
 55        '''
 56        self.theta = theta
 57        self.corr  = corr
 58        return 
 59    
 60    def get_theta(self):
 61        return self.theta
 62    
 63    def get_corr(self):
 64        return self.corr
 65
 66    def get_cov(self):
 67
 68        std = np.zeros(self.dim)
 69        for d in range(self.dim):
 70            std[d] = stats.beta(*self.theta[d]).std() * 2 * self.eps[d]
 71        
 72        return ot.corr2cov(self.corr, std=std)
 73        
 74    def sample(self, size):
 75        '''
 76        Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)
 77
 78        Parameters
 79        ----------
 80        size: int
 81            Ensemble size (ne). Size of the sample to be drawn.
 82        
 83        Returns
 84        -------
 85        out: tuple, (enZ, enX)
 86
 87            enZ : array_like, shape (ne,d)
 88                Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr
 89            
 90            enX : array_like, shape (ne,d)
 91                The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)
 92        '''
 93        # Sample normal distribution with correlation
 94        enZ = np.random.multivariate_normal(mean=np.zeros(self.dim),
 95                                            cov=self.corr,
 96                                            size=size)
 97
 98        # Transform Z to a uniform variable U
 99        enU = stats.norm.cdf(enZ)
100
101        # Initialize enX
102        enX = np.zeros_like(enZ)
103
104        # Loop over dim
105        for d in range(self.dim):
106            marginal = stats.beta(*self.theta[d]) # Make marginal dist.
107            enX[:,d] = marginal.ppf(enU[:,d])     # Transform U to marginal variables X
108
109        return enZ, enX
110
111    def epsilon_transformation(self, x, enX):
112        '''
113        Performs the epsilon transformation, 
114            X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]
115        
116        Parameters
117        ----------
118        x : array_like, shape (d,)
119            Current state vector.
120        
121        enX : array_like, shape (ne,d)
122            Ensemble matrix X sampled from GenOpt distribution
123        
124        Returns
125        -------
126        out : array_like, shape (ne,d)
127            Epsilon transfromed ensemble matrix, Y
128        '''
129        enY = np.zeros_like(enX) # tranfomred ensemble 
130
131        # loop over dimenstion   
132        for d, xd in enumerate(x):
133            eps = self.eps[d]
134
135            a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \
136                         - ( (xd+eps-1)*(xd+eps > 1) ) \
137                         + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble
138            
139            b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \
140                         - ( (xd+eps-1)*(xd+eps > 1) ) \
141                         + (xd-eps)*(xd-eps < 0)*(xd+eps > 1)   #Upper bound of ensemble
142    
143            enY[:,d] =  a + enX[:, d]*(b-a)                 #Component-wise trafo.
144        
145        return enY
146
147    def gradient(self, x, *args, **kwargs):
148        '''
149        Calcualtes the average gradient of func using Stein's Lemma.
150        Described in GenOpt paper.
151
152        Parameters
153        ----------
154        x : array_like, shape (d,)
155            Current state vector.
156        
157        args : (theta, corr)
158            theta (parameters of distribution), shape (d,2)
159            corr (correlation matrix), shape (d,d)
160        
161        kwargs :
162            func : callable objectvie function
163            ne : ensemble size
164    
165        Returns
166        -------
167        out : array_like, shape (d,)
168            The average gradient.
169        '''
170        # check for objective fucntion 
171        if 'func' in kwargs:
172            func = kwargs.get('func')
173        elif self.func is None:
174            raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>')
175        else:
176            func = self.func
177        
178        # check for ensemble size
179        if 'ne' in kwargs:
180            ne = kwargs.get('ne')
181        elif self.size is None:
182            ne = max(int(0.25*self.dim), 5)
183        else:
184            ne = self.size
185
186        # update dist
187        if args:
188            self.update_distribution(*args)
189
190        # sample and eps-transfrom
191        self.enZ, self.enX = self.sample(size=ne)
192        self.enY = self.epsilon_transformation(x, self.enX)
193        self.enJ = func(self.enY.T)
194
195        # calculate the ensemble gradient
196        self.enJ    = self.enJ[:,np.newaxis]                    # shape (ne,1)
197        self.enZ    = self.enZ.T                                # shape (d,ne)
198        self.enX    = self.enX.T                                # shape (d,ne)
199        alphas      = (self.theta[:,0])[:,np.newaxis]           # shape (d,1)   
200        betas       = (self.theta[:,1])[:,np.newaxis]           # shape (d,1)
201
202        H = np.linalg.inv(self.corr) - np.identity(self.dim)    # shape (d, d)
203        N = stats.norm.pdf(self.enZ)                            # shape (d, Ne)
204        M = np.zeros((self.dim, ne))                            # shape (d, Ne)
205
206        for d in range(self.dim):
207            marginal = stats.beta(*self.theta[d])
208            M[d,:]   = marginal.pdf(self.enX[d])
209
210        # get cov matrix
211        cov = self.get_cov()
212
213        # grad via Stein's lemma
214        ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 
215        grad      = -cov@ensembleX@self.enJ/(ne-1)
216        grad      = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,)
217
218        # mutation gradient
219        finv = lambda th: np.linalg.inv(self.fisher_matrix(*th))
220        fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2)
221
222        log_term = np.array([np.log(self.enX), np.log(1-self.enX)])
223        psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta),
224                             np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))])
225        
226        ensembleTheta   = log_term-psi_term[:,:,None]
227        self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1)
228        self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True)
229
230        return grad
231    
232    def mutation_gradient(self, x=None, *args, **kwargs):
233        '''
234        Returns the mutation gradient of theta. It is actually calulated in 
235        self.ensemble_gradient.
236
237        Parameters
238        ----------
239        kwargs:
240            return_ensemble : bool
241                If True, all the ensemble matrices are also returned in a dictionary.
242        
243        Returns
244        -------
245        out : array_like, shape (d,2)
246            Mutation gradeint of theta
247        
248        NB! If return_ensembles=True, the ensmebles are also returned!
249        '''
250        if 'return_ensembles' in kwargs:
251            ensembles = {'gaussian'   : self.enZ,
252                         'vanilla'    : self.enX,
253                         'transformed': self.enY,
254                         'objective'  : self.enJ}
255            return self.grad_theta, ensembles
256        else:
257            return self.grad_theta
258    
259    def corr_gradient(self):
260        '''
261        Returns the mutation gradeint of the correlation matrix
262        '''
263        enZ = self.enZ
264        enJ = self.enJ
265        ne  = np.squeeze(enJ).size 
266        grad_corr = np.zeros_like(self.corr)
267
268        for n in range(ne):
269            grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr)
270        
271        np.fill_diagonal(grad_corr, 0)
272        corr_gradient = grad_corr/(ne-1)
273
274        return corr_gradient
275    
276    def fisher_matrix(self, alpha, beta):
277        '''
278        Calculates the Fisher matrix of a Beta distribution.
279        
280        Parameters
281        ----------------------------------------------
282        alpha : float
283            alpha parameter in Beta distribution 
284
285        beta : float
286            beta parameter in Beta distribution
287
288        Returns
289        ----------------------------------------------
290        out : array_like, of shape (2, 2)
291            Fisher matrix 
292        '''
293        a = alpha
294        b = beta
295
296        upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)]
297        lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)]
298        fisher_matrix = np.array([upper_row, 
299                                  lower_row])
300        return fisher_matrix

Class that contains all the operations on the mutation distribution of GenOpt

GenOptDistribution(x, cov, theta0=[20.0, 20.0], func=None, ne=None)
15    def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None):
16        '''
17        Parameters
18        ----------
19        x : array_like, shape (d,)
20            Initial control vector. Used initally to get the dimensionality of the problem.
21        
22        cov : array_like, shape (d,d)
23            Initial covaraince matrix. Used to construct the correlation matrix and 
24            epsilon parameter of GenOpt
25        
26        theta0 : list, of length 2 ([alpha, beta])
27            Initial alpha and beta parameter of the marginal Beta distributions.
28        
29        func : callable (optional)
30            An objective function that can be used later for the gradeint.
31            Can also be passed directly to the gradeint fucntion.
32        
33        ne : int
34        '''
35        self.dim   = x.size                         # dimension of state                 
36        self.corr  = ot.cov2corr(cov)               # initial correlation
37        self.var   = np.diag(cov)                   # initial varaince
38        self.theta = np.tile(theta0, (self.dim,1))  # initial theta parameters, shape (dim, 2)
39        self.eps   = var2eps(self.var, self.theta)  # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)!
40        self.func  = func                           # an objective function (optional)
41        self.size  = ne                             # ensemble size
Parameters
  • x (array_like, shape (d,)): Initial control vector. Used initally to get the dimensionality of the problem.
  • cov (array_like, shape (d,d)): Initial covaraince matrix. Used to construct the correlation matrix and epsilon parameter of GenOpt
  • theta0 (list, of length 2 ([alpha, beta])): Initial alpha and beta parameter of the marginal Beta distributions.
  • func (callable (optional)): An objective function that can be used later for the gradeint. Can also be passed directly to the gradeint fucntion.
  • ne (int):
dim
corr
var
theta
eps
func
size
def update_distribution(self, theta, corr):
43    def update_distribution(self, theta, corr):
44        '''
45        Updates the parameters (theta and corr) of the distirbution.
46
47        Parameters
48        ----------
49        theta : array_like, shape (d,2)
50            Contains the alpha (first column) and beta (second column) 
51            of the marginal distirbutions.
52
53        corr : array_like, shape (d,d)
54            Correlation matrix 
55        '''
56        self.theta = theta
57        self.corr  = corr
58        return 

Updates the parameters (theta and corr) of the distirbution.

Parameters
  • theta (array_like, shape (d,2)): Contains the alpha (first column) and beta (second column) of the marginal distirbutions.
  • corr (array_like, shape (d,d)): Correlation matrix
def get_theta(self):
60    def get_theta(self):
61        return self.theta
def get_corr(self):
63    def get_corr(self):
64        return self.corr
def get_cov(self):
66    def get_cov(self):
67
68        std = np.zeros(self.dim)
69        for d in range(self.dim):
70            std[d] = stats.beta(*self.theta[d]).std() * 2 * self.eps[d]
71        
72        return ot.corr2cov(self.corr, std=std)
def sample(self, size):
 74    def sample(self, size):
 75        '''
 76        Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)
 77
 78        Parameters
 79        ----------
 80        size: int
 81            Ensemble size (ne). Size of the sample to be drawn.
 82        
 83        Returns
 84        -------
 85        out: tuple, (enZ, enX)
 86
 87            enZ : array_like, shape (ne,d)
 88                Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr
 89            
 90            enX : array_like, shape (ne,d)
 91                The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)
 92        '''
 93        # Sample normal distribution with correlation
 94        enZ = np.random.multivariate_normal(mean=np.zeros(self.dim),
 95                                            cov=self.corr,
 96                                            size=size)
 97
 98        # Transform Z to a uniform variable U
 99        enU = stats.norm.cdf(enZ)
100
101        # Initialize enX
102        enX = np.zeros_like(enZ)
103
104        # Loop over dim
105        for d in range(self.dim):
106            marginal = stats.beta(*self.theta[d]) # Make marginal dist.
107            enX[:,d] = marginal.ppf(enU[:,d])     # Transform U to marginal variables X
108
109        return enZ, enX

Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)

Parameters
  • size (int): Ensemble size (ne). Size of the sample to be drawn.
Returns
  • out (tuple, (enZ, enX)): enZ : array_like, shape (ne,d) Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr

    enX : array_like, shape (ne,d) The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)

def epsilon_transformation(self, x, enX):
111    def epsilon_transformation(self, x, enX):
112        '''
113        Performs the epsilon transformation, 
114            X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]
115        
116        Parameters
117        ----------
118        x : array_like, shape (d,)
119            Current state vector.
120        
121        enX : array_like, shape (ne,d)
122            Ensemble matrix X sampled from GenOpt distribution
123        
124        Returns
125        -------
126        out : array_like, shape (ne,d)
127            Epsilon transfromed ensemble matrix, Y
128        '''
129        enY = np.zeros_like(enX) # tranfomred ensemble 
130
131        # loop over dimenstion   
132        for d, xd in enumerate(x):
133            eps = self.eps[d]
134
135            a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \
136                         - ( (xd+eps-1)*(xd+eps > 1) ) \
137                         + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble
138            
139            b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \
140                         - ( (xd+eps-1)*(xd+eps > 1) ) \
141                         + (xd-eps)*(xd-eps < 0)*(xd+eps > 1)   #Upper bound of ensemble
142    
143            enY[:,d] =  a + enX[:, d]*(b-a)                 #Component-wise trafo.
144        
145        return enY

Performs the epsilon transformation, X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]

Parameters
  • x (array_like, shape (d,)): Current state vector.
  • enX (array_like, shape (ne,d)): Ensemble matrix X sampled from GenOpt distribution
Returns
  • out (array_like, shape (ne,d)): Epsilon transfromed ensemble matrix, Y
def gradient(self, x, *args, **kwargs):
147    def gradient(self, x, *args, **kwargs):
148        '''
149        Calcualtes the average gradient of func using Stein's Lemma.
150        Described in GenOpt paper.
151
152        Parameters
153        ----------
154        x : array_like, shape (d,)
155            Current state vector.
156        
157        args : (theta, corr)
158            theta (parameters of distribution), shape (d,2)
159            corr (correlation matrix), shape (d,d)
160        
161        kwargs :
162            func : callable objectvie function
163            ne : ensemble size
164    
165        Returns
166        -------
167        out : array_like, shape (d,)
168            The average gradient.
169        '''
170        # check for objective fucntion 
171        if 'func' in kwargs:
172            func = kwargs.get('func')
173        elif self.func is None:
174            raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>')
175        else:
176            func = self.func
177        
178        # check for ensemble size
179        if 'ne' in kwargs:
180            ne = kwargs.get('ne')
181        elif self.size is None:
182            ne = max(int(0.25*self.dim), 5)
183        else:
184            ne = self.size
185
186        # update dist
187        if args:
188            self.update_distribution(*args)
189
190        # sample and eps-transfrom
191        self.enZ, self.enX = self.sample(size=ne)
192        self.enY = self.epsilon_transformation(x, self.enX)
193        self.enJ = func(self.enY.T)
194
195        # calculate the ensemble gradient
196        self.enJ    = self.enJ[:,np.newaxis]                    # shape (ne,1)
197        self.enZ    = self.enZ.T                                # shape (d,ne)
198        self.enX    = self.enX.T                                # shape (d,ne)
199        alphas      = (self.theta[:,0])[:,np.newaxis]           # shape (d,1)   
200        betas       = (self.theta[:,1])[:,np.newaxis]           # shape (d,1)
201
202        H = np.linalg.inv(self.corr) - np.identity(self.dim)    # shape (d, d)
203        N = stats.norm.pdf(self.enZ)                            # shape (d, Ne)
204        M = np.zeros((self.dim, ne))                            # shape (d, Ne)
205
206        for d in range(self.dim):
207            marginal = stats.beta(*self.theta[d])
208            M[d,:]   = marginal.pdf(self.enX[d])
209
210        # get cov matrix
211        cov = self.get_cov()
212
213        # grad via Stein's lemma
214        ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 
215        grad      = -cov@ensembleX@self.enJ/(ne-1)
216        grad      = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,)
217
218        # mutation gradient
219        finv = lambda th: np.linalg.inv(self.fisher_matrix(*th))
220        fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2)
221
222        log_term = np.array([np.log(self.enX), np.log(1-self.enX)])
223        psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta),
224                             np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))])
225        
226        ensembleTheta   = log_term-psi_term[:,:,None]
227        self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1)
228        self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True)
229
230        return grad

Calcualtes the average gradient of func using Stein's Lemma. Described in GenOpt paper.

Parameters
  • x (array_like, shape (d,)): Current state vector.
  • args ((theta, corr)): theta (parameters of distribution), shape (d,2) corr (correlation matrix), shape (d,d)
  • kwargs :: func : callable objectvie function ne : ensemble size
Returns
  • out (array_like, shape (d,)): The average gradient.
def mutation_gradient(self, x=None, *args, **kwargs):
232    def mutation_gradient(self, x=None, *args, **kwargs):
233        '''
234        Returns the mutation gradient of theta. It is actually calulated in 
235        self.ensemble_gradient.
236
237        Parameters
238        ----------
239        kwargs:
240            return_ensemble : bool
241                If True, all the ensemble matrices are also returned in a dictionary.
242        
243        Returns
244        -------
245        out : array_like, shape (d,2)
246            Mutation gradeint of theta
247        
248        NB! If return_ensembles=True, the ensmebles are also returned!
249        '''
250        if 'return_ensembles' in kwargs:
251            ensembles = {'gaussian'   : self.enZ,
252                         'vanilla'    : self.enX,
253                         'transformed': self.enY,
254                         'objective'  : self.enJ}
255            return self.grad_theta, ensembles
256        else:
257            return self.grad_theta

Returns the mutation gradient of theta. It is actually calulated in self.ensemble_gradient.

Parameters
  • kwargs:: return_ensemble : bool If True, all the ensemble matrices are also returned in a dictionary.
Returns
  • out (array_like, shape (d,2)): Mutation gradeint of theta
  • NB! If return_ensembles=True, the ensmebles are also returned!
def corr_gradient(self):
259    def corr_gradient(self):
260        '''
261        Returns the mutation gradeint of the correlation matrix
262        '''
263        enZ = self.enZ
264        enJ = self.enJ
265        ne  = np.squeeze(enJ).size 
266        grad_corr = np.zeros_like(self.corr)
267
268        for n in range(ne):
269            grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr)
270        
271        np.fill_diagonal(grad_corr, 0)
272        corr_gradient = grad_corr/(ne-1)
273
274        return corr_gradient

Returns the mutation gradeint of the correlation matrix

def fisher_matrix(self, alpha, beta):
276    def fisher_matrix(self, alpha, beta):
277        '''
278        Calculates the Fisher matrix of a Beta distribution.
279        
280        Parameters
281        ----------------------------------------------
282        alpha : float
283            alpha parameter in Beta distribution 
284
285        beta : float
286            beta parameter in Beta distribution
287
288        Returns
289        ----------------------------------------------
290        out : array_like, of shape (2, 2)
291            Fisher matrix 
292        '''
293        a = alpha
294        b = beta
295
296        upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)]
297        lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)]
298        fisher_matrix = np.array([upper_row, 
299                                  lower_row])
300        return fisher_matrix

Calculates the Fisher matrix of a Beta distribution.

Parameters
  • alpha (float): alpha parameter in Beta distribution
  • beta (float): beta parameter in Beta distribution
Returns
  • out (array_like, of shape (2, 2)): Fisher matrix
def var2eps(var, theta):
304def var2eps(var, theta):
305    alphas  = theta[:,0]
306    betas   = theta[:,1]
307    frac    = alphas*betas / ( (alphas+betas)**2 * (alphas+betas+1) )
308    epsilon = np.sqrt(0.25*var/frac)
309    return epsilon
def delA(theta):
311def delA(theta):
312    '''
313    Calculates the expression psi(a) - psi(a+b),
314    where psi() is the digamma function.
315
316    Parameters
317    --------------------------------------------
318    a : float
319    b : float
320    
321    Returns
322    --------------------------------------------
323    out : float
324    '''
325    a = theta[0]
326    b = theta[1]
327    return digamma(a)-digamma(a+b)

Calculates the expression psi(a) - psi(a+b), where psi() is the digamma function.

Parameters
  • a (float):

  • b (float):

Returns
  • out (float):