PET docs: popt.loop.dist

GenOptDistribution(x, cov, theta0=[20.0, 20.0], func=None, ne=None) View Source

15    def __init__(self, x, cov, theta0=[20.0, 20.0], func=None, ne=None):
16        '''
17        Parameters
18        ----------
19        x : array_like, shape (d,)
20            Initial control vector. Used initally to get the dimensionality of the problem.
21        
22        cov : array_like, shape (d,d)
23            Initial covaraince matrix. Used to construct the correlation matrix and 
24            epsilon parameter of GenOpt
25        
26        theta0 : list, of length 2 ([alpha, beta])
27            Initial alpha and beta parameter of the marginal Beta distributions.
28        
29        func : callable (optional)
30            An objective function that can be used later for the gradeint.
31            Can also be passed directly to the gradeint fucntion.
32        
33        ne : int
34        '''
35        self.dim   = x.size                         # dimension of state                 
36        self.corr  = ot.cov2corr(cov)               # initial correlation
37        self.var   = np.diag(cov)                   # initial varaince
38        self.theta = np.tile(theta0, (self.dim,1))  # initial theta parameters, shape (dim, 2)
39        self.eps   = var2eps(self.var, self.theta)  # epsilon parameter(s). SET BY VARIANCE (NOT MANUALLY BU USER ANYMORE)!
40        self.func  = func                           # an objective function (optional)
41        self.size  = ne                             # ensemble size

Parameters

x (array_like, shape (d,)): Initial control vector. Used initally to get the dimensionality of the problem.
cov (array_like, shape (d,d)): Initial covaraince matrix. Used to construct the correlation matrix and epsilon parameter of GenOpt
theta0 (list, of length 2 ([alpha, beta])): Initial alpha and beta parameter of the marginal Beta distributions.
func (callable (optional)): An objective function that can be used later for the gradeint. Can also be passed directly to the gradeint fucntion.
ne (int):

dim

corr

var

theta

eps

func

size

def update_distribution(self, theta, corr): View Source

43    def update_distribution(self, theta, corr):
44        '''
45        Updates the parameters (theta and corr) of the distirbution.
46
47        Parameters
48        ----------
49        theta : array_like, shape (d,2)
50            Contains the alpha (first column) and beta (second column) 
51            of the marginal distirbutions.
52
53        corr : array_like, shape (d,d)
54            Correlation matrix 
55        '''
56        self.theta = theta
57        self.corr  = corr
58        return

Updates the parameters (theta and corr) of the distirbution.

Parameters

theta (array_like, shape (d,2)): Contains the alpha (first column) and beta (second column) of the marginal distirbutions.
corr (array_like, shape (d,d)): Correlation matrix

def get_theta(self): View Source

60    def get_theta(self):
61        return self.theta

def get_corr(self): View Source

63    def get_corr(self):
64        return self.corr

def get_cov(self): View Source

66    def get_cov(self):
67
68        std = np.zeros(self.dim)
69        for d in range(self.dim):
70            std[d] = stats.beta(*self.theta[d]).std() * 2 * self.eps[d]
71        
72        return ot.corr2cov(self.corr, std=std)

def sample(self, size): View Source

 74    def sample(self, size):
 75        '''
 76        Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)
 77
 78        Parameters
 79        ----------
 80        size: int
 81            Ensemble size (ne). Size of the sample to be drawn.
 82        
 83        Returns
 84        -------
 85        out: tuple, (enZ, enX)
 86
 87            enZ : array_like, shape (ne,d)
 88                Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr
 89            
 90            enX : array_like, shape (ne,d)
 91                The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)
 92        '''
 93        # Sample normal distribution with correlation
 94        enZ = np.random.multivariate_normal(mean=np.zeros(self.dim),
 95                                            cov=self.corr,
 96                                            size=size)
 97
 98        # Transform Z to a uniform variable U
 99        enU = stats.norm.cdf(enZ)
100
101        # Initialize enX
102        enX = np.zeros_like(enZ)
103
104        # Loop over dim
105        for d in range(self.dim):
106            marginal = stats.beta(*self.theta[d]) # Make marginal dist.
107            enX[:,d] = marginal.ppf(enU[:,d])     # Transform U to marginal variables X
108
109        return enZ, enX

Samples the mutation distribution as described in the GenOpt paper (NOT PUBLISHED YET!)

Parameters

size (int): Ensemble size (ne). Size of the sample to be drawn.

Returns

out (tuple, (enZ, enX)): enZ : array_like, shape (ne,d) Zero-mean Gaussain ensemble, drawn with the correlation matrix, corr

enX : array_like, shape (ne,d) The drawn ensemble matrix, X ~ p(x|θ,R) (GenOpt pdf)

def epsilon_transformation(self, x, enX): View Source

111    def epsilon_transformation(self, x, enX):
112        '''
113        Performs the epsilon transformation, 
114            X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]
115        
116        Parameters
117        ----------
118        x : array_like, shape (d,)
119            Current state vector.
120        
121        enX : array_like, shape (ne,d)
122            Ensemble matrix X sampled from GenOpt distribution
123        
124        Returns
125        -------
126        out : array_like, shape (ne,d)
127            Epsilon transfromed ensemble matrix, Y
128        '''
129        enY = np.zeros_like(enX) # tranfomred ensemble 
130
131        # loop over dimenstion   
132        for d, xd in enumerate(x):
133            eps = self.eps[d]
134
135            a = (xd-eps) - ( (xd-eps)*(xd-eps < 0) ) \
136                         - ( (xd+eps-1)*(xd+eps > 1) ) \
137                         + (xd+eps-1)*(xd-eps < 0)*(xd+eps > 1) #Lower bound of ensemble
138            
139            b = (xd+eps) - ( (xd-eps)*(xd-eps < 0) ) \
140                         - ( (xd+eps-1)*(xd+eps > 1) ) \
141                         + (xd-eps)*(xd-eps < 0)*(xd+eps > 1)   #Upper bound of ensemble
142    
143            enY[:,d] =  a + enX[:, d]*(b-a)                 #Component-wise trafo.
144        
145        return enY

Performs the epsilon transformation, X ∈ [0, 1] ---> Y ∈ [x-ε, x+ε]

Parameters

x (array_like, shape (d,)): Current state vector.
enX (array_like, shape (ne,d)): Ensemble matrix X sampled from GenOpt distribution

Returns

out (array_like, shape (ne,d)): Epsilon transfromed ensemble matrix, Y

def gradient(self, x, *args, **kwargs): View Source

147    def gradient(self, x, *args, **kwargs):
148        '''
149        Calcualtes the average gradient of func using Stein's Lemma.
150        Described in GenOpt paper.
151
152        Parameters
153        ----------
154        x : array_like, shape (d,)
155            Current state vector.
156        
157        args : (theta, corr)
158            theta (parameters of distribution), shape (d,2)
159            corr (correlation matrix), shape (d,d)
160        
161        kwargs :
162            func : callable objectvie function
163            ne : ensemble size
164    
165        Returns
166        -------
167        out : array_like, shape (d,)
168            The average gradient.
169        '''
170        # check for objective fucntion 
171        if 'func' in kwargs:
172            func = kwargs.get('func')
173        elif self.func is None:
174            raise ValueError('No objectvie fucntion given. Please pass keyword argument: func=<your func>')
175        else:
176            func = self.func
177        
178        # check for ensemble size
179        if 'ne' in kwargs:
180            ne = kwargs.get('ne')
181        elif self.size is None:
182            ne = max(int(0.25*self.dim), 5)
183        else:
184            ne = self.size
185
186        # update dist
187        if args:
188            self.update_distribution(*args)
189
190        # sample and eps-transfrom
191        self.enZ, self.enX = self.sample(size=ne)
192        self.enY = self.epsilon_transformation(x, self.enX)
193        self.enJ = func(self.enY.T)
194
195        # calculate the ensemble gradient
196        self.enJ    = self.enJ[:,np.newaxis]                    # shape (ne,1)
197        self.enZ    = self.enZ.T                                # shape (d,ne)
198        self.enX    = self.enX.T                                # shape (d,ne)
199        alphas      = (self.theta[:,0])[:,np.newaxis]           # shape (d,1)   
200        betas       = (self.theta[:,1])[:,np.newaxis]           # shape (d,1)
201
202        H = np.linalg.inv(self.corr) - np.identity(self.dim)    # shape (d, d)
203        N = stats.norm.pdf(self.enZ)                            # shape (d, Ne)
204        M = np.zeros((self.dim, ne))                            # shape (d, Ne)
205
206        for d in range(self.dim):
207            marginal = stats.beta(*self.theta[d])
208            M[d,:]   = marginal.pdf(self.enX[d])
209
210        # get cov matrix
211        cov = self.get_cov()
212
213        # grad via Stein's lemma
214        ensembleX = (alphas-1)/self.enX - (betas-1)/(1-self.enX) - (H@self.enZ)*M/N 
215        grad      = -cov@ensembleX@self.enJ/(ne-1)
216        grad      = 2*self.eps*np.squeeze(grad) # scale gradient. grad-shape (d,)
217
218        # mutation gradient
219        finv = lambda th: np.linalg.inv(self.fisher_matrix(*th))
220        fishers_inv = np.apply_along_axis(finv, axis=1, arr=self.theta) #shape (d, 2, 2)
221
222        log_term = np.array([np.log(self.enX), np.log(1-self.enX)])
223        psi_term = np.array([np.apply_along_axis(delA, axis=1, arr=self.theta),
224                             np.apply_along_axis(delA, axis=1, arr=np.flip(self.theta, axis=1))])
225        
226        ensembleTheta   = log_term-psi_term[:,:,None]
227        self.grad_theta = np.sum(ensembleTheta*self.enJ.T, axis=-1)/(ne-1)
228        self.grad_theta = np.einsum('kij,jk->ki', fishers_inv, self.grad_theta, optimize=True)
229
230        return grad

Calcualtes the average gradient of func using Stein's Lemma. Described in GenOpt paper.

Parameters

x (array_like, shape (d,)): Current state vector.
args ((theta, corr)): theta (parameters of distribution), shape (d,2) corr (correlation matrix), shape (d,d)
kwargs :: func : callable objectvie function ne : ensemble size

Returns

out (array_like, shape (d,)): The average gradient.

def mutation_gradient(self, x=None, *args, **kwargs): View Source

232    def mutation_gradient(self, x=None, *args, **kwargs):
233        '''
234        Returns the mutation gradient of theta. It is actually calulated in 
235        self.ensemble_gradient.
236
237        Parameters
238        ----------
239        kwargs:
240            return_ensemble : bool
241                If True, all the ensemble matrices are also returned in a dictionary.
242        
243        Returns
244        -------
245        out : array_like, shape (d,2)
246            Mutation gradeint of theta
247        
248        NB! If return_ensembles=True, the ensmebles are also returned!
249        '''
250        if 'return_ensembles' in kwargs:
251            ensembles = {'gaussian'   : self.enZ,
252                         'vanilla'    : self.enX,
253                         'transformed': self.enY,
254                         'objective'  : self.enJ}
255            return self.grad_theta, ensembles
256        else:
257            return self.grad_theta

Returns the mutation gradient of theta. It is actually calulated in self.ensemble_gradient.

Parameters

kwargs:: return_ensemble : bool If True, all the ensemble matrices are also returned in a dictionary.

Returns

out (array_like, shape (d,2)): Mutation gradeint of theta
NB! If return_ensembles=True, the ensmebles are also returned!

def corr_gradient(self): View Source

259    def corr_gradient(self):
260        '''
261        Returns the mutation gradeint of the correlation matrix
262        '''
263        enZ = self.enZ
264        enJ = self.enJ
265        ne  = np.squeeze(enJ).size 
266        grad_corr = np.zeros_like(self.corr)
267
268        for n in range(ne):
269            grad_corr += enJ[n]*(np.outer(enZ[:,n], enZ[:,n]) - self.corr)
270        
271        np.fill_diagonal(grad_corr, 0)
272        corr_gradient = grad_corr/(ne-1)
273
274        return corr_gradient

Returns the mutation gradeint of the correlation matrix

def fisher_matrix(self, alpha, beta): View Source

276    def fisher_matrix(self, alpha, beta):
277        '''
278        Calculates the Fisher matrix of a Beta distribution.
279        
280        Parameters
281        ----------------------------------------------
282        alpha : float
283            alpha parameter in Beta distribution 
284
285        beta : float
286            beta parameter in Beta distribution
287
288        Returns
289        ----------------------------------------------
290        out : array_like, of shape (2, 2)
291            Fisher matrix 
292        '''
293        a = alpha
294        b = beta
295
296        upper_row = [polygamma(1, a) - polygamma(1, a+b), -polygamma(1, a + b)]
297        lower_row = [-polygamma(1, a + b), polygamma(1, b) - polygamma(1, a+b)]
298        fisher_matrix = np.array([upper_row, 
299                                  lower_row])
300        return fisher_matrix

Calculates the Fisher matrix of a Beta distribution.

Parameters

alpha (float): alpha parameter in Beta distribution
beta (float): beta parameter in Beta distribution

Returns

out (array_like, of shape (2, 2)): Fisher matrix