pipt.update_schemes.esmda
ES-MDA type schemes
1""" 2ES-MDA type schemes 3""" 4 5# External imports 6import scipy.linalg as scilinalg 7from copy import deepcopy 8import numpy as np 9from geostat.decomp import Cholesky 10 11# Internal imports 12from pipt.loop.ensemble import Ensemble 13import pipt.misc_tools.analysis_tools as at 14 15# import update schemes 16from pipt.update_schemes.update_methods_ns.approx_update import approx_update 17from pipt.update_schemes.update_methods_ns.full_update import full_update 18from pipt.update_schemes.update_methods_ns.subspace_update import subspace_update 19 20 21class esmdaMixIn(Ensemble): 22 """ 23 This is the implementation of the ES-MDA algorithm given in [1]. This algorithm have been implemented mostly to 24 illustrate how a algorithm using the Mda loop can be implemented. 25 """ 26 27 def __init__(self, keys_da, keys_en, sim): 28 """ 29 The class is initialized by passing the keywords and simulator object upwards in the hierarchy. 30 31 Parameters 32 ---------- 33 keys_da['mda']: list 34 - tot_assim_steps: total number of iterations in MDA, e.g., 3 35 - inflation_param: covariance inflation factors, e.g., [2, 4, 4] 36 37 keys_en : dict 38 39 sim : callable 40 41 References 42 ---------- 43 [1] A. Emerick & A. Reynods, Computers & Geosciences, 55, p. 3-15, 2013 44 """ 45 # Pass the init_file upwards in the hierarchy 46 super().__init__(keys_da, keys_en, sim) 47 48 self.prev_data_misfit = None 49 50 if self.restart is False: 51 self.prior_state = deepcopy(self.state) 52 self.list_states = list(self.state.keys()) 53 # At the moment, the iterative loop is threated as an iterative smoother an thus we check if assim. indices 54 # are given as in the Simultaneous loop. 55 self.check_assimindex_simultaneous() 56 self.assim_index = [self.keys_da['obsname'], self.keys_da['assimindex'][0]] 57 self.list_datatypes, self.list_act_datatypes = at.get_list_data_types( 58 self.obs_data, self.assim_index) 59 60 # Extract no. assimilation steps from MDA keyword in DATAASSIM part of init. file and set this equal to 61 # the number of iterations pluss one. Need one additional because the iter=0 is the prior run. 62 self.max_iter = len(self._ext_assim_steps())+1 63 self.iteration = 0 64 self.lam = 0 # set LM lamda to zero as we are doing one full update. 65 if 'energy' in self.keys_da: 66 # initial energy (Remember to extract this) 67 self.trunc_energy = self.keys_da['energy'] 68 if self.trunc_energy > 1: # ensure that it is given as percentage 69 self.trunc_energy /= 100. 70 else: 71 self.trunc_energy = 0.98 72 # Get the perturbed observations and observation scaling 73 self._ext_obs() 74 self.real_obs_data_conv = deepcopy(self.real_obs_data) 75 # Get state scaling and svd of scaled prior 76 self._ext_state() 77 self.current_state = deepcopy(self.state) 78 # Extract the inflation parameter from MDA keyword 79 self.alpha = self._ext_inflation_param() 80 81 self.prev_data_misfit = None 82 83 def calc_analysis(self): 84 r""" 85 Analysis step of ES-MDA. The analysis algorithm is similar to EnKF analysis, only difference is that the data 86 covariance matrix is inflated with an inflation parameter alpha. The update is done as an iterative smoother 87 where all data is assimilated at once. 88 89 Parameters: 90 ------------------------------------------------------------- 91 assim_step: int 92 Current assimilation step 93 94 Notes: 95 ----- 96 ES-MDA is an iterative ensemble smoother with a predefined number of iterations, where the updates is done with 97 the EnKF update equations but where the data covariance matrix have been inflated: 98 99 .. math:: 100 d_{obs} = d_{true} + \sqrt{\alpha}C_d^{1/2}Z \\ 101 m = m_{prior} + C_{md}(C_g + \alpha C_d)^{-1}(g(m) - d_{obs}) 102 103 104 where $d_{true}$ is the true observed data, $\alpha$ is the inflation factor, $C_d$ is the data covariance 105 matrix, $Z$ is a standard normal random variable, $C_{md}$ and $C_{g}$ are sample covariance matrices, 106 $m$ is the model parameter, and $g(\)$ is the predicted data. Note that $\alpha$ can have a different 107 value in each assimilation step and must fulfill: 108 109 .. math:: 110 \sum_{i=1}^{N_a} \frac{1}{\alpha} = 1 111 112 113 where $N_a$ being the total number of assimilation steps. 114 """ 115 # Get assimilation order as a list 116 # reformat predicted data 117 _, self.aug_pred_data = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 118 self.list_datatypes) 119 120 init_en = Cholesky() # Initialize GeoStat class for generating realizations 121 if self.iteration == 1: # first iteration 122 data_misfit = at.calc_objectivefun( 123 self.real_obs_data_conv, self.aug_pred_data, self.cov_data) 124 125 # Store the (mean) data misfit (also for conv. check) 126 self.data_misfit = np.mean(data_misfit) 127 self.prior_data_misfit = np.mean(data_misfit) 128 self.prior_data_misfit_std = np.std(data_misfit) 129 self.data_misfit = np.mean(data_misfit) 130 self.data_misfit_std = np.std(data_misfit) 131 132 self.logger.info( 133 f'Prior run complete with data misfit: {self.prior_data_misfit:0.1f}.') 134 self.data_random_state = deepcopy(np.random.get_state()) 135 self.real_obs_data, self.scale_data = init_en.gen_real(self.obs_data_vector, 136 self.alpha[self.iteration-1] * 137 self.cov_data, self.ne, 138 return_chol=True) 139 self.E = np.dot(self.real_obs_data, self.proj) 140 else: 141 self.data_random_state = deepcopy(np.random.get_state()) 142 self.obs_data_vector, _ = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 143 self.list_datatypes) 144 self.real_obs_data, self.scale_data = init_en.gen_real(self.obs_data_vector, 145 self.alpha[self.iteration - 146 1] * self.cov_data, 147 self.ne, 148 return_chol=True) 149 self.E = np.dot(self.real_obs_data, self.proj) 150 151 if 'localanalysis' in self.keys_da: 152 self.local_analysis_update() 153 else: 154 if len(self.scale_data.shape) == 1: 155 self.pert_preddata = np.dot(np.expand_dims(self.scale_data ** (-1), axis=1), 156 np.ones((1, self.ne))) * np.dot(self.aug_pred_data, self.proj) 157 else: 158 self.pert_preddata = scilinalg.solve( 159 self.scale_data, np.dot(self.aug_pred_data, self.proj)) 160 161 aug_state = at.aug_state(self.current_state, self.list_states) 162 163 self.update() 164 if hasattr(self, 'step'): 165 aug_state_upd = aug_state + self.step 166 if hasattr(self, 'w_step'): 167 self.W = self.current_W + self.w_step 168 aug_prior_state = at.aug_state(self.prior_state, self.list_states) 169 aug_state_upd = np.dot(aug_prior_state, (np.eye( 170 self.ne) + self.W / np.sqrt(self.ne - 1))) 171 172 # Extract updated state variables from aug_update 173 self.state = at.update_state(aug_state_upd, self.state, self.list_states) 174 self.state = at.limits(self.state, self.prior_info) 175 176 def check_convergence(self): 177 """ 178 Check if LM-EnRML have converged based on evaluation of change sizes of objective function, state and damping 179 parameter. 180 181 Returns 182 ------- 183 bool 184 Logic variable telling if algorithm has converged 185 dict 186 Dict. with keys corresponding to conv. criteria, with logical variable telling which of them that has been 187 met 188 """ 189 190 self.prev_data_misfit = self.data_misfit 191 self.prev_data_misfit_std = self.data_misfit_std 192 193 # Prelude to calc. conv. check (everything done below is from calc_analysis) 194 obs_data_vector, pred_data = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 195 self.list_datatypes) 196 197 data_misfit = at.calc_objectivefun( 198 self.real_obs_data_conv, pred_data, self.cov_data) 199 self.data_misfit = np.mean(data_misfit) 200 self.data_misfit_std = np.std(data_misfit) 201 202 # Logical variables for conv. criteria 203 why_stop = {'rel_data_misfit': 1 - (self.data_misfit / self.prev_data_misfit), 204 'data_misfit': self.data_misfit, 205 'prev_data_misfit': self.prev_data_misfit} 206 207 if self.data_misfit < self.prev_data_misfit: 208 self.logger.info( 209 f'MDA iteration number {self.iteration}! Objective function reduced from {self.prev_data_misfit:0.1f} to {self.data_misfit:0.1f}.') 210 else: 211 self.logger.info( 212 f'MDA iteration number {self.iteration}! Objective function increased from {self.prev_data_misfit:0.1f} to {self.data_misfit:0.1f}.') 213 # Return conv = False, why_stop var. 214 self.current_state = deepcopy(self.state) 215 if hasattr(self, 'W'): 216 self.current_W = deepcopy(self.W) 217 218 return False, True, why_stop 219 220 def _ext_inflation_param(self): 221 r""" 222 Extract the data covariance inflation parameter from the MDA keyword in DATAASSIM part. Also, we check that 223 the criterion: 224 225 .. math:: 226 \sum_{i=1}^{N_a} \frac{1}{\alpha} = 1 227 228 is fulfilled for the inflation factor, alpha. If the keyword for inflation parameter -- INFLATION_PARAM -- is 229 not provided, we set the default $\alpha_i = N_a$, where $N_a$ is the tot. no. of MDA assimilation steps (the 230 criterion is fulfilled with this value). 231 232 Returns 233 ------- 234 alpha: list 235 Data covariance inflation factor 236 """ 237 # Make sure MDA is a list 238 if not isinstance(self.keys_da['mda'][0], list): 239 mda_opts = [self.keys_da['mda']] 240 else: 241 mda_opts = self.keys_da['mda'] 242 243 # Check if INFLATION_PARAM has been provided, and if so, extract the value(s). If not, we set alpha to the 244 # default value equal to the tot. no. assim. steps 245 if 'inflation_param' in list(zip(*mda_opts))[0]: 246 # Extract value 247 alpha_tmp = [item[1] for item in mda_opts if item[0] == 'inflation_param'][0] 248 249 # If one value is given, we copy it to all assim. steps. If multiple values are given, we check the 250 # number of parameters corresponds to tot. no. assim. steps 251 if not isinstance(alpha_tmp, list): # Single input 252 alpha = [alpha_tmp] * len(self._ext_assim_steps()) # Copy value 253 254 else: 255 assert len(alpha_tmp) == len(self._ext_assim_steps()), 'Number of parameters given in INFLATION_PARAM in MDA does ' \ 256 'not match the total number of assimilation steps given by ' \ 257 'TOT_ASSIM_STEPS in same keyword!' 258 259 # Inflation parameters for each assimilation step given directly 260 alpha = alpha_tmp 261 262 else: # Give alpha by default value 263 alpha = [len(self._ext_assim_steps())] * len(self._ext_assim_steps()) 264 265 # Check if alpha fulfills the criterion to machine precision 266 assert 1 - np.finfo(float).eps <= sum([(1 / x) for x in alpha]) <= 1 + np.finfo(float).eps, \ 267 'The sum of the inverse of the inflation parameters given in INFLATION_PARAM does not add up to 1!' 268 269 # Return inflation parameter 270 return alpha 271 272 def _ext_assim_steps(self): 273 """ 274 Extract list of assimilation steps to perform in MDA loop from the MDA keyword (mandatory for 275 MDA class) in DATAASSIM part. (This method is similar to Iterative._ext_max_iter) 276 277 Parameters 278 ---------- 279 keys_da: dict 280 all keywords from DATAASSIM part 281 mda': info 282 for MDA methods 283 284 Returns 285 ------- 286 int 287 Total number of MDA assimilation steps 288 289 Changelog 290 --------- 291 - ST 7/6-16 292 - ST 1/3-17: Changed to output list of assim. steps instead of just tot. assim. steps 293 """ 294 # Make sure MDA is a list 295 if not isinstance(self.keys_da['mda'][0], list): 296 mda_opts = [self.keys_da['mda']] 297 else: 298 mda_opts = self.keys_da['mda'] 299 300 # Check if 'max_iter' has been given; if not, give error (mandatory in ITERATION) 301 assert 'tot_assim_steps' in list( 302 zip(*mda_opts))[0], 'TOT_ASSIM_STEPS has not been given in MDA!' 303 304 # Extract max. iter 305 tot_no_assim = int([item[1] 306 for item in mda_opts if item[0] == 'tot_assim_steps'][0]) 307 308 # Make a list of assim. steps 309 assim_steps = list(range(tot_no_assim)) 310 311 # If it is a restart run, we remove simulations already done 312 if self.restart is True: 313 # List simulations we already have done. Do this by checking pred_data. 314 # OBS: Minus 1 here do to the aborted simulation is also not None. 315 # TODO: Relying on loop_ind may not be the best strategy (?) 316 sim_done = list(range(self.loop_ind)) 317 318 # Update list of assim. steps by removing simulations we have done 319 assim_steps = [ind for ind in assim_steps if ind not in sim_done] 320 321 # Return list assim. steps 322 return assim_steps 323 324 325class esmda_approx(esmdaMixIn, approx_update): 326 pass 327 328 329class esmda_full(esmdaMixIn, full_update): 330 pass 331 332 333class esmda_subspace(esmdaMixIn, subspace_update): 334 pass 335 336 337class esmda_geo(esmda_approx): 338 """ 339 This is the implementation of the ES-MDA-GEO algorithm from [1]. The main analysis step in this algorithm is the 340 same as the standard ES-MDA algorithm (implemented in the `es_mda` class). The difference between this and the 341 standard algorithm is the calculation of the inflation factor. 342 """ 343 344 def __init__(self, keys_da): 345 """ 346 The class is initialized by passing the PIPT init. file upwards in the hierarchy to be read and parsed in 347 `pipt.input_output.pipt_init.ReadInitFile`. 348 349 Parameters 350 ---------- 351 init_file: str 352 PIPT init. file containing info. to run the inversion algorithm 353 354 References 355 ---------- 356 [1] J. Rafiee & A. Reynolds, Inverse Problems 33 (11), 2017 357 """ 358 # Pass the init_file upwards in the hierarchy 359 super().__init__(keys_da) 360 361 # Within 362 self.alpha = [None] * self.tot_assim 363 364 def _calc_inflation_factor(self, pert_preddata, cov_data, energy=99): 365 """ 366 We calculate the inflation factor, follow the procedure laid out in Algorithm 1 in [1]. 367 368 Parameters 369 ---------- 370 pert_preddata: ndarray 371 Predicted data (fwd. run) ensemble matrix perturbed with its mean 372 cov_data: ndarray 373 Data covariance matrix 374 energy: float, optional 375 Percentage of energy kept in (T)SVD decompostion of 'sensitivity' matrix (default is 99%) 376 377 Returns 378 ------- 379 alpha: float 380 Inflation factor 381 beta: float 382 Geometric factor 383 """ 384 # Need the square-root of the data covariance matrix 385 if np.count_nonzero(cov_data - np.diagonal(cov_data)) == 0: 386 l = np.sqrt(cov_data) # only variance (diagonal) term 387 else: 388 # Cholesky decomposition 389 l = scilinalg.cholesky(cov_data) # cov. matrix has off-diag. terms 390 391 # Calculate the 'sensitivity' matrix: 392 sens = (1 / np.sqrt(self.ne - 1)) * np.dot(l, pert_preddata) 393 394 # Perform SVD on sensitivtiy matrix 395 _, s_d, _ = np.linalg.svd(sens, full_matrices=False) 396 397 # If no. measurements is more than ne - 1, we only keep ne - 1 sing. val. 398 if sens.shape[0] >= self.ne: 399 s_d = s_d[:-1].copy() 400 401 # If energy is less than 100 we truncate the SVD matrices 402 if energy < 100: 403 ti = (np.cumsum(s_d) / sum(s_d)) * 100 <= energy 404 s_d = s_d[ti].copy() 405 406 # Calc average singular value 407 avg_s_d = s_d.mean() 408 409 # The inflation factor is chosen as the maximum of the average singular value (squared) and max. no. of 410 # iterations 411 alpha = np.max((avg_s_d ** 2, self.tot_assim)) 412 413 # We calculate the geometric (reduction) factor (called 'common ratio' in the article). The formula is given 414 # as (1 - beta**-n) / (1 - beta**-1) = alpha (it is actually incorrect in the article, and should be as 415 # written here), with n=tot. assim. steps. Rewritten: 416 # 417 # (1-alpha)*beta**n + alpha*beta**(n-1) - 1 = 0 418 # 419 # This is of course a nasty polynomial root problem, but we use Numpy.roots, extract the real 420 # root less than 1, and hope for the best :p 421 root_coeff = np.zeros(self.tot_assim + 1) 422 root_coeff[0] = 1 - alpha # first coeff. in polynomial 423 root_coeff[1] = alpha # sec. coeff in polynomial 424 root_coeff[-1] = -1 425 roots = np.roots(root_coeff) 426 427 # Most likely the first root will be 1, and the second one will be the one we want. Due to numerical 428 # imprecision, the first root will not be exactly one, so we us Numpy.min to get the second root. 429 beta = np.min([x.real for x in roots if x.imag == 0 and x.real < 1]) 430 431 # Return inflation and geometric factor 432 return alpha, beta
22class esmdaMixIn(Ensemble): 23 """ 24 This is the implementation of the ES-MDA algorithm given in [1]. This algorithm have been implemented mostly to 25 illustrate how a algorithm using the Mda loop can be implemented. 26 """ 27 28 def __init__(self, keys_da, keys_en, sim): 29 """ 30 The class is initialized by passing the keywords and simulator object upwards in the hierarchy. 31 32 Parameters 33 ---------- 34 keys_da['mda']: list 35 - tot_assim_steps: total number of iterations in MDA, e.g., 3 36 - inflation_param: covariance inflation factors, e.g., [2, 4, 4] 37 38 keys_en : dict 39 40 sim : callable 41 42 References 43 ---------- 44 [1] A. Emerick & A. Reynods, Computers & Geosciences, 55, p. 3-15, 2013 45 """ 46 # Pass the init_file upwards in the hierarchy 47 super().__init__(keys_da, keys_en, sim) 48 49 self.prev_data_misfit = None 50 51 if self.restart is False: 52 self.prior_state = deepcopy(self.state) 53 self.list_states = list(self.state.keys()) 54 # At the moment, the iterative loop is threated as an iterative smoother an thus we check if assim. indices 55 # are given as in the Simultaneous loop. 56 self.check_assimindex_simultaneous() 57 self.assim_index = [self.keys_da['obsname'], self.keys_da['assimindex'][0]] 58 self.list_datatypes, self.list_act_datatypes = at.get_list_data_types( 59 self.obs_data, self.assim_index) 60 61 # Extract no. assimilation steps from MDA keyword in DATAASSIM part of init. file and set this equal to 62 # the number of iterations pluss one. Need one additional because the iter=0 is the prior run. 63 self.max_iter = len(self._ext_assim_steps())+1 64 self.iteration = 0 65 self.lam = 0 # set LM lamda to zero as we are doing one full update. 66 if 'energy' in self.keys_da: 67 # initial energy (Remember to extract this) 68 self.trunc_energy = self.keys_da['energy'] 69 if self.trunc_energy > 1: # ensure that it is given as percentage 70 self.trunc_energy /= 100. 71 else: 72 self.trunc_energy = 0.98 73 # Get the perturbed observations and observation scaling 74 self._ext_obs() 75 self.real_obs_data_conv = deepcopy(self.real_obs_data) 76 # Get state scaling and svd of scaled prior 77 self._ext_state() 78 self.current_state = deepcopy(self.state) 79 # Extract the inflation parameter from MDA keyword 80 self.alpha = self._ext_inflation_param() 81 82 self.prev_data_misfit = None 83 84 def calc_analysis(self): 85 r""" 86 Analysis step of ES-MDA. The analysis algorithm is similar to EnKF analysis, only difference is that the data 87 covariance matrix is inflated with an inflation parameter alpha. The update is done as an iterative smoother 88 where all data is assimilated at once. 89 90 Parameters: 91 ------------------------------------------------------------- 92 assim_step: int 93 Current assimilation step 94 95 Notes: 96 ----- 97 ES-MDA is an iterative ensemble smoother with a predefined number of iterations, where the updates is done with 98 the EnKF update equations but where the data covariance matrix have been inflated: 99 100 .. math:: 101 d_{obs} = d_{true} + \sqrt{\alpha}C_d^{1/2}Z \\ 102 m = m_{prior} + C_{md}(C_g + \alpha C_d)^{-1}(g(m) - d_{obs}) 103 104 105 where $d_{true}$ is the true observed data, $\alpha$ is the inflation factor, $C_d$ is the data covariance 106 matrix, $Z$ is a standard normal random variable, $C_{md}$ and $C_{g}$ are sample covariance matrices, 107 $m$ is the model parameter, and $g(\)$ is the predicted data. Note that $\alpha$ can have a different 108 value in each assimilation step and must fulfill: 109 110 .. math:: 111 \sum_{i=1}^{N_a} \frac{1}{\alpha} = 1 112 113 114 where $N_a$ being the total number of assimilation steps. 115 """ 116 # Get assimilation order as a list 117 # reformat predicted data 118 _, self.aug_pred_data = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 119 self.list_datatypes) 120 121 init_en = Cholesky() # Initialize GeoStat class for generating realizations 122 if self.iteration == 1: # first iteration 123 data_misfit = at.calc_objectivefun( 124 self.real_obs_data_conv, self.aug_pred_data, self.cov_data) 125 126 # Store the (mean) data misfit (also for conv. check) 127 self.data_misfit = np.mean(data_misfit) 128 self.prior_data_misfit = np.mean(data_misfit) 129 self.prior_data_misfit_std = np.std(data_misfit) 130 self.data_misfit = np.mean(data_misfit) 131 self.data_misfit_std = np.std(data_misfit) 132 133 self.logger.info( 134 f'Prior run complete with data misfit: {self.prior_data_misfit:0.1f}.') 135 self.data_random_state = deepcopy(np.random.get_state()) 136 self.real_obs_data, self.scale_data = init_en.gen_real(self.obs_data_vector, 137 self.alpha[self.iteration-1] * 138 self.cov_data, self.ne, 139 return_chol=True) 140 self.E = np.dot(self.real_obs_data, self.proj) 141 else: 142 self.data_random_state = deepcopy(np.random.get_state()) 143 self.obs_data_vector, _ = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 144 self.list_datatypes) 145 self.real_obs_data, self.scale_data = init_en.gen_real(self.obs_data_vector, 146 self.alpha[self.iteration - 147 1] * self.cov_data, 148 self.ne, 149 return_chol=True) 150 self.E = np.dot(self.real_obs_data, self.proj) 151 152 if 'localanalysis' in self.keys_da: 153 self.local_analysis_update() 154 else: 155 if len(self.scale_data.shape) == 1: 156 self.pert_preddata = np.dot(np.expand_dims(self.scale_data ** (-1), axis=1), 157 np.ones((1, self.ne))) * np.dot(self.aug_pred_data, self.proj) 158 else: 159 self.pert_preddata = scilinalg.solve( 160 self.scale_data, np.dot(self.aug_pred_data, self.proj)) 161 162 aug_state = at.aug_state(self.current_state, self.list_states) 163 164 self.update() 165 if hasattr(self, 'step'): 166 aug_state_upd = aug_state + self.step 167 if hasattr(self, 'w_step'): 168 self.W = self.current_W + self.w_step 169 aug_prior_state = at.aug_state(self.prior_state, self.list_states) 170 aug_state_upd = np.dot(aug_prior_state, (np.eye( 171 self.ne) + self.W / np.sqrt(self.ne - 1))) 172 173 # Extract updated state variables from aug_update 174 self.state = at.update_state(aug_state_upd, self.state, self.list_states) 175 self.state = at.limits(self.state, self.prior_info) 176 177 def check_convergence(self): 178 """ 179 Check if LM-EnRML have converged based on evaluation of change sizes of objective function, state and damping 180 parameter. 181 182 Returns 183 ------- 184 bool 185 Logic variable telling if algorithm has converged 186 dict 187 Dict. with keys corresponding to conv. criteria, with logical variable telling which of them that has been 188 met 189 """ 190 191 self.prev_data_misfit = self.data_misfit 192 self.prev_data_misfit_std = self.data_misfit_std 193 194 # Prelude to calc. conv. check (everything done below is from calc_analysis) 195 obs_data_vector, pred_data = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 196 self.list_datatypes) 197 198 data_misfit = at.calc_objectivefun( 199 self.real_obs_data_conv, pred_data, self.cov_data) 200 self.data_misfit = np.mean(data_misfit) 201 self.data_misfit_std = np.std(data_misfit) 202 203 # Logical variables for conv. criteria 204 why_stop = {'rel_data_misfit': 1 - (self.data_misfit / self.prev_data_misfit), 205 'data_misfit': self.data_misfit, 206 'prev_data_misfit': self.prev_data_misfit} 207 208 if self.data_misfit < self.prev_data_misfit: 209 self.logger.info( 210 f'MDA iteration number {self.iteration}! Objective function reduced from {self.prev_data_misfit:0.1f} to {self.data_misfit:0.1f}.') 211 else: 212 self.logger.info( 213 f'MDA iteration number {self.iteration}! Objective function increased from {self.prev_data_misfit:0.1f} to {self.data_misfit:0.1f}.') 214 # Return conv = False, why_stop var. 215 self.current_state = deepcopy(self.state) 216 if hasattr(self, 'W'): 217 self.current_W = deepcopy(self.W) 218 219 return False, True, why_stop 220 221 def _ext_inflation_param(self): 222 r""" 223 Extract the data covariance inflation parameter from the MDA keyword in DATAASSIM part. Also, we check that 224 the criterion: 225 226 .. math:: 227 \sum_{i=1}^{N_a} \frac{1}{\alpha} = 1 228 229 is fulfilled for the inflation factor, alpha. If the keyword for inflation parameter -- INFLATION_PARAM -- is 230 not provided, we set the default $\alpha_i = N_a$, where $N_a$ is the tot. no. of MDA assimilation steps (the 231 criterion is fulfilled with this value). 232 233 Returns 234 ------- 235 alpha: list 236 Data covariance inflation factor 237 """ 238 # Make sure MDA is a list 239 if not isinstance(self.keys_da['mda'][0], list): 240 mda_opts = [self.keys_da['mda']] 241 else: 242 mda_opts = self.keys_da['mda'] 243 244 # Check if INFLATION_PARAM has been provided, and if so, extract the value(s). If not, we set alpha to the 245 # default value equal to the tot. no. assim. steps 246 if 'inflation_param' in list(zip(*mda_opts))[0]: 247 # Extract value 248 alpha_tmp = [item[1] for item in mda_opts if item[0] == 'inflation_param'][0] 249 250 # If one value is given, we copy it to all assim. steps. If multiple values are given, we check the 251 # number of parameters corresponds to tot. no. assim. steps 252 if not isinstance(alpha_tmp, list): # Single input 253 alpha = [alpha_tmp] * len(self._ext_assim_steps()) # Copy value 254 255 else: 256 assert len(alpha_tmp) == len(self._ext_assim_steps()), 'Number of parameters given in INFLATION_PARAM in MDA does ' \ 257 'not match the total number of assimilation steps given by ' \ 258 'TOT_ASSIM_STEPS in same keyword!' 259 260 # Inflation parameters for each assimilation step given directly 261 alpha = alpha_tmp 262 263 else: # Give alpha by default value 264 alpha = [len(self._ext_assim_steps())] * len(self._ext_assim_steps()) 265 266 # Check if alpha fulfills the criterion to machine precision 267 assert 1 - np.finfo(float).eps <= sum([(1 / x) for x in alpha]) <= 1 + np.finfo(float).eps, \ 268 'The sum of the inverse of the inflation parameters given in INFLATION_PARAM does not add up to 1!' 269 270 # Return inflation parameter 271 return alpha 272 273 def _ext_assim_steps(self): 274 """ 275 Extract list of assimilation steps to perform in MDA loop from the MDA keyword (mandatory for 276 MDA class) in DATAASSIM part. (This method is similar to Iterative._ext_max_iter) 277 278 Parameters 279 ---------- 280 keys_da: dict 281 all keywords from DATAASSIM part 282 mda': info 283 for MDA methods 284 285 Returns 286 ------- 287 int 288 Total number of MDA assimilation steps 289 290 Changelog 291 --------- 292 - ST 7/6-16 293 - ST 1/3-17: Changed to output list of assim. steps instead of just tot. assim. steps 294 """ 295 # Make sure MDA is a list 296 if not isinstance(self.keys_da['mda'][0], list): 297 mda_opts = [self.keys_da['mda']] 298 else: 299 mda_opts = self.keys_da['mda'] 300 301 # Check if 'max_iter' has been given; if not, give error (mandatory in ITERATION) 302 assert 'tot_assim_steps' in list( 303 zip(*mda_opts))[0], 'TOT_ASSIM_STEPS has not been given in MDA!' 304 305 # Extract max. iter 306 tot_no_assim = int([item[1] 307 for item in mda_opts if item[0] == 'tot_assim_steps'][0]) 308 309 # Make a list of assim. steps 310 assim_steps = list(range(tot_no_assim)) 311 312 # If it is a restart run, we remove simulations already done 313 if self.restart is True: 314 # List simulations we already have done. Do this by checking pred_data. 315 # OBS: Minus 1 here do to the aborted simulation is also not None. 316 # TODO: Relying on loop_ind may not be the best strategy (?) 317 sim_done = list(range(self.loop_ind)) 318 319 # Update list of assim. steps by removing simulations we have done 320 assim_steps = [ind for ind in assim_steps if ind not in sim_done] 321 322 # Return list assim. steps 323 return assim_steps
This is the implementation of the ES-MDA algorithm given in [1]. This algorithm have been implemented mostly to illustrate how a algorithm using the Mda loop can be implemented.
28 def __init__(self, keys_da, keys_en, sim): 29 """ 30 The class is initialized by passing the keywords and simulator object upwards in the hierarchy. 31 32 Parameters 33 ---------- 34 keys_da['mda']: list 35 - tot_assim_steps: total number of iterations in MDA, e.g., 3 36 - inflation_param: covariance inflation factors, e.g., [2, 4, 4] 37 38 keys_en : dict 39 40 sim : callable 41 42 References 43 ---------- 44 [1] A. Emerick & A. Reynods, Computers & Geosciences, 55, p. 3-15, 2013 45 """ 46 # Pass the init_file upwards in the hierarchy 47 super().__init__(keys_da, keys_en, sim) 48 49 self.prev_data_misfit = None 50 51 if self.restart is False: 52 self.prior_state = deepcopy(self.state) 53 self.list_states = list(self.state.keys()) 54 # At the moment, the iterative loop is threated as an iterative smoother an thus we check if assim. indices 55 # are given as in the Simultaneous loop. 56 self.check_assimindex_simultaneous() 57 self.assim_index = [self.keys_da['obsname'], self.keys_da['assimindex'][0]] 58 self.list_datatypes, self.list_act_datatypes = at.get_list_data_types( 59 self.obs_data, self.assim_index) 60 61 # Extract no. assimilation steps from MDA keyword in DATAASSIM part of init. file and set this equal to 62 # the number of iterations pluss one. Need one additional because the iter=0 is the prior run. 63 self.max_iter = len(self._ext_assim_steps())+1 64 self.iteration = 0 65 self.lam = 0 # set LM lamda to zero as we are doing one full update. 66 if 'energy' in self.keys_da: 67 # initial energy (Remember to extract this) 68 self.trunc_energy = self.keys_da['energy'] 69 if self.trunc_energy > 1: # ensure that it is given as percentage 70 self.trunc_energy /= 100. 71 else: 72 self.trunc_energy = 0.98 73 # Get the perturbed observations and observation scaling 74 self._ext_obs() 75 self.real_obs_data_conv = deepcopy(self.real_obs_data) 76 # Get state scaling and svd of scaled prior 77 self._ext_state() 78 self.current_state = deepcopy(self.state) 79 # Extract the inflation parameter from MDA keyword 80 self.alpha = self._ext_inflation_param() 81 82 self.prev_data_misfit = None
The class is initialized by passing the keywords and simulator object upwards in the hierarchy.
Parameters
- keys_da['mda'] (list):
- tot_assim_steps: total number of iterations in MDA, e.g., 3
- inflation_param: covariance inflation factors, e.g., [2, 4, 4]
keys_en (dict):
sim (callable):
References
[1] A. Emerick & A. Reynods, Computers & Geosciences, 55, p. 3-15, 2013
84 def calc_analysis(self): 85 r""" 86 Analysis step of ES-MDA. The analysis algorithm is similar to EnKF analysis, only difference is that the data 87 covariance matrix is inflated with an inflation parameter alpha. The update is done as an iterative smoother 88 where all data is assimilated at once. 89 90 Parameters: 91 ------------------------------------------------------------- 92 assim_step: int 93 Current assimilation step 94 95 Notes: 96 ----- 97 ES-MDA is an iterative ensemble smoother with a predefined number of iterations, where the updates is done with 98 the EnKF update equations but where the data covariance matrix have been inflated: 99 100 .. math:: 101 d_{obs} = d_{true} + \sqrt{\alpha}C_d^{1/2}Z \\ 102 m = m_{prior} + C_{md}(C_g + \alpha C_d)^{-1}(g(m) - d_{obs}) 103 104 105 where $d_{true}$ is the true observed data, $\alpha$ is the inflation factor, $C_d$ is the data covariance 106 matrix, $Z$ is a standard normal random variable, $C_{md}$ and $C_{g}$ are sample covariance matrices, 107 $m$ is the model parameter, and $g(\)$ is the predicted data. Note that $\alpha$ can have a different 108 value in each assimilation step and must fulfill: 109 110 .. math:: 111 \sum_{i=1}^{N_a} \frac{1}{\alpha} = 1 112 113 114 where $N_a$ being the total number of assimilation steps. 115 """ 116 # Get assimilation order as a list 117 # reformat predicted data 118 _, self.aug_pred_data = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 119 self.list_datatypes) 120 121 init_en = Cholesky() # Initialize GeoStat class for generating realizations 122 if self.iteration == 1: # first iteration 123 data_misfit = at.calc_objectivefun( 124 self.real_obs_data_conv, self.aug_pred_data, self.cov_data) 125 126 # Store the (mean) data misfit (also for conv. check) 127 self.data_misfit = np.mean(data_misfit) 128 self.prior_data_misfit = np.mean(data_misfit) 129 self.prior_data_misfit_std = np.std(data_misfit) 130 self.data_misfit = np.mean(data_misfit) 131 self.data_misfit_std = np.std(data_misfit) 132 133 self.logger.info( 134 f'Prior run complete with data misfit: {self.prior_data_misfit:0.1f}.') 135 self.data_random_state = deepcopy(np.random.get_state()) 136 self.real_obs_data, self.scale_data = init_en.gen_real(self.obs_data_vector, 137 self.alpha[self.iteration-1] * 138 self.cov_data, self.ne, 139 return_chol=True) 140 self.E = np.dot(self.real_obs_data, self.proj) 141 else: 142 self.data_random_state = deepcopy(np.random.get_state()) 143 self.obs_data_vector, _ = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 144 self.list_datatypes) 145 self.real_obs_data, self.scale_data = init_en.gen_real(self.obs_data_vector, 146 self.alpha[self.iteration - 147 1] * self.cov_data, 148 self.ne, 149 return_chol=True) 150 self.E = np.dot(self.real_obs_data, self.proj) 151 152 if 'localanalysis' in self.keys_da: 153 self.local_analysis_update() 154 else: 155 if len(self.scale_data.shape) == 1: 156 self.pert_preddata = np.dot(np.expand_dims(self.scale_data ** (-1), axis=1), 157 np.ones((1, self.ne))) * np.dot(self.aug_pred_data, self.proj) 158 else: 159 self.pert_preddata = scilinalg.solve( 160 self.scale_data, np.dot(self.aug_pred_data, self.proj)) 161 162 aug_state = at.aug_state(self.current_state, self.list_states) 163 164 self.update() 165 if hasattr(self, 'step'): 166 aug_state_upd = aug_state + self.step 167 if hasattr(self, 'w_step'): 168 self.W = self.current_W + self.w_step 169 aug_prior_state = at.aug_state(self.prior_state, self.list_states) 170 aug_state_upd = np.dot(aug_prior_state, (np.eye( 171 self.ne) + self.W / np.sqrt(self.ne - 1))) 172 173 # Extract updated state variables from aug_update 174 self.state = at.update_state(aug_state_upd, self.state, self.list_states) 175 self.state = at.limits(self.state, self.prior_info)
Analysis step of ES-MDA. The analysis algorithm is similar to EnKF analysis, only difference is that the data covariance matrix is inflated with an inflation parameter alpha. The update is done as an iterative smoother where all data is assimilated at once.
Parameters:
assim_step: int
Current assimilation step
Notes:
ES-MDA is an iterative ensemble smoother with a predefined number of iterations, where the updates is done with the EnKF update equations but where the data covariance matrix have been inflated:
$$d_{obs} = d_{true} + \sqrt{\alpha}C_d^{1/2}Z \ m = m_{prior} + C_{md}(C_g + \alpha C_d)^{-1}(g(m) - d_{obs})$$
where $d_{true}$ is the true observed data, $\alpha$ is the inflation factor, $C_d$ is the data covariance matrix, $Z$ is a standard normal random variable, $C_{md}$ and $C_{g}$ are sample covariance matrices, $m$ is the model parameter, and $g()$ is the predicted data. Note that $\alpha$ can have a different value in each assimilation step and must fulfill:
$$\sum_{i=1}^{N_a} \frac{1}{\alpha} = 1$$
where $N_a$ being the total number of assimilation steps.
177 def check_convergence(self): 178 """ 179 Check if LM-EnRML have converged based on evaluation of change sizes of objective function, state and damping 180 parameter. 181 182 Returns 183 ------- 184 bool 185 Logic variable telling if algorithm has converged 186 dict 187 Dict. with keys corresponding to conv. criteria, with logical variable telling which of them that has been 188 met 189 """ 190 191 self.prev_data_misfit = self.data_misfit 192 self.prev_data_misfit_std = self.data_misfit_std 193 194 # Prelude to calc. conv. check (everything done below is from calc_analysis) 195 obs_data_vector, pred_data = at.aug_obs_pred_data(self.obs_data, self.pred_data, self.assim_index, 196 self.list_datatypes) 197 198 data_misfit = at.calc_objectivefun( 199 self.real_obs_data_conv, pred_data, self.cov_data) 200 self.data_misfit = np.mean(data_misfit) 201 self.data_misfit_std = np.std(data_misfit) 202 203 # Logical variables for conv. criteria 204 why_stop = {'rel_data_misfit': 1 - (self.data_misfit / self.prev_data_misfit), 205 'data_misfit': self.data_misfit, 206 'prev_data_misfit': self.prev_data_misfit} 207 208 if self.data_misfit < self.prev_data_misfit: 209 self.logger.info( 210 f'MDA iteration number {self.iteration}! Objective function reduced from {self.prev_data_misfit:0.1f} to {self.data_misfit:0.1f}.') 211 else: 212 self.logger.info( 213 f'MDA iteration number {self.iteration}! Objective function increased from {self.prev_data_misfit:0.1f} to {self.data_misfit:0.1f}.') 214 # Return conv = False, why_stop var. 215 self.current_state = deepcopy(self.state) 216 if hasattr(self, 'W'): 217 self.current_W = deepcopy(self.W) 218 219 return False, True, why_stop
Check if LM-EnRML have converged based on evaluation of change sizes of objective function, state and damping parameter.
Returns
- bool: Logic variable telling if algorithm has converged
- dict: Dict. with keys corresponding to conv. criteria, with logical variable telling which of them that has been met
Inherited Members
This is the implementation of the ES-MDA algorithm given in [1]. This algorithm have been implemented mostly to illustrate how a algorithm using the Mda loop can be implemented.
Inherited Members
- pipt.loop.ensemble.Ensemble
- logger
- check_assimindex_sequential
- check_assimindex_simultaneous
- save_temp_state_assim
- save_temp_state_iter
- save_temp_state_mda
- save_temp_state_ml
- compress
- local_analysis_update
- ensemble.ensemble.Ensemble
- keys_en
- sim
- pred_data
- aux_input
- pickle_restart_file
- restart
- gen_init_ensemble
- get_list_assim_steps
- calc_prediction
- save
- load
- calc_ml_prediction
- pipt.update_schemes.update_methods_ns.approx_update.approx_update
- update
This is the implementation of the ES-MDA algorithm given in [1]. This algorithm have been implemented mostly to illustrate how a algorithm using the Mda loop can be implemented.
Inherited Members
- pipt.loop.ensemble.Ensemble
- logger
- check_assimindex_sequential
- check_assimindex_simultaneous
- save_temp_state_assim
- save_temp_state_iter
- save_temp_state_mda
- save_temp_state_ml
- compress
- local_analysis_update
- ensemble.ensemble.Ensemble
- keys_en
- sim
- pred_data
- aux_input
- pickle_restart_file
- restart
- gen_init_ensemble
- get_list_assim_steps
- calc_prediction
- save
- load
- calc_ml_prediction
- pipt.update_schemes.update_methods_ns.full_update.full_update
- update
This is the implementation of the ES-MDA algorithm given in [1]. This algorithm have been implemented mostly to illustrate how a algorithm using the Mda loop can be implemented.
Inherited Members
- pipt.loop.ensemble.Ensemble
- logger
- check_assimindex_sequential
- check_assimindex_simultaneous
- save_temp_state_assim
- save_temp_state_iter
- save_temp_state_mda
- save_temp_state_ml
- compress
- local_analysis_update
- ensemble.ensemble.Ensemble
- keys_en
- sim
- pred_data
- aux_input
- pickle_restart_file
- restart
- gen_init_ensemble
- get_list_assim_steps
- calc_prediction
- save
- load
- calc_ml_prediction
- pipt.update_schemes.update_methods_ns.subspace_update.subspace_update
- update
338class esmda_geo(esmda_approx): 339 """ 340 This is the implementation of the ES-MDA-GEO algorithm from [1]. The main analysis step in this algorithm is the 341 same as the standard ES-MDA algorithm (implemented in the `es_mda` class). The difference between this and the 342 standard algorithm is the calculation of the inflation factor. 343 """ 344 345 def __init__(self, keys_da): 346 """ 347 The class is initialized by passing the PIPT init. file upwards in the hierarchy to be read and parsed in 348 `pipt.input_output.pipt_init.ReadInitFile`. 349 350 Parameters 351 ---------- 352 init_file: str 353 PIPT init. file containing info. to run the inversion algorithm 354 355 References 356 ---------- 357 [1] J. Rafiee & A. Reynolds, Inverse Problems 33 (11), 2017 358 """ 359 # Pass the init_file upwards in the hierarchy 360 super().__init__(keys_da) 361 362 # Within 363 self.alpha = [None] * self.tot_assim 364 365 def _calc_inflation_factor(self, pert_preddata, cov_data, energy=99): 366 """ 367 We calculate the inflation factor, follow the procedure laid out in Algorithm 1 in [1]. 368 369 Parameters 370 ---------- 371 pert_preddata: ndarray 372 Predicted data (fwd. run) ensemble matrix perturbed with its mean 373 cov_data: ndarray 374 Data covariance matrix 375 energy: float, optional 376 Percentage of energy kept in (T)SVD decompostion of 'sensitivity' matrix (default is 99%) 377 378 Returns 379 ------- 380 alpha: float 381 Inflation factor 382 beta: float 383 Geometric factor 384 """ 385 # Need the square-root of the data covariance matrix 386 if np.count_nonzero(cov_data - np.diagonal(cov_data)) == 0: 387 l = np.sqrt(cov_data) # only variance (diagonal) term 388 else: 389 # Cholesky decomposition 390 l = scilinalg.cholesky(cov_data) # cov. matrix has off-diag. terms 391 392 # Calculate the 'sensitivity' matrix: 393 sens = (1 / np.sqrt(self.ne - 1)) * np.dot(l, pert_preddata) 394 395 # Perform SVD on sensitivtiy matrix 396 _, s_d, _ = np.linalg.svd(sens, full_matrices=False) 397 398 # If no. measurements is more than ne - 1, we only keep ne - 1 sing. val. 399 if sens.shape[0] >= self.ne: 400 s_d = s_d[:-1].copy() 401 402 # If energy is less than 100 we truncate the SVD matrices 403 if energy < 100: 404 ti = (np.cumsum(s_d) / sum(s_d)) * 100 <= energy 405 s_d = s_d[ti].copy() 406 407 # Calc average singular value 408 avg_s_d = s_d.mean() 409 410 # The inflation factor is chosen as the maximum of the average singular value (squared) and max. no. of 411 # iterations 412 alpha = np.max((avg_s_d ** 2, self.tot_assim)) 413 414 # We calculate the geometric (reduction) factor (called 'common ratio' in the article). The formula is given 415 # as (1 - beta**-n) / (1 - beta**-1) = alpha (it is actually incorrect in the article, and should be as 416 # written here), with n=tot. assim. steps. Rewritten: 417 # 418 # (1-alpha)*beta**n + alpha*beta**(n-1) - 1 = 0 419 # 420 # This is of course a nasty polynomial root problem, but we use Numpy.roots, extract the real 421 # root less than 1, and hope for the best :p 422 root_coeff = np.zeros(self.tot_assim + 1) 423 root_coeff[0] = 1 - alpha # first coeff. in polynomial 424 root_coeff[1] = alpha # sec. coeff in polynomial 425 root_coeff[-1] = -1 426 roots = np.roots(root_coeff) 427 428 # Most likely the first root will be 1, and the second one will be the one we want. Due to numerical 429 # imprecision, the first root will not be exactly one, so we us Numpy.min to get the second root. 430 beta = np.min([x.real for x in roots if x.imag == 0 and x.real < 1]) 431 432 # Return inflation and geometric factor 433 return alpha, beta
This is the implementation of the ES-MDA-GEO algorithm from [1]. The main analysis step in this algorithm is the
same as the standard ES-MDA algorithm (implemented in the es_mda
class). The difference between this and the
standard algorithm is the calculation of the inflation factor.
345 def __init__(self, keys_da): 346 """ 347 The class is initialized by passing the PIPT init. file upwards in the hierarchy to be read and parsed in 348 `pipt.input_output.pipt_init.ReadInitFile`. 349 350 Parameters 351 ---------- 352 init_file: str 353 PIPT init. file containing info. to run the inversion algorithm 354 355 References 356 ---------- 357 [1] J. Rafiee & A. Reynolds, Inverse Problems 33 (11), 2017 358 """ 359 # Pass the init_file upwards in the hierarchy 360 super().__init__(keys_da) 361 362 # Within 363 self.alpha = [None] * self.tot_assim
The class is initialized by passing the PIPT init. file upwards in the hierarchy to be read and parsed in
pipt.input_output.pipt_init.ReadInitFile
.
Parameters
- init_file (str): PIPT init. file containing info. to run the inversion algorithm
References
[1] J. Rafiee & A. Reynolds, Inverse Problems 33 (11), 2017
Inherited Members
- pipt.loop.ensemble.Ensemble
- logger
- check_assimindex_sequential
- check_assimindex_simultaneous
- save_temp_state_assim
- save_temp_state_iter
- save_temp_state_mda
- save_temp_state_ml
- compress
- local_analysis_update
- ensemble.ensemble.Ensemble
- keys_en
- sim
- pred_data
- aux_input
- pickle_restart_file
- restart
- gen_init_ensemble
- get_list_assim_steps
- calc_prediction
- save
- load
- calc_ml_prediction
- pipt.update_schemes.update_methods_ns.approx_update.approx_update
- update