pipt.misc_tools.cov_regularization
Scripts used for localization in the fwd_sim step of Bayes.
Changelog
- 28/6-16: Initialise major reconstruction of the covariance regularization script.
Main outline is:
- make this a collection of support functions, not a class
- initialization will be performed at the initialization of the ensemble class, not at the analysis step. This will
return a dictionary of dictionaries, with a triple as key (data_type, assim_time, parameter). From this key the info
for a unique localization function can be found as a new dictionary with keys:
taper_func
,position
,anisotropi
,range
. This is, potentially, a substantial amount of data which should be imported as a npz file. For small cases, it can be defined in the init file in csv form:
LOCALIZATION
FIELD 10 10
fb 2 2 1 5 1 0 WBHP PRO-1 10 PERMX,fb 7 7 1 5 1 0 WBHP PRO-2 10 PERMX,fb 5 5 1 5 1 0 WBHP INJ-1 10 PERMX
(taper_func pos(x) pos(y) pos(z) range range(z) anisotropi(ratio) anisotropi(angel) data well assim_time parameter)
- Generate functions that return the correct localization function.
1""" 2Scripts used for localization in the fwd_sim step of Bayes. 3 4Changelog 5--------- 6- 28/6-16: Initialise major reconstruction of the covariance regularization script. 7 8Main outline is: 9- make this a collection of support functions, not a class 10- initialization will be performed at the initialization of the ensemble class, not at the analysis step. This will 11 return a dictionary of dictionaries, with a triple as key (data_type, assim_time, parameter). From this key the info 12 for a unique localization function can be found as a new dictionary with keys: 13 `taper_func`, `position`, `anisotropi`, `range`. 14 This is, potentially, a substantial amount of data which should be imported 15 as a npz file. For small cases, it can be defined in the init file in csv 16 form: 17 18 LOCALIZATION 19 FIELD 10 10 20 fb 2 2 1 5 1 0 WBHP PRO-1 10 PERMX,fb 7 7 1 5 1 0 WBHP PRO-2 10 PERMX,fb 5 5 1 5 1 0 WBHP INJ-1 10 PERMX 21 (taper_func pos(x) pos(y) pos(z) range range(z) anisotropi(ratio) anisotropi(angel) data well assim_time parameter) 22 23- Generate functions that return the correct localization function. 24""" 25 26__author__ = 'kfo005' 27 28 29import numpy as np 30import scipy.linalg as linalg 31from scipy.special import expit 32import os 33import pickle 34import csv 35import datetime as dt 36from shutil import rmtree 37from scipy import sparse 38from scipy.spatial import distance 39 40# internal import 41import pipt.misc_tools.analysis_tools as at 42 43 44class localization(): 45 ##### 46 # TODO: Check field dimensions, should always ensure that we can provide i ,j ,k (x, y, z) 47 ### 48 49 def __init__(self, parsed_info, assimIndex, data_typ, free_parameter, ne): 50 """ 51 Format the parsed info from the input file, and generate the unique localization masks 52 """ 53 # if the next element is a .p file (pickle), assume that this has been correctly formated and can be automatically 54 # imported. NB: it is important that we use the pickle format since we have a dictionary containing dictionaries 55 56 # to make this as robust as possible, we always try to load the file 57 try: 58 if parsed_info[1][0].upper() == 'AUTOADALOC': 59 init_local = {} 60 init_local['autoadaloc'] = True 61 init_local['nstd'] = parsed_info[1][1] 62 if parsed_info[2][0] == 'type': 63 init_local['type'] = parsed_info[2][1] 64 elif parsed_info[1][0].upper() == 'LOCALANALYSIS': 65 init_local = {} 66 init_local['localanalysis'] = True 67 for i, opt in enumerate(list(zip(*parsed_info))[0]): 68 if opt.lower() == 'type': 69 init_local['type'] = parsed_info[i][1] 70 if opt.lower() == 'range': 71 init_local['range'] = float(parsed_info[i][1]) 72 else: 73 init_local = pickle.load(open(parsed_info[1][0], 'rb')) 74 except: 75 # no file could be loaded 76 # initiallize the outer dictionary 77 init_local = {} 78 for time in assimIndex: 79 for datum in data_typ: 80 for parameter in free_parameter: 81 init_local[(datum, time, parameter)] = { 82 'taper_func': None, 83 'position': None, 84 'anisotropi': None, 85 'range': None 86 } 87 # insert the values that are defined 88 # check if data are provided through a .csv file 89 if parsed_info[1][0].endswith('.csv'): 90 with open(parsed_info[1][0]) as csv_file: 91 # get all lines 92 reader = csv.reader(csv_file) 93 info = [elem for elem in reader] 94 # collapse 95 info = [item for sublist in info for item in sublist] 96 else: 97 info = parsed_info[1][0].split(',') 98 for elem in info: 99 # 100 # If a predefined mask is to be imported the localization keyword must be 101 # [import filename.npz] 102 # where filename is the name of the .npz file to be uploaded. 103 tmp_info = elem.split() 104 105 # format the data and time elements 106 if len(tmp_info) == 11: # data has only one name 107 name = (tmp_info[8].lower(), float(tmp_info[9]), tmp_info[10].lower()) 108 else: 109 name = (tmp_info[8].lower() + ' ' + tmp_info[9].lower(), 110 float(tmp_info[10]), tmp_info[11].lower()) 111 112 # assert if the data to be localized actually exists 113 if name in init_local.keys(): 114 115 # input the correct info into the localization dictionary 116 init_local[name]['taper_func'] = tmp_info[0] 117 if tmp_info[0] == 'import': 118 # if a predefined mask is to be imported, the name is the following element. 119 init_local[name]['file'] = tmp_info[1] 120 else: 121 # the position can span over multiple cells, e.g., 55:100. Hence keep this input as a string 122 init_local[name]['position'] = [ 123 [int(float(tmp_info[1])), int(float(tmp_info[2])), int(float(tmp_info[3]))]] 124 init_local[name]['range'] = [int(tmp_info[4]), int( 125 tmp_info[5])] # the range is always an integer 126 init_local[name]['anisotropi'] = [ 127 float(tmp_info[6]), float(tmp_info[7])] 128 129 # fist element of the parsed info is field size 130 assert parsed_info[0][0].upper() == 'FIELD' 131 init_local['field'] = [int(elem) for elem in parsed_info[0][1]] 132 133 # check if final parsed info is the actnum 134 try: 135 if parsed_info[2][0].upper() == 'ACTNUM': 136 assert parsed_info[2][1].endswith('.npz') # this must be a .npz file!! 137 tmp_file = np.load(parsed_info[2][1]) 138 init_local['actnum'] = tmp_file['actnum'] 139 else: 140 init_local['actnum'] = None 141 except: 142 init_local['actnum'] = None 143 144 # generate the unique localization masks. Recall that the parameters: "taper_type", "anisotropi", and "range" 145 # gives a unique mask. 146 147 init_local['mask'] = {} 148 # loop over all localization info to ensure that all the masks have been generated 149 # Store masks with the key ('taper_function', 'anisotropi', 'range') 150 loc_mask_info = [(init_local[el]['taper_func'], init_local[el]['anisotropi'][0], init_local[el] 151 ['anisotropi'][1], init_local[el]['range']) for el in init_local.keys() if len(el) == 3] 152 for test_key in loc_mask_info: 153 if not len(init_local['mask']): 154 if test_key[0] == 'region': 155 if isinstance(test_key[3], list): 156 new_key = ('region', test_key[3][0], 157 test_key[3][1], test_key[3][2]) 158 else: 159 new_key = ('region', test_key[3]) 160 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 161 anisotropi=[ 162 test_key[1], test_key[2]], 163 loc_range=test_key[3], 164 field_size=init_local['field'], 165 ne=ne 166 ) 167 else: 168 if isinstance(test_key[3], list): 169 new_key = (test_key[0], test_key[1], 170 test_key[2], test_key[3][0], test_key[3][1]) 171 else: 172 new_key = test_key 173 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 174 anisotropi=[ 175 test_key[1], test_key[2]], 176 loc_range=test_key[3][0], 177 field_size=init_local['field'], 178 ne=ne 179 ) 180 else: 181 # if loc = region, anisotropi has no meaning. 182 if test_key[0] == 'region': 183 # If region there are two options: 184 # 1: file. Unique parameters ('region', filename) 185 # 2: area. Unique parameters ('region', 'x', 'y','z') 186 if isinstance(test_key[3], list): 187 new_key = ('region', test_key[3][0], 188 test_key[3][1], test_key[3][2]) 189 else: 190 new_key = ('region', test_key[3]) 191 192 if new_key not in init_local['mask']: 193 # generate this mask 194 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 195 anisotropi=[ 196 test_key[1], test_key[2]], 197 loc_range=test_key[3], 198 field_size=init_local['field'], 199 ne=ne 200 ) 201 202 else: 203 if isinstance(test_key[3], list): 204 new_key = (test_key[0], test_key[1], 205 test_key[2], test_key[3][0], test_key[3][1]) 206 else: 207 new_key = test_key 208 209 if new_key not in init_local['mask']: 210 # generate this mask 211 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 212 anisotropi=[ 213 test_key[1], test_key[2]], 214 loc_range=test_key[3][0], 215 field_size=init_local['field'], 216 ne=ne 217 ) 218 self.loc_info = init_local 219 220 def localize(self, curr_data, curr_time, curr_param, ne, prior_info, data_size): 221 # generate the full localization mask 222 # potentially: current_time, curr_param, and curr_data are lists. Must loop over: 223 # curr_time, curr_data and curr param to generate localization mask 224 # rho = n_m (size of total parameters) x n_d (size of all data) 225 226 loc = [] 227 for time_count, time in enumerate(curr_time): 228 for count, data in enumerate(curr_data): 229 if data_size[time_count][count] > 0: 230 tmp_loc = [[] for _ in range(data_size[time_count][count])] 231 for param in curr_param: 232 # Check if this parameter should be localized 233 if (data, time, param) in self.loc_info: 234 if not self.loc_info[(data, time, param)]['taper_func'] == 'region': 235 if isinstance(self.loc_info[(data, time, param)]['range'], list): 236 key = (self.loc_info[(data, time, param)]['taper_func'], 237 self.loc_info[(data, time, param) 238 ]['anisotropi'][0], 239 self.loc_info[(data, time, param) 240 ]['anisotropi'][1], 241 self.loc_info[(data, time, param)]['range'][0], 242 self.loc_info[(data, time, param)]['range'][1]) 243 mask = self._repos_locmask(self.loc_info['mask'][key], 244 [[el[0], el[1], el[2]] for el in 245 self.loc_info[(data, time, param)]['position']], 246 z_range=self.loc_info[(data, time, param)]['range'][1]) 247 else: 248 key = (self.loc_info[(data, time, param)]['taper_func'], 249 self.loc_info[(data, time, param) 250 ]['anisotropi'][0], 251 self.loc_info[(data, time, param) 252 ]['anisotropi'][1], 253 self.loc_info[(data, time, param)]['range']) 254 mask = self._repos_locmask(self.loc_info['mask'][key], 255 [[el[0], el[1]] for el in 256 self.loc_info[(data, time, param)]['position']]) 257 # if len(mask.shape) == 2: # this is field data 258 # # check that first axis is data, i.e., n_d X n_m 259 # if mask.shape[0] == data_size[time_count][count]: 260 # for i in range(data_size[time_count][count]): 261 # tmp_loc[i].append(mask[i, :]) 262 # # tmp_loc = np.hstack((tmp_loc, mask)) if tmp_loc.size else mask # trick 263 # else: 264 for i in range(data_size[time_count][count]): 265 tmp_loc[i].append(mask) 266 # tmp_loc = np.hstack((tmp_loc, mask.T)) if tmp_loc.size else mask.T 267 # else: 268 # tmp_loc[0].append(mask) 269 # tmp_loc = np.append(tmp_loc, mask) 270 # np.savez('local_mask_upd/' + str(param) + ':' + str(time) + ':' + str(data).replace(' ', ':') 271 # + '.npz', loc=mask) 272 elif self.loc_info[(data, time, param)]['taper_func'] == 'region': 273 if isinstance(self.loc_info[(data, time, param)]['range'], list): 274 key = (self.loc_info[(data, time, param)]['taper_func'], 275 self.loc_info[(data, time, param)]['range'][0], 276 self.loc_info[(data, time, param)]['range'][1], 277 self.loc_info[(data, time, param)]['range'][2]) 278 else: 279 key = (self.loc_info[(data, time, param)]['taper_func'], 280 self.loc_info[(data, time, param)]['range']) 281 282 mask = self.loc_info['mask'][key] 283 for i in range(data_size[time_count][count]): 284 tmp_loc[i].append(mask) 285 else: 286 # if no localization has been defined, assume that we do not update 287 if data_size[time_count][count] > 1: 288 # must make a field mask of zeros 289 mask = np.zeros((data_size[time_count][count], prior_info[param]['nx'] * 290 prior_info[param]['ny'] * 291 prior_info[param]['nz'])) 292 # set the localization mask to zeros for this parameter 293 for i in range(data_size[time_count][count]): 294 if self.loc_info['actnum'] is not None: 295 tmp_loc[i].append( 296 mask[i, self.loc_info['actnum']]) 297 else: 298 tmp_loc[i].append(mask[i, :]) 299 # tmp_loc = np.hstack((tmp_loc, mask)) if tmp_loc.size else mask 300 else: 301 mask = np.zeros(prior_info[param]['nx'] * 302 prior_info[param]['ny'] * 303 prior_info[param]['nz']) 304 if self.loc_info['actnum'] is not None: 305 tmp_loc[0].append(mask[self.loc_info['actnum']]) 306 else: 307 tmp_loc[0].append(mask) 308 # if data_size[count] == 1: 309 # loc = np.append(loc, np.array([tmp_loc, ]).T, axis=1) if loc.size else np.array([tmp_loc, ]).T 310 # elif data_size[count] > 1: 311 # loc = np.concatenate((loc, tmp_loc.T), axis=1) if loc.size else tmp_loc.T 312 for el in tmp_loc: 313 if len(el) > 1: 314 loc.append(sparse.hstack(el)) 315 else: 316 loc.append(sparse.csc_matrix(el)) 317 return sparse.vstack(loc).transpose() 318 # return np.array(loc).T 319 320 def auto_ada_loc(self, pert_state, proj_pred_data, curr_param, **kwargs): 321 if 'prior_info' in kwargs: 322 prior_info = kwargs['prior_info'] 323 else: 324 prior_info = {key: None for key in curr_param} 325 326 step = [] 327 328 ne = pert_state.shape[1] 329 rp_index = np.random.permutation(ne) 330 shuffled_ensemble = pert_state[:, rp_index] 331 corr_mtx = self.get_corr_mtx(pert_state, proj_pred_data) 332 corr_mtx_shuffled = self.get_corr_mtx(shuffled_ensemble, proj_pred_data) 333 334 tapering_matrix = np.ones(corr_mtx.shape) 335 336 if self.loc_info['actnum'] is not None: 337 num_active = np.sum(self.loc_info['actnum']) 338 else: 339 num_active = np.prod(self.loc_info['field']) 340 count = 0 341 for param in curr_param: 342 if param == 'NA': 343 num_active = tapering_matrix.shape[0] 344 else: 345 if 'active' in prior_info[param]: # if this is defined 346 num_active = int(prior_info[param]['active']) 347 prop_index = np.arange(num_active) + count 348 current_tapering = self.tapering_function( 349 corr_mtx[prop_index, :], corr_mtx_shuffled[prop_index, :]) 350 tapering_matrix[prop_index, :] = current_tapering 351 count += num_active 352 step = np.dot(np.multiply(tapering_matrix, pert_state), proj_pred_data) 353 354 return step 355 356 def tapering_function(self, cf, cf_s): 357 358 nstd = 1 359 if self.loc_info['nstd'] is not None: 360 nstd = self.loc_info['nstd'] 361 362 tc = np.zeros(cf.shape) 363 364 for i in range(cf.shape[1]): 365 current_cf = cf[:, i] 366 est_noise_std = np.median(np.absolute(cf_s[:, i]), axis=0) / 0.6745 367 cutoff_point = np.sqrt(2*np.log(np.prod(current_cf.shape))) * est_noise_std 368 cutoff_point = nstd * est_noise_std 369 if 'type' in self.loc_info and self.loc_info['type'] == 'soft': 370 current_tc = self.rational_function(1-np.absolute(current_cf), 371 1 - cutoff_point) 372 elif 'type' in self.loc_info and self.loc_info['type'] == 'sigm': 373 current_tc = self.rational_function_sigmoid(np.absolute(current_cf), 374 nstd) 375 else: # default to hard thresholding 376 set_upper = np.where(np.absolute(current_cf) > cutoff_point) 377 current_tc = np.zeros(current_cf.shape) 378 current_tc[set_upper] = 1 # this is hard thresholding 379 tc[:, i] = current_tc.flatten() 380 381 return tc 382 383 def rational_function(self, dist, lc): 384 385 z = np.absolute(dist) / lc 386 index_1 = np.where(z <= 1) 387 index_2 = np.where(z <= 2) 388 index_12 = np.setdiff1d(index_2, index_1) 389 390 y = np.zeros(len(z)) 391 392 y[index_1] = 1 - (np.power(z[index_1], 5) / 4) \ 393 + (np.power(z[index_1], 4) / 2) \ 394 + (5*np.power(z[index_1], 3) / 8) \ 395 - (5*np.power(z[index_1], 2) / 3) 396 397 y[index_12] = (np.power(z[index_12], 5) / 12) \ 398 - (np.power(z[index_12], 4) / 2) \ 399 + (5 * np.power(z[index_12], 3) / 8) \ 400 + (5 * np.power(z[index_12], 2) / 3) \ 401 - 5*z[index_12] \ 402 - np.divide(2, 3*z[index_12]) + 4 403 404 return y 405 406 def rational_function_sigmoid(self, dist, lc): 407 steepness = 50 # define how steep the transition is 408 y = expit((dist-(1-lc))*steepness) 409 410 return y 411 412 def get_corr_mtx(self, pert_state, proj_pred_data): 413 414 # compute correlation matrix 415 416 ne = pert_state.shape[1] 417 418 std_model = np.std(pert_state, axis=1) 419 std_model[std_model < 10 ** -6] = 10 ** -6 420 std_data = np.std(proj_pred_data, axis=1) 421 std_data[std_data < 10 ** -6] = 10 ** -6 422 # model_zero_spread_index = np.find(std_model<10**-6) 423 # data_zero_spread_index = np.find(std_data<10**-6) 424 425 C1 = np.mean(pert_state, axis=1) 426 A1 = np.outer(C1, np.ones(ne)) 427 B1 = np.outer(std_model, np.ones(ne)) 428 normalized_ensemble = np.divide((pert_state - A1), B1) 429 430 C2 = np.mean(proj_pred_data, axis=1) 431 A2 = np.outer(C2, np.ones(ne)) 432 B2 = np.outer(std_data, np.ones(ne)) 433 normalized_simData = np.divide((proj_pred_data - A2), B2) 434 435 corr_mtx = np.divide( 436 np.dot(normalized_ensemble, np.transpose(normalized_simData)), ne) 437 438 corr_mtx[std_model < 10 ** -6, :] = 0 439 corr_mtx[:, std_data < 10 ** -6] = 0 440 441 return corr_mtx 442 443 def _gen_loc_mask(self, taper_function=None, anisotropi=None, loc_range=None, field_size=None, ne=None): 444 445 # redesign the old _gen_loc_mask 446 447 if taper_function == 'gc': # if the taper function is Gaspari-Kohn. 448 449 # rotation matrix 450 rotate = np.array([[np.cos((anisotropi[1] / 180) * np.pi), np.sin((anisotropi[1] / 180) * np.pi)], 451 [-np.sin((anisotropi[1] / 180) * np.pi), np.cos((anisotropi[1] / 180) * np.pi)]]) 452 # Scale matrix 453 scale = np.array([[1 / anisotropi[0], 0], [0, 1]]) 454 455 # tot_range = [int(el) for el in np.dot(np.dot(scale, rotate), np.array([loc_range, loc_range]))] 456 tot_range = [int(el) for el in np.array([loc_range, loc_range])] 457 458 # preallocate a mask sufficiantly large 459 mask = np.zeros((2 * field_size[1], 2 * field_size[2])) # 2D 460 461 center = [int(mask.shape[0] / 2), int(mask.shape[1] / 2)] 462 length = np.empty(2) 463 for i in range(mask.shape[0]): 464 for j in range(mask.shape[1]): 465 # subtract 1 and switch element to make python and ecl equivalent 466 length[0] = (center[0]) - i 467 length[1] = (center[1]) - j 468 lt = np.dot(np.dot(scale, rotate), length) 469 # d = np.sqrt(np.sum(lt**2)) 470 471 # Gaspari-Chon 472 ratio = np.sqrt((lt[0] / tot_range[0]) ** 2 + 473 (lt[1] / tot_range[1]) ** 2) 474 h1 = ratio 475 h2 = np.sqrt((lt[0] / (2 * tot_range[0])) ** 2 + 476 (lt[1] / (2 * tot_range[1])) ** 2) 477 478 if ((h1 <= 1) & (h2 <= 1)): # check that this layer should be localized 479 mask[i, j] = (-1 / 4) * ratio ** 5 + (1 / 2) * ratio ** 4 + (5 / 8) * ratio ** 3 - \ 480 (5 / 3) * ratio ** 2 + 1 481 elif ((h1 > 1) & (h2 <= 1)): # check that this layer should be localized 482 mask[i, j] = (1 / 12) * ratio ** 5 - (1 / 2) * ratio ** 4 + (5 / 8) * ratio ** 3 + \ 483 (5 / 3) * ratio ** 2 - 5 * \ 484 ratio + 4 - (2 / 3) * ratio ** (-1) 485 elif (h1 > 1) & (h2 > 1): 486 mask[i, j] = 0 487 # only return non-zero part 488 return mask[mask.nonzero()[0].min():mask.nonzero()[0].max() + 1, 489 mask.nonzero()[1].min():mask.nonzero()[1].max() + 1] 490 491 # Taper function based on a covariance structure, as defined by eq (23) in "R.Furrer and 492 if taper_function == 'fb': 493 # T.Bengtsson, Estimation of high-dimensional prior and posterior covariance matrices 494 # in Kalman filter variants, Journal of Multivariate Analysis, 2007." 495 496 # rotation matrix 497 rotate = np.array([[np.cos((anisotropi[1] / 180) * np.pi), np.sin((anisotropi[1] / 180) * np.pi)], 498 [-np.sin((anisotropi[1] / 180) * np.pi), np.cos((anisotropi[1] / 180) * np.pi)]]) 499 # Scale matrix 500 scale = np.array([[1 / anisotropi[0], 0], [0, 1]]) 501 502 # preallocate a mask sufficiantly large 503 mask = np.zeros((2 * field_size[1], 2 * field_size[2])) # 2D 504 505 center = [int(mask.shape[0] / 2), int(mask.shape[1] / 2)] 506 length = np.empty(2) 507 508 # transform the position into values 509 for i in range(mask.shape[0]): 510 for j in range(mask.shape[1]): 511 # 512 length[0] = (center[0]) - i 513 length[1] = (center[1]) - j 514 lt = np.dot(np.dot(scale, rotate), length) 515 # Calc the distance 516 d = np.sqrt(np.sum(lt ** 2)) 517 # The fb function is now dependent on finding the covariance function. We use the same variogram 518 # function as the prior, that is, a spherical model. 519 # Todo: Include different variogram models 520 tmp = 0 521 if (d < loc_range): 522 tmp = 1 - 1 * (1.5 * np.abs(d) / loc_range - .5 * 523 (d / loc_range) ** 3) 524 # eq (23) of Furrer and Bengtsson 525 tmp_mask = (ne * tmp ** 2) / ((tmp ** 2) * (ne + 1) + 1 ** 2) 526 if mask[i, j] < tmp_mask: 527 mask[i, j] = tmp_mask 528 529 return mask[mask.nonzero()[0].min():mask.nonzero()[0].max() + 1, 530 mask.nonzero()[1].min():mask.nonzero()[1].max() + 1] 531 532 if taper_function == 'region': 533 # since this matrix always is field size, store as sparse 534 return np.ones(1) 535 536 def _repos_locmask(self, mask, data_pos, z_range=None): 537 # input: 538 # mask: The default localization mask. This has dimensions equal to its range. Note that all anisotropi is already 539 # taken care of during the creation of the mask. 540 # grid_dim: tuple providing the dimensions of the grid. 541 # data_pos: List of tuple values (X,Y,Z) giving positioning of the data 542 543 grid_dim = self.loc_info['field'] 544 if len(data_pos) > 1: # if more than one position is defined for this data 545 loc_mask = np.zeros(grid_dim) 546 547 for data in data_pos: 548 loc_mask = np.maximum(loc_mask, self._repos_mask(mask, data)) 549 elif len(data_pos) == 1: # single position 550 loc_mask = self._repos_mask(mask, data_pos[0]) 551 else: # no data pos, i.e. this data should not update this parameter 552 loc_mask = np.zeros(grid_dim) 553 554 if self.loc_info['actnum'] is not None: 555 if z_range == ':': 556 return loc_mask.flatten()[self.loc_info['actnum']] 557 else: 558 new_loc_mask = loc_mask[z_range, :, :] 559 new_actnum = self.loc_info['actnum'].reshape(grid_dim)[z_range, :, :] 560 return new_loc_mask.flatten()[new_actnum.flatten()] 561 else: 562 if z_range == ':': 563 return loc_mask.flatten() 564 else: 565 return loc_mask[z_range, :, :].flatten() 566 567 def _repos_mask(self, mask, data_pos): 568 grid_dim = self.loc_info['field'][1:] 569 mask_dimX = mask.shape[0] 570 mask_dimY = mask.shape[1] 571 572 # If the mask is placed sufficiently inside the grid, it only requires padding 573 574 if ((grid_dim[0] - (data_pos[1] + mask_dimX / 2) > 0) and data_pos[1] - mask_dimX / 2 > 0) \ 575 and (((grid_dim[1] - (data_pos[0] + mask_dimY / 2) > 0)) and (data_pos[0] - mask_dimY / 2 > 0)): 576 # x padding 577 pad_x_l = data_pos[1] - int(mask_dimX / 2) 578 pad_x_r = grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) 579 # y padding 580 pad_y_d = data_pos[0] - int(mask_dimY / 2) 581 pad_y_u = grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) 582 583 loc_2d_mask = np.pad( 584 mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant') 585 586 elif ((grid_dim[0] - (data_pos[1] + mask_dimX / 2) > 0) and data_pos[1] - mask_dimX / 2 > 0) \ 587 and not (((grid_dim[1] - (data_pos[0] + mask_dimY / 2) > 0)) and (data_pos[0] - mask_dimY / 2 > 0)): 588 # x padding 589 pad_x_l = data_pos[1] - int(mask_dimX / 2) 590 pad_x_r = grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) 591 592 pad_y_u = 0 593 # y padding 594 if data_pos[0] - int(mask_dimY / 2) <= 0: 595 pad_y_d = 0 596 pad_y_u = abs(data_pos[0] - int(mask_dimY / 2)) 597 pos_y1 = abs(data_pos[0] - int(mask_dimY / 2)) 598 else: 599 pad_y_d = grid_dim[1] - int(np.ceil(mask_dimY / 2)) 600 pos_y1 = 0 601 if grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) <= 0: 602 pad_y_u = 0 603 pad_y_d += (data_pos[0] - grid_dim[1]) + 1 604 pos_y2 = grid_dim[1] 605 else: 606 pad_y_u += grid_dim[1] - (int(np.ceil(mask_dimY / 2))) 607 pos_y2 = grid_dim[1] + abs(data_pos[0] - int(mask_dimY / 2)) 608 609 # check if negative padding, if true the mask is larger than the field. Need to update the coordinates, 610 # and remove negative padding. 611 if pad_y_d < 0: 612 pad_y_d = 0 613 pos_y2 += pos_y1 614 if pos_y1 == pos_y2: 615 pos_y2 += 1 616 617 loc_2d_mask = np.pad(mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant')[ 618 :, pos_y1:pos_y2] 619 620 elif not ((grid_dim[0] - (data_pos[1] + mask_dimX / 2) > 0) and data_pos[1] - mask_dimX / 2 > 0) \ 621 and (((grid_dim[1] - (data_pos[0] + mask_dimY / 2) > 0)) and (data_pos[0] - mask_dimY / 2 > 0)): 622 # x padding 623 pad_x_r = 0 624 if data_pos[1] - int(mask_dimX / 2) <= 0: 625 pad_x_l = 0 626 pad_x_r = abs(data_pos[1] - int(mask_dimX / 2)) 627 pos_x1 = abs(data_pos[1] - int(mask_dimX / 2)) 628 else: 629 pad_x_l = grid_dim[0] - int(np.ceil(mask_dimX / 2)) 630 pos_x1 = 0 631 if grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) <= 0: 632 pad_x_r = 0 633 pad_x_l += (data_pos[1] - grid_dim[0]) + 1 634 pos_x2 = grid_dim[0] 635 else: 636 pad_x_r += grid_dim[0] - (int(np.ceil(mask_dimX / 2))) 637 pos_x2 = grid_dim[0] + abs(data_pos[1] - int(mask_dimX / 2)) 638 # y padding 639 pad_y_d = data_pos[0] - int(mask_dimY / 2) 640 pad_y_u = grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) 641 642 # check if negative padding, if true the mask is larger than the field. Need to update the coordinates, 643 # and remove negative padding. 644 if pad_x_l < 0: 645 pad_x_l = 0 646 pos_x2 += pos_x1 647 if pos_x1 == pos_x2: 648 pos_x2 += 1 649 650 loc_2d_mask = np.pad(mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant')[ 651 pos_x1:pos_x2, :] 652 else: 653 pad_x_r = 0 654 pad_y_u = 0 655 656 if data_pos[1] - int(mask_dimX / 2) <= 0: 657 pad_x_l = 0 658 pad_x_r = abs(data_pos[1] - int(mask_dimX / 2)) 659 pos_x1 = abs(data_pos[1] - int(mask_dimX / 2)) 660 else: 661 pad_x_l = grid_dim[0] - int(np.ceil(mask_dimX / 2)) 662 pos_x1 = 0 663 if grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) <= 0: 664 pad_x_r = 0 665 pad_x_l += (data_pos[1] - grid_dim[0]) + 1 666 pos_x2 = grid_dim[0] 667 else: 668 pad_x_r += grid_dim[0] - (int(np.ceil(mask_dimX / 2))) 669 pos_x2 = grid_dim[0] + abs(data_pos[1] - int(mask_dimX / 2)) 670 671 # y padding 672 if data_pos[0] - int(mask_dimY / 2) <= 0: 673 pad_y_d = 0 674 pad_y_u = abs(data_pos[0] - int(mask_dimY / 2)) 675 pos_y1 = abs(data_pos[0] - int(mask_dimY / 2)) 676 else: 677 pad_y_d = grid_dim[1] - int(np.ceil(mask_dimY / 2)) 678 pos_y1 = 0 679 if grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) <= 0: 680 pad_y_u = 0 681 pad_y_d += (data_pos[0] - grid_dim[1]) + 1 682 pos_y2 = grid_dim[1] 683 else: 684 pad_y_u += grid_dim[1] - (int(np.ceil(mask_dimY / 2))) 685 pos_y2 = grid_dim[1] + abs(data_pos[0] - int(mask_dimY / 2)) 686 687 # check if negative padding, if true the mask is larger than the field. Need to update the coordinates, 688 # and remove negative padding. 689 if pad_y_d < 0: 690 pad_y_d = 0 691 pos_y2 += pos_y1 692 if pad_x_l < 0: 693 pad_x_l = 0 694 pos_x2 += pos_x1 695 if pos_x1 == pos_x2: 696 pos_x2 += 1 697 if pos_y1 == pos_y2: 698 pos_y2 += 1 699 loc_2d_mask = np.pad(mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant')[ 700 pos_x1:pos_x2, pos_y1:pos_y2] 701 702 loc_mask = np.zeros(self.loc_info['field']) 703 loc_mask[data_pos[2], :, :] = loc_2d_mask 704 return loc_mask 705 706 707def _calc_distance(data_pos, index_unique, current_data_list, assim_index, obs_data, pred_data, param_pos): 708 """ 709 Calculate the distance between data and parameters. 710 711 Parameters 712 ---------- 713 data_pos : dict 714 Dictionary containing the position of the data. 715 716 index_unique : bool 717 Boolean that determines if the position is unique. 718 719 current_data_list : list 720 List containing the names of the data that should be evaluated. 721 722 assim_index : int 723 The index of the data to be evaluated. 724 725 obs_data : list of dict 726 List of dictionaries containing the data. 727 728 pred_data : list of dict 729 List of dictionaries containing the predictions. 730 731 param_pos : list of tuple 732 List of tuples representing the position of the parameters. 733 734 Returns: 735 - dist: list of euclidean distance between the data/parameter pair. 736 """ 737 # distance to data if distance based localization 738 if index_unique == False: 739 dist = [] 740 for dat in current_data_list: 741 for indx in assim_index[1]: 742 indx_data_pos = data_pos[dat][indx] 743 if obs_data[indx] is not None and obs_data[indx][dat] is not None: 744 # add shortest distance 745 dist.append(min(distance.cdist(indx_data_pos, param_pos).flatten())) 746 else: 747 dist = [] 748 for data in current_data_list: 749 elem_data_pos = data_pos[data] 750 obs, _ = at.aug_obs_pred_data(obs_data, pred_data, assim_index, [data]) 751 dist.extend( 752 len(obs)*[min(distance.cdist(elem_data_pos, param_pos).flatten())]) 753 754 return dist 755 756 757def _calc_loc(max_dist, distance, prior_info, loc_type, ne): 758 # given the parameter type (to get the prior info) and the range to the data points we can calculate the 759 # localization mask 760 variance = prior_info['variance'][0] 761 mask = np.zeros(len(distance)) 762 if loc_type == 'fb': 763 # assume that FB localization is utilized. Here vi can add all different localization functions 764 for i in range(len(distance)): 765 if distance[i] < max_dist: 766 tmp = variance - variance * ( 767 1.5 * np.abs(distance[i]) / max_dist - .5 * (distance[i] / max_dist) ** 3) 768 else: 769 tmp = 0 770 771 mask[i] = (ne * tmp ** 2) / ((tmp ** 2) * (ne + 1) + variance ** 2) 772 elif loc_type == 'gc': 773 for count, i in enumerate(np.abs(distance)): 774 if (i <= max_dist): 775 tmp = -(1. / 4.) * (i / max_dist) ** 5 + (1. / 2.) * (i / max_dist) ** 4 + (5. / 8.) * ( 776 i / max_dist) ** 3 - (5. / 3.) * (i / max_dist) ** 2 + 1 777 elif (i <= 2 * max_dist): 778 tmp = (1. / 12.) * (i / max_dist) ** 5 - (1. / 2.) * (i / max_dist) ** 4 + (5. / 8.) * ( 779 i / max_dist) ** 3 + (5. / 3.) * (i / max_dist) ** 2 - 5. * (i / max_dist) + 4. - ( 780 2. / 3.) * (max_dist / i) 781 else: 782 tmp = 0. 783 mask[count] = tmp 784 785 return mask[np.newaxis, :]
class
localization:
45class localization(): 46 ##### 47 # TODO: Check field dimensions, should always ensure that we can provide i ,j ,k (x, y, z) 48 ### 49 50 def __init__(self, parsed_info, assimIndex, data_typ, free_parameter, ne): 51 """ 52 Format the parsed info from the input file, and generate the unique localization masks 53 """ 54 # if the next element is a .p file (pickle), assume that this has been correctly formated and can be automatically 55 # imported. NB: it is important that we use the pickle format since we have a dictionary containing dictionaries 56 57 # to make this as robust as possible, we always try to load the file 58 try: 59 if parsed_info[1][0].upper() == 'AUTOADALOC': 60 init_local = {} 61 init_local['autoadaloc'] = True 62 init_local['nstd'] = parsed_info[1][1] 63 if parsed_info[2][0] == 'type': 64 init_local['type'] = parsed_info[2][1] 65 elif parsed_info[1][0].upper() == 'LOCALANALYSIS': 66 init_local = {} 67 init_local['localanalysis'] = True 68 for i, opt in enumerate(list(zip(*parsed_info))[0]): 69 if opt.lower() == 'type': 70 init_local['type'] = parsed_info[i][1] 71 if opt.lower() == 'range': 72 init_local['range'] = float(parsed_info[i][1]) 73 else: 74 init_local = pickle.load(open(parsed_info[1][0], 'rb')) 75 except: 76 # no file could be loaded 77 # initiallize the outer dictionary 78 init_local = {} 79 for time in assimIndex: 80 for datum in data_typ: 81 for parameter in free_parameter: 82 init_local[(datum, time, parameter)] = { 83 'taper_func': None, 84 'position': None, 85 'anisotropi': None, 86 'range': None 87 } 88 # insert the values that are defined 89 # check if data are provided through a .csv file 90 if parsed_info[1][0].endswith('.csv'): 91 with open(parsed_info[1][0]) as csv_file: 92 # get all lines 93 reader = csv.reader(csv_file) 94 info = [elem for elem in reader] 95 # collapse 96 info = [item for sublist in info for item in sublist] 97 else: 98 info = parsed_info[1][0].split(',') 99 for elem in info: 100 # 101 # If a predefined mask is to be imported the localization keyword must be 102 # [import filename.npz] 103 # where filename is the name of the .npz file to be uploaded. 104 tmp_info = elem.split() 105 106 # format the data and time elements 107 if len(tmp_info) == 11: # data has only one name 108 name = (tmp_info[8].lower(), float(tmp_info[9]), tmp_info[10].lower()) 109 else: 110 name = (tmp_info[8].lower() + ' ' + tmp_info[9].lower(), 111 float(tmp_info[10]), tmp_info[11].lower()) 112 113 # assert if the data to be localized actually exists 114 if name in init_local.keys(): 115 116 # input the correct info into the localization dictionary 117 init_local[name]['taper_func'] = tmp_info[0] 118 if tmp_info[0] == 'import': 119 # if a predefined mask is to be imported, the name is the following element. 120 init_local[name]['file'] = tmp_info[1] 121 else: 122 # the position can span over multiple cells, e.g., 55:100. Hence keep this input as a string 123 init_local[name]['position'] = [ 124 [int(float(tmp_info[1])), int(float(tmp_info[2])), int(float(tmp_info[3]))]] 125 init_local[name]['range'] = [int(tmp_info[4]), int( 126 tmp_info[5])] # the range is always an integer 127 init_local[name]['anisotropi'] = [ 128 float(tmp_info[6]), float(tmp_info[7])] 129 130 # fist element of the parsed info is field size 131 assert parsed_info[0][0].upper() == 'FIELD' 132 init_local['field'] = [int(elem) for elem in parsed_info[0][1]] 133 134 # check if final parsed info is the actnum 135 try: 136 if parsed_info[2][0].upper() == 'ACTNUM': 137 assert parsed_info[2][1].endswith('.npz') # this must be a .npz file!! 138 tmp_file = np.load(parsed_info[2][1]) 139 init_local['actnum'] = tmp_file['actnum'] 140 else: 141 init_local['actnum'] = None 142 except: 143 init_local['actnum'] = None 144 145 # generate the unique localization masks. Recall that the parameters: "taper_type", "anisotropi", and "range" 146 # gives a unique mask. 147 148 init_local['mask'] = {} 149 # loop over all localization info to ensure that all the masks have been generated 150 # Store masks with the key ('taper_function', 'anisotropi', 'range') 151 loc_mask_info = [(init_local[el]['taper_func'], init_local[el]['anisotropi'][0], init_local[el] 152 ['anisotropi'][1], init_local[el]['range']) for el in init_local.keys() if len(el) == 3] 153 for test_key in loc_mask_info: 154 if not len(init_local['mask']): 155 if test_key[0] == 'region': 156 if isinstance(test_key[3], list): 157 new_key = ('region', test_key[3][0], 158 test_key[3][1], test_key[3][2]) 159 else: 160 new_key = ('region', test_key[3]) 161 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 162 anisotropi=[ 163 test_key[1], test_key[2]], 164 loc_range=test_key[3], 165 field_size=init_local['field'], 166 ne=ne 167 ) 168 else: 169 if isinstance(test_key[3], list): 170 new_key = (test_key[0], test_key[1], 171 test_key[2], test_key[3][0], test_key[3][1]) 172 else: 173 new_key = test_key 174 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 175 anisotropi=[ 176 test_key[1], test_key[2]], 177 loc_range=test_key[3][0], 178 field_size=init_local['field'], 179 ne=ne 180 ) 181 else: 182 # if loc = region, anisotropi has no meaning. 183 if test_key[0] == 'region': 184 # If region there are two options: 185 # 1: file. Unique parameters ('region', filename) 186 # 2: area. Unique parameters ('region', 'x', 'y','z') 187 if isinstance(test_key[3], list): 188 new_key = ('region', test_key[3][0], 189 test_key[3][1], test_key[3][2]) 190 else: 191 new_key = ('region', test_key[3]) 192 193 if new_key not in init_local['mask']: 194 # generate this mask 195 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 196 anisotropi=[ 197 test_key[1], test_key[2]], 198 loc_range=test_key[3], 199 field_size=init_local['field'], 200 ne=ne 201 ) 202 203 else: 204 if isinstance(test_key[3], list): 205 new_key = (test_key[0], test_key[1], 206 test_key[2], test_key[3][0], test_key[3][1]) 207 else: 208 new_key = test_key 209 210 if new_key not in init_local['mask']: 211 # generate this mask 212 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 213 anisotropi=[ 214 test_key[1], test_key[2]], 215 loc_range=test_key[3][0], 216 field_size=init_local['field'], 217 ne=ne 218 ) 219 self.loc_info = init_local 220 221 def localize(self, curr_data, curr_time, curr_param, ne, prior_info, data_size): 222 # generate the full localization mask 223 # potentially: current_time, curr_param, and curr_data are lists. Must loop over: 224 # curr_time, curr_data and curr param to generate localization mask 225 # rho = n_m (size of total parameters) x n_d (size of all data) 226 227 loc = [] 228 for time_count, time in enumerate(curr_time): 229 for count, data in enumerate(curr_data): 230 if data_size[time_count][count] > 0: 231 tmp_loc = [[] for _ in range(data_size[time_count][count])] 232 for param in curr_param: 233 # Check if this parameter should be localized 234 if (data, time, param) in self.loc_info: 235 if not self.loc_info[(data, time, param)]['taper_func'] == 'region': 236 if isinstance(self.loc_info[(data, time, param)]['range'], list): 237 key = (self.loc_info[(data, time, param)]['taper_func'], 238 self.loc_info[(data, time, param) 239 ]['anisotropi'][0], 240 self.loc_info[(data, time, param) 241 ]['anisotropi'][1], 242 self.loc_info[(data, time, param)]['range'][0], 243 self.loc_info[(data, time, param)]['range'][1]) 244 mask = self._repos_locmask(self.loc_info['mask'][key], 245 [[el[0], el[1], el[2]] for el in 246 self.loc_info[(data, time, param)]['position']], 247 z_range=self.loc_info[(data, time, param)]['range'][1]) 248 else: 249 key = (self.loc_info[(data, time, param)]['taper_func'], 250 self.loc_info[(data, time, param) 251 ]['anisotropi'][0], 252 self.loc_info[(data, time, param) 253 ]['anisotropi'][1], 254 self.loc_info[(data, time, param)]['range']) 255 mask = self._repos_locmask(self.loc_info['mask'][key], 256 [[el[0], el[1]] for el in 257 self.loc_info[(data, time, param)]['position']]) 258 # if len(mask.shape) == 2: # this is field data 259 # # check that first axis is data, i.e., n_d X n_m 260 # if mask.shape[0] == data_size[time_count][count]: 261 # for i in range(data_size[time_count][count]): 262 # tmp_loc[i].append(mask[i, :]) 263 # # tmp_loc = np.hstack((tmp_loc, mask)) if tmp_loc.size else mask # trick 264 # else: 265 for i in range(data_size[time_count][count]): 266 tmp_loc[i].append(mask) 267 # tmp_loc = np.hstack((tmp_loc, mask.T)) if tmp_loc.size else mask.T 268 # else: 269 # tmp_loc[0].append(mask) 270 # tmp_loc = np.append(tmp_loc, mask) 271 # np.savez('local_mask_upd/' + str(param) + ':' + str(time) + ':' + str(data).replace(' ', ':') 272 # + '.npz', loc=mask) 273 elif self.loc_info[(data, time, param)]['taper_func'] == 'region': 274 if isinstance(self.loc_info[(data, time, param)]['range'], list): 275 key = (self.loc_info[(data, time, param)]['taper_func'], 276 self.loc_info[(data, time, param)]['range'][0], 277 self.loc_info[(data, time, param)]['range'][1], 278 self.loc_info[(data, time, param)]['range'][2]) 279 else: 280 key = (self.loc_info[(data, time, param)]['taper_func'], 281 self.loc_info[(data, time, param)]['range']) 282 283 mask = self.loc_info['mask'][key] 284 for i in range(data_size[time_count][count]): 285 tmp_loc[i].append(mask) 286 else: 287 # if no localization has been defined, assume that we do not update 288 if data_size[time_count][count] > 1: 289 # must make a field mask of zeros 290 mask = np.zeros((data_size[time_count][count], prior_info[param]['nx'] * 291 prior_info[param]['ny'] * 292 prior_info[param]['nz'])) 293 # set the localization mask to zeros for this parameter 294 for i in range(data_size[time_count][count]): 295 if self.loc_info['actnum'] is not None: 296 tmp_loc[i].append( 297 mask[i, self.loc_info['actnum']]) 298 else: 299 tmp_loc[i].append(mask[i, :]) 300 # tmp_loc = np.hstack((tmp_loc, mask)) if tmp_loc.size else mask 301 else: 302 mask = np.zeros(prior_info[param]['nx'] * 303 prior_info[param]['ny'] * 304 prior_info[param]['nz']) 305 if self.loc_info['actnum'] is not None: 306 tmp_loc[0].append(mask[self.loc_info['actnum']]) 307 else: 308 tmp_loc[0].append(mask) 309 # if data_size[count] == 1: 310 # loc = np.append(loc, np.array([tmp_loc, ]).T, axis=1) if loc.size else np.array([tmp_loc, ]).T 311 # elif data_size[count] > 1: 312 # loc = np.concatenate((loc, tmp_loc.T), axis=1) if loc.size else tmp_loc.T 313 for el in tmp_loc: 314 if len(el) > 1: 315 loc.append(sparse.hstack(el)) 316 else: 317 loc.append(sparse.csc_matrix(el)) 318 return sparse.vstack(loc).transpose() 319 # return np.array(loc).T 320 321 def auto_ada_loc(self, pert_state, proj_pred_data, curr_param, **kwargs): 322 if 'prior_info' in kwargs: 323 prior_info = kwargs['prior_info'] 324 else: 325 prior_info = {key: None for key in curr_param} 326 327 step = [] 328 329 ne = pert_state.shape[1] 330 rp_index = np.random.permutation(ne) 331 shuffled_ensemble = pert_state[:, rp_index] 332 corr_mtx = self.get_corr_mtx(pert_state, proj_pred_data) 333 corr_mtx_shuffled = self.get_corr_mtx(shuffled_ensemble, proj_pred_data) 334 335 tapering_matrix = np.ones(corr_mtx.shape) 336 337 if self.loc_info['actnum'] is not None: 338 num_active = np.sum(self.loc_info['actnum']) 339 else: 340 num_active = np.prod(self.loc_info['field']) 341 count = 0 342 for param in curr_param: 343 if param == 'NA': 344 num_active = tapering_matrix.shape[0] 345 else: 346 if 'active' in prior_info[param]: # if this is defined 347 num_active = int(prior_info[param]['active']) 348 prop_index = np.arange(num_active) + count 349 current_tapering = self.tapering_function( 350 corr_mtx[prop_index, :], corr_mtx_shuffled[prop_index, :]) 351 tapering_matrix[prop_index, :] = current_tapering 352 count += num_active 353 step = np.dot(np.multiply(tapering_matrix, pert_state), proj_pred_data) 354 355 return step 356 357 def tapering_function(self, cf, cf_s): 358 359 nstd = 1 360 if self.loc_info['nstd'] is not None: 361 nstd = self.loc_info['nstd'] 362 363 tc = np.zeros(cf.shape) 364 365 for i in range(cf.shape[1]): 366 current_cf = cf[:, i] 367 est_noise_std = np.median(np.absolute(cf_s[:, i]), axis=0) / 0.6745 368 cutoff_point = np.sqrt(2*np.log(np.prod(current_cf.shape))) * est_noise_std 369 cutoff_point = nstd * est_noise_std 370 if 'type' in self.loc_info and self.loc_info['type'] == 'soft': 371 current_tc = self.rational_function(1-np.absolute(current_cf), 372 1 - cutoff_point) 373 elif 'type' in self.loc_info and self.loc_info['type'] == 'sigm': 374 current_tc = self.rational_function_sigmoid(np.absolute(current_cf), 375 nstd) 376 else: # default to hard thresholding 377 set_upper = np.where(np.absolute(current_cf) > cutoff_point) 378 current_tc = np.zeros(current_cf.shape) 379 current_tc[set_upper] = 1 # this is hard thresholding 380 tc[:, i] = current_tc.flatten() 381 382 return tc 383 384 def rational_function(self, dist, lc): 385 386 z = np.absolute(dist) / lc 387 index_1 = np.where(z <= 1) 388 index_2 = np.where(z <= 2) 389 index_12 = np.setdiff1d(index_2, index_1) 390 391 y = np.zeros(len(z)) 392 393 y[index_1] = 1 - (np.power(z[index_1], 5) / 4) \ 394 + (np.power(z[index_1], 4) / 2) \ 395 + (5*np.power(z[index_1], 3) / 8) \ 396 - (5*np.power(z[index_1], 2) / 3) 397 398 y[index_12] = (np.power(z[index_12], 5) / 12) \ 399 - (np.power(z[index_12], 4) / 2) \ 400 + (5 * np.power(z[index_12], 3) / 8) \ 401 + (5 * np.power(z[index_12], 2) / 3) \ 402 - 5*z[index_12] \ 403 - np.divide(2, 3*z[index_12]) + 4 404 405 return y 406 407 def rational_function_sigmoid(self, dist, lc): 408 steepness = 50 # define how steep the transition is 409 y = expit((dist-(1-lc))*steepness) 410 411 return y 412 413 def get_corr_mtx(self, pert_state, proj_pred_data): 414 415 # compute correlation matrix 416 417 ne = pert_state.shape[1] 418 419 std_model = np.std(pert_state, axis=1) 420 std_model[std_model < 10 ** -6] = 10 ** -6 421 std_data = np.std(proj_pred_data, axis=1) 422 std_data[std_data < 10 ** -6] = 10 ** -6 423 # model_zero_spread_index = np.find(std_model<10**-6) 424 # data_zero_spread_index = np.find(std_data<10**-6) 425 426 C1 = np.mean(pert_state, axis=1) 427 A1 = np.outer(C1, np.ones(ne)) 428 B1 = np.outer(std_model, np.ones(ne)) 429 normalized_ensemble = np.divide((pert_state - A1), B1) 430 431 C2 = np.mean(proj_pred_data, axis=1) 432 A2 = np.outer(C2, np.ones(ne)) 433 B2 = np.outer(std_data, np.ones(ne)) 434 normalized_simData = np.divide((proj_pred_data - A2), B2) 435 436 corr_mtx = np.divide( 437 np.dot(normalized_ensemble, np.transpose(normalized_simData)), ne) 438 439 corr_mtx[std_model < 10 ** -6, :] = 0 440 corr_mtx[:, std_data < 10 ** -6] = 0 441 442 return corr_mtx 443 444 def _gen_loc_mask(self, taper_function=None, anisotropi=None, loc_range=None, field_size=None, ne=None): 445 446 # redesign the old _gen_loc_mask 447 448 if taper_function == 'gc': # if the taper function is Gaspari-Kohn. 449 450 # rotation matrix 451 rotate = np.array([[np.cos((anisotropi[1] / 180) * np.pi), np.sin((anisotropi[1] / 180) * np.pi)], 452 [-np.sin((anisotropi[1] / 180) * np.pi), np.cos((anisotropi[1] / 180) * np.pi)]]) 453 # Scale matrix 454 scale = np.array([[1 / anisotropi[0], 0], [0, 1]]) 455 456 # tot_range = [int(el) for el in np.dot(np.dot(scale, rotate), np.array([loc_range, loc_range]))] 457 tot_range = [int(el) for el in np.array([loc_range, loc_range])] 458 459 # preallocate a mask sufficiantly large 460 mask = np.zeros((2 * field_size[1], 2 * field_size[2])) # 2D 461 462 center = [int(mask.shape[0] / 2), int(mask.shape[1] / 2)] 463 length = np.empty(2) 464 for i in range(mask.shape[0]): 465 for j in range(mask.shape[1]): 466 # subtract 1 and switch element to make python and ecl equivalent 467 length[0] = (center[0]) - i 468 length[1] = (center[1]) - j 469 lt = np.dot(np.dot(scale, rotate), length) 470 # d = np.sqrt(np.sum(lt**2)) 471 472 # Gaspari-Chon 473 ratio = np.sqrt((lt[0] / tot_range[0]) ** 2 + 474 (lt[1] / tot_range[1]) ** 2) 475 h1 = ratio 476 h2 = np.sqrt((lt[0] / (2 * tot_range[0])) ** 2 + 477 (lt[1] / (2 * tot_range[1])) ** 2) 478 479 if ((h1 <= 1) & (h2 <= 1)): # check that this layer should be localized 480 mask[i, j] = (-1 / 4) * ratio ** 5 + (1 / 2) * ratio ** 4 + (5 / 8) * ratio ** 3 - \ 481 (5 / 3) * ratio ** 2 + 1 482 elif ((h1 > 1) & (h2 <= 1)): # check that this layer should be localized 483 mask[i, j] = (1 / 12) * ratio ** 5 - (1 / 2) * ratio ** 4 + (5 / 8) * ratio ** 3 + \ 484 (5 / 3) * ratio ** 2 - 5 * \ 485 ratio + 4 - (2 / 3) * ratio ** (-1) 486 elif (h1 > 1) & (h2 > 1): 487 mask[i, j] = 0 488 # only return non-zero part 489 return mask[mask.nonzero()[0].min():mask.nonzero()[0].max() + 1, 490 mask.nonzero()[1].min():mask.nonzero()[1].max() + 1] 491 492 # Taper function based on a covariance structure, as defined by eq (23) in "R.Furrer and 493 if taper_function == 'fb': 494 # T.Bengtsson, Estimation of high-dimensional prior and posterior covariance matrices 495 # in Kalman filter variants, Journal of Multivariate Analysis, 2007." 496 497 # rotation matrix 498 rotate = np.array([[np.cos((anisotropi[1] / 180) * np.pi), np.sin((anisotropi[1] / 180) * np.pi)], 499 [-np.sin((anisotropi[1] / 180) * np.pi), np.cos((anisotropi[1] / 180) * np.pi)]]) 500 # Scale matrix 501 scale = np.array([[1 / anisotropi[0], 0], [0, 1]]) 502 503 # preallocate a mask sufficiantly large 504 mask = np.zeros((2 * field_size[1], 2 * field_size[2])) # 2D 505 506 center = [int(mask.shape[0] / 2), int(mask.shape[1] / 2)] 507 length = np.empty(2) 508 509 # transform the position into values 510 for i in range(mask.shape[0]): 511 for j in range(mask.shape[1]): 512 # 513 length[0] = (center[0]) - i 514 length[1] = (center[1]) - j 515 lt = np.dot(np.dot(scale, rotate), length) 516 # Calc the distance 517 d = np.sqrt(np.sum(lt ** 2)) 518 # The fb function is now dependent on finding the covariance function. We use the same variogram 519 # function as the prior, that is, a spherical model. 520 # Todo: Include different variogram models 521 tmp = 0 522 if (d < loc_range): 523 tmp = 1 - 1 * (1.5 * np.abs(d) / loc_range - .5 * 524 (d / loc_range) ** 3) 525 # eq (23) of Furrer and Bengtsson 526 tmp_mask = (ne * tmp ** 2) / ((tmp ** 2) * (ne + 1) + 1 ** 2) 527 if mask[i, j] < tmp_mask: 528 mask[i, j] = tmp_mask 529 530 return mask[mask.nonzero()[0].min():mask.nonzero()[0].max() + 1, 531 mask.nonzero()[1].min():mask.nonzero()[1].max() + 1] 532 533 if taper_function == 'region': 534 # since this matrix always is field size, store as sparse 535 return np.ones(1) 536 537 def _repos_locmask(self, mask, data_pos, z_range=None): 538 # input: 539 # mask: The default localization mask. This has dimensions equal to its range. Note that all anisotropi is already 540 # taken care of during the creation of the mask. 541 # grid_dim: tuple providing the dimensions of the grid. 542 # data_pos: List of tuple values (X,Y,Z) giving positioning of the data 543 544 grid_dim = self.loc_info['field'] 545 if len(data_pos) > 1: # if more than one position is defined for this data 546 loc_mask = np.zeros(grid_dim) 547 548 for data in data_pos: 549 loc_mask = np.maximum(loc_mask, self._repos_mask(mask, data)) 550 elif len(data_pos) == 1: # single position 551 loc_mask = self._repos_mask(mask, data_pos[0]) 552 else: # no data pos, i.e. this data should not update this parameter 553 loc_mask = np.zeros(grid_dim) 554 555 if self.loc_info['actnum'] is not None: 556 if z_range == ':': 557 return loc_mask.flatten()[self.loc_info['actnum']] 558 else: 559 new_loc_mask = loc_mask[z_range, :, :] 560 new_actnum = self.loc_info['actnum'].reshape(grid_dim)[z_range, :, :] 561 return new_loc_mask.flatten()[new_actnum.flatten()] 562 else: 563 if z_range == ':': 564 return loc_mask.flatten() 565 else: 566 return loc_mask[z_range, :, :].flatten() 567 568 def _repos_mask(self, mask, data_pos): 569 grid_dim = self.loc_info['field'][1:] 570 mask_dimX = mask.shape[0] 571 mask_dimY = mask.shape[1] 572 573 # If the mask is placed sufficiently inside the grid, it only requires padding 574 575 if ((grid_dim[0] - (data_pos[1] + mask_dimX / 2) > 0) and data_pos[1] - mask_dimX / 2 > 0) \ 576 and (((grid_dim[1] - (data_pos[0] + mask_dimY / 2) > 0)) and (data_pos[0] - mask_dimY / 2 > 0)): 577 # x padding 578 pad_x_l = data_pos[1] - int(mask_dimX / 2) 579 pad_x_r = grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) 580 # y padding 581 pad_y_d = data_pos[0] - int(mask_dimY / 2) 582 pad_y_u = grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) 583 584 loc_2d_mask = np.pad( 585 mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant') 586 587 elif ((grid_dim[0] - (data_pos[1] + mask_dimX / 2) > 0) and data_pos[1] - mask_dimX / 2 > 0) \ 588 and not (((grid_dim[1] - (data_pos[0] + mask_dimY / 2) > 0)) and (data_pos[0] - mask_dimY / 2 > 0)): 589 # x padding 590 pad_x_l = data_pos[1] - int(mask_dimX / 2) 591 pad_x_r = grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) 592 593 pad_y_u = 0 594 # y padding 595 if data_pos[0] - int(mask_dimY / 2) <= 0: 596 pad_y_d = 0 597 pad_y_u = abs(data_pos[0] - int(mask_dimY / 2)) 598 pos_y1 = abs(data_pos[0] - int(mask_dimY / 2)) 599 else: 600 pad_y_d = grid_dim[1] - int(np.ceil(mask_dimY / 2)) 601 pos_y1 = 0 602 if grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) <= 0: 603 pad_y_u = 0 604 pad_y_d += (data_pos[0] - grid_dim[1]) + 1 605 pos_y2 = grid_dim[1] 606 else: 607 pad_y_u += grid_dim[1] - (int(np.ceil(mask_dimY / 2))) 608 pos_y2 = grid_dim[1] + abs(data_pos[0] - int(mask_dimY / 2)) 609 610 # check if negative padding, if true the mask is larger than the field. Need to update the coordinates, 611 # and remove negative padding. 612 if pad_y_d < 0: 613 pad_y_d = 0 614 pos_y2 += pos_y1 615 if pos_y1 == pos_y2: 616 pos_y2 += 1 617 618 loc_2d_mask = np.pad(mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant')[ 619 :, pos_y1:pos_y2] 620 621 elif not ((grid_dim[0] - (data_pos[1] + mask_dimX / 2) > 0) and data_pos[1] - mask_dimX / 2 > 0) \ 622 and (((grid_dim[1] - (data_pos[0] + mask_dimY / 2) > 0)) and (data_pos[0] - mask_dimY / 2 > 0)): 623 # x padding 624 pad_x_r = 0 625 if data_pos[1] - int(mask_dimX / 2) <= 0: 626 pad_x_l = 0 627 pad_x_r = abs(data_pos[1] - int(mask_dimX / 2)) 628 pos_x1 = abs(data_pos[1] - int(mask_dimX / 2)) 629 else: 630 pad_x_l = grid_dim[0] - int(np.ceil(mask_dimX / 2)) 631 pos_x1 = 0 632 if grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) <= 0: 633 pad_x_r = 0 634 pad_x_l += (data_pos[1] - grid_dim[0]) + 1 635 pos_x2 = grid_dim[0] 636 else: 637 pad_x_r += grid_dim[0] - (int(np.ceil(mask_dimX / 2))) 638 pos_x2 = grid_dim[0] + abs(data_pos[1] - int(mask_dimX / 2)) 639 # y padding 640 pad_y_d = data_pos[0] - int(mask_dimY / 2) 641 pad_y_u = grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) 642 643 # check if negative padding, if true the mask is larger than the field. Need to update the coordinates, 644 # and remove negative padding. 645 if pad_x_l < 0: 646 pad_x_l = 0 647 pos_x2 += pos_x1 648 if pos_x1 == pos_x2: 649 pos_x2 += 1 650 651 loc_2d_mask = np.pad(mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant')[ 652 pos_x1:pos_x2, :] 653 else: 654 pad_x_r = 0 655 pad_y_u = 0 656 657 if data_pos[1] - int(mask_dimX / 2) <= 0: 658 pad_x_l = 0 659 pad_x_r = abs(data_pos[1] - int(mask_dimX / 2)) 660 pos_x1 = abs(data_pos[1] - int(mask_dimX / 2)) 661 else: 662 pad_x_l = grid_dim[0] - int(np.ceil(mask_dimX / 2)) 663 pos_x1 = 0 664 if grid_dim[0] - (data_pos[1] + int(np.ceil(mask_dimX / 2))) <= 0: 665 pad_x_r = 0 666 pad_x_l += (data_pos[1] - grid_dim[0]) + 1 667 pos_x2 = grid_dim[0] 668 else: 669 pad_x_r += grid_dim[0] - (int(np.ceil(mask_dimX / 2))) 670 pos_x2 = grid_dim[0] + abs(data_pos[1] - int(mask_dimX / 2)) 671 672 # y padding 673 if data_pos[0] - int(mask_dimY / 2) <= 0: 674 pad_y_d = 0 675 pad_y_u = abs(data_pos[0] - int(mask_dimY / 2)) 676 pos_y1 = abs(data_pos[0] - int(mask_dimY / 2)) 677 else: 678 pad_y_d = grid_dim[1] - int(np.ceil(mask_dimY / 2)) 679 pos_y1 = 0 680 if grid_dim[1] - (data_pos[0] + int(np.ceil(mask_dimY / 2))) <= 0: 681 pad_y_u = 0 682 pad_y_d += (data_pos[0] - grid_dim[1]) + 1 683 pos_y2 = grid_dim[1] 684 else: 685 pad_y_u += grid_dim[1] - (int(np.ceil(mask_dimY / 2))) 686 pos_y2 = grid_dim[1] + abs(data_pos[0] - int(mask_dimY / 2)) 687 688 # check if negative padding, if true the mask is larger than the field. Need to update the coordinates, 689 # and remove negative padding. 690 if pad_y_d < 0: 691 pad_y_d = 0 692 pos_y2 += pos_y1 693 if pad_x_l < 0: 694 pad_x_l = 0 695 pos_x2 += pos_x1 696 if pos_x1 == pos_x2: 697 pos_x2 += 1 698 if pos_y1 == pos_y2: 699 pos_y2 += 1 700 loc_2d_mask = np.pad(mask, ((pad_x_l, pad_x_r), (pad_y_d, pad_y_u)), 'constant')[ 701 pos_x1:pos_x2, pos_y1:pos_y2] 702 703 loc_mask = np.zeros(self.loc_info['field']) 704 loc_mask[data_pos[2], :, :] = loc_2d_mask 705 return loc_mask
localization(parsed_info, assimIndex, data_typ, free_parameter, ne)
50 def __init__(self, parsed_info, assimIndex, data_typ, free_parameter, ne): 51 """ 52 Format the parsed info from the input file, and generate the unique localization masks 53 """ 54 # if the next element is a .p file (pickle), assume that this has been correctly formated and can be automatically 55 # imported. NB: it is important that we use the pickle format since we have a dictionary containing dictionaries 56 57 # to make this as robust as possible, we always try to load the file 58 try: 59 if parsed_info[1][0].upper() == 'AUTOADALOC': 60 init_local = {} 61 init_local['autoadaloc'] = True 62 init_local['nstd'] = parsed_info[1][1] 63 if parsed_info[2][0] == 'type': 64 init_local['type'] = parsed_info[2][1] 65 elif parsed_info[1][0].upper() == 'LOCALANALYSIS': 66 init_local = {} 67 init_local['localanalysis'] = True 68 for i, opt in enumerate(list(zip(*parsed_info))[0]): 69 if opt.lower() == 'type': 70 init_local['type'] = parsed_info[i][1] 71 if opt.lower() == 'range': 72 init_local['range'] = float(parsed_info[i][1]) 73 else: 74 init_local = pickle.load(open(parsed_info[1][0], 'rb')) 75 except: 76 # no file could be loaded 77 # initiallize the outer dictionary 78 init_local = {} 79 for time in assimIndex: 80 for datum in data_typ: 81 for parameter in free_parameter: 82 init_local[(datum, time, parameter)] = { 83 'taper_func': None, 84 'position': None, 85 'anisotropi': None, 86 'range': None 87 } 88 # insert the values that are defined 89 # check if data are provided through a .csv file 90 if parsed_info[1][0].endswith('.csv'): 91 with open(parsed_info[1][0]) as csv_file: 92 # get all lines 93 reader = csv.reader(csv_file) 94 info = [elem for elem in reader] 95 # collapse 96 info = [item for sublist in info for item in sublist] 97 else: 98 info = parsed_info[1][0].split(',') 99 for elem in info: 100 # 101 # If a predefined mask is to be imported the localization keyword must be 102 # [import filename.npz] 103 # where filename is the name of the .npz file to be uploaded. 104 tmp_info = elem.split() 105 106 # format the data and time elements 107 if len(tmp_info) == 11: # data has only one name 108 name = (tmp_info[8].lower(), float(tmp_info[9]), tmp_info[10].lower()) 109 else: 110 name = (tmp_info[8].lower() + ' ' + tmp_info[9].lower(), 111 float(tmp_info[10]), tmp_info[11].lower()) 112 113 # assert if the data to be localized actually exists 114 if name in init_local.keys(): 115 116 # input the correct info into the localization dictionary 117 init_local[name]['taper_func'] = tmp_info[0] 118 if tmp_info[0] == 'import': 119 # if a predefined mask is to be imported, the name is the following element. 120 init_local[name]['file'] = tmp_info[1] 121 else: 122 # the position can span over multiple cells, e.g., 55:100. Hence keep this input as a string 123 init_local[name]['position'] = [ 124 [int(float(tmp_info[1])), int(float(tmp_info[2])), int(float(tmp_info[3]))]] 125 init_local[name]['range'] = [int(tmp_info[4]), int( 126 tmp_info[5])] # the range is always an integer 127 init_local[name]['anisotropi'] = [ 128 float(tmp_info[6]), float(tmp_info[7])] 129 130 # fist element of the parsed info is field size 131 assert parsed_info[0][0].upper() == 'FIELD' 132 init_local['field'] = [int(elem) for elem in parsed_info[0][1]] 133 134 # check if final parsed info is the actnum 135 try: 136 if parsed_info[2][0].upper() == 'ACTNUM': 137 assert parsed_info[2][1].endswith('.npz') # this must be a .npz file!! 138 tmp_file = np.load(parsed_info[2][1]) 139 init_local['actnum'] = tmp_file['actnum'] 140 else: 141 init_local['actnum'] = None 142 except: 143 init_local['actnum'] = None 144 145 # generate the unique localization masks. Recall that the parameters: "taper_type", "anisotropi", and "range" 146 # gives a unique mask. 147 148 init_local['mask'] = {} 149 # loop over all localization info to ensure that all the masks have been generated 150 # Store masks with the key ('taper_function', 'anisotropi', 'range') 151 loc_mask_info = [(init_local[el]['taper_func'], init_local[el]['anisotropi'][0], init_local[el] 152 ['anisotropi'][1], init_local[el]['range']) for el in init_local.keys() if len(el) == 3] 153 for test_key in loc_mask_info: 154 if not len(init_local['mask']): 155 if test_key[0] == 'region': 156 if isinstance(test_key[3], list): 157 new_key = ('region', test_key[3][0], 158 test_key[3][1], test_key[3][2]) 159 else: 160 new_key = ('region', test_key[3]) 161 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 162 anisotropi=[ 163 test_key[1], test_key[2]], 164 loc_range=test_key[3], 165 field_size=init_local['field'], 166 ne=ne 167 ) 168 else: 169 if isinstance(test_key[3], list): 170 new_key = (test_key[0], test_key[1], 171 test_key[2], test_key[3][0], test_key[3][1]) 172 else: 173 new_key = test_key 174 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 175 anisotropi=[ 176 test_key[1], test_key[2]], 177 loc_range=test_key[3][0], 178 field_size=init_local['field'], 179 ne=ne 180 ) 181 else: 182 # if loc = region, anisotropi has no meaning. 183 if test_key[0] == 'region': 184 # If region there are two options: 185 # 1: file. Unique parameters ('region', filename) 186 # 2: area. Unique parameters ('region', 'x', 'y','z') 187 if isinstance(test_key[3], list): 188 new_key = ('region', test_key[3][0], 189 test_key[3][1], test_key[3][2]) 190 else: 191 new_key = ('region', test_key[3]) 192 193 if new_key not in init_local['mask']: 194 # generate this mask 195 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 196 anisotropi=[ 197 test_key[1], test_key[2]], 198 loc_range=test_key[3], 199 field_size=init_local['field'], 200 ne=ne 201 ) 202 203 else: 204 if isinstance(test_key[3], list): 205 new_key = (test_key[0], test_key[1], 206 test_key[2], test_key[3][0], test_key[3][1]) 207 else: 208 new_key = test_key 209 210 if new_key not in init_local['mask']: 211 # generate this mask 212 init_local['mask'][new_key] = self._gen_loc_mask(taper_function=test_key[0], 213 anisotropi=[ 214 test_key[1], test_key[2]], 215 loc_range=test_key[3][0], 216 field_size=init_local['field'], 217 ne=ne 218 ) 219 self.loc_info = init_local
Format the parsed info from the input file, and generate the unique localization masks
def
localize(self, curr_data, curr_time, curr_param, ne, prior_info, data_size):
221 def localize(self, curr_data, curr_time, curr_param, ne, prior_info, data_size): 222 # generate the full localization mask 223 # potentially: current_time, curr_param, and curr_data are lists. Must loop over: 224 # curr_time, curr_data and curr param to generate localization mask 225 # rho = n_m (size of total parameters) x n_d (size of all data) 226 227 loc = [] 228 for time_count, time in enumerate(curr_time): 229 for count, data in enumerate(curr_data): 230 if data_size[time_count][count] > 0: 231 tmp_loc = [[] for _ in range(data_size[time_count][count])] 232 for param in curr_param: 233 # Check if this parameter should be localized 234 if (data, time, param) in self.loc_info: 235 if not self.loc_info[(data, time, param)]['taper_func'] == 'region': 236 if isinstance(self.loc_info[(data, time, param)]['range'], list): 237 key = (self.loc_info[(data, time, param)]['taper_func'], 238 self.loc_info[(data, time, param) 239 ]['anisotropi'][0], 240 self.loc_info[(data, time, param) 241 ]['anisotropi'][1], 242 self.loc_info[(data, time, param)]['range'][0], 243 self.loc_info[(data, time, param)]['range'][1]) 244 mask = self._repos_locmask(self.loc_info['mask'][key], 245 [[el[0], el[1], el[2]] for el in 246 self.loc_info[(data, time, param)]['position']], 247 z_range=self.loc_info[(data, time, param)]['range'][1]) 248 else: 249 key = (self.loc_info[(data, time, param)]['taper_func'], 250 self.loc_info[(data, time, param) 251 ]['anisotropi'][0], 252 self.loc_info[(data, time, param) 253 ]['anisotropi'][1], 254 self.loc_info[(data, time, param)]['range']) 255 mask = self._repos_locmask(self.loc_info['mask'][key], 256 [[el[0], el[1]] for el in 257 self.loc_info[(data, time, param)]['position']]) 258 # if len(mask.shape) == 2: # this is field data 259 # # check that first axis is data, i.e., n_d X n_m 260 # if mask.shape[0] == data_size[time_count][count]: 261 # for i in range(data_size[time_count][count]): 262 # tmp_loc[i].append(mask[i, :]) 263 # # tmp_loc = np.hstack((tmp_loc, mask)) if tmp_loc.size else mask # trick 264 # else: 265 for i in range(data_size[time_count][count]): 266 tmp_loc[i].append(mask) 267 # tmp_loc = np.hstack((tmp_loc, mask.T)) if tmp_loc.size else mask.T 268 # else: 269 # tmp_loc[0].append(mask) 270 # tmp_loc = np.append(tmp_loc, mask) 271 # np.savez('local_mask_upd/' + str(param) + ':' + str(time) + ':' + str(data).replace(' ', ':') 272 # + '.npz', loc=mask) 273 elif self.loc_info[(data, time, param)]['taper_func'] == 'region': 274 if isinstance(self.loc_info[(data, time, param)]['range'], list): 275 key = (self.loc_info[(data, time, param)]['taper_func'], 276 self.loc_info[(data, time, param)]['range'][0], 277 self.loc_info[(data, time, param)]['range'][1], 278 self.loc_info[(data, time, param)]['range'][2]) 279 else: 280 key = (self.loc_info[(data, time, param)]['taper_func'], 281 self.loc_info[(data, time, param)]['range']) 282 283 mask = self.loc_info['mask'][key] 284 for i in range(data_size[time_count][count]): 285 tmp_loc[i].append(mask) 286 else: 287 # if no localization has been defined, assume that we do not update 288 if data_size[time_count][count] > 1: 289 # must make a field mask of zeros 290 mask = np.zeros((data_size[time_count][count], prior_info[param]['nx'] * 291 prior_info[param]['ny'] * 292 prior_info[param]['nz'])) 293 # set the localization mask to zeros for this parameter 294 for i in range(data_size[time_count][count]): 295 if self.loc_info['actnum'] is not None: 296 tmp_loc[i].append( 297 mask[i, self.loc_info['actnum']]) 298 else: 299 tmp_loc[i].append(mask[i, :]) 300 # tmp_loc = np.hstack((tmp_loc, mask)) if tmp_loc.size else mask 301 else: 302 mask = np.zeros(prior_info[param]['nx'] * 303 prior_info[param]['ny'] * 304 prior_info[param]['nz']) 305 if self.loc_info['actnum'] is not None: 306 tmp_loc[0].append(mask[self.loc_info['actnum']]) 307 else: 308 tmp_loc[0].append(mask) 309 # if data_size[count] == 1: 310 # loc = np.append(loc, np.array([tmp_loc, ]).T, axis=1) if loc.size else np.array([tmp_loc, ]).T 311 # elif data_size[count] > 1: 312 # loc = np.concatenate((loc, tmp_loc.T), axis=1) if loc.size else tmp_loc.T 313 for el in tmp_loc: 314 if len(el) > 1: 315 loc.append(sparse.hstack(el)) 316 else: 317 loc.append(sparse.csc_matrix(el)) 318 return sparse.vstack(loc).transpose() 319 # return np.array(loc).T
def
auto_ada_loc(self, pert_state, proj_pred_data, curr_param, **kwargs):
321 def auto_ada_loc(self, pert_state, proj_pred_data, curr_param, **kwargs): 322 if 'prior_info' in kwargs: 323 prior_info = kwargs['prior_info'] 324 else: 325 prior_info = {key: None for key in curr_param} 326 327 step = [] 328 329 ne = pert_state.shape[1] 330 rp_index = np.random.permutation(ne) 331 shuffled_ensemble = pert_state[:, rp_index] 332 corr_mtx = self.get_corr_mtx(pert_state, proj_pred_data) 333 corr_mtx_shuffled = self.get_corr_mtx(shuffled_ensemble, proj_pred_data) 334 335 tapering_matrix = np.ones(corr_mtx.shape) 336 337 if self.loc_info['actnum'] is not None: 338 num_active = np.sum(self.loc_info['actnum']) 339 else: 340 num_active = np.prod(self.loc_info['field']) 341 count = 0 342 for param in curr_param: 343 if param == 'NA': 344 num_active = tapering_matrix.shape[0] 345 else: 346 if 'active' in prior_info[param]: # if this is defined 347 num_active = int(prior_info[param]['active']) 348 prop_index = np.arange(num_active) + count 349 current_tapering = self.tapering_function( 350 corr_mtx[prop_index, :], corr_mtx_shuffled[prop_index, :]) 351 tapering_matrix[prop_index, :] = current_tapering 352 count += num_active 353 step = np.dot(np.multiply(tapering_matrix, pert_state), proj_pred_data) 354 355 return step
def
tapering_function(self, cf, cf_s):
357 def tapering_function(self, cf, cf_s): 358 359 nstd = 1 360 if self.loc_info['nstd'] is not None: 361 nstd = self.loc_info['nstd'] 362 363 tc = np.zeros(cf.shape) 364 365 for i in range(cf.shape[1]): 366 current_cf = cf[:, i] 367 est_noise_std = np.median(np.absolute(cf_s[:, i]), axis=0) / 0.6745 368 cutoff_point = np.sqrt(2*np.log(np.prod(current_cf.shape))) * est_noise_std 369 cutoff_point = nstd * est_noise_std 370 if 'type' in self.loc_info and self.loc_info['type'] == 'soft': 371 current_tc = self.rational_function(1-np.absolute(current_cf), 372 1 - cutoff_point) 373 elif 'type' in self.loc_info and self.loc_info['type'] == 'sigm': 374 current_tc = self.rational_function_sigmoid(np.absolute(current_cf), 375 nstd) 376 else: # default to hard thresholding 377 set_upper = np.where(np.absolute(current_cf) > cutoff_point) 378 current_tc = np.zeros(current_cf.shape) 379 current_tc[set_upper] = 1 # this is hard thresholding 380 tc[:, i] = current_tc.flatten() 381 382 return tc
def
rational_function(self, dist, lc):
384 def rational_function(self, dist, lc): 385 386 z = np.absolute(dist) / lc 387 index_1 = np.where(z <= 1) 388 index_2 = np.where(z <= 2) 389 index_12 = np.setdiff1d(index_2, index_1) 390 391 y = np.zeros(len(z)) 392 393 y[index_1] = 1 - (np.power(z[index_1], 5) / 4) \ 394 + (np.power(z[index_1], 4) / 2) \ 395 + (5*np.power(z[index_1], 3) / 8) \ 396 - (5*np.power(z[index_1], 2) / 3) 397 398 y[index_12] = (np.power(z[index_12], 5) / 12) \ 399 - (np.power(z[index_12], 4) / 2) \ 400 + (5 * np.power(z[index_12], 3) / 8) \ 401 + (5 * np.power(z[index_12], 2) / 3) \ 402 - 5*z[index_12] \ 403 - np.divide(2, 3*z[index_12]) + 4 404 405 return y
def
get_corr_mtx(self, pert_state, proj_pred_data):
413 def get_corr_mtx(self, pert_state, proj_pred_data): 414 415 # compute correlation matrix 416 417 ne = pert_state.shape[1] 418 419 std_model = np.std(pert_state, axis=1) 420 std_model[std_model < 10 ** -6] = 10 ** -6 421 std_data = np.std(proj_pred_data, axis=1) 422 std_data[std_data < 10 ** -6] = 10 ** -6 423 # model_zero_spread_index = np.find(std_model<10**-6) 424 # data_zero_spread_index = np.find(std_data<10**-6) 425 426 C1 = np.mean(pert_state, axis=1) 427 A1 = np.outer(C1, np.ones(ne)) 428 B1 = np.outer(std_model, np.ones(ne)) 429 normalized_ensemble = np.divide((pert_state - A1), B1) 430 431 C2 = np.mean(proj_pred_data, axis=1) 432 A2 = np.outer(C2, np.ones(ne)) 433 B2 = np.outer(std_data, np.ones(ne)) 434 normalized_simData = np.divide((proj_pred_data - A2), B2) 435 436 corr_mtx = np.divide( 437 np.dot(normalized_ensemble, np.transpose(normalized_simData)), ne) 438 439 corr_mtx[std_model < 10 ** -6, :] = 0 440 corr_mtx[:, std_data < 10 ** -6] = 0 441 442 return corr_mtx