misc.grdecl
Read Schlumberger Eclipse grid input files.
>>> import pyresito.io.grdecl as grdecl
>>> grid = grdecl.read ("FOO")
1# pylint: disable=too-many-lines 2"""Read Schlumberger Eclipse grid input files. 3 4>>> import pyresito.io.grdecl as grdecl 5>>> grid = grdecl.read ("FOO") 6""" 7from __future__ import division 8import argparse 9import codecs # pylint: disable=unused-import 10import collections as col 11import contextlib as ctx 12import logging 13import math 14import mmap 15import numpy 16import numpy.ma 17import os 18import os.path 19import re 20from six.moves import range # pylint: disable=redefined-builtin, import-error 21import six 22import sys 23 24 25# even though the memoryview api exists in Python 2, the regular expression 26# engine cannot use it, so we must keep the old buffer interface around, too 27if sys.version_info[0] < 3: 28 def membuf(mem, bgn, end): # pylint: disable=missing-docstring 29 return buffer(mem, bgn, end - bgn) 30 31else: 32 def membuf(mem, bgn, end): # pylint: disable=missing-docstring 33 return memoryview(mem)[bgn: end] 34 35 36# there is a bug in the interaction between int() and memoryview in 37# Python 3: it attempts to interpret the entire buffer instead of just 38# the subset are indexed. this is solved by casting it to a bytebuffer 39# before conversion, so no string decoding needs to take place 40if sys.version_info[0] < 3: 41 def conv_atoi(buf): # pylint: disable=missing-docstring 42 return int(buf) 43 44 def conv_atof(buf): # pylint: disable=missing-docstring 45 return float(buf) 46 47else: 48 def conv_atoi(buf): # pylint: disable=missing-docstring 49 return int(bytes(buf)) 50 51 def conv_atof(buf): # pylint: disable=missing-docstring 52 return float(bytes(buf)) 53 54 55# file on disk is read as single bytes, whereas text in memory is of the string 56# type. these helper functions convert between the two, in Python 2 there is no 57# difference between the types 58if sys.version_info[0] < 3: 59 def enc(text): # pylint: disable=missing-docstring 60 return text 61 62 def dec(buf): # pylint: disable=missing-docstring 63 return buf 64 65else: 66 def enc(text): # pylint: disable=missing-docstring 67 return codecs.utf_8_encode(text)[0] 68 69 def dec(buf): # pylint: disable=missing-docstring 70 return codecs.utf_8_decode(buf)[0] 71 72 73# add a valid log in case we are not run through the main program which 74# sets up one for us 75log = logging.getLogger(__name__) # pylint: disable=invalid-name 76log.addHandler(logging.NullHandler()) 77 78 79class GrdEclError (Exception): 80 """Exception thrown if invalid syntax is encountered.""" 81 82 def __init__(self, path, pos, message): 83 line, column = pos 84 # include the filename if any was specified. only print the name, 85 # not the entire path, since this may be a very long string 86 fmt = ("in \"{0}\" " if path is not None else "") + "at ({1},{2}): {3}" 87 basename = (os.path.basename(path) if path else "") 88 super(GrdEclError, self).__init__( 89 fmt.format(basename, line, column, message)) 90 91 92# pylint: disable=invalid-name 93# tokens that exists in Eclipse grid files: 94 95# whitespace is everything that is between other tokens; I've put comments 96# into this class so that they can appear inside the data tables (e.g. for 97# headings and dividers); it shouldn't add much to the processing time since 98# they can be decided with very little look-ahead 99whitespace = re.compile(enc(r'[\ \t]+|--[^\r]*')) 100 101# newlines need to be their own tokens since we are going to update the 102# position counter at each such occurrence (source files are better viewed 103# as (line, column) rather than byte offset) 104newline = re.compile(enc(r'\r?\n')) 105 106# cardinals are integer numbers that describe sizes, e.g. the number of 107# rows in a spatial direction 108cardinal = re.compile(enc(r'[0-9]+')) 109 110# multiple cardinals prefixed with an optional repeat count 111multint = re.compile(enc(r'([0-9]+\*)?([0-9]+)')) 112 113# single floating-point number without any count. coordinates are described 114# using this type, since there is no meaning in defining a repeat count that 115# goes across both x and y-coordinates. 116_digs = r'([0-9]?\.[0-9]+|[0-9]+(\.[0-9]+)?)([eEdD][-+]?[0-9]+)?' 117 118# properties that are imported from other sources may also have NaN written 119# in cells where the value doesn't apply; Petrel will understand this, but 120# Eclipse won't. 121_nan = r'[nN][aA][nN]' 122 123# regular expression that accepts both NaNs and single numbers 124_anumb = r'-?(' + _nan + r'|' + _digs + r')' 125single = re.compile(enc(_anumb)) 126 127# floating-point numbers. the bulk of the data will be tokens of this type. 128# first group is the repeat count, the sign is optional, then comes two 129# subgroups: either the integer part or the fractional part of the number 130# can be omitted, but not both. third group is the exponent, allowing for 131# Fortran-style single precision indicator 132number = re.compile(enc(r'([0-9]+\*)?(' + _anumb + r')')) 133 134# most records ends with a slash; this is used as a guard token to ensure 135# that we are parsing the file correctly 136endrec = re.compile(enc(r'/')) 137 138# a flag is encoded as zero or one, with an optional repeat count. this 139# type of lexeme is used to parse the ACTNUM property. 140flag = re.compile(enc(r'([0-9]+\*)?([01])')) 141 142# keywords are section headers. they are really only supposed to be 8 143# characters but Petrel will write longer names for custom attributes 144keyword = re.compile(enc(r'[a-zA-Z_]+')) 145 146# filenames are used to include; either we specify more or less anything 147# as long as it is between quotes (notably including forward slashes!), or 148# it can be something that resembles a sensible MS-DOS name (including 149# colons and back-slashes!) 150filename = re.compile(enc(r'(\'[^\']*\')|([a-zA-Z0-9_\-\.\:\\\ ]+)')) 151 152 153class _Lexer (object): 154 """View the input file as a stream of tokens""" 155 156 # pylint: disable=unused-argument 157 def __init__(self, filename, mem): 158 """Lex a given input file with an opened memory mapping.""" 159 160 super(_Lexer, self).__init__() 161 162 # line number is one-based, and we start at the first line 163 self.lineno = 1 164 165 # this is the file offset of the first character on the current 166 # line. we use this to determine the column number (it is not 167 # updated whenever we read something, but rather generated on demand) 168 self.startofs = 0 169 170 # this is the position of where we last found a token; we store the 171 # offset into the file, and can find the column from that 172 self.last_pos = 0 173 174 # use the memory-mapping of the file as if it was a string buffer 175 self.mem = mem 176 177 # when viewed as a byte-buffer/string memory mapped files always 178 # "start" at the beginning of the file, regardless of the cursor. 179 # we therefore maintain a separate buffer object that points into 180 # the current position in the file. the reason that we bother to 181 # use the position at all is in case it signals to the os where 182 # to put the memory window of the file 183 self.buf = membuf(self.mem, self.mem.tell(), self.mem.size()) 184 185 # save a reference to the filename for use in error messages 186 self.path = filename 187 188 def close(self): 189 """Clean up internal memory buffer usage""" 190 self.buf = None 191 self.mem = None 192 193 def _advance(self, length): 194 """Advance the current position of the file by length bytes. length 195 is commonly the length of the last read token. 196 """ 197 self.mem.seek(length, os.SEEK_CUR) 198 self.buf = membuf(self.mem, self.mem.tell(), self.mem.size()) 199 200 def skip_blanks(self): 201 """Skip one or more blank tokens, updating the counter as we go. 202 """ 203 done = False 204 while not done: 205 match = re.match(whitespace, self.buf) 206 if match: 207 # advance the stream to skip over the whitespace 208 self._advance(match.end() - match.start()) 209 else: 210 # maybe it wasn't blank whitespace, but rather a newline? 211 match = re.match(newline, self.buf) 212 213 if match: 214 # still skip this token 215 self._advance(match.end() - match.start()) 216 217 # but now also update the line counter 218 self.lineno = self.lineno + 1 219 self.startofs = self.mem.tell() 220 else: 221 # it is a legitimate token, return for further processing; 222 # the memory stream will not have advanced, so the token 223 # is still at the beginning of the stream buffer 224 done = True 225 226 def pos(self): 227 """Return the current position in the file as (line, column)""" 228 # we have the line counter available; the column counter is 229 # available from looking how far we have progressed since the 230 # last linebreak. we return the position of the last token, 231 # which may not be visible to the parser (can be converted to number) 232 return (self.lineno, self.last_pos - self.startofs + 1) 233 234 def expect(self, lexeme): 235 """Expect a certain type of lexeme, e.g. keyword, to appear as the 236 next non-whitespace token in the stream. 237 """ 238 # skip any blanks in front of the wanted token 239 self.skip_blanks() 240 241 # next token is going to be at this position 242 self.last_pos = self.mem.tell() 243 244 # attempt to match the lexeme in question 245 match = re.match(lexeme, self.buf) 246 247 if match: 248 # read off the token that was matched into a separate string 249 token = match.group(0) 250 251 # update the buffer pointer to the new position 252 self._advance(match.end() - match.start()) 253 254 return token 255 else: 256 return None 257 258 def expect_count(self, lexeme): 259 """Expect a production where the first group is an optional repeat 260 count. The function returns a tuple where the first item is the value 261 and the second one is the repeat count. 262 """ 263 # skip any blanks in front of the wanted token 264 self.skip_blanks() 265 266 # next token is going to be at this position 267 self.last_pos = self.mem.tell() 268 269 # attempt to match the lexeme 270 match = re.match(lexeme, self.buf) 271 272 if match: 273 # if the count group is not present, then set count to be 1 274 if match.group(1) is None: 275 count = 1 276 else: 277 count = conv_atoi(match.group(1)[:-1]) 278 279 # the value is the second group, capturing the rest of the token 280 value = match.group(2) 281 282 # update the buffer pointer to the new position 283 self._advance(match.end() - match.start()) 284 285 return (count, value) 286 else: 287 return (0, None) 288 289 def expect_str(self, lexeme): 290 """Expect a text token.""" 291 txt = self.expect(lexeme) 292 return (None if txt is None else dec(txt)) 293 294 def expect_filename(self): 295 """Expect a filename.""" 296 name = self.expect(filename) 297 # specification of filenames allow spaces at the beginning and the 298 # end. since the slash is not part of the filename but rather and 299 # end record, it is natural to separate them with a space. to avoid 300 # an overly complex regular expression, we do the trimming here 301 return (None if name is None else dec(name).strip().strip("\'")) 302 303 def expect_enum(self, keywords, kind): 304 """Expect a keyword in a given set, or throw error if not found""" 305 token = self.expect_str(keyword) 306 if not token: 307 raise GrdEclError(self.path, self.pos(), 308 "Expected keyword for {0}".format(kind)) 309 if token not in keywords: 310 raise GrdEclError(self.path, self.pos(), 311 "Unsupported {0} \"{1}\"".format(kind, token)) 312 return token 313 314 def expect_single(self): 315 """Expect a single floating point number""" 316 token = self.expect(single) 317 if not token: 318 raise GrdEclError(self.path, self.pos(), 319 "Expected floating-point number") 320 return float(token) 321 322 def expect_cardinal(self): 323 """Expect a single cardinal number""" 324 token = self.expect(cardinal) 325 if not token: 326 raise GrdEclError(self.path, self.pos(), 327 "Expected positive, integer number") 328 return conv_atoi(token) 329 330 def expect_multi_card(self): 331 """Expect multiple cardinal numbers""" 332 # use scanning routine which takes care of the optional count with star 333 # in front of our own lexeme 334 count, value = self.expect_count(multint) 335 336 # convert the string itself to our integral type afterwards 337 return (count, numpy.int32(value)) 338 339 def expect_numbers(self): 340 """Expect a floating-point number with an optional repeat count""" 341 count, value = self.expect_count(number) 342 # convert to byte buffer to be able to search and modify it 343 value = bytearray(value) 344 345 # check if we are using the Fortran-style exponent, and it that 346 # case replace it with something our runtime understands 347 ndx = value.find(b'd') 348 if ndx != -1: 349 value[ndx] = b'e' 350 else: 351 ndx = value.find(b'D') 352 if ndx != -1: 353 value[ndx] = b'e' 354 355 return (count, numpy.float64(value)) 356 357 def expect_bools(self): 358 """Expect boolean flags with an optional repeat count""" 359 count, value = self.expect_count(flag) 360 # if we don't convert to int first and then to bool, the conversion 361 # will always yield a value of True! 362 return (count, bool) 363 364 def at_eof(self): 365 """Check if the lexer has progressed to the end of the stream""" 366 return self.mem.tell() == self.mem.size() 367 368 369class _FileMapPair (object): # pylint: disable=too-few-public-methods 370 """An open file with an associated memory-mapping""" 371 372 def __init__(self, path): 373 """Open file and mapping from a path specification""" 374 375 # must open files as binary on Windows NT, or they are automagically 376 # converted into carriage-return/newline! also, by declaring that we 377 # are reading the file sequentially, the fs cache can optimize. 378 # pylint: disable=no-member 379 flags = os.O_BINARY | os.O_SEQUENTIAL if os.name == 'nt' else 0 380 self.file_handle = os.open(path, os.O_RDONLY | flags) 381 382 # tell the operating system that we intend to read the file in one go 383 if (sys.platform.startswith('linux') and 384 sys.version_info[0:2] >= (3, 3)): 385 os.posix_fadvise(self.file_handle, 0, 0, os.POSIX_FADV_SEQUENTIAL) 386 387 # attempt to create a memory-mapping of the file; if this doesn't 388 # succeed, then close the underlaying file as well 389 try: 390 self.map_obj = mmap.mmap(self.file_handle, 0, 391 access=mmap.ACCESS_READ) 392 except Exception: 393 os.close(self.file_handle) 394 raise 395 396 def close(self): 397 """Close both the file and the memory mapping""" 398 try: 399 self.map_obj.close() 400 finally: 401 os.close(self.file_handle) 402 403 404class _OwningLexer (_Lexer): 405 """Lexer that owns exclusively the underlaying memory map""" 406 407 def __init__(self, path): 408 # first attempt to open the file and map it; if this is successful, 409 # then create a lexer using the memory-map (and keep the file object 410 # in ourself). if the lexer could not be created, then make sure that 411 # the file doesn't leak 412 self.fmp = _FileMapPair(path) 413 try: 414 super(_OwningLexer, self).__init__(path, self.fmp.map_obj) 415 except Exception: 416 self.fmp.close() 417 raise 418 419 def close(self): 420 """Clean up all resources associated with the lexer""" 421 422 # let the superclass clear its internal references first, and then 423 # make sure that the underlaying OS handles are closed afterwards 424 try: 425 super(_OwningLexer, self).close() 426 finally: 427 self.fmp.close() 428 429 430# pylint: disable=too-many-arguments, too-many-locals, invalid-name 431def _axes_rot(x1, y1, x2, y2, x3, y3): 432 """Rotation matrix and offset vector for axes mapping. 433 434 :returns: (A, b) such that each point x in coordinate array should be 435 rotated with A(x-b)+b 436 """ 437 # y axis vector 438 v1_x = x1 - x2 439 v1_y = y1 - y2 440 v1_len = math.sqrt(v1_x * v1_x + v1_y * v1_y) 441 ey_x = v1_x / v1_len 442 ey_y = v1_y / v1_len 443 444 # x axis vector 445 v3_x = x3 - x2 446 v3_y = y3 - y2 447 v3_len = math.sqrt(v3_x * v3_x + v3_y * v3_y) 448 ex_x = v3_x / v3_len 449 ex_y = v3_y / v3_len 450 451 # define transformation; each point x in COORD array is rotated A(x-b)+b 452 A = numpy.array([[ex_x, ey_x, 0.], 453 [ex_y, ey_y, 0.], 454 [0., 0., 1.]], dtype=numpy.float64) 455 b = numpy.array([x2, y2, 0], dtype=numpy.float64) 456 457 return (A, b) 458 459 460# pylint: disable=invalid-name 461def _no_rot(): 462 """Rotation matrix and offset vector if there is no axes mapping.""" 463 A = numpy.array([[1., 0., 0.], 464 [0., 1., 0.], 465 [0., 0., 1.]], dtype=numpy.float64) 466 b = numpy.array([0., 0., 0.], dtype=numpy.float64) 467 return (A, b) 468 469 470# pylint: disable=too-many-instance-attributes, too-few-public-methods 471class _Parser (object): 472 """Read sections usually exported by Petrel from an Eclipse cornerpoint grid 473 file. 474 """ 475 476 def __init__(self, filename, mem): 477 super(_Parser, self).__init__() 478 479 # these sections have their own productions, because they need special 480 # treatment or give side-effects 481 self.prod = {} 482 self.prod['GRID'] = self._ignore 483 self.prod['PINCH'] = self._pinch 484 self.prod['NOECHO'] = self._ignore 485 self.prod['MAPUNITS'] = self._mapunits 486 self.prod['MAPAXES'] = self._mapaxes 487 self.prod['GRIDUNIT'] = self._gridunit 488 self.prod['SPECGRID'] = self._specgrid 489 self.prod['COORDSYS'] = self._coordsys 490 self.prod['COORD'] = self._coord 491 self.prod['ZCORN'] = self._zcorn 492 self.prod['ACTNUM'] = self._actnum 493 self.prod['ECHO'] = self._ignore 494 self.prod['INCLUDE'] = self._include 495 self.prod['DIMENS'] = self._dimens 496 497 # this list contains the keywords that are all readable in the same way 498 self.cell_sections = [x for x in _SECTIONS.keys() 499 if _SECTIONS[x]['cell']] 500 501 # if no rotation matrix is defined, then use identity 502 self.rot_A, self.rot_b = _no_rot() 503 504 # this class does the interpretation of the bytes on disk 505 self.lex = _Lexer(filename, mem) 506 507 # each section is read into a dictionary object 508 self.section = dict() 509 510 # before we have read anything, we don't know the dimensions of 511 # the grid 512 self.ni = None 513 self.nj = None 514 self.nk = None 515 516 # if we have inactive cells, this attribute tells us where they are 517 # so that we can create a masked array 518 self.mask = None 519 520 # if partial files are opened, then the old lexers are pushed onto 521 # this stack while the new file is parsed. when the new file is 522 # finished, then parsing resumes from the top of this stack 523 self.lexer_stack = [] 524 525 def close(self): 526 """Clean up internal memory buffer usage""" 527 # if we were interrupted while we were in the middle of a parse, 528 # then it might be that there are several opened sub-files that must 529 # be closed. normally this loop should be empty. 530 while len(self.lexer_stack): 531 self.lexer_stack.pop().close() 532 533 self.lex.close() 534 self.lex = None 535 536 def _include(self): 537 """Continue parsing a fragment file at this point""" 538 # read the file name from the keyword stream 539 name = self.lex.expect_filename() 540 if not name: 541 raise GrdEclError(self.lex.path, self.lex.pos(), 542 ("Include statements must be followed by " 543 "a file name")) 544 self.lex.expect(endrec) 545 546 # included files should be read relative to the file that refer to 547 # them, not to any arbitrary current directory of the process. 548 fullname = os.path.normpath(os.path.join(os.path.dirname( 549 self.lex.path), name)) 550 551 log.info('Reading from file "%s"', name) 552 # put the current lexer on hold, and put a new lexer in its place so 553 # that parsing continues from the new file 554 self.lexer_stack.append(self.lex) 555 self.lex = _OwningLexer(fullname) 556 557 def _ignore(self): 558 """Ignore this single keyword""" 559 pass 560 561 def _pinch(self): 562 """Production for PINCH keyword""" 563 # this is just a processing flag that is set; nothing more to read 564 self.lex.expect(endrec) 565 566 def _mapunits(self): 567 """Production for MAPUNITS keyword""" 568 # read the keyword for units specified. currently, only specifying 569 # metres directly is supported. alternatively, a different unit should 570 # be looked up here in a dictionary and a scaling factor set 571 self.lex.expect_enum(['METRES'], "unit") 572 self.lex.expect(endrec) 573 574 def _mapaxes(self): # pylint: disable=too-many-locals 575 """Production for MAPAXES keyword""" 576 # a point on the Y axis 577 x1 = self.lex.expect_single() 578 y1 = self.lex.expect_single() 579 580 # origin 581 x2 = self.lex.expect_single() 582 y2 = self.lex.expect_single() 583 584 # a point on the X axis 585 x3 = self.lex.expect_single() 586 y3 = self.lex.expect_single() 587 588 # even though data is fixed-length, it needs a record terminator 589 self.lex.expect(endrec) 590 591 self.rot_A, self.rot_b = _axes_rot(x1, y1, x2, y2, x3, y3) 592 593 def _gridunit(self): 594 """Production for GRIDUNIT keyword""" 595 self.lex.expect_enum(["METRES"], "unit") 596 597 # map flag is optional 598 token = self.lex.expect_str(keyword) 599 if token: 600 if token != "MAP": 601 raise GrdEclError(self.lex.path, self.lex.pos(), 602 ("Expected flag MAP or nothing, " 603 "not \"{0}\"").format(token)) 604 self.lex.expect(endrec) 605 606 def _specgrid(self): 607 """Production for the SPECGRID keyword""" 608 # grid dimensions; we read this directly into the object fields 609 # since we need these to allocate memory later 610 self.ni = self.lex.expect_cardinal() 611 self.nj = self.lex.expect_cardinal() 612 self.nk = self.lex.expect_cardinal() 613 614 # also add this as if we had read the DIMENS keyword 615 self.section['DIMENS'] = numpy.array([self.ni, self.nj, self.nk], 616 dtype=numpy.int32) 617 618 # number of reservoirs are not used in Eclipse, but are in the API 619 numres = self.lex.expect_cardinal() 620 if numres != 1: 621 raise GrdEclError(self.lex.path, self.lex.pos(), 622 "Expected only one reservoir, but got {0:d}", 623 numres) 624 625 # radial coordinates are not supported yet 626 token = self.lex.expect_str(keyword) 627 if token: 628 if token != "F": 629 raise GrdEclError(self.lex.path, self.lex.pos(), 630 ("Expected F for Cartesian coordinates " 631 "but got \"{0}\"").format(token)) 632 # end-of-record sentinel in case any tokens are defaulted 633 self.lex.expect(endrec) 634 635 def _dimens(self): 636 """Production for the DIMENS keyword""" 637 ni = self.lex.expect_cardinal() 638 nj = self.lex.expect_cardinal() 639 nk = self.lex.expect_cardinal() 640 self.section['DIMENS'] = numpy.array([ni, nj, nk], 641 dtype=numpy.int32) 642 643 self.lex.expect(endrec) 644 645 def _coordsys(self): 646 """Production for the COORDSYS keyword""" 647 # we are reading only one grid, so we don't really have any notion 648 # about where this is going to end up in the total reservoir. 649 self.lex.expect_cardinal() # k_lower = 650 self.lex.expect_cardinal() # k_upper = 651 # there can be more arguments to this keyword, but Petrel will only 652 # write the first two 653 self.lex.expect(endrec) 654 655 def _coord(self): 656 """Production for the COORD keyword""" 657 # check that we actually know how many numbers to read 658 if 'DIMENS' not in self.section: 659 raise GrdEclError(self.lex.path, self.lex.pos(), 660 "COORD section must come after SPECGRID") 661 662 # each column/row of cells have a pillar on either side, meaning 663 # that there is an extra one at each end, and there is a top and 664 # a bottom coordinate for every pillar 665 num_coord = (self.ni + 1) * (self.nj + 1) * 2 666 667 # allocate memory 668 coord = numpy.empty((num_coord, 3), dtype=numpy.float64) 669 670 # read three and three numbers, making out a coordinate. we take 671 # care not to allocate any extra memory in this loop 672 vec = numpy.empty((3, ), dtype=numpy.float64) 673 for i in range(num_coord): 674 # read coordinate specified in file, into intermediate vector 675 vec[0] = self.lex.expect_single() # x 676 vec[1] = self.lex.expect_single() # y 677 vec[2] = self.lex.expect_single() # z 678 679 # rotate according to map axes, and assign into global grid 680 vec -= self.rot_b 681 numpy.dot(self.rot_A, vec, out=coord[i, :]) 682 coord[i, :] += self.rot_b 683 684 # after we're done reading, then rearrange the grid so that is 685 # is no longer a list of coordinates, but rather a true grid 686 self.section['COORD'] = numpy.reshape( 687 coord, (self.nj + 1, self.ni + 1, 2, 3)) 688 689 # after all the coordinates are read, we expect the sentinel token 690 self.lex.expect(endrec) 691 692 def _zcorn(self): 693 """Production for the ZCORN keyword""" 694 # check that we actually know how many numbers to read 695 if 'DIMENS' not in self.section: 696 raise GrdEclError(self.lex.path, self.lex.pos(), 697 "ZCORN section must come after SPECGRID") 698 699 # allocate memory 700 num_zcorn = self.nk * self.nj * self.ni * 2 * 2 * 2 701 zcorn = numpy.empty((num_zcorn, ), dtype=numpy.float64) 702 703 # read numbers from file; each read may potentially fill a different 704 # length of the array 705 num_read = 0 706 while num_read < num_zcorn: 707 count, value = self.lex.expect_numbers() 708 zcorn[num_read: num_read+count] = value 709 num_read += count 710 711 # verify that we got exactly the number of corners we wanted 712 if num_read > num_zcorn: 713 raise GrdEclError(self.lex.path, self.lex.pos(), 714 "Expected {0:d} numbers, but got {1:d}".format( 715 num_zcorn, num_read)) 716 717 # reformat the data into a natural kji/bfr hypercube 718 self.section['ZCORN'] = numpy.reshape( 719 zcorn, (self.nk, 2, self.nj, 2, self.ni, 2)) 720 721 # after all the data, there should be a sentinel slash 722 self.lex.expect(endrec) 723 724 def _actnum(self): 725 """Production for the ACTNUM keyword""" 726 # check that we actually know how many numbers to read 727 if 'DIMENS' not in self.section: 728 raise GrdEclError(self.lex.path, self.lex.pos(), 729 "ACTNUM section must come after SPECGRID") 730 731 # allocate memory 732 num_cells = self.nk * self.nj * self.ni 733 actnum = numpy.empty((num_cells, ), dtype=bool) 734 735 # read numbers from file; each read may potentially fill a different 736 # length of the array 737 num_read = 0 738 while num_read < num_cells: 739 count, value = self.lex.expect_bools() 740 actnum[num_read: num_read+count] = value 741 num_read += count 742 743 # verify that we got exactly the number of corners we wanted 744 if num_read > num_cells: 745 raise GrdEclError(self.lex.path, self.lex.pos(), 746 "Expected {0:d} numbers, but got {1:d}".format( 747 num_cells, num_read)) 748 749 # if not all cells were specified, the rest should be marked as 750 # inactive 751 if num_read < num_cells: 752 actnum[num_read:] = False 753 754 # reformat the data into a natural kji/bfr hypercube 755 self.section['ACTNUM'] = numpy.reshape( 756 actnum, (self.nk, self.nj, self.ni)) 757 758 # mask is simply the opposite of the active flag 759 self.mask = numpy.logical_not(self.section['ACTNUM']) 760 761 # after all the data, there should be a sentinel slash 762 self.lex.expect(endrec) 763 764 def _celldata(self, kw): 765 """Production for the keywords that load data per cell""" 766 # check that we actually know how many numbers to read 767 if 'DIMENS' not in self.section: 768 raise GrdEclError(self.lex.path, self.lex.pos(), 769 "{0} section must come after SPECGRID".format( 770 kw)) 771 772 # check if this keyword is registered with a known type 773 typ = _kw_dtype(kw) 774 775 # allocate memory 776 num_cells = self.nk * self.nj * self.ni 777 data = numpy.empty((num_cells, ), dtype=typ) 778 779 # read numbers from file; each read may potentially fill a different 780 # length of the array. use an almost identical type of loop for 781 # integers and floating-point, but avoid using test inside the loop 782 # or polymorphism through function pointer, to ease optimization 783 num_read = 0 784 if numpy.issubdtype(typ, numpy.integer): 785 while num_read < num_cells: 786 count, value = self.lex.expect_multi_card() 787 data[num_read: num_read+count] = value 788 num_read += count 789 else: 790 while num_read < num_cells: 791 count, value = self.lex.expect_numbers() 792 data[num_read: num_read+count] = value 793 num_read += count 794 795 # verify that we got exactly the number of corners we wanted 796 if num_read > num_cells: 797 raise GrdEclError(self.lex.path, self.lex.pos(), 798 "Expected {0:d} numbers, but got {1:d}".format( 799 num_cells, num_read)) 800 801 # reformat the data into a natural kji/bfr hypercube 802 data = numpy.reshape(data, (self.nk, self.nj, self.ni)) 803 if self.mask is not None: 804 # integral data is mostly category indices; -1 tends to be an 805 # over-all good enough value to use for that purpose, but Petrel 806 # won't understand it if we are reading the file back in. 807 if numpy.issubdtype(data.dtype, numpy.integer): 808 data[self.mask] = 0 809 else: 810 data[self.mask] = numpy.nan 811 self.section[kw] = numpy.ma.array(data=data, 812 dtype=data.dtype, 813 mask=self.mask) 814 else: 815 self.section[kw] = data 816 817 # after all the data, there should be a sentinel slash 818 self.lex.expect(endrec) 819 820 def parse(self): 821 """Parse the entire file""" 822 # read all sections of the file 823 while True: 824 # read a keyword first 825 kw = self.lex.expect_str(keyword) 826 827 # check here, after we have read the keyword, for end-of-file 828 # since it may be that the function above skip some blanks 829 # at the end without ever returning a keyword 830 if self.lex.at_eof(): 831 # if there are any nested files open, then close this file 832 # and continue reading from the previous one 833 if len(self.lexer_stack): 834 self.lex.close() 835 self.lex = self.lexer_stack.pop() 836 log.info('Resume reading from "%s"', self.lex.path) 837 continue 838 else: 839 break 840 841 # if this is a recognizable keyword, then call its handler 842 # function; otherwise raise an error about the keyword 843 if kw: 844 if kw in self.prod: 845 log.info("Reading keyword %s", kw) 846 self.prod[kw]() 847 elif kw in self.cell_sections: 848 log.info("Reading keyword %s", kw) 849 self._celldata(kw) 850 else: 851 raise GrdEclError(self.lex.path, self.lex.pos(), 852 "Unknown keyword \"{0}\"".format(kw)) 853 else: 854 raise GrdEclError(self.lex.path, self.lex.pos(), 855 "Expected keyword here") 856 857 return self.section 858 859 860_GEN_BGN = b'-- Generated [\r\n' 861_GEN_END = b'-- Generated ]\r\n' 862 863 864def _parse_meta(mem): 865 """Parse the file headers for properties that are put there by the 866 generator and that can give hints as to how to most efficiently 867 process the file. 868 869 :param mem: Opened memory-map for the main input file. 870 :type mem: mmap.mmap 871 :returns: Properties that are set in the header of the data file, 872 and the number of bytes that can be skipped to avoid 873 processing this header again (it will be in comments) 874 :rtype: (dict, int) 875 """ 876 # dictionary that will receive parsed headers, if any 877 props = col.OrderedDict() 878 879 # in case of an early exit by the below test, we don't want the client 880 # to skip anything (there is no header) 881 nxt = 0 882 883 # if the very first line is a special header, then start reading 884 # the properties from the beginning of the file 885 if mem.find(_GEN_BGN, 0, len(_GEN_BGN)) != -1: 886 # skip the initial indicator, go straight to the first property 887 pos = len(_GEN_BGN) 888 while True: 889 # pos is the position where the current property starts, and 890 # nxt is the position where the next property starts (everything 891 # in between those two belongs to this particular property). 892 # add one so that we get the byte that is after the newline. 893 nxt = mem.find(b'\n', pos) + 1 894 895 # if we cannot find the end of the property (file wrap-around), 896 # then just claim the file as exhausted 897 if nxt == 0: 898 nxt = mem.size() 899 break 900 901 # if this property is the end marker, then stop processing 902 if mem.find(_GEN_END, pos, nxt) != -1: 903 break 904 else: 905 # two first three bytes are comment hyphens ("-- "), and the 906 # last two bytes are the newline ("\r\n"). read the tokens 907 # that are inside these brackets 908 fst = pos + 3 909 lst = nxt - 2 910 911 # find the colon if this is a named property, and use that 912 # colon to parse the line into a key and a value. if there 913 # is no value, then interpret this as a kind of directive 914 delim = mem.find(b':', fst, lst) 915 if delim != -1: 916 key = dec(mem[fst: delim]).rstrip() 917 value = dec(mem[delim + 2: lst]) 918 else: 919 key = dec(mem[fst: lst]) 920 value = None 921 922 # assign the values found to the output set 923 props[key] = value 924 925 # move to processing the next line 926 pos = nxt 927 928 # return the properties that were found, or empty dictionary if not 929 # a generated file 930 return (props, nxt) 931 932 933# known sections that can be in a datafile, and whether they have any 934# associated data with them (should be read until a final slash or not) 935# result refers to whether this section should be read as part of the 936# final result or not. cell refers to whether this is a (regular) cell 937# property (with format nk*nj*ni) 938_SECTIONS = { 939 'PINCH': {'slashed': True, 'result': False, 'cell': False, }, 940 'ECHO': {'slashed': False, 'result': False, 'cell': False, }, 941 'MAPUNITS': {'slashed': True, 'result': False, 'cell': False, }, 942 'MAPAXES': {'slashed': True, 'result': False, 'cell': False, }, 943 'GRIDUNIT': {'slashed': True, 'result': False, 'cell': False, }, 944 'SPECGRID': {'slashed': True, 'result': False, 'cell': False, }, 945 'COORDSYS': {'slashed': True, 'result': False, 'cell': False, }, 946 'COORD': {'slashed': True, 'result': False, 'cell': False, }, 947 'ZCORN': {'slashed': True, 'result': False, 'cell': False, }, 948 'ACTNUM': {'slashed': True, 'result': False, 'cell': False, }, 949 'PORO': {'slashed': True, 'result': True, 'cell': True, }, 950 'PERMX': {'slashed': True, 'result': True, 'cell': True, }, 951 'PERMY': {'slashed': True, 'result': True, 'cell': True, }, 952 'PERMZ': {'slashed': True, 'result': True, 'cell': True, }, 953 'TRANSX': {'slashed': True, 'result': True, 'cell': True, }, 954 'TRANSY': {'slashed': True, 'result': True, 'cell': True, }, 955 'NOECHO': {'slashed': False, 'result': False, 'cell': False, }, 956 'GRID': {'slashed': False, 'result': False, 'cell': False, }, 957 'INCLUDE': {'slashed': True, 'result': False, 'cell': False, }, 958 'NTG': {'slashed': True, 'result': True, 'cell': True, }, 959 'SWATINIT': {'slashed': True, 'result': True, 'cell': True, }, 960 'SWCR': {'slashed': True, 'result': True, 'cell': True, }, 961 'EQLNUM': {'slashed': True, 'result': True, 'cell': True, }, 962 'FIPNUM': {'slashed': True, 'result': True, 'cell': True, }, 963 # for properties that are not used by Eclipse, Petrel doesn't care 964 # about the eight character limit, but just writes the full name 965 'BULKVOLUME': {'slashed': True, 'result': True, 'cell': True, }, 966 'ELEVATIONGENERAL': {'slashed': True, 'result': True, 'cell': True, }, 967 'FACIES': {'slashed': True, 'result': True, 'cell': True, 968 'dtype': numpy.int32}, 969} 970 971 972def _kw_dtype(kw): 973 """Data type of a keyword, if specified in the section of known keywords""" 974 # thanks to boolean short-circuiting, we can test for the presence of the 975 # keyword in the section list and whether it has the dtype property in the 976 # same expression. if it isn't there, then the default are floating point 977 if kw in _SECTIONS and 'dtype' in _SECTIONS[kw]: 978 return _SECTIONS[kw]['dtype'] 979 else: 980 return numpy.float64 981 982 983def _fast_index(path, mem, skip=0): 984 # lookup table that will be returned 985 index = {} 986 987 # don't associate keywords in the main file with a particular file, 988 # since we already have the memory-mapping open, but extract the 989 # directory of the path so we can read included files from there 990 base_dir = os.path.normpath(os.path.dirname(path)) 991 _fast_index_mem(base_dir, None, mem, skip, index) 992 993 # return the lookup table to caller after the file is processed 994 return index 995 996 997def _fast_index_file(base_dir, fname, index): 998 # full path and name of the file to be included 999 fullfile = os.path.normpath(os.path.join(base_dir, fname)) 1000 1001 # it might be that the included file is in a sub-dir, in which case 1002 # further files included should be read from that directory too 1003 new_dir = os.path.dirname(fullfile) 1004 1005 # open a new memory buffer for this file, and index it into the existing 1006 # section list, associating keywords with this file 1007 with ctx.closing(_FileMapPair(fullfile)) as fmp: 1008 _fast_index_mem(new_dir, fullfile, fmp.map_obj, 0, index) 1009 1010 1011def _fast_index_mem(base_dir, fname, mem, skip, index): 1012 """ 1013 :param base_dir: Directory from which included files will be read 1014 :type base_dir: str 1015 :param fname: Filename associated with the memory-mapping 1016 :type fname: str 1017 :param mem: Memory-mapping of the file that should be indexed. 1018 :type mem: mmap.mmap 1019 :param skip: Number of bytes that should be skipped at the beginning of 1020 the file. 1021 :type skip: int 1022 :param index: Lookup table of the (recognized) keywords in the file and 1023 their position. The position is a pair of the address of 1024 the first data byte (which may be a whitespace), and of 1025 the ending slash, which can be seen as the end-point of 1026 the data, exclusive (i.e. you don't read that slash). If 1027 the last item in the tuple is not None, then it is in 1028 another file than the main file. 1029 :type index: Dictionary [str, (int, int, str)] 1030 """ 1031 # build an index of keywords found in the file 1032 cur_pos = skip 1033 1034 # read the entire file to end 1035 while cur_pos < mem.size(): 1036 1037 # skip lines that are comments or empty lines 1038 if (mem.find(b'-', cur_pos, cur_pos + 1) == cur_pos or 1039 mem.find(b'\r', cur_pos, cur_pos + 1) == cur_pos): 1040 cur_pos = mem.find(b'\n', cur_pos) + 1 1041 continue 1042 1043 # next bytes are a keyword; all keywords are written with a padding 1044 # up to a comments that says that this keyword is written by Petrel 1045 space_pos = mem.find(b' ', cur_pos) 1046 1047 # read the keyword itself; this identifies the section 1048 kw = dec(mem[cur_pos: space_pos]) 1049 1050 # skip the rest of this line 1051 cur_pos = mem.find(b'\n', cur_pos) + 1 1052 1053 # if the keyword has associated data, then figure out where in the file 1054 # that section is 1055 if _SECTIONS[kw]['slashed']: 1056 1057 # sometimes the keyword has another name in Petrel, but an entry 1058 # is generated in the .grdecl based on its type. the name in 1059 # Petrel is then added in a comment right below the keyword. here 1060 # we simply skip those comments 1061 if (mem.find(b'-', cur_pos, cur_pos + 1) == cur_pos and 1062 mem.find(b'-', cur_pos + 1, cur_pos + 2) == cur_pos + 1): 1063 cur_pos = mem.find(b'\n', cur_pos) + 1 1064 1065 # include statements is a special-case: the associated data 1066 # is not in this file 1067 if kw == 'INCLUDE': 1068 # filename is always in single quotes at the first data line 1069 start_pos = cur_pos + 1 1070 end_pos = mem.find(b'\'', start_pos) 1071 fname = dec(mem[start_pos: end_pos]) 1072 log.info('Indexing file: "%s"', fname) 1073 1074 # index the included file recursively. we don't keep that 1075 # file open, because it just contains a single keyword which 1076 # is only read at most one time later 1077 _fast_index_file(base_dir, fname, index) 1078 1079 # advance past the slash that ends the inclusion statement 1080 cur_pos = mem.find(b'/', end_pos) + 1 1081 1082 else: 1083 # range of the data that is associated with the keyword 1084 start_pos = cur_pos 1085 end_pos = mem.find(b'/', cur_pos) 1086 cur_pos = end_pos + 1 1087 1088 # no associated data, simply return a blank tuple 1089 else: 1090 start_pos = 0 1091 end_pos = 0 1092 1093 # make an entry of this keyword in the lookup table 1094 if kw != 'INCLUDE': 1095 index[kw] = (start_pos, end_pos, fname) 1096 1097 1098# definition of special characters that can be compared directly to the 1099# contents of the memory-map. notice that this is the inverse of the 1100# six.byte2int function that is used further below when manipulating a 1101# bytearray copy. 1102if sys.version_info[0] < 3: 1103 _SP = b' ' 1104 _NL = b'\n' 1105else: 1106 _SP = ord(b' ') 1107 _NL = ord(b'\n') 1108 1109 1110def _tokenize(mem, bgn, end): 1111 """Read tokens from the data part of a section. 1112 1113 :param mem: Memory-mapping of the file the section should be read from. 1114 :type mem: mmap.mmap 1115 :param bgn: Offset of the first byte that is part of the section. 1116 :type bgn: int 1117 :param end: Offset of the first next byte that is *not* part of the 1118 section (end range, exclusive). 1119 :type end: int 1120 :returns: List of ranges for each token. 1121 :rtype: Generator [(int, int)] 1122 """ 1123 # current position we are looking at 1124 cur = bgn 1125 1126 # tokenize while there is any tokens left in the stream 1127 while cur < end: 1128 1129 # skip all leading whitespace 1130 while (cur < end) and ((mem[cur] == _SP) or (mem[cur] == _NL)): 1131 cur = cur + 1 1132 1133 # the token begin at the first non-whitespace character 1134 fst = cur 1135 1136 while (cur < end) and ((mem[cur] != _SP) and (mem[cur] != _NL)): 1137 cur = cur + 1 1138 1139 # token ends at the next whitespace encountered 1140 lst = cur 1141 1142 # generate this token, if it is non-empty 1143 if fst < lst: 1144 yield (fst, lst) 1145 1146 1147def _decode(mem, fst, lst, typ): 1148 """Decode a token into a value. 1149 1150 :param mem: Memory-mapping of the file we are reading from. 1151 :type mem: mmap.mmap 1152 :param fst: Offset into the file of the start of the token. 1153 :type fst: int 1154 :param lst: Offset into the file of one past the last byte of the token. 1155 :type lst: int 1156 :param typ: Conversion routine for the value 1157 :type typ: str -> Any 1158 :returns: The value that is decoded, and the repeat count of that value. 1159 :rtype: (int, Any) 1160 """ 1161 # is a repeat count specified? 1162 asterisk = mem.find(b'*', fst, lst) 1163 1164 # no repeat count, just one single value 1165 if asterisk == -1: 1166 count = 1 1167 value = typ(mem[fst: lst]) 1168 1169 # repeat count specified, must decode separately 1170 else: 1171 count = int(mem[fst: asterisk]) 1172 value = typ(mem[asterisk+1: lst]) 1173 1174 return (count, value) 1175 1176 1177def _read_array(mem, bgn, end, dims, typ): 1178 """Read a section as a typed matrix. 1179 1180 :param mem: Memory-mapping of the file the section should be read from. 1181 :type mem: mmap.mmap 1182 :param bgn: Offset of the first byte that is part of the section. 1183 :type bgn: int 1184 :param end: Offset of the first next byte that is *not* part of the 1185 section (end range, exclusive). 1186 :type end: int 1187 :param dims: Tuple consisting of final dimensions for the array 1188 :type dims: List [int] 1189 :param typ: Conversion routine for the value 1190 :type typ: str -> Any 1191 :returns: Array of values read from the file 1192 :type: :class:`numpy.ndarray`, shape = dims, dtype = typ 1193 """ 1194 # allocate the memory of the array first 1195 total = int(numpy.product(dims)) 1196 data = numpy.empty((total, ), dtype=typ) 1197 1198 # index that we are currently going to assign the next token to 1199 ofs = 0 1200 1201 # process all tokens found in the stream 1202 for fst, lst in _tokenize(mem, bgn, end): 1203 # look for asterisk and get the repeat count 1204 count, value = _decode(mem, fst, lst, typ) 1205 1206 # assign a number of items in batch using the repeat count 1207 data[ofs: ofs+count] = value 1208 1209 # increment the counter to where the next token should start 1210 ofs = ofs + count 1211 1212 # if the latter part of the array is zero, then Petrel won't bother 1213 # to write it 1214 data[ofs: total] = 0 1215 1216 # reshape into proper grid before returning 1217 return numpy.reshape(data, dims) 1218 1219 1220# pylint: disable=too-many-arguments 1221def _read_section(sec_name, dims, typ, mem, sec_tbl): 1222 """Read a section from file, if it is present. 1223 1224 :param sec_name: Name of the section to be read 1225 :type sec_name: str 1226 :param dims: Tuple consisting of final dimensions for the array 1227 :type dims: List [int] 1228 :param typ: Conversion routine for the value 1229 :type typ: str -> Any 1230 :param mem: Memory-mapping of the file the section should be read 1231 from. 1232 :type mem: mmap.mmap 1233 :param sec_tbl: Lookup table for each section, in the file 1234 :type sec_tbl: Dict [str, (int, int)] 1235 """ 1236 log.info("Reading keyword %s", sec_name) 1237 1238 # get the address of the subsection in memory 1239 bgn, end, fname = sec_tbl[sec_name] 1240 1241 # if it is from another file, and not from the memory-map of the main 1242 # file, then it must be opened and data read from there. however, we 1243 # don't keep it open, because it probably only contain a single property 1244 # and will never be read again. 1245 if fname is not None: 1246 with ctx.closing(_FileMapPair(fname)) as fmp: 1247 data = _read_array(fmp.map_obj, bgn, end, dims, typ) 1248 else: 1249 data = _read_array(mem, bgn, end, dims, typ) 1250 return data 1251 1252 1253def _sec_mat(mem, sec_tbl, sec_name, dtype, usecols): 1254 """Read a data section matrix for a keyword. 1255 1256 :param mem: Memory-mapping of the file the section should be read 1257 from. 1258 :type mem: mmap.mmap 1259 :param sec_tbl: Lookup table for each section, in the file 1260 :type sec_tbl: Dict [str, (int, int)] 1261 :param sec_name: Name of the section to be read 1262 :type sec_name: str 1263 :param dtype: Expected NumPy data type of the values in the section 1264 :param usecols: Columns that should be read. Use None to read a freely 1265 formatted section. 1266 """ 1267 # get the address of the section within the file 1268 bgn, end, fname = sec_tbl[sec_name] 1269 1270 # if it is from another file, and not from the memory-map of the main 1271 # file, then it must be opened and data read from there. however, we 1272 # don't keep it open, because it probably only contain a single property 1273 # and will never be read again. 1274 if fname is not None: 1275 with ctx.closing(_FileMapPair(fname)) as fmp: 1276 data = _sec_mat_mem(fmp.map_obj, bgn, end, dtype, usecols) 1277 else: 1278 data = _sec_mat_mem(mem, bgn, end, dtype, usecols) 1279 1280 return data 1281 1282 1283def _sec_mat_mem(mem, bgn, end, dtype, usecols): 1284 # create view of the section that just includes the numbers 1285 buf = bytearray(membuf(mem, bgn, end)) 1286 1287 # if free format stream, we must copy it into memory and remove all the 1288 # newline, since Petrel is obviously not able to keep a consistent number 1289 # of column on each line. 1290 if usecols is None: 1291 _strip_newline(buf) 1292 1293 # let the library do the heavy lifting of this section; it is just an 1294 # array without any special formatting (anymore) 1295 with ctx.closing(six.BytesIO(buf)) as src: 1296 data = numpy.loadtxt(src, dtype=dtype, usecols=usecols) 1297 1298 return data 1299 1300 1301def _read_specgrid(mem, sec_tbl): 1302 """Read the grid dimensions. 1303 1304 :param mem: Memory-mapping of the file the section should be read 1305 from. 1306 :type mem: mmap.mmap 1307 :param sec_tbl: Lookup table for each section, in the file 1308 :type sec_tbl: Dict [str, (int, int)] 1309 """ 1310 # read the first three numbers from the section 1311 log.info("Reading keyword SPECGRID") 1312 spec = _sec_mat(mem, sec_tbl, 'SPECGRID', numpy.int32, (0, 1, 2)) 1313 1314 # reverse the dimensions (since the i axis varies fastest), before 1315 # assigning it to the target dictionary 1316 return spec[::-1] 1317 1318 1319_CR = six.byte2int(b'\r') 1320_LF = six.byte2int(b'\n') 1321_WS = six.byte2int(b' ') 1322 1323 1324def _strip_newline(data): 1325 """Replace newlines with regular whitespace. This allows us to read an 1326 array with a known total number of arrays, but where we don't know how 1327 many column are formatted on each line. 1328 """ 1329 # according to Guido van Rossum, in-place bytearray operations are "rare", 1330 # so it is OK for the bytearray to reuse string methods that return a copy. 1331 # hopefully, this implementation is straight-forward enough so that it can 1332 # be optimized into something that use less time than having to allocate a 1333 # string copy for every line. 1334 for i in range(len(data)): 1335 if (data[i] == _CR) or (data[i] == _LF): 1336 data[i] = _WS 1337 1338 1339def _axes_map(mem, section_index): 1340 """Rotation matrix and offset vector for axes mapping. 1341 1342 :returns: (A, b) such that each point x in coordinate array should be 1343 rotated with A(x-b)+b 1344 """ 1345 # if a mapping is specified in the file, then use this, otherwise 1346 # return a unit mapping, so that we can proceed along the same code path 1347 if 'MAPAXES' in section_index: 1348 vec = _read_section('MAPAXES', (3, 2), 1349 numpy.float64, mem, section_index) 1350 A, b = _axes_rot(vec[0, 0], vec[0, 1], 1351 vec[1, 0], vec[1, 1], 1352 vec[2, 0], vec[2, 1]) 1353 else: 1354 A, b = _no_rot() 1355 1356 return (A, b) 1357 1358 1359def _read_coord(dims, mem, sec_tbl, A, b): 1360 """Read pillar coordinates. 1361 1362 :param mem: Memory-mapping of the file the section should be read 1363 from. 1364 :type mem: mmap.mmap 1365 :param sec_tbl: Lookup table for each section, in the file 1366 :type sec_tbl: Dict [str, (int, int)] 1367 :param A: Rotation matrix 1368 :param b: Rotation offset 1369 """ 1370 # extract dimensions of the table into more sensible local names 1371 num_j = dims[1] 1372 num_i = dims[2] 1373 1374 # pillar coordinates don't have any repeated values, so they are always 1375 # written with 'un-starred' floats. 1376 log.info("Reading keyword COORD") 1377 coord = _sec_mat(mem, sec_tbl, 'COORD', numpy.float64, None) 1378 1379 # reformat the section into the correct format 1380 coord = numpy.reshape(coord, (num_j + 1, num_i + 1, 2, 3)) 1381 1382 # rotate the coordinate pillars 1383 coord -= b 1384 numpy.dot(coord, A) 1385 coord += b 1386 1387 # pillar coordinates can now be assigned into output structure 1388 return coord 1389 1390 1391def _zcorn_dims(dims): 1392 """Dimensions for the ZCORN array, given grid dimensions""" 1393 return (dims[0], 2, dims[1], 2, dims[2], 2) 1394 1395 1396# include statements in a wrapper file written by us look like this: 1397_INCL_STMT = re.compile(enc(r'INCLUDE\ *\n\'(.*)\'\n/.*\n'), re.MULTILINE) 1398 1399 1400def read(filename): 1401 """Read an Eclipse input grid into a dictionary. 1402 """ 1403 # get the canonical path of the file to read 1404 path = os.path.expanduser(filename) 1405 1406 # open the file and create a memory-mapping so that we can read 1407 # the input file as if it was a string buffer 1408 with ctx.closing(_FileMapPair(path)) as fmp: 1409 mem = fmp.map_obj 1410 1411 # determine first if there are a header section at the top 1412 # of the file. if there is, the start reading after it 1413 props, skip = _parse_meta(mem) 1414 mem.seek(skip, os.SEEK_SET) 1415 1416 # notice that there is an optimization opportunity since 1417 # Petrel only uses a subset of the format 1418 if ('Exported by' in props and 1419 props['Exported by'].startswith('PyReSiTo (multi) v1.0')): 1420 log.info("File is PyReSiTo multi-file") 1421 1422 grid = _read_multi(path, mem) 1423 1424 elif ('Exported by' in props and 1425 props['Exported by'].startswith('Petrel')): 1426 log.info("File is exported by Petrel") 1427 grid = {} 1428 section_index = _fast_index(path, mem, skip) 1429 1430 # specgrid is special because it must be read before everything 1431 # else, so that we know how to read other sections 1432 dims = _read_specgrid(mem, section_index) 1433 grid['DIMENS'] = dims[::-1] 1434 1435 # coord is special because we need to possibly rotate it 1436 A, b = _axes_map(mem, section_index) 1437 grid['COORD'] = _read_coord(dims, mem, section_index, A, b) 1438 1439 # actnum is special because we need it to set the mask 1440 grid['ACTNUM'] = _read_section('ACTNUM', dims, numpy.int32, 1441 mem, section_index).astype( 1442 numpy.bool) 1443 mask = numpy.logical_not(grid['ACTNUM']) 1444 1445 # zcorn is special because it has a different format (and 1446 # cannot have inactive elements) 1447 grid['ZCORN'] = _read_section('ZCORN', _zcorn_dims(dims), 1448 numpy.float64, mem, 1449 section_index) 1450 1451 # read general cell properties that are marked as result 1452 # properties, from the file 1453 for kw in section_index: 1454 props = _SECTIONS[kw] 1455 if props['result']: 1456 # read the data that is stored in the file 1457 data = _read_section(kw, dims, _kw_dtype(kw), mem, 1458 section_index) 1459 1460 # create an array where the inactive region is masked 1461 # out (won't appear on plots, in statistics etc.) 1462 grid[kw] = numpy.ma.array(data=data, mask=mask) 1463 1464 # use the regular, but slower parser 1465 else: 1466 with ctx.closing(_Parser(path, mem)) as parser: 1467 grid = parser.parse() 1468 1469 return grid 1470 1471 1472def _read_multi(wrapper_name, mem): 1473 """ 1474 :param wrapper_name: Name of the file containing the inclusion wrapper. 1475 This file is only interesting because the name of 1476 the dimensions file is constructed based on it. 1477 :type wrapper_name: str 1478 :param mem: Handle to memory-mapping of the wrapper file. 1479 :type mem: mmap.mmap 1480 """ 1481 log.info("Reading wrapper for include statements") 1482 # parse the entire buffer for inclusion clauses 1483 buf = membuf(mem, 0, mem.size()) 1484 files = [dec(match.group(1)) 1485 for match in re.finditer(_INCL_STMT, buf)] 1486 buf = None 1487 1488 # strip off the extension of the filename. this stem is then 1489 # recombined to get the name of the dimension file, which is 1490 # not included by the wrapper but is part of the exported files 1491 stem, _ = os.path.splitext(wrapper_name) 1492 1493 dim_name = '{0}_dimens.grdecl'.format(stem) 1494 log.info("Reading grid dimensions from \"%s\"", dim_name) 1495 ni, nj, nk = [int(x) for x in numpy.loadtxt( 1496 dim_name, dtype=numpy.int32, skiprows=1, comments='/')] 1497 log.info("Grid has dimensions %d x %d x %d", ni, nj, nk) 1498 1499 # shape of the data properties; notice that the order of the dimensions 1500 # is reversed (in Python) compared to the way they are stored in the file 1501 dims = [nk, nj, ni] 1502 1503 # read the active flag from file 1504 act_name = '{0}_actnum.grdecl'.format(stem) 1505 log.info("Reading cell activeness from \"%s\"", act_name) 1506 actnum = numpy.reshape(numpy.loadtxt( 1507 act_name, dtype=numpy.bool, skiprows=1, comments='/'), 1508 (nk, nj, ni)) 1509 1510 coord_name = '{0}_coord.grdecl'.format(stem) 1511 log.info("Reading pillar coordinates from \"%s\"", coord_name) 1512 coord = numpy.reshape(numpy.loadtxt( 1513 coord_name, dtype=numpy.float, skiprows=1, comments='/'), 1514 (nj + 1, ni + 1, 2, 3)) 1515 1516 zcorn_name = '{0}_zcorn.grdecl'.format(stem) 1517 log.info("Reading corner depths from \"%s\"", zcorn_name) 1518 zcorn = numpy.reshape(numpy.loadtxt( 1519 zcorn_name, dtype=numpy.float, skiprows=1, comments='/'), 1520 (nk, 2, nj, 2, ni, 2)) 1521 1522 # create the embryonic grid containing at least the format 1523 grid = {'DIMENS': [ni, nj, nk], 1524 'COORD': coord, 1525 'ZCORN': zcorn, 1526 'ACTNUM': actnum, 1527 } 1528 1529 # remove the known properties from the list that is already loaded 1530 for prop in ['actnum', 'coord', 'zcorn']: 1531 filename = '{0}_{1}.grdecl'.format(os.path.basename(stem), prop) 1532 if filename in files: 1533 files.remove(filename) 1534 1535 # read each extra property from its own file 1536 stem_dir = os.path.dirname(stem) 1537 for filename in files: 1538 _read_multi_prop(os.path.join(stem_dir, filename), dims, grid) 1539 1540 return grid 1541 1542 1543def _read_multi_prop(filename, dims, grid): 1544 """ 1545 :param filename: Name of the file containing the property. 1546 :type filename: str 1547 :param dims: Dimensions of the grid 1548 :type dims: List [int] 1549 :param grid: Structure that will receive the property. 1550 :type grid: dict 1551 """ 1552 with open(filename, 'rb') as fileobj: 1553 # read the name of the property from the first line of the file 1554 propname = codecs.ascii_decode(fileobj.readline())[0][:-1].strip() 1555 log.info("Reading keyword %s from \"%s\"", propname, filename) 1556 1557 # rest of the file contains the property 1558 data = numpy.loadtxt(fileobj, comments='/', 1559 dtype=_kw_dtype(propname)) 1560 1561 # reformat the property to contain the right shape 1562 grid[propname] = numpy.reshape(data, dims) 1563 1564 1565# rules on how to reformat some keywords; the rule is expressed as a function 1566# taking the actual data and then returning a tuple which is the new shape that 1567# can be passed to the reshape function. if a keyword is not in this list, then 1568# it will be written with a single entry on each line. 1569_SPECIAL_SHAPE = { 1570 'COORD': lambda ni, nj, nk: ((nj + 1) * (ni + 1), (2 * 3)), 1571} 1572 1573 1574# formats for keywords that have known value domains 1575_SPECIAL_FORMAT = { 1576 'COORD': '%8.2f', 1577 'ZCORN': '%7.2f', 1578 'ACTNUM': '%d', 1579} 1580 1581 1582def _write_kw(fileobj, propname, values, lookup, dimens): 1583 """Write a section for a single keyword to an already open file. 1584 1585 :param fileobj: Where to serialize the data 1586 :param propname: Name of the property from the grid to write 1587 :param values: Array with the values themselves 1588 :param lookup: Mapping of properties to identfier string used in file 1589 :param dimens: Dimensions of the grid, (ni, nj, nk) 1590 """ 1591 # structure of the values: if we can make a fixed number of columns, 1592 # then we attempt to do so, otherwise we just write everything as a 1593 # long stream (faster to load, when there are no exceptions) 1594 if propname in _SPECIAL_SHAPE: 1595 shape = _SPECIAL_SHAPE[propname](*dimens) 1596 else: 1597 shape = (numpy.prod(values.shape), 1) 1598 1599 # make sure we get an appropriate number of digits on the keywords that 1600 # are known to us 1601 if propname in _SPECIAL_FORMAT: 1602 fmt = _SPECIAL_FORMAT[propname] 1603 else: 1604 # print integral types without inappropriate fraction digits 1605 if numpy.issubdtype(values.dtype, numpy.integer): 1606 fmt = '%d' 1607 else: 1608 fmt = '%11.6f' 1609 1610 # write the keyword itself, on its own line 1611 fileobj.write(enc('{0:<8s}\n'.format(lookup(propname)))) 1612 1613 # replace NaN values (which we typically use to trap access to inactive 1614 # blocks) with zero (which Petrel uses a placeholder for inactive blocks) 1615 zero = 0 if numpy.issubdtype(values.dtype, numpy.integer) else 0. 1616 data = numpy.reshape(numpy.copy(numpy.ma.getdata(values)), shape) 1617 data[numpy.isnan(data)] = zero 1618 1619 # data array itself; NumPy does the job for us 1620 numpy.savetxt(fileobj, data, fmt=fmt, delimiter='\t') 1621 1622 # end of field for most fields in the loaded grid file 1623 fileobj.write(enc('{0:s}\n'.format(lookup('/')))) 1624 1625 1626def _write_meta(fileobj, subtype): 1627 """Write a header which enable us to recognize our own files later.""" 1628 fileobj.write(_GEN_BGN) 1629 fileobj.write(enc( 1630 '-- Exported by: PyReSiTo ({0}) v1.0\r\n'.format(subtype))) 1631 fileobj.write(_GEN_END) 1632 1633 1634def _write_dimens_ecl(fileobj, dimens): 1635 """Write grid dimensions to an Eclipse wrapper file.""" 1636 fileobj.write(enc('SPECGRID\n{0:d} {1:d} {2:d} 1 F\n/\n'.format( 1637 *dimens))) 1638 1639 1640def _write_dimens_ext(fileobj, dimens, lookup): 1641 """Write grid dimensions to a new external grid file.""" 1642 # decode grid dimensions since we cannot expand inline in formatting tuple 1643 ni, nj, nk = dimens 1644 1645 # write the keyword containg the 1646 fileobj.write(enc('{0:<8s}\n{1:d} {2:d} {3:d}\n{4:s}\n'.format( 1647 lookup('DIMENS'), ni, nj, nk, lookup('/')))) 1648 1649 1650# these keywords are written in the beginning of the file in this order, 1651# and then the rest of any properties we have stuffed into the grid object 1652# follows after. 1653_LEADING = ['COORD', 'ZCORN', 'ACTNUM'] 1654 1655 1656def _write_multi(path, base, grid, lookup, dialect): 1657 """ 1658 :param path: Path to all files that are generated 1659 :param base: Prefix for all the files that are generated 1660 :param grid: Dictionary of properties 1661 :param lookup: Lookup function for keyword strings 1662 :param dialect: Simulator that the file is intended for 1663 """ 1664 log.info('Writing multiple files') 1665 stem = os.path.join(path, base) 1666 1667 # the size is written into a separate file, so it can be loaded from there 1668 dimens = grid['DIMENS'] 1669 log.info('Grid size is %d x %d x %d', *dimens) 1670 log.info('Writing keyword DIMENS') 1671 with open('{0}_dimens.grdecl'.format(stem), 'wb') as fileobj: 1672 _write_dimens_ext(fileobj, dimens, lookup) 1673 1674 # size is also written directly into the wrapper file, and not included 1675 with open('{0}.grdecl'.format(stem), 'wb') as wrapper: 1676 # header first 1677 log.info('Writing wrapper heading') 1678 _write_meta(wrapper, 'multi') 1679 1680 # internal dimensioning is written specially with other keywords 1681 log.info('Writing size of grid') 1682 if dialect == 'ecl': 1683 _write_dimens_ecl(wrapper, dimens) 1684 else: 1685 assert False 1686 1687 # start with all the predefined leading keywords, and then write the 1688 # rest of them in alphabetical order 1689 order = ([x for x in grid if x in _LEADING] + 1690 sorted([x for x in grid if x not in _LEADING + ['DIMENS']])) 1691 1692 # all other keywords than the size is written to each their own file; 1693 # use bytes formatted files since NumPy requires this to dump matrix 1694 for kw in order: 1695 log.info('Writing keyword %s', kw.upper()) 1696 filename = '{0}_{1}.grdecl'.format(stem, kw.lower()) 1697 with open(filename, 'wb') as fileobj: 1698 _write_kw(fileobj, kw, grid[kw], lookup, dimens) 1699 1700 # write an include statement in the wrapper file; it is customary 1701 # to write only a relative filename and not a full path 1702 wrapper.write( 1703 enc('\n{0:<8s}\n\'{1:s}_{2:s}.grdecl\'\n{3:s}\n'.format( 1704 lookup('INCLUDE'), base, kw.lower(), lookup('/')))) 1705 1706 1707def _write_single(path, base, grid, lookup, dialect): 1708 """ 1709 :param path: Path to all files that are generated 1710 :param base: Prefix for all the files that are generated 1711 :param grid: Dictionary of properties 1712 :param lookup: Lookup function for keyword strings 1713 :param dialect: Simulator that the file is intended for 1714 """ 1715 log.info('Writing Single files') 1716 stem = os.path.join(path, base) 1717 dimens = grid['DIMENS'] 1718 1719 # size is also written directly into the file, 1720 with open('{0}.grdecl'.format(stem), 'wb') as fileobj: 1721 # header first 1722 log.info('Writing wrapper heading') 1723 _write_meta(fileobj, 'single') 1724 1725 # internal dimensioning is written specially with other keywords 1726 log.info('Writing size of grid') 1727 if dialect == 'ecl': 1728 _write_dimens_ecl(fileobj, dimens) 1729 else: 1730 assert False 1731 # 1732 # log.info('Grid size is %d x %d x %d', *dimens) 1733 # log.info('Writing keyword DIMENS') 1734 # _write_dimens_ext(fileobj, dimens, lookup) 1735 1736 # start with all the predefined leading keywords, and then write the 1737 # rest of them in alphabetical order 1738 order = ([x for x in grid if x in _LEADING] + 1739 sorted([x for x in grid if x not in _LEADING + ['DIMENS']])) 1740 1741 # all other keywords than the size is written to the file; 1742 # use bytes formatted files since NumPy requires this to dump matrix 1743 for kw in order: 1744 log.info('Writing keyword %s', kw.upper()) 1745 _write_kw(fileobj, kw, grid[kw], lookup, dimens) 1746 1747 1748# a dialect contain the strings that are written for each keyword in the case 1749# that they are not the same as the property name. the end-of-field marker has 1750# been coded as a keyword with the symbol '/', as in Eclipse. 1751_DIALECT = { 1752 'ecl': {}, 1753 'cmg': { 1754 'DIMENS': 'CORNER', 1755 'ACTNUM': 'NULL', 1756 'PORO': 'POR', 1757 'PERMX': 'PERMI', 1758 'PERMY': 'PERMJ', 1759 'PERMZ': 'PERMK', 1760 'MULTX': 'TRANSI', 1761 'MULTY': 'TRANSJ', 1762 'FIPNUM': 'ISECTOR', 1763 'NTG': 'NETGROSS', 1764 'EQLNUM': 'ITYPE', 1765 'SWATINIT': 'SWINIT ALL', 1766 '/': '' 1767 } 1768} 1769 1770 1771def write(filename, grid, dialect='ecl', multi_file=True): 1772 """Write a grid to corner-point text format, formatted in such a way 1773 that it can easily be read again. 1774 1775 :param filename: Name of the filename containing a wrapper for all 1776 the properties. This will also serve as the stem for 1777 all individual property files. 1778 :type filename: str 1779 :param grid: Grid object containing values for all properties. 1780 :type grid: dict 1781 :param dialect: Name of the simulator that we are writing files for. 1782 Currently, this must be 'ecl'. 1783 :type dialect: str 1784 :param multi_file: Write properties in separate files, instead of putting 1785 everything in one large file. 1786 :type multi_file: bool 1787 """ 1788 # inner helper function that curries the dialect, and returns the keyword 1789 # from the translation dictionary if present, otherwise return unmodified. 1790 # this function is passed to all the other, and is used to lookup the 1791 # correct string to write for each keyword. 1792 trans_dict = _DIALECT[dialect] 1793 1794 def _lookup(kw): 1795 return trans_dict[kw] if kw in trans_dict else kw 1796 1797 # dismantle the filename into various parts; we are going to use the 1798 # relative filenames in the wrapper file 1799 path, base = os.path.split(filename) 1800 base, _ = os.path.splitext(base) 1801 1802 # delegate to specialized routine, that directs writing of individual 1803 # properties to either each their file, or one common file 1804 if multi_file: 1805 _write_multi(path, base, grid, _lookup, dialect) 1806 else: 1807 _write_single(path, base, grid, _lookup, dialect) 1808 1809 1810def _stretches(data): 1811 """\ 1812 Identify stretches of data with equal values: 1813 1814 :param data: Array which is scanned for stretches of equal values 1815 :type data: :class:`numpy.array` 1816 :return: 1817 :rtype: Generator[(int, Any)] 1818 """ 1819 # picture cursors in between every number; this becomes the array of 1820 # numbers before and after every such cursor; looking backward and forwards 1821 before = data[0:-1] 1822 after = data[1:] 1823 1824 # get a list of indices for the positions that change; add one since the 1825 # indices will be relative to the 'after' array, which is skewed one from 1826 # the original. we must add zero to this list, as the first element is 1827 # always new, but is never registered as such (nothing to differ from) 1828 changes = numpy.where(numpy.not_equal(before, after))[0] + 1 1829 changes = numpy.concatenate((numpy.array((0,), dtype=changes.dtype), 1830 changes)) 1831 1832 # changes is the start of each run; find the lengths of each run. here 1833 # we have the dual of the case above with the first item; the last item 1834 # runs until the end of the array, but there is no change after it so 1835 # we must register it manually 1836 length = numpy.diff( 1837 numpy.concatenate((changes, numpy.array((len(data),), 1838 dtype=changes.dtype)))) 1839 1840 # return a generator of every individual value and its associated 1841 # consecutive count in the stream 1842 for row, count in enumerate(length): 1843 yield (count, data[changes[row]]) 1844 1845 1846def _write_compr_full(f_obj, data, fmt): 1847 """\ 1848 Write a data field as a keyword that can be included into an already 1849 opened grid file, assuming a full data array. 1850 1851 :param f_obj: File-like object to write to 1852 :type f_obj: :class:`io.IOBase` 1853 :param data: Data array to be written 1854 :type data: :class:`numpy.array` 1855 :param fmt: Format of a single item, on the form used to specify 1856 formats in the build-in routines, but without percent 1857 or braces. 1858 :type fmt: str 1859 """ 1860 # format strings for writing a single value and multiple values, 1861 # respectively; we prepare these outside of the loop. as a special 1862 # case, we write nulls in a particularily short format 1863 single_fmt = '{{0:{0:s}}}\n'.format(fmt) 1864 multi_fmt = '{{0:d}}*{{1:{0:s}}}\n'.format(fmt) 1865 single_null = '0\n' if fmt[-1] == 'd' else '0.\n' 1866 multi_null = '{0:d}*0\n' if fmt[-1] == 'd' else '{0:d}*0.\n' 1867 1868 # then write all the data in batch; length first, then the data, 1869 # using the data itself for criteria for what constitute a stretch 1870 for count, value in _stretches(data): 1871 if value: 1872 if count == 1: 1873 f_obj.write(enc(single_fmt.format(value))) 1874 else: 1875 f_obj.write(enc(multi_fmt.format(count, value))) 1876 else: 1877 if count == 1: 1878 f_obj.write(enc(single_null)) 1879 else: 1880 f_obj.write(enc(multi_null.format(count))) 1881 1882 1883# pylint: disable=too-many-branches 1884def _write_compr_masked(f_obj, data, mask, fmt): 1885 """\ 1886 Write a data field as a keyword that can be included into an already 1887 opened grid file, assuming a masked (sparse) data array. 1888 1889 :param f_obj: File-like object to write to 1890 :type f_obj: :class:`io.IOBase` 1891 :param data: Data array to be written 1892 :type data: :class:`numpy.array` 1893 :param fmt: Format of a single item, on the form used to specify 1894 formats in the build-in routines, but without percent 1895 or braces. 1896 :type fmt: str 1897 """ 1898 # format strings for writing a single value and multiple values. these 1899 # are the same as for the _write_compr_full routine, but we cannot make 1900 # them global variables since they depend on the format 1901 single_fmt = '{{0:{0:s}}}\n'.format(fmt) 1902 multi_fmt = '{{0:d}}*{{1:{0:s}}}\n'.format(fmt) 1903 single_null = '0\n' if fmt[-1] == 'd' else '0.\n' 1904 multi_null = '{0:d}*0\n' if fmt[-1] == 'd' else '{0:d}*0.\n' 1905 1906 # Petrel reads these as masked values 1907 single_blank = 'NaN\n' 1908 multi_blank = '{0:d}*NaN\n' 1909 1910 # we haven't written any values yet, so start reading from the beginning 1911 accum = 0 1912 1913 # start out by enumerating stretches which is either masked, or unmasked 1914 # pylint: disable=too-many-nested-blocks 1915 for count_subset, is_blank in _stretches(mask): 1916 1917 # if we are to write a stretch of blank values, then we don't need 1918 # any formatting for the number, just the special mask value 1919 if is_blank: 1920 if count_subset == 1: 1921 f_obj.write(enc(single_blank)) 1922 else: 1923 f_obj.write(enc(multi_blank.format(count_subset))) 1924 1925 # if we have a subset with non-blanks, then process this subset as 1926 # if it were an array in itself, which enables us to compress long 1927 # stretches of equal values there, too 1928 else: 1929 # fast path 1930 if count_subset == 1: 1931 value = data[accum] 1932 if value: 1933 f_obj.write(enc(single_fmt.format(value))) 1934 else: 1935 f_obj.write(enc(single_null)) 1936 1937 # more than one value in the subset 1938 else: 1939 subset = data[accum:(accum+count_subset)] 1940 for count, value in _stretches(subset): 1941 if value: 1942 if count == 1: 1943 f_obj.write(enc(single_fmt.format(value))) 1944 else: 1945 f_obj.write(enc(multi_fmt.format(count, value))) 1946 else: 1947 if count == 1: 1948 f_obj.write(enc(single_null)) 1949 else: 1950 f_obj.write(enc(multi_null.format(count))) 1951 1952 # keep track of how many items we have written totally 1953 accum += count_subset 1954 1955 1956def _write_compr_any(f_obj, keyw, cube, fmt): 1957 """\ 1958 Write a data field as a keyword that can be included into an already 1959 opened grid file. 1960 1961 :param f_obj: File-like object to write to 1962 :type f_obj: :class:`io.IOBase` 1963 :param keyw: Name of the keyword to put in the header; this should 1964 not be greater than eight characters 1965 :type keyw: str 1966 :param cube: Data cube to be written 1967 :type cube: :class:`numpy.array` 1968 :param fmt: Format of a single item, on the form used to specify 1969 formats in the build-in routines, but without percent 1970 or braces. 1971 :type fmt: str 1972 """ 1973 # write the keyword first, on its own line 1974 f_obj.write(enc('{0:8s}\n'.format(keyw.upper()))) 1975 1976 # if the data is sparse, i.e. not defined over the entire field, 1977 # then use a different writing algorithm, then if it's full 1978 if hasattr(cube, 'mask'): 1979 _write_compr_masked(f_obj, numpy.ravel(cube.data), 1980 numpy.ravel(cube.mask), fmt) 1981 else: 1982 _write_compr_full(f_obj, numpy.ravel(cube), fmt) 1983 1984 # write a single slash at the end to terminate the field 1985 f_obj.write(enc('/\n')) 1986 1987 1988def write_compressed(fname, keyw, cube, *, fmt="12.6e"): 1989 """\ 1990 Write a data field as a keyword that can be included into a grid file. 1991 1992 :param fname: Path to the output file to write to 1993 :type fname: str 1994 :param keyw: Name of the keyword to put in the header; this should 1995 not be greater than eight characters 1996 :type keyw: str 1997 :param cube: Data cube to be written 1998 :type cube: :class:`numpy.array` 1999 :param fmt: Format of a single item, on the form used to specify 2000 formats in the build-in routines, but without percent 2001 or braces. 2002 :type fmt: str 2003 """ 2004 # if we specified a string, this is the path to the file, so open it 2005 # and start writing; otherwise, it is a file-like object that is already 2006 # opened, so just continue writing directly 2007 if isinstance(fname, str): 2008 with open(os.path.expanduser(fname), 'wb') as f_obj: 2009 _write_compr_any(f_obj, keyw, cube, fmt=fmt) 2010 else: 2011 _write_compr_any(fname, keyw, cube, fmt=fmt) 2012 2013 2014def shape(grdecl): 2015 """\ 2016 Get shape of field data cube. 2017 2018 :param grdecl: Corner-point grid structure 2019 :type grdecl: dict 2020 :return: (num_k, num_j, num_i) 2021 :rtype: (int, int, int) 2022 """ 2023 return tuple(reversed(grdecl['DIMENS'])) 2024 2025 2026def read_prop(fname, dims, typ=numpy.float64, prop=None, *, mask=None): 2027 """\ 2028 Read a property from a text file into a dictionary. This is akin to 2029 Petrel's Import onto selection popup menu choice. 2030 2031 :param fname: File name to load property from. Currently, this must be 2032 a file in the filesystem and cannot be a file-like object. 2033 :type fname: str 2034 :param dims: Dimensions of the data cube, in a format compatible with 2035 the return of the shape function, i.e. (nk, nj, ni) 2036 :type dims: (int, int, int) 2037 :param typ: Data type of the data to be loaded 2038 :type typ: :class:`numpy.dtype` 2039 :param prop: Dictionary where the keyword will be added. If this is 2040 None, then a new dictionary will be created. 2041 :type prop: dict 2042 :param mask: Existing mask for inactive elements; this will be 2043 combined with the mask inherit in the data, if specified. 2044 If you have loaded the grid, this should be the ACTNUM 2045 field 2046 :type mask: Optional[:class:`numpy.ndarray`] 2047 :return: Dictionary where the property being read is added as a key 2048 :rtype: dict 2049 """ 2050 # if output isn't given, then add to brand new dictionary 2051 if prop is None: 2052 prop = dict() 2053 2054 # allocate memory 2055 num_cells = numpy.product(dims) 2056 data = numpy.empty((num_cells,), dtype=typ) 2057 2058 # read raw array of values into memory first 2059 with ctx.closing(_OwningLexer(os.path.expanduser(fname))) as lex: 2060 2061 # read the first keyword that appears in the file 2062 kw = dec(lex.expect(keyword)) 2063 if not lex.at_eof(): 2064 lex.expect(newline) 2065 2066 # read numbers from file; each read may potentially fill a different 2067 # length of the array. use an almost identical type of loop for 2068 # integers and floating-point, but avoid using test inside the loop 2069 # or polymorphism through function pointer, to ease optimization 2070 num_read = 0 2071 if numpy.issubdtype(typ, numpy.integer): 2072 while num_read < num_cells: 2073 count, value = lex.expect_multi_card() 2074 data[num_read:(num_read+count)] = value 2075 num_read += count 2076 else: 2077 while num_read < num_cells: 2078 count, value = lex.expect_numbers() 2079 data[num_read:(num_read+count)] = value 2080 num_read += count 2081 2082 # verify that we got exactly the number of corners we wanted 2083 if num_read > num_cells: 2084 raise GrdEclError(lex.path, lex.pos(), 2085 "Expected {0:d} numbers, but got {1:d}".format( 2086 num_cells, num_read)) 2087 2088 # after all the data, there should be a sentinel slash 2089 lex.expect(endrec) 2090 2091 # reformat the data into a natural kji/bfr hypercube 2092 data = numpy.reshape(data, dims) 2093 2094 # inherit inactive cells by missing values in the file 2095 blanked = numpy.isnan(data) 2096 2097 # if there is either specified a mask, or there is an inherit mask, then 2098 # we need to generate a masked array 2099 if (mask is not None) or numpy.any(blanked): 2100 # combine the two masks into one: 2101 if mask is not None: 2102 blanked = numpy.logical_or(blanked, mask) 2103 2104 # update the values to not have anything where there is now a mask 2105 if numpy.issubdtype(data.dtype, numpy.integer): 2106 data[blanked] = 0 2107 else: 2108 data[blanked] = numpy.nan 2109 2110 prop[kw] = numpy.ma.array(data=data, dtype=data.dtype, 2111 mask=blanked) 2112 2113 # no mask at all, seems to be a full matrix 2114 else: 2115 prop[kw] = data 2116 2117 return prop 2118 2119 2120def main(*args): 2121 """Read a data file to see if it parses OK.""" 2122 # setup simple logging where we prefix each line with a letter code 2123 logging.basicConfig(level=logging.INFO, 2124 format="%(levelname).1s: %(message).76s") 2125 2126 # parse command-line arguments 2127 parser = argparse.ArgumentParser() 2128 parser.add_argument("filename") 2129 parser.add_argument("output", type=str, nargs='?', default=None) 2130 parser.add_argument("--dialect", choices=['ecl'], default='ecl') 2131 parser.add_argument("--verbose", action='store_true') 2132 parser.add_argument("--quiet", action='store_true') 2133 cmd_args = parser.parse_args(*args) 2134 2135 # adjust the verbosity of the program 2136 if cmd_args.verbose: 2137 logging.getLogger(__name__).setLevel(logging.DEBUG) 2138 if cmd_args.quiet: 2139 logging.getLogger(__name__).setLevel(logging.NOTSET) 2140 2141 # process file 2142 grid = read(cmd_args.filename) 2143 2144 # if an output file was specified, then write it 2145 if cmd_args.output is not None: 2146 write(cmd_args.output, grid, cmd_args.dialect, True) 2147 2148 2149# executable library 2150if __name__ == "__main__": 2151 main(sys.argv[1:])
log =
<Logger misc.grdecl (WARNING)>
class
GrdEclError(builtins.Exception):
80class GrdEclError (Exception): 81 """Exception thrown if invalid syntax is encountered.""" 82 83 def __init__(self, path, pos, message): 84 line, column = pos 85 # include the filename if any was specified. only print the name, 86 # not the entire path, since this may be a very long string 87 fmt = ("in \"{0}\" " if path is not None else "") + "at ({1},{2}): {3}" 88 basename = (os.path.basename(path) if path else "") 89 super(GrdEclError, self).__init__( 90 fmt.format(basename, line, column, message))
Exception thrown if invalid syntax is encountered.
GrdEclError(path, pos, message)
83 def __init__(self, path, pos, message): 84 line, column = pos 85 # include the filename if any was specified. only print the name, 86 # not the entire path, since this may be a very long string 87 fmt = ("in \"{0}\" " if path is not None else "") + "at ({1},{2}): {3}" 88 basename = (os.path.basename(path) if path else "") 89 super(GrdEclError, self).__init__( 90 fmt.format(basename, line, column, message))
Inherited Members
- builtins.BaseException
- with_traceback
- args
whitespace =
re.compile(b'[\\ \\t]+|--[^\\r]*')
newline =
re.compile(b'\\r?\\n')
cardinal =
re.compile(b'[0-9]+')
multint =
re.compile(b'([0-9]+\\*)?([0-9]+)')
single =
re.compile(b'-?([nN][aA][nN]|([0-9]?\\.[0-9]+|[0-9]+(\\.[0-9]+)?)([eEdD][-+]?[0-9]+)?)')
number =
re.compile(b'([0-9]+\\*)?(-?([nN][aA][nN]|([0-9]?\\.[0-9]+|[0-9]+(\\.[0-9]+)?)([eEdD][-+]?[0-9]+)?))')
endrec =
re.compile(b'/')
flag =
re.compile(b'([0-9]+\\*)?([01])')
keyword =
re.compile(b'[a-zA-Z_]+')
filename =
re.compile(b"(\\'[^\\']*\\')|([a-zA-Z0-9_\\-\\.\\:\\\\\\ ]+)")
def
read(filename):
1401def read(filename): 1402 """Read an Eclipse input grid into a dictionary. 1403 """ 1404 # get the canonical path of the file to read 1405 path = os.path.expanduser(filename) 1406 1407 # open the file and create a memory-mapping so that we can read 1408 # the input file as if it was a string buffer 1409 with ctx.closing(_FileMapPair(path)) as fmp: 1410 mem = fmp.map_obj 1411 1412 # determine first if there are a header section at the top 1413 # of the file. if there is, the start reading after it 1414 props, skip = _parse_meta(mem) 1415 mem.seek(skip, os.SEEK_SET) 1416 1417 # notice that there is an optimization opportunity since 1418 # Petrel only uses a subset of the format 1419 if ('Exported by' in props and 1420 props['Exported by'].startswith('PyReSiTo (multi) v1.0')): 1421 log.info("File is PyReSiTo multi-file") 1422 1423 grid = _read_multi(path, mem) 1424 1425 elif ('Exported by' in props and 1426 props['Exported by'].startswith('Petrel')): 1427 log.info("File is exported by Petrel") 1428 grid = {} 1429 section_index = _fast_index(path, mem, skip) 1430 1431 # specgrid is special because it must be read before everything 1432 # else, so that we know how to read other sections 1433 dims = _read_specgrid(mem, section_index) 1434 grid['DIMENS'] = dims[::-1] 1435 1436 # coord is special because we need to possibly rotate it 1437 A, b = _axes_map(mem, section_index) 1438 grid['COORD'] = _read_coord(dims, mem, section_index, A, b) 1439 1440 # actnum is special because we need it to set the mask 1441 grid['ACTNUM'] = _read_section('ACTNUM', dims, numpy.int32, 1442 mem, section_index).astype( 1443 numpy.bool) 1444 mask = numpy.logical_not(grid['ACTNUM']) 1445 1446 # zcorn is special because it has a different format (and 1447 # cannot have inactive elements) 1448 grid['ZCORN'] = _read_section('ZCORN', _zcorn_dims(dims), 1449 numpy.float64, mem, 1450 section_index) 1451 1452 # read general cell properties that are marked as result 1453 # properties, from the file 1454 for kw in section_index: 1455 props = _SECTIONS[kw] 1456 if props['result']: 1457 # read the data that is stored in the file 1458 data = _read_section(kw, dims, _kw_dtype(kw), mem, 1459 section_index) 1460 1461 # create an array where the inactive region is masked 1462 # out (won't appear on plots, in statistics etc.) 1463 grid[kw] = numpy.ma.array(data=data, mask=mask) 1464 1465 # use the regular, but slower parser 1466 else: 1467 with ctx.closing(_Parser(path, mem)) as parser: 1468 grid = parser.parse() 1469 1470 return grid
Read an Eclipse input grid into a dictionary.
def
write(filename, grid, dialect='ecl', multi_file=True):
1772def write(filename, grid, dialect='ecl', multi_file=True): 1773 """Write a grid to corner-point text format, formatted in such a way 1774 that it can easily be read again. 1775 1776 :param filename: Name of the filename containing a wrapper for all 1777 the properties. This will also serve as the stem for 1778 all individual property files. 1779 :type filename: str 1780 :param grid: Grid object containing values for all properties. 1781 :type grid: dict 1782 :param dialect: Name of the simulator that we are writing files for. 1783 Currently, this must be 'ecl'. 1784 :type dialect: str 1785 :param multi_file: Write properties in separate files, instead of putting 1786 everything in one large file. 1787 :type multi_file: bool 1788 """ 1789 # inner helper function that curries the dialect, and returns the keyword 1790 # from the translation dictionary if present, otherwise return unmodified. 1791 # this function is passed to all the other, and is used to lookup the 1792 # correct string to write for each keyword. 1793 trans_dict = _DIALECT[dialect] 1794 1795 def _lookup(kw): 1796 return trans_dict[kw] if kw in trans_dict else kw 1797 1798 # dismantle the filename into various parts; we are going to use the 1799 # relative filenames in the wrapper file 1800 path, base = os.path.split(filename) 1801 base, _ = os.path.splitext(base) 1802 1803 # delegate to specialized routine, that directs writing of individual 1804 # properties to either each their file, or one common file 1805 if multi_file: 1806 _write_multi(path, base, grid, _lookup, dialect) 1807 else: 1808 _write_single(path, base, grid, _lookup, dialect)
Write a grid to corner-point text format, formatted in such a way that it can easily be read again.
Parameters
- filename: Name of the filename containing a wrapper for all the properties. This will also serve as the stem for all individual property files.
- grid: Grid object containing values for all properties.
- dialect: Name of the simulator that we are writing files for. Currently, this must be 'ecl'.
- multi_file: Write properties in separate files, instead of putting everything in one large file.
def
write_compressed(fname, keyw, cube, *, fmt='12.6e'):
1989def write_compressed(fname, keyw, cube, *, fmt="12.6e"): 1990 """\ 1991 Write a data field as a keyword that can be included into a grid file. 1992 1993 :param fname: Path to the output file to write to 1994 :type fname: str 1995 :param keyw: Name of the keyword to put in the header; this should 1996 not be greater than eight characters 1997 :type keyw: str 1998 :param cube: Data cube to be written 1999 :type cube: :class:`numpy.array` 2000 :param fmt: Format of a single item, on the form used to specify 2001 formats in the build-in routines, but without percent 2002 or braces. 2003 :type fmt: str 2004 """ 2005 # if we specified a string, this is the path to the file, so open it 2006 # and start writing; otherwise, it is a file-like object that is already 2007 # opened, so just continue writing directly 2008 if isinstance(fname, str): 2009 with open(os.path.expanduser(fname), 'wb') as f_obj: 2010 _write_compr_any(f_obj, keyw, cube, fmt=fmt) 2011 else: 2012 _write_compr_any(fname, keyw, cube, fmt=fmt)
Write a data field as a keyword that can be included into a grid file.
Parameters
- fname: Path to the output file to write to
- keyw: Name of the keyword to put in the header; this should not be greater than eight characters
- cube: Data cube to be written
- fmt: Format of a single item, on the form used to specify formats in the build-in routines, but without percent or braces.
def
shape(grdecl):
2015def shape(grdecl): 2016 """\ 2017 Get shape of field data cube. 2018 2019 :param grdecl: Corner-point grid structure 2020 :type grdecl: dict 2021 :return: (num_k, num_j, num_i) 2022 :rtype: (int, int, int) 2023 """ 2024 return tuple(reversed(grdecl['DIMENS']))
Get shape of field data cube.
Parameters
- grdecl: Corner-point grid structure
Returns
(num_k, num_j, num_i)
def
read_prop(fname, dims, typ=<class 'numpy.float64'>, prop=None, *, mask=None):
2027def read_prop(fname, dims, typ=numpy.float64, prop=None, *, mask=None): 2028 """\ 2029 Read a property from a text file into a dictionary. This is akin to 2030 Petrel's Import onto selection popup menu choice. 2031 2032 :param fname: File name to load property from. Currently, this must be 2033 a file in the filesystem and cannot be a file-like object. 2034 :type fname: str 2035 :param dims: Dimensions of the data cube, in a format compatible with 2036 the return of the shape function, i.e. (nk, nj, ni) 2037 :type dims: (int, int, int) 2038 :param typ: Data type of the data to be loaded 2039 :type typ: :class:`numpy.dtype` 2040 :param prop: Dictionary where the keyword will be added. If this is 2041 None, then a new dictionary will be created. 2042 :type prop: dict 2043 :param mask: Existing mask for inactive elements; this will be 2044 combined with the mask inherit in the data, if specified. 2045 If you have loaded the grid, this should be the ACTNUM 2046 field 2047 :type mask: Optional[:class:`numpy.ndarray`] 2048 :return: Dictionary where the property being read is added as a key 2049 :rtype: dict 2050 """ 2051 # if output isn't given, then add to brand new dictionary 2052 if prop is None: 2053 prop = dict() 2054 2055 # allocate memory 2056 num_cells = numpy.product(dims) 2057 data = numpy.empty((num_cells,), dtype=typ) 2058 2059 # read raw array of values into memory first 2060 with ctx.closing(_OwningLexer(os.path.expanduser(fname))) as lex: 2061 2062 # read the first keyword that appears in the file 2063 kw = dec(lex.expect(keyword)) 2064 if not lex.at_eof(): 2065 lex.expect(newline) 2066 2067 # read numbers from file; each read may potentially fill a different 2068 # length of the array. use an almost identical type of loop for 2069 # integers and floating-point, but avoid using test inside the loop 2070 # or polymorphism through function pointer, to ease optimization 2071 num_read = 0 2072 if numpy.issubdtype(typ, numpy.integer): 2073 while num_read < num_cells: 2074 count, value = lex.expect_multi_card() 2075 data[num_read:(num_read+count)] = value 2076 num_read += count 2077 else: 2078 while num_read < num_cells: 2079 count, value = lex.expect_numbers() 2080 data[num_read:(num_read+count)] = value 2081 num_read += count 2082 2083 # verify that we got exactly the number of corners we wanted 2084 if num_read > num_cells: 2085 raise GrdEclError(lex.path, lex.pos(), 2086 "Expected {0:d} numbers, but got {1:d}".format( 2087 num_cells, num_read)) 2088 2089 # after all the data, there should be a sentinel slash 2090 lex.expect(endrec) 2091 2092 # reformat the data into a natural kji/bfr hypercube 2093 data = numpy.reshape(data, dims) 2094 2095 # inherit inactive cells by missing values in the file 2096 blanked = numpy.isnan(data) 2097 2098 # if there is either specified a mask, or there is an inherit mask, then 2099 # we need to generate a masked array 2100 if (mask is not None) or numpy.any(blanked): 2101 # combine the two masks into one: 2102 if mask is not None: 2103 blanked = numpy.logical_or(blanked, mask) 2104 2105 # update the values to not have anything where there is now a mask 2106 if numpy.issubdtype(data.dtype, numpy.integer): 2107 data[blanked] = 0 2108 else: 2109 data[blanked] = numpy.nan 2110 2111 prop[kw] = numpy.ma.array(data=data, dtype=data.dtype, 2112 mask=blanked) 2113 2114 # no mask at all, seems to be a full matrix 2115 else: 2116 prop[kw] = data 2117 2118 return prop
Read a property from a text file into a dictionary. This is akin to Petrel's Import onto selection popup menu choice.
Parameters
- fname: File name to load property from. Currently, this must be a file in the filesystem and cannot be a file-like object.
- dims: Dimensions of the data cube, in a format compatible with the return of the shape function, i.e. (nk, nj, ni)
- typ: Data type of the data to be loaded
- prop: Dictionary where the keyword will be added. If this is None, then a new dictionary will be created.
- mask: Existing mask for inactive elements; this will be combined with the mask inherit in the data, if specified. If you have loaded the grid, this should be the ACTNUM field
Returns
Dictionary where the property being read is added as a key
def
main(*args):
2121def main(*args): 2122 """Read a data file to see if it parses OK.""" 2123 # setup simple logging where we prefix each line with a letter code 2124 logging.basicConfig(level=logging.INFO, 2125 format="%(levelname).1s: %(message).76s") 2126 2127 # parse command-line arguments 2128 parser = argparse.ArgumentParser() 2129 parser.add_argument("filename") 2130 parser.add_argument("output", type=str, nargs='?', default=None) 2131 parser.add_argument("--dialect", choices=['ecl'], default='ecl') 2132 parser.add_argument("--verbose", action='store_true') 2133 parser.add_argument("--quiet", action='store_true') 2134 cmd_args = parser.parse_args(*args) 2135 2136 # adjust the verbosity of the program 2137 if cmd_args.verbose: 2138 logging.getLogger(__name__).setLevel(logging.DEBUG) 2139 if cmd_args.quiet: 2140 logging.getLogger(__name__).setLevel(logging.NOTSET) 2141 2142 # process file 2143 grid = read(cmd_args.filename) 2144 2145 # if an output file was specified, then write it 2146 if cmd_args.output is not None: 2147 write(cmd_args.output, grid, cmd_args.dialect, True)
Read a data file to see if it parses OK.
def
membuf(mem, bgn, end):
def
conv_atoi(buf):
def
conv_atof(buf):
def
enc(text):
def
dec(buf):