misc.read_input_csv
File for reading CSV files and returning a 2D list
1""" 2File for reading CSV files and returning a 2D list 3""" 4import pandas as pd 5import numpy as np 6 7 8def read_data_csv(filename, datatype, truedataindex): 9 """ 10 Parameters 11 ---------- 12 filename: 13 Name of csv-file 14 datatype: 15 List of data types as strings 16 truedataindex: 17 List of where the "TRUEDATA" has been extracted (e.g., at which time, etc) 18 19 Returns 20 ------- 21 some-type: 22 List of observed data 23 """ 24 25 df = pd.read_csv(filename) # Read the file 26 27 imported_data = [] # Initialize the 2D list of csv data 28 tlength = len(truedataindex) 29 dnumber = len(datatype) 30 31 if df.columns[0] == 'header_both': # csv file has column and row headers 32 pos = [None] * dnumber 33 for col in range(dnumber): 34 # find index of data type in csv file header 35 pos[col] = df.columns.get_loc(datatype[col]) 36 for t in truedataindex: 37 row = df[df['header_both'] == t] # pick row corresponding to truedataindex 38 row = row.values[0] # select the values of the dataframe row 39 csv_data = [None] * dnumber 40 for col in range(dnumber): 41 if (not type(row[pos[col]]) == str) and (np.isnan(row[pos[col]])): # do not check strings 42 csv_data[col] = 'n/a' 43 else: 44 try: # Making a float 45 csv_data[col] = float(row[pos[col]]) 46 except: # It is a string 47 csv_data[col] = row[pos[col]] 48 imported_data.append(csv_data) 49 else: # No row headers (the rows in the csv file must correspond to the order in truedataindex) 50 if tlength == df.shape[0]: # File has column headers 51 pos = [None] * dnumber 52 for col in range(dnumber): 53 # Find index of the header in datatype 54 pos[col] = df.columns.get_loc(datatype[col]) 55 # File has no column headers (columns must correspond to the order in datatype) 56 elif tlength == df.shape[0]+1: 57 # First row has been misinterpreted as header, so we read first row again: 58 temp = pd.read_csv(filename, header=None, nrows=1).values[0] 59 pos = list(range(df.shape[1])) # Assume the data is in the correct order 60 csv_data = [None] * len(temp) 61 for col in range(len(temp)): 62 if (not type(temp[col]) == str) and (np.isnan(temp[col])): # do not check strings 63 csv_data[col] = 'n/a' 64 else: 65 try: # Making a float 66 csv_data[col] = float(temp[col]) 67 except: # It is a string 68 csv_data[col] = temp[col] 69 imported_data.append(csv_data) 70 71 for rows in df.values: 72 csv_data = [None] * dnumber 73 for col in range(dnumber): 74 if (not type(rows[pos[col]]) == str) and (np.isnan(rows[pos[col]])): # do not check strings 75 csv_data[col] = 'n/a' 76 else: 77 try: # Making a float 78 csv_data[col] = float(rows[pos[col]]) 79 except: # It is a string 80 csv_data[col] = rows[pos[col]] 81 imported_data.append(csv_data) 82 83 return imported_data 84 85 86def read_var_csv(filename, datatype, truedataindex): 87 """ 88 Parameters 89 ---------- 90 filename : str 91 Name of the CSV file. 92 93 datatype : list 94 List of data types as strings. 95 96 truedataindex : list 97 List of indices where the "TRUEDATA" has been extracted. 98 99 Returns 100 ------- 101 imported_var : list 102 List of variances. 103 """ 104 105 df = pd.read_csv(filename) # Read the file 106 107 imported_var = [] # Initialize the 2D list of csv data 108 tlength = len(truedataindex) 109 dnumber = len(datatype) 110 111 if df.columns[0] == 'header_both': # csv file has column and row headers 112 pos = [None] * dnumber 113 for col in range(dnumber): 114 # find index of data type in csv file header 115 pos[col] = df.columns.get_loc(datatype[col]) 116 for t in truedataindex: 117 row = df[df['header_both'] == t] # pick row 118 row = row.values[0] # select the values of the dataframe 119 csv_data = [None] * 2 * dnumber 120 for col in range(dnumber): 121 csv_data[2*col] = row[pos[col]] 122 try: # Making a float 123 csv_data[2*col+1] = float(row[pos[col]]+1) 124 except: # It is a string 125 csv_data[2*col+1] = row[pos[col]+1] 126 # Make sure the string input is lowercase 127 csv_data[0::2] = [x.lower() for x in csv_data[0::2]] 128 imported_var.append(csv_data) 129 else: # No row headers (the rows in the csv file must correspond to the order in truedataindex) 130 if tlength == df.shape[0]: # File has column headers 131 pos = [None] * dnumber 132 for col in range(dnumber): 133 # Find index of datatype in csv file header 134 pos[col] = df.columns.get_loc(datatype[col]) 135 # File has no column headers (columns must correspond to the order in datatype) 136 elif tlength == df.shape[0]+1: 137 # First row has been misinterpreted as header, so we read first row again: 138 temp = pd.read_csv(filename, header=None, nrows=1).values[0] 139 # Make sure the string input is lowercase 140 temp[0::2] = [x.lower() for x in temp[0::2]] 141 # Assume the data is in the correct order 142 pos = list(range(0, df.shape[1], 2)) 143 csv_data = [None] * len(temp) 144 for col in range(dnumber): 145 csv_data[2 * col] = temp[2 * col] 146 try: # Making a float 147 csv_data[2*col+1] = float(temp[2*col+1]) 148 except: # It is a string 149 csv_data[2*col+1] = temp[2*col+1] 150 imported_var.append(csv_data) 151 152 for rows in df.values: 153 csv_data = [None] * 2 * dnumber 154 for col in range(dnumber): 155 csv_data[2*col] = rows[2*col] 156 try: # Making a float 157 csv_data[2*col+1] = float(rows[pos[col]+1]) 158 except: # It is a string 159 csv_data[2*col+1] = rows[pos[col]+1] 160 # Make sure the string input is lowercase 161 csv_data[0::2] = [x.lower() for x in csv_data[0::2]] 162 imported_var.append(csv_data) 163 164 return imported_var
def
read_data_csv(filename, datatype, truedataindex):
9def read_data_csv(filename, datatype, truedataindex): 10 """ 11 Parameters 12 ---------- 13 filename: 14 Name of csv-file 15 datatype: 16 List of data types as strings 17 truedataindex: 18 List of where the "TRUEDATA" has been extracted (e.g., at which time, etc) 19 20 Returns 21 ------- 22 some-type: 23 List of observed data 24 """ 25 26 df = pd.read_csv(filename) # Read the file 27 28 imported_data = [] # Initialize the 2D list of csv data 29 tlength = len(truedataindex) 30 dnumber = len(datatype) 31 32 if df.columns[0] == 'header_both': # csv file has column and row headers 33 pos = [None] * dnumber 34 for col in range(dnumber): 35 # find index of data type in csv file header 36 pos[col] = df.columns.get_loc(datatype[col]) 37 for t in truedataindex: 38 row = df[df['header_both'] == t] # pick row corresponding to truedataindex 39 row = row.values[0] # select the values of the dataframe row 40 csv_data = [None] * dnumber 41 for col in range(dnumber): 42 if (not type(row[pos[col]]) == str) and (np.isnan(row[pos[col]])): # do not check strings 43 csv_data[col] = 'n/a' 44 else: 45 try: # Making a float 46 csv_data[col] = float(row[pos[col]]) 47 except: # It is a string 48 csv_data[col] = row[pos[col]] 49 imported_data.append(csv_data) 50 else: # No row headers (the rows in the csv file must correspond to the order in truedataindex) 51 if tlength == df.shape[0]: # File has column headers 52 pos = [None] * dnumber 53 for col in range(dnumber): 54 # Find index of the header in datatype 55 pos[col] = df.columns.get_loc(datatype[col]) 56 # File has no column headers (columns must correspond to the order in datatype) 57 elif tlength == df.shape[0]+1: 58 # First row has been misinterpreted as header, so we read first row again: 59 temp = pd.read_csv(filename, header=None, nrows=1).values[0] 60 pos = list(range(df.shape[1])) # Assume the data is in the correct order 61 csv_data = [None] * len(temp) 62 for col in range(len(temp)): 63 if (not type(temp[col]) == str) and (np.isnan(temp[col])): # do not check strings 64 csv_data[col] = 'n/a' 65 else: 66 try: # Making a float 67 csv_data[col] = float(temp[col]) 68 except: # It is a string 69 csv_data[col] = temp[col] 70 imported_data.append(csv_data) 71 72 for rows in df.values: 73 csv_data = [None] * dnumber 74 for col in range(dnumber): 75 if (not type(rows[pos[col]]) == str) and (np.isnan(rows[pos[col]])): # do not check strings 76 csv_data[col] = 'n/a' 77 else: 78 try: # Making a float 79 csv_data[col] = float(rows[pos[col]]) 80 except: # It is a string 81 csv_data[col] = rows[pos[col]] 82 imported_data.append(csv_data) 83 84 return imported_data
Parameters
- filename:: Name of csv-file
- datatype:: List of data types as strings
- truedataindex:: List of where the "TRUEDATA" has been extracted (e.g., at which time, etc)
Returns
- some-type:: List of observed data
def
read_var_csv(filename, datatype, truedataindex):
87def read_var_csv(filename, datatype, truedataindex): 88 """ 89 Parameters 90 ---------- 91 filename : str 92 Name of the CSV file. 93 94 datatype : list 95 List of data types as strings. 96 97 truedataindex : list 98 List of indices where the "TRUEDATA" has been extracted. 99 100 Returns 101 ------- 102 imported_var : list 103 List of variances. 104 """ 105 106 df = pd.read_csv(filename) # Read the file 107 108 imported_var = [] # Initialize the 2D list of csv data 109 tlength = len(truedataindex) 110 dnumber = len(datatype) 111 112 if df.columns[0] == 'header_both': # csv file has column and row headers 113 pos = [None] * dnumber 114 for col in range(dnumber): 115 # find index of data type in csv file header 116 pos[col] = df.columns.get_loc(datatype[col]) 117 for t in truedataindex: 118 row = df[df['header_both'] == t] # pick row 119 row = row.values[0] # select the values of the dataframe 120 csv_data = [None] * 2 * dnumber 121 for col in range(dnumber): 122 csv_data[2*col] = row[pos[col]] 123 try: # Making a float 124 csv_data[2*col+1] = float(row[pos[col]]+1) 125 except: # It is a string 126 csv_data[2*col+1] = row[pos[col]+1] 127 # Make sure the string input is lowercase 128 csv_data[0::2] = [x.lower() for x in csv_data[0::2]] 129 imported_var.append(csv_data) 130 else: # No row headers (the rows in the csv file must correspond to the order in truedataindex) 131 if tlength == df.shape[0]: # File has column headers 132 pos = [None] * dnumber 133 for col in range(dnumber): 134 # Find index of datatype in csv file header 135 pos[col] = df.columns.get_loc(datatype[col]) 136 # File has no column headers (columns must correspond to the order in datatype) 137 elif tlength == df.shape[0]+1: 138 # First row has been misinterpreted as header, so we read first row again: 139 temp = pd.read_csv(filename, header=None, nrows=1).values[0] 140 # Make sure the string input is lowercase 141 temp[0::2] = [x.lower() for x in temp[0::2]] 142 # Assume the data is in the correct order 143 pos = list(range(0, df.shape[1], 2)) 144 csv_data = [None] * len(temp) 145 for col in range(dnumber): 146 csv_data[2 * col] = temp[2 * col] 147 try: # Making a float 148 csv_data[2*col+1] = float(temp[2*col+1]) 149 except: # It is a string 150 csv_data[2*col+1] = temp[2*col+1] 151 imported_var.append(csv_data) 152 153 for rows in df.values: 154 csv_data = [None] * 2 * dnumber 155 for col in range(dnumber): 156 csv_data[2*col] = rows[2*col] 157 try: # Making a float 158 csv_data[2*col+1] = float(rows[pos[col]+1]) 159 except: # It is a string 160 csv_data[2*col+1] = rows[pos[col]+1] 161 # Make sure the string input is lowercase 162 csv_data[0::2] = [x.lower() for x in csv_data[0::2]] 163 imported_var.append(csv_data) 164 165 return imported_var
Parameters
- filename (str): Name of the CSV file.
- datatype (list): List of data types as strings.
- truedataindex (list): List of indices where the "TRUEDATA" has been extracted.
Returns
- imported_var (list): List of variances.