misc.read_input_csv

File for reading CSV files and returning a 2D list

View Source

  1"""
  2File for reading CSV files and returning a 2D list
  3"""
  4import pandas as pd
  5import numpy as np
  6
  7
  8def read_data_csv(filename, datatype, truedataindex):
  9    """
 10    Parameters
 11    ----------
 12    filename:
 13        Name of csv-file
 14    datatype:
 15        List of data types as strings
 16    truedataindex:
 17        List of where the "TRUEDATA" has been extracted (e.g., at which time, etc)
 18
 19    Returns
 20    -------
 21    some-type:
 22        List of observed data
 23    """
 24
 25    df = pd.read_csv(filename)  # Read the file
 26
 27    imported_data = []  # Initialize the 2D list of csv data
 28    tlength = len(truedataindex)
 29    dnumber = len(datatype)
 30
 31    if df.columns[0] == 'header_both':  # csv file has column and row headers
 32        pos = [None] * dnumber
 33        for col in range(dnumber):
 34            # find index of data type in csv file header
 35            pos[col] = df.columns.get_loc(datatype[col])
 36        for t in truedataindex:
 37            row = df[df['header_both'] == t]  # pick row corresponding to truedataindex
 38            row = row.values[0]  # select the values of the dataframe row
 39            csv_data = [None] * dnumber
 40            for col in range(dnumber):
 41                if (not type(row[pos[col]]) == str) and (np.isnan(row[pos[col]])):  # do not check strings
 42                    csv_data[col] = 'n/a'
 43                else:
 44                    try:  # Making a float
 45                        csv_data[col] = float(row[pos[col]])
 46                    except:  # It is a string
 47                        csv_data[col] = row[pos[col]]
 48            imported_data.append(csv_data)
 49    else:  # No row headers (the rows in the csv file must correspond to the order in truedataindex)
 50        if tlength == df.shape[0]:  # File has column headers
 51            pos = [None] * dnumber
 52            for col in range(dnumber):
 53                # Find index of the header in datatype
 54                pos[col] = df.columns.get_loc(datatype[col])
 55        # File has no column headers (columns must correspond to the order in datatype)
 56        elif tlength == df.shape[0]+1:
 57            # First row has been misinterpreted as header, so we read first row again:
 58            temp = pd.read_csv(filename, header=None, nrows=1).values[0]
 59            pos = list(range(df.shape[1]))  # Assume the data is in the correct order
 60            csv_data = [None] * len(temp)
 61            for col in range(len(temp)):
 62                if (not type(temp[col]) == str) and (np.isnan(temp[col])):  # do not check strings
 63                    csv_data[col] = 'n/a'
 64                else:
 65                    try:  # Making a float
 66                        csv_data[col] = float(temp[col])
 67                    except:  # It is a string
 68                        csv_data[col] = temp[col]
 69            imported_data.append(csv_data)
 70
 71        for rows in df.values:
 72            csv_data = [None] * dnumber
 73            for col in range(dnumber):
 74                if (not type(rows[pos[col]]) == str) and (np.isnan(rows[pos[col]])):  # do not check strings
 75                    csv_data[col] = 'n/a'
 76                else:
 77                    try:  # Making a float
 78                        csv_data[col] = float(rows[pos[col]])
 79                    except:  # It is a string
 80                        csv_data[col] = rows[pos[col]]
 81            imported_data.append(csv_data)
 82
 83    return imported_data
 84
 85
 86def read_var_csv(filename, datatype, truedataindex):
 87    """
 88    Parameters
 89    ----------
 90    filename : str
 91        Name of the CSV file.
 92
 93    datatype : list
 94        List of data types as strings.
 95
 96    truedataindex : list
 97        List of indices where the "TRUEDATA" has been extracted.
 98
 99    Returns
100    -------
101    imported_var : list
102        List of variances.
103    """
104
105    df = pd.read_csv(filename)  # Read the file
106
107    imported_var = []  # Initialize the 2D list of csv data
108    tlength = len(truedataindex)
109    dnumber = len(datatype)
110
111    if df.columns[0] == 'header_both':  # csv file has column and row headers
112        pos = [None] * dnumber
113        for col in range(dnumber):
114            # find index of data type in csv file header
115            pos[col] = df.columns.get_loc(datatype[col])
116        for t in truedataindex:
117            row = df[df['header_both'] == t]  # pick row
118            row = row.values[0]  # select the values of the dataframe
119            csv_data = [None] * 2 * dnumber
120            for col in range(dnumber):
121                csv_data[2*col] = row[pos[col]]
122                try:  # Making a float
123                    csv_data[2*col+1] = float(row[pos[col]]+1)
124                except:  # It is a string
125                    csv_data[2*col+1] = row[pos[col]+1]
126            # Make sure the string input is lowercase
127            csv_data[0::2] = [x.lower() for x in csv_data[0::2]]
128            imported_var.append(csv_data)
129    else:  # No row headers (the rows in the csv file must correspond to the order in truedataindex)
130        if tlength == df.shape[0]:  # File has column headers
131            pos = [None] * dnumber
132            for col in range(dnumber):
133                # Find index of datatype in csv file header
134                pos[col] = df.columns.get_loc(datatype[col])
135        # File has no column headers (columns must correspond to the order in datatype)
136        elif tlength == df.shape[0]+1:
137            # First row has been misinterpreted as header, so we read first row again:
138            temp = pd.read_csv(filename, header=None, nrows=1).values[0]
139            # Make sure the string input is lowercase
140            temp[0::2] = [x.lower() for x in temp[0::2]]
141            # Assume the data is in the correct order
142            pos = list(range(0, df.shape[1], 2))
143            csv_data = [None] * len(temp)
144            for col in range(dnumber):
145                csv_data[2 * col] = temp[2 * col]
146                try:  # Making a float
147                    csv_data[2*col+1] = float(temp[2*col+1])
148                except:  # It is a string
149                    csv_data[2*col+1] = temp[2*col+1]
150            imported_var.append(csv_data)
151
152        for rows in df.values:
153            csv_data = [None] * 2 * dnumber
154            for col in range(dnumber):
155                csv_data[2*col] = rows[2*col]
156                try:  # Making a float
157                    csv_data[2*col+1] = float(rows[pos[col]+1])
158                except:  # It is a string
159                    csv_data[2*col+1] = rows[pos[col]+1]
160            # Make sure the string input is lowercase
161            csv_data[0::2] = [x.lower() for x in csv_data[0::2]]
162            imported_var.append(csv_data)
163
164    return imported_var

def read_data_csv(filename, datatype, truedataindex): View Source

 9def read_data_csv(filename, datatype, truedataindex):
10    """
11    Parameters
12    ----------
13    filename:
14        Name of csv-file
15    datatype:
16        List of data types as strings
17    truedataindex:
18        List of where the "TRUEDATA" has been extracted (e.g., at which time, etc)
19
20    Returns
21    -------
22    some-type:
23        List of observed data
24    """
25
26    df = pd.read_csv(filename)  # Read the file
27
28    imported_data = []  # Initialize the 2D list of csv data
29    tlength = len(truedataindex)
30    dnumber = len(datatype)
31
32    if df.columns[0] == 'header_both':  # csv file has column and row headers
33        pos = [None] * dnumber
34        for col in range(dnumber):
35            # find index of data type in csv file header
36            pos[col] = df.columns.get_loc(datatype[col])
37        for t in truedataindex:
38            row = df[df['header_both'] == t]  # pick row corresponding to truedataindex
39            row = row.values[0]  # select the values of the dataframe row
40            csv_data = [None] * dnumber
41            for col in range(dnumber):
42                if (not type(row[pos[col]]) == str) and (np.isnan(row[pos[col]])):  # do not check strings
43                    csv_data[col] = 'n/a'
44                else:
45                    try:  # Making a float
46                        csv_data[col] = float(row[pos[col]])
47                    except:  # It is a string
48                        csv_data[col] = row[pos[col]]
49            imported_data.append(csv_data)
50    else:  # No row headers (the rows in the csv file must correspond to the order in truedataindex)
51        if tlength == df.shape[0]:  # File has column headers
52            pos = [None] * dnumber
53            for col in range(dnumber):
54                # Find index of the header in datatype
55                pos[col] = df.columns.get_loc(datatype[col])
56        # File has no column headers (columns must correspond to the order in datatype)
57        elif tlength == df.shape[0]+1:
58            # First row has been misinterpreted as header, so we read first row again:
59            temp = pd.read_csv(filename, header=None, nrows=1).values[0]
60            pos = list(range(df.shape[1]))  # Assume the data is in the correct order
61            csv_data = [None] * len(temp)
62            for col in range(len(temp)):
63                if (not type(temp[col]) == str) and (np.isnan(temp[col])):  # do not check strings
64                    csv_data[col] = 'n/a'
65                else:
66                    try:  # Making a float
67                        csv_data[col] = float(temp[col])
68                    except:  # It is a string
69                        csv_data[col] = temp[col]
70            imported_data.append(csv_data)
71
72        for rows in df.values:
73            csv_data = [None] * dnumber
74            for col in range(dnumber):
75                if (not type(rows[pos[col]]) == str) and (np.isnan(rows[pos[col]])):  # do not check strings
76                    csv_data[col] = 'n/a'
77                else:
78                    try:  # Making a float
79                        csv_data[col] = float(rows[pos[col]])
80                    except:  # It is a string
81                        csv_data[col] = rows[pos[col]]
82            imported_data.append(csv_data)
83
84    return imported_data

Parameters

filename:: Name of csv-file
datatype:: List of data types as strings
truedataindex:: List of where the "TRUEDATA" has been extracted (e.g., at which time, etc)

Returns

some-type:: List of observed data

def read_var_csv(filename, datatype, truedataindex): View Source

 87def read_var_csv(filename, datatype, truedataindex):
 88    """
 89    Parameters
 90    ----------
 91    filename : str
 92        Name of the CSV file.
 93
 94    datatype : list
 95        List of data types as strings.
 96
 97    truedataindex : list
 98        List of indices where the "TRUEDATA" has been extracted.
 99
100    Returns
101    -------
102    imported_var : list
103        List of variances.
104    """
105
106    df = pd.read_csv(filename)  # Read the file
107
108    imported_var = []  # Initialize the 2D list of csv data
109    tlength = len(truedataindex)
110    dnumber = len(datatype)
111
112    if df.columns[0] == 'header_both':  # csv file has column and row headers
113        pos = [None] * dnumber
114        for col in range(dnumber):
115            # find index of data type in csv file header
116            pos[col] = df.columns.get_loc(datatype[col])
117        for t in truedataindex:
118            row = df[df['header_both'] == t]  # pick row
119            row = row.values[0]  # select the values of the dataframe
120            csv_data = [None] * 2 * dnumber
121            for col in range(dnumber):
122                csv_data[2*col] = row[pos[col]]
123                try:  # Making a float
124                    csv_data[2*col+1] = float(row[pos[col]]+1)
125                except:  # It is a string
126                    csv_data[2*col+1] = row[pos[col]+1]
127            # Make sure the string input is lowercase
128            csv_data[0::2] = [x.lower() for x in csv_data[0::2]]
129            imported_var.append(csv_data)
130    else:  # No row headers (the rows in the csv file must correspond to the order in truedataindex)
131        if tlength == df.shape[0]:  # File has column headers
132            pos = [None] * dnumber
133            for col in range(dnumber):
134                # Find index of datatype in csv file header
135                pos[col] = df.columns.get_loc(datatype[col])
136        # File has no column headers (columns must correspond to the order in datatype)
137        elif tlength == df.shape[0]+1:
138            # First row has been misinterpreted as header, so we read first row again:
139            temp = pd.read_csv(filename, header=None, nrows=1).values[0]
140            # Make sure the string input is lowercase
141            temp[0::2] = [x.lower() for x in temp[0::2]]
142            # Assume the data is in the correct order
143            pos = list(range(0, df.shape[1], 2))
144            csv_data = [None] * len(temp)
145            for col in range(dnumber):
146                csv_data[2 * col] = temp[2 * col]
147                try:  # Making a float
148                    csv_data[2*col+1] = float(temp[2*col+1])
149                except:  # It is a string
150                    csv_data[2*col+1] = temp[2*col+1]
151            imported_var.append(csv_data)
152
153        for rows in df.values:
154            csv_data = [None] * 2 * dnumber
155            for col in range(dnumber):
156                csv_data[2*col] = rows[2*col]
157                try:  # Making a float
158                    csv_data[2*col+1] = float(rows[pos[col]+1])
159                except:  # It is a string
160                    csv_data[2*col+1] = rows[pos[col]+1]
161            # Make sure the string input is lowercase
162            csv_data[0::2] = [x.lower() for x in csv_data[0::2]]
163            imported_var.append(csv_data)
164
165    return imported_var

Parameters

filename (str): Name of the CSV file.
datatype (list): List of data types as strings.
truedataindex (list): List of indices where the "TRUEDATA" has been extracted.

Returns

imported_var (list): List of variances.