ClimateModelCorrection/ModelMaker.py at master · timholthuijsen/ClimateModelCorrection · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
import joblib
from DataProcessing import CSVWriter

text = input("""Please provide the path to your .nc climate model output file or use the default setup by typing default: """)

"""
Note: This program is still currently under development
"""


print("your nc file is:", text)

debug = False
default = False
normal = False

if text == 'SudoDebug':
    print("Welcome to debug mode")
    debug = True
elif text == 'default':
    print("You are using the default training set up")
    default = True
else:
    normal = True


#Determine data origin based on usage mode
if default:
    data = pd.read_csv("MegaTable.csv/")

#If someone gives their own custom .nc filetype, we convert it to the required format:
if normal:
    #We use our new DataProcessing function to transform the .nc data
    try:
        CSVWriter(Data = text)
        #if it worked, we use this data
        data = pd.read_csv("CustomTable.csv/")
    #if it doesn't work
    except:
        print("""Your data needs to be inserted into the MegaTable.csv training set.
          An automatic implementation for this is on its way and will be available
          in this function soon. If you already want to use this function,
          simply put your climate temperature data into the MegaTable.csv and
          use the 'default' implementation.""")
        data = pd.read_csv("MegaTable.csv/")


columns_to_use = [
    "ReforecastLon",
    "ReforecastLat",
    "Time",
    "ReforecastTemp",
    "Precipitation",
    "CloudCover",
    "Water","Ice",
    "Land",
    "Shallow Water",
    "Desert"]

target_column = "ModelError"
x_train, x_test, y_train, y_test = train_test_split(
    data[columns_to_use], data[target_column]
)

if len(x_train) == len(y_train):
    print("Train/Test split worked!")

def error(prediction, true = y_test):
    "A function to automate error calculation"
    PredictionSubstracted = y_test - prediction
    Totalerror = 0
    for modelerror in PredictionSubstracted:
        addition = (modelerror**2)**0.5
        Totalerror = Totalerror + addition
    RMSE = Totalerror/len(prediction)
    return RMSE

def ModelMaker(model, x_train = x_train, y_train = y_train,
               x_test = x_test, y_test = y_test, fit = True):
    "A function to automate model making"
    if fit:
        model.fit(x_train,y_train)
    y_predicted = model.predict(x_test)
    RMSE = error(y_predicted)
    print("your ", model, "model yields an RMSE of:", RMSE)
    return RMSE


modeltype = input("""What kind of model do you want to use?
                  options: Linear, RandomForest, NeuralNetwork """)

linear = False
neural = False
forest = False

if modeltype.lower() == 'linear':
    linear = True

if modeltype.lower() == 'randomforest':
    neural = True

if modeltype.lower() == 'neuralnetwork':
    forest = True

if linear:
    print('training your model. This may take a while.')
    model = LinearRegression().fit(x_train,y_train)
    ModelMaker(model, fit=False)

if neural:
    print('training your model. This may take a while.')
    model= MLPRegressor(max_iter = 500).fit(x_train,y_train)
    ModelMaker(model, fit=False)

if forest:
    print('training your model. This may take a while.')
    model=RandomForestRegressor().fit(x_train,y_train)
    ModelMaker(model, fit=False)

joblib.dump(model, 'TrainedModel.joblib')