-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModelMaker.py
More file actions
129 lines (100 loc) · 3.55 KB
/
ModelMaker.py
File metadata and controls
129 lines (100 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
import joblib
from DataProcessing import CSVWriter
text = input("""Please provide the path to your .nc climate model output file or use the default setup by typing default: """)
"""
Note: This program is still currently under development
"""
print("your nc file is:", text)
debug = False
default = False
normal = False
if text == 'SudoDebug':
print("Welcome to debug mode")
debug = True
elif text == 'default':
print("You are using the default training set up")
default = True
else:
normal = True
#Determine data origin based on usage mode
if default:
data = pd.read_csv("MegaTable.csv/")
#If someone gives their own custom .nc filetype, we convert it to the required format:
if normal:
#We use our new DataProcessing function to transform the .nc data
try:
CSVWriter(Data = text)
#if it worked, we use this data
data = pd.read_csv("CustomTable.csv/")
#if it doesn't work
except:
print("""Your data needs to be inserted into the MegaTable.csv training set.
An automatic implementation for this is on its way and will be available
in this function soon. If you already want to use this function,
simply put your climate temperature data into the MegaTable.csv and
use the 'default' implementation.""")
data = pd.read_csv("MegaTable.csv/")
columns_to_use = [
"ReforecastLon",
"ReforecastLat",
"Time",
"ReforecastTemp",
"Precipitation",
"CloudCover",
"Water","Ice",
"Land",
"Shallow Water",
"Desert"]
target_column = "ModelError"
x_train, x_test, y_train, y_test = train_test_split(
data[columns_to_use], data[target_column]
)
if len(x_train) == len(y_train):
print("Train/Test split worked!")
def error(prediction, true = y_test):
"A function to automate error calculation"
PredictionSubstracted = y_test - prediction
Totalerror = 0
for modelerror in PredictionSubstracted:
addition = (modelerror**2)**0.5
Totalerror = Totalerror + addition
RMSE = Totalerror/len(prediction)
return RMSE
def ModelMaker(model, x_train = x_train, y_train = y_train,
x_test = x_test, y_test = y_test, fit = True):
"A function to automate model making"
if fit:
model.fit(x_train,y_train)
y_predicted = model.predict(x_test)
RMSE = error(y_predicted)
print("your ", model, "model yields an RMSE of:", RMSE)
return RMSE
modeltype = input("""What kind of model do you want to use?
options: Linear, RandomForest, NeuralNetwork """)
linear = False
neural = False
forest = False
if modeltype.lower() == 'linear':
linear = True
if modeltype.lower() == 'randomforest':
neural = True
if modeltype.lower() == 'neuralnetwork':
forest = True
if linear:
print('training your model. This may take a while.')
model = LinearRegression().fit(x_train,y_train)
ModelMaker(model, fit=False)
if neural:
print('training your model. This may take a while.')
model= MLPRegressor(max_iter = 500).fit(x_train,y_train)
ModelMaker(model, fit=False)
if forest:
print('training your model. This may take a while.')
model=RandomForestRegressor().fit(x_train,y_train)
ModelMaker(model, fit=False)
joblib.dump(model, 'TrainedModel.joblib')