-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreProcess.py
More file actions
43 lines (29 loc) · 1.28 KB
/
preProcess.py
File metadata and controls
43 lines (29 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pickle
import numpy as np
def scaled(train_data):
feature_scaler = StandardScaler()
feature_columns = ['Duration 1', 'Duration 2', 'Sensor Value', 'Sensor Value 2', 'Consumable Life']
train_data[feature_columns] = train_data[feature_columns].astype(np.float32)
train_data[feature_columns] = feature_scaler.fit_transform(train_data[feature_columns])
with open('feature_scaler.pkl', 'wb') as f:
pickle.dump(feature_scaler, f)
return train_data
def split(data):
all_run_ids = data['Run ID'].unique()
total = len(all_run_ids)
train_end = int(0.7 * total)
val_end = int(0.85 * total)
# train_end = int(1)
# val_end = int(2)
train_ids = all_run_ids[:train_end]
val_ids = all_run_ids[train_end:val_end]
test_ids = all_run_ids[val_end:]
train_data = data[data['Run ID'].isin(train_ids)]
val_data = data[data['Run ID'].isin(val_ids)]
test_data = data[data['Run ID'].isin(test_ids)]
print("Train size:", len(train_data))
print("Val size:", len(val_data))
print("Test size:", len(test_data))
return train_data.reset_index(drop=True), val_data.reset_index(drop=True), test_data.reset_index(drop=True)