-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdynamic_model.py
More file actions
134 lines (114 loc) · 5.51 KB
/
dynamic_model.py
File metadata and controls
134 lines (114 loc) · 5.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import torch
import time
import os
from datahandler import DataHandler
from Tester import Tester
from modules import DynamicRecModel
DATASET_DIR = '' # fixme
DATASET_FILE = '' # fixme
DATASET_W_CONVERTED_TIMESTAMPS = DATASET_DIR + '/1_converted_timestamps.pickle'
DATASET_USER_ARTIST_MAPPED = DATASET_DIR + '/2_user_artist_mapped.pickle'
DATASET_USER_SESSIONS = DATASET_DIR + '/3_user_sessions.pickle'
DATASET_TRAIN_TEST_SPLIT = DATASET_DIR + '/4_train_test_split_user.pickle'
DATASET_USER_REDUCE = DATASET_DIR + '/5_train_test_split_user.pickle'
DATASET_BPR_MF = DATASET_DIR + '/bpr-mf_train_test_split.pickle'
SESSION_TIMEDELTA = 3600 # 1h
MAX_SESSION_LENGTH = 20 # maximum number of events in a session
MAX_SESSION_LENGTH_PRE_SPLIT = MAX_SESSION_LENGTH * 2
MINIMUM_REQUIRED_SESSIONS = 3 # The dual-RNN should have minimum 2 two train + 1 to test
PAD_VALUE = 0
SPLIT_FRACTION = 0.8
SEED = 2
GPU = 0
torch.manual_seed(SEED)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU)
dims = {}
params = {}
BATCHSIZE = 100
SEQLEN = 20 - 1
params["TOP_K"] = 20
MAX_SESSION_REPRESENTATIONS = 15
dims["TIME_RESOLUTION"] = 500
dims["TIME_HIDDEN"] = 5
dims["USER_HIDDEN"] = 10
dataset_path = '' # fixme
txt_log_name = 'Log.txt'
with open(txt_log_name, 'w+') as txt_file:
txt_file.write("New experiment\n")
dims["EMBEDDING_DIM"] = 50
params["lr"] = 0.001
dropout = {}
dropout["INTER"] = 0
dropout["INTRA"] = 0
MAX_EPOCHS = 30
min_time = 1.0
time_threshold = torch.cuda.FloatTensor([min_time]) / 24
dims["INTRA_HIDDEN"] = dims["EMBEDDING_DIM"]
dims["INTER_INPUT_DIM"] = dims["INTRA_HIDDEN"] + dims["TIME_HIDDEN"] + dims["USER_HIDDEN"]
dims["INTER_HIDDEN"] = dims["INTRA_HIDDEN"]
datahandler = DataHandler(dataset_path, BATCHSIZE, MAX_SESSION_REPRESENTATIONS, dims["INTRA_HIDDEN"],
dims["TIME_RESOLUTION"], min_time)
dims["N_ITEMS"] = datahandler.get_num_items()
N_SESSIONS = datahandler.get_num_training_sessions()
dims["N_USERS"] = datahandler.get_num_users()
# TODO: Initialize tester
tester = Tester("Log")
model = DynamicRecModel(dims, dropout, params, datahandler, tester, time_threshold)
# setting up for training
epoch_nr = 0
start_time = time.time()
num_training_batches = datahandler.get_num_training_batches()
num_test_batches = datahandler.get_num_test_batches()
epoch_loss = 0
# start training
while epoch_nr < MAX_EPOCHS:
with open(txt_log_name, 'a') as txt_file:
txt_file.write("Starting epoch #" + str(epoch_nr) + "\n")
start_time_epoch = time.time()
# reset the datahandler and get first training batch
datahandler.reset_user_batch_data_train()
datahandler.reset_user_session_representations()
items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_train_batch()
batch_nr = 0
model.train_mode()
while len(items) > int(BATCHSIZE / 2):
batch_start_time = time.time()
batch_loss = model.train_on_batch(items, session_reps, sess_time_reps, user_list, item_targets, time_targets,
first_rec_targets, session_lengths, session_rep_lengths)
epoch_loss += batch_loss
batch_runtime = time.time() - batch_start_time
items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_train_batch()
if batch_nr % 1500 == 0:
with open(txt_log_name, 'a') as txt_file:
txt_file.write("Batch: " + str(batch_nr) + "/" + str(num_training_batches) + " batch_loss: " + str(
batch_loss) + "\n")
batch_nr += 1
with open(txt_log_name, 'a') as txt_file:
txt_file.write("Epoch loss: " + str(epoch_loss / batch_nr) + "\n")
# ***********************************************Testing************************************************************
if epoch_nr == MAX_EPOCHS - 1:
with open(txt_log_name) as f:
f.write("**************************Testing*********************\n")
datahandler.reset_user_batch_data_test()
items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_test_batch()
model.eval_mode()
batch_nr = 0
while len(items) > int(BATCHSIZE / 2):
batch_start_time = time.time()
predictions = model.predict_on_batch(items, session_reps, sess_time_reps, user_list, time_targets,
session_lengths, session_rep_lengths, True)
tester.evaluate_batch(predictions[:, 1:], item_targets, session_lengths, predictions[:, 0],
first_rec_targets)
items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_test_batch()
batch_nr += 1
cumulate_recall_mrr_res, time_res, individual_recall_mrr_res = tester.get_result_and_reset()
with open(txt_log_name, 'a') as f:
f.write(cumulate_recall_mrr_res + "\n\n")
f.write(individual_recall_mrr_res + "\n\n")
f.write(str(model.get_time_loss_weight().data) + "\n\n")
f.write(time_res + "\n\n")
with open(txt_log_name, 'a') as f:
f.write("Epoch #" + str(epoch_nr) + " Cost Time: " + str(time.time() - start_time_epoch) + "\n\n")
epoch_nr += 1
epoch_loss = 0