DynamicRecSys/dynamic_model.py at master · LightingTom/DynamicRecSys · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import torch
import time
import os

from datahandler import DataHandler
from Tester import Tester
from modules import DynamicRecModel

DATASET_DIR = ''  # fixme
DATASET_FILE = ''  # fixme

DATASET_W_CONVERTED_TIMESTAMPS = DATASET_DIR + '/1_converted_timestamps.pickle'
DATASET_USER_ARTIST_MAPPED = DATASET_DIR + '/2_user_artist_mapped.pickle'
DATASET_USER_SESSIONS = DATASET_DIR + '/3_user_sessions.pickle'
DATASET_TRAIN_TEST_SPLIT = DATASET_DIR + '/4_train_test_split_user.pickle'
DATASET_USER_REDUCE = DATASET_DIR + '/5_train_test_split_user.pickle'
DATASET_BPR_MF = DATASET_DIR + '/bpr-mf_train_test_split.pickle'

SESSION_TIMEDELTA = 3600  # 1h

MAX_SESSION_LENGTH = 20  # maximum number of events in a session
MAX_SESSION_LENGTH_PRE_SPLIT = MAX_SESSION_LENGTH * 2
MINIMUM_REQUIRED_SESSIONS = 3  # The dual-RNN should have minimum 2 two train + 1 to test
PAD_VALUE = 0
SPLIT_FRACTION = 0.8

SEED = 2
GPU = 0

torch.manual_seed(SEED)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU)

dims = {}
params = {}
BATCHSIZE = 100
SEQLEN = 20 - 1
params["TOP_K"] = 20
MAX_SESSION_REPRESENTATIONS = 15
dims["TIME_RESOLUTION"] = 500
dims["TIME_HIDDEN"] = 5
dims["USER_HIDDEN"] = 10

dataset_path = ''  # fixme
txt_log_name = 'Log.txt'
with open(txt_log_name, 'w+') as txt_file:
    txt_file.write("New experiment\n")

dims["EMBEDDING_DIM"] = 50
params["lr"] = 0.001

dropout = {}
dropout["INTER"] = 0
dropout["INTRA"] = 0

MAX_EPOCHS = 30
min_time = 1.0
time_threshold = torch.cuda.FloatTensor([min_time]) / 24
dims["INTRA_HIDDEN"] = dims["EMBEDDING_DIM"]
dims["INTER_INPUT_DIM"] = dims["INTRA_HIDDEN"] + dims["TIME_HIDDEN"] + dims["USER_HIDDEN"]
dims["INTER_HIDDEN"] = dims["INTRA_HIDDEN"]

datahandler = DataHandler(dataset_path, BATCHSIZE, MAX_SESSION_REPRESENTATIONS, dims["INTRA_HIDDEN"],
                          dims["TIME_RESOLUTION"], min_time)
dims["N_ITEMS"] = datahandler.get_num_items()
N_SESSIONS = datahandler.get_num_training_sessions()
dims["N_USERS"] = datahandler.get_num_users()

# TODO: Initialize tester
tester = Tester("Log")
model = DynamicRecModel(dims, dropout, params, datahandler, tester, time_threshold)

# setting up for training
epoch_nr = 0
start_time = time.time()
num_training_batches = datahandler.get_num_training_batches()
num_test_batches = datahandler.get_num_test_batches()
epoch_loss = 0

# start training
while epoch_nr < MAX_EPOCHS:
    with open(txt_log_name, 'a') as txt_file:
        txt_file.write("Starting epoch #" + str(epoch_nr) + "\n")
    start_time_epoch = time.time()

    # reset the datahandler and get first training batch
    datahandler.reset_user_batch_data_train()
    datahandler.reset_user_session_representations()
    items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_train_batch()
    batch_nr = 0

    model.train_mode()
    while len(items) > int(BATCHSIZE / 2):
        batch_start_time = time.time()
        batch_loss = model.train_on_batch(items, session_reps, sess_time_reps, user_list, item_targets, time_targets,
                                          first_rec_targets, session_lengths, session_rep_lengths)
        epoch_loss += batch_loss
        batch_runtime = time.time() - batch_start_time
        items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_train_batch()
        if batch_nr % 1500 == 0:
            with open(txt_log_name, 'a') as txt_file:
                txt_file.write("Batch: " + str(batch_nr) + "/" + str(num_training_batches) + " batch_loss: " + str(
                    batch_loss) + "\n")
        batch_nr += 1
        with open(txt_log_name, 'a') as txt_file:
            txt_file.write("Epoch loss: " + str(epoch_loss / batch_nr) + "\n")

    # ***********************************************Testing************************************************************
    if epoch_nr == MAX_EPOCHS - 1:
        with open(txt_log_name) as f:
            f.write("**************************Testing*********************\n")
        datahandler.reset_user_batch_data_test()
        items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_test_batch()
        model.eval_mode()

        batch_nr = 0
        while len(items) > int(BATCHSIZE / 2):
            batch_start_time = time.time()
            predictions = model.predict_on_batch(items, session_reps, sess_time_reps, user_list, time_targets,
                                                 session_lengths, session_rep_lengths, True)
            tester.evaluate_batch(predictions[:, 1:], item_targets, session_lengths, predictions[:, 0],
                                  first_rec_targets)
            items, item_targets, session_lengths, session_reps, session_rep_lengths, user_list, sess_time_reps, time_targets, first_rec_targets = datahandler.get_next_test_batch()
            batch_nr += 1
        cumulate_recall_mrr_res, time_res, individual_recall_mrr_res = tester.get_result_and_reset()
        with open(txt_log_name, 'a') as f:
            f.write(cumulate_recall_mrr_res + "\n\n")
            f.write(individual_recall_mrr_res + "\n\n")
            f.write(str(model.get_time_loss_weight().data) + "\n\n")
            f.write(time_res + "\n\n")
    with open(txt_log_name, 'a') as f:
        f.write("Epoch #" + str(epoch_nr) + " Cost Time: " + str(time.time() - start_time_epoch) + "\n\n")
    epoch_nr += 1
    epoch_loss = 0