diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6cd11ba
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,104 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+.static_storage/
+.media/
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
\ No newline at end of file
diff --git a/program/BasicModel.py b/program/BasicModel.py
index b194205..d137e23 100644
--- a/program/BasicModel.py
+++ b/program/BasicModel.py
@@ -11,8 +11,8 @@
 from Encoding import encoding
 import argparse
 from keras.preprocessing import sequence
-from keras.models import Sequential, Graph, Model
-from keras.layers import Input, merge, Merge, Dense, TimeDistributedDense, Dropout, Activation, RepeatVector, Permute, Reshape, RepeatVector, Flatten
+from keras.models import Sequential, Model
+from keras.layers import Input, merge, Dense, Dropout, Activation, RepeatVector, Permute, Reshape, RepeatVector, Flatten, concatenate
 from keras.layers.convolutional import Convolution1D, MaxPooling1D, AveragePooling1D
 from keras.layers.embeddings import Embedding
 from keras.layers.recurrent import SimpleRNN, GRU, LSTM
@@ -176,18 +176,18 @@ def build( self ):
 				forward = GRU(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
 				backward = GRU(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation, go_backwards=True)(current)
 			elif 'lstm' in self.arch:
-				forward = LSTM(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
-				backward = LSTM(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation, go_backwards=True)(current)
+				forward = LSTM(self.hidden_size, return_sequences=True, kernel_initializer=self.init_type, activation=self.activation)(current)
+				backward = LSTM(self.hidden_size, return_sequences=True, kernel_initializer=self.init_type, activation=self.activation, go_backwards=True)(current)
 			if 'b' in self.arch:
-				tagger = merge([forward, backward], mode='concat')
+				tagger = concatenate([forward, backward])
 			else:
 				tagger = forward
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
+			prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
 
-                        self.model = Model(input=raw_current, output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=raw_current, output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 		# 2-Stacked Layered RNN (LSTM, SimpleRNN, GRU)
 		elif self.arch == '2lstm' or self.arch == '2rnn' or self.arch == '2gru':
@@ -242,14 +242,14 @@ def build( self ):
 				encoder = fencoder
 				labeling = flabeling
 			#intent = Dense(self.output_vocab_size, activation='softmax')(encoder)
-                        encoder = RepeatVector(self.time_length)(encoder)
-                        tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
+			encoder = RepeatVector(self.time_length)(encoder)
+			tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
+			prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
 
-                        self.model = Model(input=raw_current, output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=raw_current, output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 		# Encode intent information by feeding all words and then start tagging
 		elif self.arch == 'i-c-rnn' or self.arch == 'i-c-gru' or self.arch == 'i-c-lstm' or self.arch == 'i-c-brnn' or self.arch == 'i-c-bgru' or self.arch == 'i-c-blstm':
@@ -275,14 +275,14 @@ def build( self ):
 			else:
 				labeling = forward
 			#intent = Dense(self.output_vocab_size, activation='softmax')(encoder)
-                        encoder = RepeatVector(self.time_length)(encoder)
-                        tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
+			encoder = RepeatVector(self.time_length)(encoder)
+			tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
+			prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
 
-                        self.model = Model(input=raw_current, output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=raw_current, output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 
 		# Encode all history and the current utterance first and then start tagging
@@ -317,14 +317,14 @@ def build( self ):
 				encoder = fencoder
 				labeling = flabeling
 			#intent = Dense(self.output_vocab_size, activation='softmax')(encoder)
-                        encoder = RepeatVector(self.time_length)(encoder)
-                        tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
+			encoder = RepeatVector(self.time_length)(encoder)
+			tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
+			prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
 
-                        self.model = Model(input=[raw_his, raw_cur], output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=[raw_his, raw_cur], output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 		# Encode all history and the current utterance first and then start tagging
 		elif self.arch == 'hi-c-rnn' or self.arch == 'hi-c-gru' or self.arch == 'hi-c-lstm' or self.arch == 'hi-c-brnn' or self.arch == 'hi-c-bgru' or self.arch == 'hi-c-blstm':
@@ -353,14 +353,14 @@ def build( self ):
 			encoder = MaxPooling1D(self.time_length)(encoder)
 			encoder = Flatten()(encoder)
 			#intent = Dense(self.output_vocab_size, activation='softmax')(encoder)
-                        encoder = RepeatVector(self.time_length)(encoder)
-                        tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
+			encoder = RepeatVector(self.time_length)(encoder)
+			tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
+			prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
 
-                        self.model = Model(input=[raw_his, raw_cur], output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=[raw_his, raw_cur], output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 		elif 'amemn2n' in self.arch:
 			# current: (, time_length, embedding_size)
@@ -434,7 +434,7 @@ def build( self ):
 			else:
 				raw_input_memory = Input(shape=(self.his_length * self.time_length, self.embedding_size), name='input_memory')
 				input_memory = Reshape((self.his_length, self.time_length, self.embedding_size))(raw_input_memory)
-                        mem_vec = TimeDistributed(sent_model)(input_memory)
+			mem_vec = TimeDistributed(sent_model)(input_memory)
 
 			# compute the similarity between sentence embeddings for attention
 			match = merge([mem_vec, cur_vec], mode='dot', dot_axes=[2, 1])
@@ -452,18 +452,18 @@ def build( self ):
 				backward = LSTM(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation, go_backwards=True)(current)
 				labeling = merge([forward, backward], mode='concat', concat_axis=-1)
 			elif 'rnn' in self.arch:
-	                        labeling = SimpleRNN(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation)(current)
+				labeling = SimpleRNN(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation)(current)
 			elif 'gru' in self.arch:
-	                        labeling = GRU(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation)(current)
+				labeling = GRU(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation)(current)
 			elif 'lstm' in self.arch:
-	                        labeling = LSTM(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation)(current)
-                        tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
+				labeling = LSTM(self.hidden_size, return_sequences=False, init=self.init_type, activation=self.activation)(current)
+			tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = Dense(self.output_vocab_size, activation='softmax')(tagger)
+			prediction = Dense(self.output_vocab_size, activation='softmax')(tagger)
 
-                        self.model = Model(input=[raw_input_memory, raw_current], output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=[raw_input_memory, raw_current], output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 		elif 'memn2n' in self.arch:
 			# current: (, time_length, embedding_size)
@@ -537,7 +537,7 @@ def build( self ):
 			else:
 				raw_input_memory = Input(shape=(self.his_length * self.time_length, self.embedding_size), name='input_memory')
 				input_memory = Reshape((self.his_length, self.time_length, self.embedding_size))(raw_input_memory)
-                        mem_vec = TimeDistributed(sent_model)(input_memory)
+			mem_vec = TimeDistributed(sent_model)(input_memory)
 
 			# compute the similarity between sentence embeddings for attention
 			match = merge([mem_vec, cur_vec], mode='dot', dot_axes=[2, 1])
@@ -548,7 +548,7 @@ def build( self ):
 			his_vec = merge([mem_vec, match], mode='dot', dot_axes=[1, 1])
 			encoder = merge([his_vec, cur_vec], mode='sum')
 			encoder = Dense(self.embedding_size)(encoder)
-                        encoder = RepeatVector(self.time_length)(encoder)
+			encoder = RepeatVector(self.time_length)(encoder)
 
 			# tagging the words in the current sentence
 			if 'blstm' in self.arch:
@@ -556,23 +556,23 @@ def build( self ):
 				backward = LSTM(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation, go_backwards=True)(current)
 				labeling = merge([forward, backward], mode='concat', concat_axis=-1)
 			elif 'rnn' in self.arch:
-	                        labeling = SimpleRNN(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
+				labeling = SimpleRNN(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
 			elif 'gru' in self.arch:
-	                        labeling = GRU(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
+				labeling = GRU(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
 			elif 'lstm' in self.arch:
-	                        labeling = LSTM(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
-                        tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
+				labeling = LSTM(self.hidden_size, return_sequences=True, init=self.init_type, activation=self.activation)(current)
+			tagger = merge([encoder, labeling], mode='concat', concat_axis=-1)
 			if self.dropout:
 				tagger = Dropout(self.dropout_ratio)(tagger)
-                        prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
+			prediction = TimeDistributed(Dense(self.output_vocab_size, activation='softmax'))(tagger)
 
-                        self.model = Model(input=[raw_input_memory, raw_current], output=prediction)
-                        self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
+			self.model = Model(input=[raw_input_memory, raw_current], output=prediction)
+			self.model.compile(loss='categorical_crossentropy', optimizer=opt_func)
 
 
 	def train(self, H_train, X_train, y_train, H_dev, X_dev, y_dev, val_ratio=0.0):
 		# load saved model weights
-                if self.load_weight is not None:
+		if self.load_weight is not None:
 			sys.stderr.write("Load the pretrained weights for the model.\n")
 			self.model.load_weights(self.load_weight)
 		else:
diff --git a/program/BasicModel.pyc b/program/BasicModel.pyc
deleted file mode 100644
index e289b76..0000000
Binary files a/program/BasicModel.pyc and /dev/null differ
diff --git a/program/History.pyc b/program/History.pyc
deleted file mode 100644
index d6640fb..0000000
Binary files a/program/History.pyc and /dev/null differ
diff --git a/program/PredefinedEmbedding.pyc b/program/PredefinedEmbedding.pyc
deleted file mode 100644
index 442f12f..0000000
Binary files a/program/PredefinedEmbedding.pyc and /dev/null differ
diff --git a/program/wordSlotDataSet.pyc b/program/wordSlotDataSet.pyc
deleted file mode 100644
index 50797ae..0000000
Binary files a/program/wordSlotDataSet.pyc and /dev/null differ