As is tradition in this book, I will show you how the entire architecture for this model fits together here:
def build_models(lstm_units, num_encoder_tokens, num_decoder_tokens):
# train model
encoder_input = Input(shape=(None, num_encoder_tokens),
name='encoder_input')
encoder_outputs, state_h, state_c = LSTM(lstm_units,
return_state=True, name="encoder_lstm")(encoder_input)
encoder_states = [state_h, state_c]
decoder_input = Input(shape=(None, num_decoder_tokens),
name='decoder_input')
decoder_lstm = LSTM(lstm_units, return_sequences=True,
return_state=True, name="decoder_lstm")
decoder_outputs, _, _ = decoder_lstm(decoder_input,
initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax',
name='softmax_output')
decoder_output = decoder_dense(decoder_outputs)
model = Model([encoder_input, decoder_input], decoder_output)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
encoder_model = Model(encoder_input, encoder_states)
decoder_state_input_h = Input(shape=(lstm_units,))
decoder_state_input_c = Input(shape=(lstm_units,))
decoder_states_inputs = [decoder_state_input_h,
decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
decoder_input, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
[decoder_input] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
return model, encoder_model, decoder_model
Note that we are returning all three models here. After the training model is trained, I will serialize all three with the keras model.save() method.