self.input_data_input = tf.placeholder(tf.int32, [None, None], name='input') self.targets = tf.placeholder(tf.int32, [None, None], name='targets') self.learning_rate_input = tf.placeholder(tf.float32, name='learning_rate') self.target_sequences_length_input = tf.placeholder(tf.int32, (None,), name='target_sequences_length') self.max_target_sequences_length = tf.reduce_max(self.target_sequences_length_input, name='max_target_len') self.source_sequences_length_input = tf.placeholder(tf.int32, (None,), name='source_sequences_length')
# 1. Encoder embedding encoder_embed_input = tf.contrib.layers.embed_sequence(input_data_input, vocabulary_size, TF_FLAGS.FLAGS.encoding_embedding_size) # 2. Construct the encoder layer encoder_cell = tf.contrib.rnn.MultiRNNCell([self.make_cell() for _ in range(TF_FLAGS.FLAGS.num_layers)]) enc_output, enc_state = tf.nn.dynamic_rnn(encoder_cell, encoder_embed_input, sequence_length=source_sequences_length_input, dtype=tf.float32)
dec_cell = tf.contrib.rnn.LSTMCell(TF_FLAGS.FLAGS.rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
# 1. Decoder Embedding target_vocab_size = self.vocabulary_size decoder_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, TF_FLAGS.FLAGS.decoding_embedding_size])) decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings, decoder_input)
# 2. Construct the decoder layer dec_cell = tf.contrib.rnn.MultiRNNCell([self.make_cell() for _ in range(TF_FLAGS.FLAGS.num_layers)]) # 3. Dense layer to translate the decoder's output at each time # step into a choice from the target vocabulary output_layer = Dense(target_vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))
# Helper for the training process. Used by BasicDecoder to read inputs. training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_embed_input, sequence_length=target_sequences_length, time_major=False) # Basic decoder training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, training_helper, encoder_state, output_layer) # Perform dynamic decoding using the decoder training_decoder_output = tf.contrib.seq2seq.dynamic_decode(training_decoder, impute_finished=True, maximum_iterations=max_target_sequences_length)[0]
start_tokens = tf.tile(tf.constant([ua.UrbanArea.vacab_go_key], dtype=tf.int32), [TF_FLAGS.FLAGS.batch_size], name='start_tokens') # Helper for the inference process. inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(decoder_embeddings, start_tokens, ua.UrbanArea.vacab_eos_key) # Basic decoder inference_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, inference_helper, encoder_state, output_layer) inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(inference_decoder, impute_finished=True, maximum_iterations=max_target_sequences_length)[0]
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits') _ = tf.identity(inference_decoder_output.sample_id, name='predictions') # Create the weights for sequence_loss masks = tf.sequence_mask(self.target_sequences_length_input, self.max_target_sequences_length, dtype=tf.float32, name='masks') with tf.name_scope("optimization"): # Loss function self.cost = tf.contrib.seq2seq.sequence_loss(training_logits, self.targets, masks) tf.summary.scalar("loss", self.cost)
# Optimizer optimizer = tf.train.AdamOptimizer(self.learning_rate_input) # Gradient Clipping gradients = optimizer.compute_gradients(self.cost) capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None] self.train_op = optimizer.apply_gradients(capped_gradients)
Epoch 1/100 Batch 20/65 Loss: 1.170 Validation loss: 1.082 Time: 0.0039s
Epoch 1/100 Batch 40/65 Loss: 0.868 Validation loss: 0.950 Time: 0.0029s
Epoch 1/100 Batch 60/65 Loss: 0.939 Validation loss: 0.794 Time: 0.0031s
...
Epoch 99/100 Batch 60/65 Loss: 0.136 Validation loss: 0.403 Time: 0.0030s
Epoch 100/100 Batch 20/65 Loss: 0.149 Validation loss: 0.430 Time: 0.0037s
Epoch 100/100 Batch 40/65 Loss: 0.110 Validation loss: 0.423 Time: 0.0031s
Epoch 100/100 Batch 60/65 Loss: 0.153 Validation loss: 0.397 Time: 0.0031s
Source: https://habr.com/ru/post/354220/