(C:\anaconda3) C:\DL\rnn-tolstoy>conda create -n tolstoy ... (C:\anaconda3) C:\DL\rnn-tolstoy>activate tolstoy (tolstoy) C:\DL\rnn-tolstoy>conda install numpy tensorflow jupyter ...
(tolstoy) C:\DL\rnn-tolstoy>jupyter notebook
import time from collections import namedtuple import numpy as np import tensorflow as tf
with open('anna.txt', 'r') as f: text=f.read() vocab = sorted(set(text)) vocab_to_int = {c: i for i, c in enumerate(vocab)} int_to_vocab = dict(enumerate(vocab)) encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
text[:110]
Out: ' \n\n\n\nI\n\n , -.'
encoded[:110]
Out: array([ 99, 77, 93, 94, 102, 1, 91, 82, 92, 79, 77, 105, 0, 0, 0, 0, 30, 0, 0, 79, 123, 111, 1, 123, 129, 106, 123, 124, 117, 114, 108, 133, 111, 1, 123, 111, 118, 134, 114, 1, 121, 120, 127, 120, 112, 114, 1, 110, 122, 125, 109, 1, 119, 106, 1, 110, 122, 125, 109, 106, 7, 1, 116, 106, 112, 110, 106, 137, 1, 119, 111, 123, 129, 106, 123, 124, 117, 114, 108, 106, 137, 1, 123, 111, 118, 134, 137, 1, 119, 111, 123, 129, 106, 123, 124, 117, 114, 108, 106, 1, 121, 120, 8, 123, 108, 120, 111, 118, 125, 9])
len(vocab)
Out: 140
def get_batches(arr, n_seqs, n_steps): ''' , n_seqs x n_steps arr. --------- arr: , n_seqs: Batch size, n_steps: Sequence length, "" ''' # , characters_per_batch = n_seqs * n_steps n_batches = len(arr)//characters_per_batch # , arr = arr[:n_batches * characters_per_batch] # reshape 1D -> 2D, n_seqs , arr = arr.reshape((n_seqs, -1)) for n in range(0, arr.shape[1], n_steps): # , x = arr[:, n:n+n_steps] # , , "x" y = np.zeros_like(x) y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0] yield x, y
batches = get_batches(encoded, 10, 50) x, y = next(batches) print('x\n', x[:5, :5]) print('\ny\n', y[:5, :5])
x [[ 99 77 93 94 102] [ 1 110 108 114 112] [ 79 120 124 1 120] [114 119 1 109 120] [106 108 111 110 117]] y [[ 77 93 94 102 1] [110 108 114 112 111] [120 124 1 120 124] [119 1 109 120 108] [108 111 110 117 114]]
def build_inputs(batch_size, num_steps): ''' placeholder' , , drop out --------- batch_size: Batch size, num_steps: Sequence length, "" ''' # placeholder' inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs') targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets') # Placeholder drop out keep_prob = tf.placeholder(tf.float32, name='keep_prob') return inputs, targets, keep_prob
def build_lstm(lstm_size, num_layers, batch_size, keep_prob): ''' LSTM . --------- keep_prob: (tf.placeholder) dropout keep probability lstm_size: LSTM num_layers: LSTM batch_size: Batch size ''' ### LSTM def build_cell(lstm_size, keep_prob): # LSTM lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size) # dropout drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob) return drop # LSTM deep learning cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)]) # LTSM initial_state = cell.zero_state(batch_size, tf.float32) return cell, initial_state
def build_output(lstm_output, in_size, out_size): ''' softmax . --------- x: LSTM in_size: , (- LSTM ) out_size: softmax ( ) ''' # , 3D -> 2D seq_output = tf.concat(lstm_output, axis=1) x = tf.reshape(seq_output, [-1, in_size]) # LTSM softmax with tf.variable_scope('softmax'): softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1)) softmax_b = tf.Variable(tf.zeros(out_size)) # logit- logits = tf.matmul(x, softmax_w) + softmax_b # softmax out = tf.nn.softmax(logits, name='predictions') return out, logits
[red, yellow, green]
[0, 1, 2]
[[1, 0, 0],
[0, 1, 0],
[0, 0, 1]]
def build_loss(logits, targets, lstm_size, num_classes): ''' logit- . --------- logits: logit- targets: , lstm_size: LSTM num_classes: ( ) ''' # one-hot logits y_one_hot = tf.one_hot(targets, num_classes) y_reshaped = tf.reshape(y_one_hot, logits.get_shape()) # softmax cross entropy loss loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped) loss = tf.reduce_mean(loss) return loss
def build_optimizer(loss, learning_rate, grad_clip): ''' , . Arguments: loss: learning_rate: ''' # , "" tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip) train_op = tf.train.AdamOptimizer(learning_rate) optimizer = train_op.apply_gradients(zip(grads, tvars)) return optimizer
class CharRNN: def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers=2, learning_rate=0.001, grad_clip=5, sampling=False): # ( ), # if sampling == True: batch_size, num_steps = 1, 1 else: batch_size, num_steps = batch_size, num_steps tf.reset_default_graph() # input placeholder' self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps) # LSTM cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob) ### RNN # one-hot x_one_hot = tf.one_hot(self.inputs, num_classes) # RNN outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state) self.final_state = state # (softmax) logit- self.prediction, self.logits = build_output(outputs, lstm_size, num_classes) # ( ) self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes) self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)
batch_size = 100 # num_steps = 100 # lstm_size = 512 # LSTM num_layers = 2 # LSTM learning_rate = 0.001 # keep_prob = 0.5 # Dropout keep probability
with tf.Session() as sess:
epochs = 20 # N save_every_n = 200 model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps, lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate) saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # checkpoint' #saver.restore(sess, 'checkpoints/______.ckpt') counter = 0 for e in range(epochs): # new_state = sess.run(model.initial_state) loss = 0 for x, y in get_batches(encoded, batch_size, num_steps): counter += 1 start = time.time() feed = {model.inputs: x, model.targets: y, model.keep_prob: keep_prob, model.initial_state: new_state} batch_loss, new_state, _ = sess.run([model.loss, model.final_state, model.optimizer], feed_dict=feed) end = time.time() print('Epoch: {}/{}... '.format(e+1, epochs), 'Training Step: {}... '.format(counter), 'Training loss: {:.4f}... '.format(batch_loss), '{:.4f} sec/batch'.format((end-start))) if (counter % save_every_n == 0): saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size)) saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
Epoch: 1/20... Training Step: 1... Training loss: 4.9402... 7.7964 sec/batch Epoch: 1/20... Training Step: 2... Training loss: 4.8530... 7.1318 sec/batch ... Epoch: 20/20... Training Step: 3400... Training loss: 1.4003... 6.6569 sec/batch
tf.train.get_checkpoint_state('checkpoints')
model_checkpoint_path: "checkpoints\\i3400_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i200_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i400_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i600_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i800_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i1000_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i1200_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i1400_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i1600_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i1800_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i2000_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i2200_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i2400_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i2600_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i2800_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i3000_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i3200_l512.ckpt" all_model_checkpoint_paths: "checkpoints\\i3400_l512.ckpt"
pick_top_n
function pick_top_n
used to reduce the “noise” of predictions, leaving only a specified number (default 5) of options for selection, discarding all other options. def pick_top_n(preds, vocab_size, top_n=5): p = np.squeeze(preds) p[np.argsort(p)[:-top_n]] = 0 p = p / np.sum(p) c = np.random.choice(vocab_size, 1, p=p)[0] return c
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime=" ."): samples = [c for c in prime] model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, checkpoint) new_state = sess.run(model.initial_state) for c in prime: x = np.zeros((1, 1)) x[0,0] = vocab_to_int[c] feed = {model.inputs: x, model.keep_prob: 1., model.initial_state: new_state} preds, new_state = sess.run([model.prediction, model.final_state], feed_dict=feed) c = pick_top_n(preds, len(vocab)) samples.append(int_to_vocab[c]) for i in range(n_samples): x[0,0] = c feed = {model.inputs: x, model.keep_prob: 1., model.initial_state: new_state} preds, new_state = sess.run([model.prediction, model.final_state], feed_dict=feed) c = pick_top_n(preds, len(vocab)) samples.append(int_to_vocab[c]) return ''.join(samples)
checkpoint = 'checkpoints/i200_l512.ckpt' samp = sample(checkpoint, 1000, lstm_size, len(vocab)) print(samp)
INFO:tensorflow:Restoring parameters from checkpoints/i200_l512.ckpt . – ,, , , , , , , , , , , , ,.. – , , , ,
checkpoint = 'checkpoints/i600_l512.ckpt' samp = sample(checkpoint, 1000, lstm_size, len(vocab)) print(samp)
INFO:tensorflow:Restoring parameters from checkpoints/i600_l512.ckpt . , , , - , , , , , . – , , . , – . – , , , , , , , , ,
checkpoint = tf.train.latest_checkpoint('checkpoints') samp = sample(checkpoint, 2000, lstm_size, len(vocab)) print(samp)
INFO:tensorflow:Restoring parameters from checkpoints\i3400_l512.ckpt . , , , , . . , , . – , , – , – , - , , , , , , . . , , , . – . , , , . – , , – . – , , , . , , . , – . – . – , , – , – , , – . – , , – , – , , , , – , , – . «, , . , , , – , , , – - . – , . . , – , . ,
Source: https://habr.com/ru/post/342738/
All Articles