我试图实现本文中的模型:
Framewise phoneme classification with bidirectional LSTM and other neural network architectures
亚历克斯·格雷夫斯
但我不知道hot如何实现lstm网络的延时提到。对于5帧的延迟,应该是这样的:
x->x0,x1,x2,x3,x4,x5,x6,x7,x8,x9
| | | | | | | | | |
y->_ ,_ ,_ ,_ ,_ ,y0,y1,y2,y3,y4,y5,y6,y7,y8,y9
我已经建立了模型,没有框架分类的延迟。
with tf.name_scope("Inputs"):
self.X = tf.placeholder(name = 'X', shape=[None, None, self.n_feats], dtype = tf.float32) #[batchsize, seqlen, input_dims]
self.seqlens = tf.placeholder(name = 'seqlens', shape = [None], dtype = tf.int32)
self.y = tf.placeholder(name = 'y', shape=[None, None], dtype = tf.int64) #[batchsize, seqlen]
self.delay = tf.placeholder(name = 'delay', shape=(), dtype = tf.int64)
onehot_y = tf.one_hot(self.y, self.n_classes, name="onehot_y") #[[batchsize, seqlen], n_classes]
onehot_y = tf.transpose(onehot_y, [1,0,2]) #[seqlen, batchsize, n_classes]
mask = tf.transpose(tf.sequence_mask(self.seqlens), [1, 0], name ="create_mask")
self.lr = tf.placeholder(name = 'lr', shape=(), dtype = tf.float32)
with tf.name_scope("variables"):
w = tf.get_variable(name = "W", shape = [self.n_hidden, self.n_classes], dtype = tf.float32, initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01, dtype = tf.float32))
b = tf.get_variable(name = 'b', shape=[self.n_classes], initializer = tf.constant_initializer(0))
with tf.name_scope("rnn"):
cellfw = tf.nn.rnn_cell.LSTMCell(self.n_hidden,state_is_tuple=True)
rnn_out, state = tf.nn.dynamic_rnn(cellfw, self.X, dtype=tf.float32, sequence_length=self.seqlens)
rnn_out = tf.concat(rnn_out,2)
val = tf.transpose(rnn_out, [1, 0, 2]) #[seqlen, batchsize, n_hidden]
val = tf.boolean_mask(val, mask, name = "apply_mask")
with tf.name_scope("softmask"):
self.logits_output = tf.nn.softmax(tf.matmul(val, w) + b)
self.y_pred = tf.argmax(self.logits_output, axis = 1)
with tf.name_scope("calc_losses"):
true_logits = tf.boolean_mask(onehot_y, mask)
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels= true_logits, logits=self.logits_output), name='loss')
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, name='Adam-op', beta1 = 0.9).minimize(self.loss)
with tf.name_scope("calc_accuracies"):
y_true = tf.boolean_mask(self.y,mask)
correct_prediction = tf.equal(self.y_pred, y_true, name='correct_pred')
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
with tf.name_scope("summaries"):
hist_b = tf.summary.histogram('b', b)
hist_w = tf.summary.histogram("w", w)
self.hists_merged = tf.summary.merge([hist_w] + [hist_b])
self.loss_ph = tf.placeholder(tf.float32, shape = (), name="loss_placeholder")
self.summary_loss = tf.summary.scalar("loss", self.loss_ph)
self.acc_ph = tf.placeholder(tf.float32, shape = (), name="accuracy_placeholder")
self.summary_acc = tf.summary.scalar("accuracy", self.acc_ph)
self.saver = tf.train.Saver()
关于怎么做有什么线索吗?