peace195 / sentence-based-sentiment-analysis Goto Github PK
View Code? Open in Web Editor NEWSentence Based Sentiment Analysis
Sentence Based Sentiment Analysis
Dimension 2 in both shapes must be equal, but are 21748 and 5. Shapes are [?,30,21748] and [?,30,5].
From merging shape 0 with other shapes. for 'dynamic_rnn/concat/concat_dim' (op: 'Pack') with input shapes: [?,30,21748], [?,30,5].
`def model_training(dimension, lookup_table):
# clear the default graph stack and reset the global default graph
print("Reset the global default graph.. ")
tf.reset_default_graph()
# Passing data
print("Passing data")
source_path = "../encoder/"
# load training data without additional features
trainset = SequenceData(max_seq_len=FLAGS.seq_max_len, data_in=source_path + "train-data",
label_in=source_path + "train-label", table=lookup_table,
add_features=None, pos_flag=FLAGS.using_pos)
# load test data without additional features
testset = SequenceData(max_seq_len=FLAGS.seq_max_len, data_in=source_path + "test-data",
label_in=source_path + "test-label", table=lookup_table,
add_features=None, pos_flag=FLAGS.using_pos)
print("Check the data size")
print("Training set:")
print(len(trainset.data), len(trainset.data[0]))
print("Test set:")
print(len(testset.data), len(testset.data[0]))
test_length = len(testset.data)
print("Start training ..")
# Placeholder for data
x = tf.placeholder(tf.int32, [None, FLAGS.seq_max_len])
xx = tf.placeholder(tf.int32, [None, FLAGS.seq_max_len])
# Placeholder for label
y = tf.placeholder("float", [None, FLAGS.n_classes])
# A placeholder for indicating each sequence length
seq_len = tf.placeholder(tf.int32, [None])
# Placeholder for one-hot partition matrix
one_hot = tf.placeholder(tf.int32, [None, FLAGS.seq_max_len])
# Placeholder for one-hot target partition matrix
one_hot_target = tf.placeholder(tf.int32, [None, FLAGS.seq_max_len])
# Place holder for sentiment masking
sentiment_mask = tf.placeholder(tf.float32, [None, FLAGS.seq_max_len])
# Placeholder for data target
sent_target = tf.placeholder("float", [None, FLAGS.seq_max_len - 1])
sentence_target = masking(sent_target, depth=dimension)
# Placeholder for dropout
keep_prob = tf.placeholder(tf.float32)
# Placeholder for added data
# addition = tf.placeholder("float", [None, FLAGS.n_add])
# Define weights
with tf.name_scope("weights"):
weights = {
'inside': tf.Variable(tf.random_normal([dimension, FLAGS.n_hidden_inside])),
'reconstruct': tf.Variable(tf.random_normal([FLAGS.n_hidden, dimension])),
'out': tf.Variable(tf.random_normal([FLAGS.n_hidden, FLAGS.n_classes]))
}
with tf.name_scope("biases"):
biases = {
'inside': tf.Variable(tf.random_normal([FLAGS.n_hidden_inside])),
'reconstruct': tf.Variable(tf.random_normal([dimension])),
'out': tf.Variable(tf.random_normal([FLAGS.n_classes]))
}
# Variables scope for sharing parameters
with tf.variable_scope("dynamic_rnn") as scope:
sentiment = dynamic_rnn(x, xx, seq_len, weights, biases,
keep_prob, addition=None, one_hot=one_hot,
one_hot_target=one_hot_target, sentiment_mask=sentiment_mask,
switch=False)
scope.reuse_variables()
reconstruction = dynamic_rnn(x, xx, seq_len, weights, biases,
keep_prob, addition=None, one_hot=one_hot,
one_hot_target=one_hot_target, sentiment_mask=sentiment_mask,
switch=True)
# Define loss and optimizer
with tf.name_scope("cost_sentiment"):
cost_sent = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(sentiment, y))
tf.scalar_summary("cost_sentiment", cost_sent)
with tf.name_scope("cost_reconstruction"):
cost_recon = reconstruction
tf.scalar_summary("cost_reconstruction", cost_recon)
with tf.name_scope("cost"):
cost = FLAGS.alpha * cost_recon + (1 - FLAGS.alpha) * cost_sent
tf.scalar_summary("cost", cost)
with tf.name_scope("learning_rate"):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(0.001,
global_step,
100, 0.65,
staircase=True)
tf.scalar_summary("learning_rate", learning_rate)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,
global_step=global_step)
# Evaluate model
with tf.name_scope("accuracy"):
with tf.name_scope("correct_prediction"):
correct_pred = tf.equal(tf.argmax(sentiment, 1), tf.argmax(y, 1))
correct_prediction = tf.reduce_sum(tf.cast(correct_pred, tf.float32))
with tf.name_scope("accuracy"):
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.scalar_summary("accuracy", accuracy)
# Merge all the summaries and write them out to /tmp/lstm_for_SA_logs (by default)
if tf.gfile.Exists(FLAGS.summaries_dir):
tf.gfile.DeleteRecursively(FLAGS.summaries_dir)
tf.gfile.MakeDirs(FLAGS.summaries_dir)
# make directory /tmp/lstm_for_SA_checkpoint if does not exist
if not tf.gfile.Exists(FLAGS.checkpoint_dir):
tf.gfile.MakeDirs(FLAGS.checkpoint_dir)
merged = tf.merge_all_summaries()
sess = tf.InteractiveSession()
train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',
sess.graph)
test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test')
# Initializing the variables
init = tf.initialize_all_variables()
# Using defaults to saving all variables
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# saver.restore(sess, "se-v001.ckpt")
if FLAGS.train_mode:
step = 1
# Keep training until reach max iterations
while step * FLAGS.batch_size < 10000:
if FLAGS.using_add_features:
batch_x, batch_xx, batch_y, \
batch_sequence_length, batch_add, batch_one_hot, \
batch_target,batch_sentiment_mask, \
batch_one_hot_target = trainset.next(FLAGS.batch_size)
else:
batch_x, batch_xx, batch_y, \
batch_sequence_length, batch_one_hot, \
batch_target, batch_sentiment_mask, \
batch_one_hot_target = trainset.next(FLAGS.batch_size)
# Run optimization (back-prop)
sess.run(optimizer,
feed_dict={x: batch_x,
xx: batch_xx,
y: batch_y,
seq_len: batch_sequence_length,
keep_prob: FLAGS.dropout,
one_hot: batch_one_hot,
sent_target: batch_target,
sentiment_mask: batch_sentiment_mask,
one_hot_target: batch_one_hot_target})
if step % FLAGS.display_step == 0:
# Calculate batch accuracy
summary, acc = sess.run([merged, accuracy],
feed_dict={x: batch_x,
xx: batch_xx,
y: batch_y,
seq_len: batch_sequence_length,
keep_prob: FLAGS.dropout,
one_hot: batch_one_hot,
sent_target: batch_target,
sentiment_mask: batch_sentiment_mask,
one_hot_target: batch_one_hot_target})
train_writer.add_summary(summary, step)
# Calculate batch loss
loss, loss_sent, loss_recon = sess.run([cost, cost_sent, cost_recon],
feed_dict={x: batch_x,
xx: batch_xx,
y: batch_y,
seq_len: batch_sequence_length,
keep_prob: FLAGS.dropout,
one_hot: batch_one_hot,
sent_target: batch_target,
sentiment_mask: batch_sentiment_mask,
one_hot_target: batch_one_hot_target})
print("Iter " + str(step * FLAGS.batch_size) + ", Loss= " +
"{:.4f}".format(loss) + ", Loss Sent= " +
"{:.4f}".format(loss_sent) + ", Loss Recon= " +
"{:.4f}".format(loss_recon) + ", Accuracy= " +
"{:.4f}".format(acc) + ", Learning_rate= " +
"{:.6f}".format(learning_rate.eval()))
saver.save(sess, "se-v001.ckpt")
step += 1
print("Optimization Finished!")
# calculate accuracy
step = 1
count = 0
while step * FLAGS.batch_size <= test_length:
test_x, test_xx, test_y, test_sequence_length, test_one_hot, \
test_target, test_sentiment_mask, test_one_hot_target = testset.next(FLAGS.batch_size)
correct_pred = sess.run(correct_prediction,
feed_dict={x: test_x, xx: test_xx,
y: test_y, seq_len: test_sequence_length,
keep_prob: 1.,
one_hot: test_one_hot,
sent_target: test_target,
sentiment_mask: test_sentiment_mask,
one_hot_target: test_one_hot_target})
print ("Correct prediction: {0} - {1}/ batch_size ={2}".format(step,
correct_pred,
FLAGS.batch_size))
count += correct_pred
step += 1
print(count)
print ("Accuracy: {}".format(count/test_length))`
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.