# 机器学习 LSTM预测股票走势

#######

# coding=utf-8

importpandasaspd

importnumpyasnp

importmatplotlib.pyplotasplt

importtensorflowastf

rnn_unit =10# 隐层数量

input_size =7

output_size =1

lr =0.0006# 学习率

# ——————————————————导入数据——————————————————————

f =open('/Users/zhangmenren/Desktop/机器学习/dataset_2.csv')

df = pd.read_csv(f)# 读入股票数据

data = df.iloc[:,2:10].values# 取第3-10列

# 获取训练集

defget_train_data(batch_size=1, time_step=20, train_begin=0, train_end=5800):

batch_index = []

data_train = data[train_begin:train_end]

normalized_train_data = (data_train - np.mean(data_train,axis=0)) / np.std(data_train,axis=0)# 标准化

train_x, train_y = [], []# 训练集

foriinrange(len(normalized_train_data) - time_step):

ifi % batch_size ==0:

batch_index.append(i)

x = normalized_train_data[i:i + time_step, :7]

y = normalized_train_data[i:i + time_step,7, np.newaxis]

train_x.append(x.tolist())

train_y.append(y.tolist())

batch_index.append((len(normalized_train_data) - time_step))

returnbatch_index, train_x, train_y

# 获取测试集

defget_test_data(time_step=1, test_begin=5800):

data_test = data[test_begin:]

mean = np.mean(data_test,axis=0)

std = np.std(data_test,axis=0)

normalized_test_data = (data_test - mean) / std# 标准化

size = (len(normalized_test_data) + time_step -1) // time_step# 有size个sample

test_x, test_y = [], []

foriinrange(size -1):

x = normalized_test_data[i * time_step:(i +1) * time_step, :7]

y = normalized_test_data[i * time_step:(i +1) * time_step,7]

test_x.append(x.tolist())

test_y.extend(y)

test_x.append((normalized_test_data[(i +1) * time_step:, :7]).tolist())

test_y.extend((normalized_test_data[(i +1) * time_step:,7]).tolist())

returnmean, std, test_x, test_y

# ——————————————————定义神经网络变量——————————————————

# 输入层、输出层权重、偏置

weights = {

'in': tf.Variable(tf.random_normal([input_size, rnn_unit])),

'out': tf.Variable(tf.random_normal([rnn_unit,1]))

}

biases = {

'in': tf.Variable(tf.constant(0.1,shape=[rnn_unit, ])),

'out': tf.Variable(tf.constant(0.1,shape=[1, ]))

}

# ——————————————————定义神经网络变量——————————————————

deflstm(X):

batch_size = tf.shape(X)[0]

time_step = tf.shape(X)[1]

w_in = weights['in']

b_in = biases['in']

input = tf.reshape(X, [-1, input_size])# 需要将tensor转成2维进行计算，计算后的结果作为隐藏层的输入

input_rnn = tf.matmul(input, w_in) + b_in

input_rnn = tf.reshape(input_rnn, [-1, time_step, rnn_unit])# 将tensor转成3维，作为lstm cell的输入

cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_unit)

init_state = cell.zero_state(batch_size,dtype=tf.float32)

output_rnn, final_states = tf.nn.dynamic_rnn(cell, input_rnn,initial_state=init_state,dtype=tf.float32)

output = tf.reshape(output_rnn, [-1, rnn_unit])

w_out = weights['out']

b_out = biases['out']

pred = tf.matmul(output, w_out) + b_out

returnpred, final_states

# ————————————————训练模型————————————————————

deftrain_lstm(batch_size=10, time_step=20, train_begin=2000, train_end=5800):

X = tf.placeholder(tf.float32,shape=[None, time_step, input_size])

Y = tf.placeholder(tf.float32,shape=[None, time_step, output_size])

batch_index, train_x, train_y = get_train_data(batch_size, time_step, train_begin, train_end)

withtf.variable_scope("sec_lstm"):

pred, _ = lstm(X)

loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(Y, [-1])))

saver = tf.train.Saver(tf.global_variables(),max_to_keep=15)

withtf.Session()assess:

sess.run(tf.global_variables_initializer())

foriinrange(100000):# 这个迭代次数，可以更改，越大预测效果会更好，但需要更长时间

forstepinrange(len(batch_index) -1):

_, loss_ = sess.run([train_op, loss],feed_dict={X: train_x[batch_index[step]:batch_index[step +1]],

Y: train_y[batch_index[step]:batch_index[step +1]]})

print("Number of iterations:", i," loss:", loss_)

#print("model_save: ", saver.save(sess, 'model_save2/modle.ckpt'))

saver.save(sess,"save/model.ckpt")

# 我是在window下跑的，这个地址是存放模型的地方，模型参数文件名为modle.ckpt

# 在Linux下面用 'model_save2/modle.ckpt'

print("The train has finished")

train_lstm()

# ————————————————预测模型————————————————————

defprediction(time_step=20):

X = tf.placeholder(tf.float32,shape=[None, time_step, input_size])

mean, std, test_x, test_y = get_test_data(time_step)

withtf.variable_scope("sec_lstm",reuse=True):

pred, _ = lstm(X)

saver = tf.train.Saver(tf.global_variables())

withtf.Session()assess:

# 参数恢复

module_file = tf.train.latest_checkpoint('save/')

#saver.restore(sess, module_file)

print("1111111",module_file)

saver.restore(sess, module_file)

test_predict = []

forstepinrange(len(test_x) -1):

prob = sess.run(pred,feed_dict={X: [test_x[step]]})

predict = prob.reshape((-1))

test_predict.extend(predict)

test_y = np.array(test_y) * std[7] + mean[7]

test_predict = np.array(test_predict) * std[7] + mean[7]

acc = np.average(np.abs(test_predict - test_y[:len(test_predict)]) / test_y[:len(test_predict)])# 偏差程度

print("The accuracy of this predict:", acc)

# 以折线图表示结果

plt.figure()

plt.plot(list(range(len(test_predict))), test_predict,color='b', )

plt.plot(list(range(len(test_y))), test_y,color='r')

plt.show()

prediction()

###########预测结果###########