我正在训练一个逻辑回归模型
Iris dataset
它有三类输出,使用TensorFlow。
我把我的训练集分成75%的训练和25%的测试集。
在训练模型时,
Y_hat
是对训练集的预测。在打印输出(Y_-Hat)时,我看到模型正在预测
1
对于所有输入(形状(无,4))。
此外,在再次测试模型时
伊哈特
作为模型预测,它仍然显示
一
对于所有的测试集输入,这是不可避免的,因为模型在列车组上做得不好。
代码如下:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
iris=datasets.load_iris()
type(iris)
X=iris.data[:]
X=X.astype(np.float32)
Y=iris.target[:]
Y=Y.astype(np.float32)
print("x=",type(X)," Y=",type(Y))
print("X.shape=",X.shape,"Y.shape=",Y.shape)
Y=Y.reshape(Y.shape[0],1)
print("Y.shape=",Y.shape)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=23, test_size=0.25, stratify=Y)
print("Size")
print("X_train=",X_train.shape)
print("Y_train=",Y_train.shape)
print("X_test=",X_test.shape)
print("Y_test=",Y_test.shape)
#Variables
W=tf.Variable(tf.random_normal(shape=[4,1]),dtype=tf.float32)
b=tf.Variable(tf.random_normal(shape=[1,1]),dtype=tf.float32)
#Placeholder
input_=tf.placeholder(tf.float32,shape=(None,iris.data[:].shape[1]))
output_=tf.placeholder(tf.float32,shape=(None,Y.shape[1]))
Y_hat=tf.nn.softmax(tf.matmul(input_,W)+b)
c=tf.nn.sigmoid_cross_entropy_with_logits(labels=Y_hat,logits=output_)
cost=tf.reduce_mean(c)
optimizer=tf.train.GradientDescentOptimizer(0.01)
train=optimizer.minimize(cost)
init=tf.global_variables_initializer()
with tf.Session() as sess:
epochs=6
for i in range(epochs):
sess.run(init)
sess.run(Y_hat,feed_dict={input_:X_train})#,output_:Y_train})
sess.run(train,feed_dict={input_:X_train,output_:Y_train})
print(i,"Cost=",sess.run(cost,feed_dict={input_:X_train,output_:Y_train}))
ytest=tf.placeholder(dtype=tf.float32,shape=(None,1))
print(sess.run(Y_hat,feed_dict={input_:X_test}))
correct_prediction = tf.equal(tf.argmax(Y_hat,1), tf.argmax(ytest,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(Y_hat.shape)
print("accuracy=",sess.run(accuracy,feed_dict={input_:X_test,ytest:Y_test}))
print("test_cost",sess.run(cost,feed_dict={input_:X_test,output_:Y_test}))
以下是所有测试集预测的输出:
x= <class 'numpy.ndarray'> Y= <class 'numpy.ndarray'>
X.shape= (150, 4) Y.shape= (150,)
Y.shape= (150, 1)
Size
X_train= (112, 4)
Y_train= (112, 1)
X_test= (38, 4)
Y_test= (38, 1)
0 Cost= 0.377203
1 Cost= 0.377203
2 Cost= 0.377203
3 Cost= 0.377203
4 Cost= 0.377203
5 Cost= 0.377203
[[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]]
(?, 1)
accuracy= 1.0
test_cost 0.379477
请解释模型预测背后的原因
一
在所有情况下。是因为激活功能还是其他原因?