dataset
df_dataset = pd.read_csv(banknote_dataset, header=None, names = CSV_COLUMNS)
df_train, df_valid, df_test = np.split(df_dataset.sample(frac=1), [int(.6*len(df_dataset)), int(.8*len(df_dataset))])
def train_input_fn(df, num_epochs):
return tf.estimator.inputs.pandas_input_fn(
x = df,
y = df[LABEL],
batch_size = 128,
num_epochs = num_epochs,
shuffle = True
)
def eval_input_fn(df):
return tf.estimator.inputs.pandas_input_fn(
x = df,
y = df[LABEL],
batch_size = 128,
shuffle = False
)
def prediction_input_fn(df):
return tf.estimator.inputs.pandas_input_fn(
x = df,
y = None,
batch_size = 128,
shuffle = False,
)
def get_feature_cols():
input_columns = [tf.feature_column.numeric_column(k) for k in FEATURES]
return input_columns
OUTDIR = 'banknote_trained'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
model = tf.estimator.LinearRegressor(
feature_columns = get_feature_cols(),
optimizer=tf.train.FtrlOptimizer(learning_rate=0.1),
model_dir = OUTDIR)
model.train(input_fn = train_input_fn(df_train, num_epochs = 100))
predictions = model.predict(input_fn = prediction_input_fn(df_test))
for items in predictions:
print(items)
我的结果是:
{'predictions': array([-0.28320795], dtype=float32)}
{'predictions': array([0.8572771], dtype=float32)}
{'predictions': array([0.68809825], dtype=float32)}
{'predictions': array([0.9708319], dtype=float32)}
{'predictions': array([0.0971362], dtype=float32)}
{'predictions': array([0.98395026], dtype=float32)
根据Logistic回归,应该在0到1之间。
在TF中使用Logistic回归是令人困惑的。检查
this