我正在试验/学习
机器学习
我首先导入所需的库:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
我正在导入数据集并检查其内容:
dataset = pd.read_csv('./myfile.csv')
dataset.head()
我拆分从属变量:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
我拆分数据集:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
以下是我的训练步骤:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors= 5, metric = 'minkowski', p= 2)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
# np.set_printoptions(precision = 2)
print(np.concatenate((
y_pred.reshape(len(y_pred),1),
y_test.reshape(len(y_test),1)),
1))
我的问题如下。我尝试将训练集结果/测试集结果可视化:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('K Nearest Neighbors classification (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
我得到了关于记忆的错误信息:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-23-70ec666db8bb> in <module>
4 np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
5
----> 6 plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
7 alpha = 0.75, cmap = ListedColormap(('red', 'green')))
8
C:\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py in transform(self, X, copy)
793 X = check_array(X, accept_sparse='csr', copy=copy,
794 estimator=self, dtype=FLOAT_DTYPES,
--> 795 force_all_finite='allow-nan')
796
797 if sparse.issparse(X):
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
600
601 if copy and np.may_share_memory(array, array_orig):
--> 602 array = np.array(array, dtype=dtype, order=order)
603
604 if (warn_on_dtype and dtypes_orig is not None and
MemoryError: Unable to allocate 2.03 GiB for an array with shape (135904000, 2) and data type float64