I'm trying to use this code from the scikit learn site.
I'm using my own data.
My problem is, I have a lot more than two features. If I want to "expand" the features from 2 to 3 or 4.
I'm getting:
query data dimension must match training data dimension
def machine():
with open("test.txt",'r') as csvr:
reader= csv.reader(csvr,delimiter='\t')
for i,row in enumerate(reader):
if i==0:
pass
elif '' in row[2:]:
pass
else:
liste.append(map(float,row[2:]))
a = np.array(liste)
h = .02
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
"Random Forest", "AdaBoost", "Naive Bayes", "LDA", "QDA"]
classifiers = [
KNeighborsClassifier(1),
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
DecisionTreeClassifier(max_depth=5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
AdaBoostClassifier(),
GaussianNB(),
LDA(),
QDA()]
X = a[:,:3]
y = np.ravel(a[:,13])
linearly_separable = (X, y)
datasets =[linearly_separable]
figure = plt.figure(figsize=(27, 9))
I = 1
for ds in datasets:
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
I += 1
for name, clf in zip(names, classifiers):
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
print clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print y.shape, X.shape
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
print Z
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(name)
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
size=15, horizontalalignment='right')
I += 1
figure.subplots_adjust(left=.02, right=.98)
plt.show()
In this case I use three features.
What am I doing wrong in the code, Is it something with the X_train and X_test data? With just two features, everything is ok.
My X value:
(array([[ 1., 1., 0.],
[ 1., 0., 0.],
[ 1., 0., 0.],
[ 1., 0., 0.],
[ 1., 1., 0.],
[ 1., 0., 0.],
[ 1., 0., 0.],
[ 3., 3., 0.],
[ 1., 1., 0.],
[ 1., 1., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 4., 4., 2.],
[ 0., 0., 0.],
[ 6., 3., 0.],
[ 5., 3., 2.],
[ 2., 2., 0.],
[ 4., 4., 2.],
[ 2., 1., 0.],
[ 2., 2., 0.]]), array([ 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1.,
1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 1.]))
The first array is the X array and the second array is the y(target) array.
I'm sorry for the bad format = error:
```` Traceback (most recent call last):
File "allM.py", line 144, in <module> mainplot(namePlot,1,2) File "allM.py", line 117, in mainplot
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
File "/usr/local/lib/python2.7/dist-packages/sklearn/neighbors/classification.py", line 191, in predict_proba neigh_dist, neigh_ind = self.kneighbors(X)
File "/usr/local/lib/python2.7/dist-packages/sklearn/neighbors/base.py", line 332, in kneighbors return_distance=return_distance)
File "binary_tree.pxi", line 1298, in sklearn.neighbors.kd_tree.BinaryTree.query (sklearn/neighbors/kd_tree.c:10433)
ValueError: query data dimension must match training data dimension ```
and this is the X array without putting him into the Dataset "ds".
[[ 1. 1. 0.][ 1. 0. 0.][ 1. 0. 0.][ 1. 0. 0.][ 1. 1. 0.][ 1. 0. 0.][ 1. 0. 0.][ 3. 3. 0.][ 1. 1. 0.][ 1. 1. 0.][ 0. 0. 0.][ 0. 0. 0.][ 0. 0. 0.][ 0. 0. 0.][ 0. 0. 0.][ 0. 0. 0.][ 4. 4. 2.][ 0. 0. 0.][ 6. 3. 0.][ 5. 3. 2.][ 2. 2. 0.][ 4. 4. 2.][ 2. 1. 0.][ 2. 2. 0.]]