I got it to work.
Standardized your X
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(X[:, :-1])
X_trans = np.column_stack((scaler.transform(X[:, :-1]), X[:, -1]))
Better initialization than zero.
#initialize weights
r = np.sqrt(2)
w = np.random.uniform(-r, r, (3,))
Add learned biases during prediction
z = np.dot(X, w[:-1]) + w[-1]
Standardize during prediction as well (using standardization learned from input)
Z = classify(scaler.transform(np.c_[xx.ravel(), yy.ravel()]),
l, w) #strip the bias from weights
Generally, always a good idea to standardize the inputs.
Entire code:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
def generate_data():
#generate a dataset that is linearly seperable
group_1 = np.random.randint(50, 100, size=(50,2))
group_1_labels = np.full((50,1), 1)
group_2 = np.random.randint(0, 49, size =(50,2))
group_2_labels = np.full((50,1), -1)
#add a bias value of -1
bias = np.full((50,1), -1)
#add labels, upper right quadrant are 1, lower left are -1
group_1_with_bias = np.hstack((group_1, bias))
group_2_with_bias = np.hstack((group_2, bias))
group_1_labeled = np.hstack((group_1_with_bias, group_1_labels))
group_2_labeled = np.hstack((group_2_with_bias, group_2_labels))
#merge our labeled data and shuffle!
merged_data = np.vstack((group_1_labeled, group_2_labeled))
np.random.shuffle(merged_data)
return merged_data
data = generate_data()
#load data, strip labels, add a -1 bias value
X = data[:, :3]
#create labels matrix
l = np.ravel(data[:, 3:])
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(X[:, :-1])
X_trans = np.column_stack((scaler.transform(X[:, :-1]), X[:, -1]))
def perceptron_sgd(X, l, c, epochs):
#initialize weights
r = np.sqrt(2)
w = np.random.uniform(-r, r, (3,))
errors = []
for epoch in range(epochs):
total_error = 0
for i, x in enumerate(X):
if (np.dot(x, w) * l[i]) <= 0:
total_error += (np.dot(x, w) * l[i])
w = w + c * (x * l[i])
errors.append(total_error * -1)
print("epoch " + str(epoch) + ": " + str(w))
return w, errors
def classify(X, l, w):
z = np.dot(X, w[:-1]) + w[-1]
print(z)
z[z <= 0] = -1
z[z > 0] = 1
#return a matrix of predicted labels
return z
w, errors = perceptron_sgd(X_trans, l, .01, 25)
# X - some data in 2dimensional np.array
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .1), np.arange(y_min, y_max, .1))
# here "model" is your model's prediction (classification) function
Z = classify(scaler.transform(np.c_[xx.ravel(), yy.ravel()]), l, w) #strip the bias from weights
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
#plt.axis('off')
#Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=l, cmap=plt.cm.Paired)