Skip to content

Commit d1d43a4

Browse files
committed
logistic regression fake data version complete
1 parent e89ea9c commit d1d43a4

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# encoding=utf-8
2+
# @Author: WenDesi
3+
# @Date: 08-11-16
4+
# @Email: wendesi@foxmail.com
5+
# @Last modified by: WenDesi
6+
# @Last modified time: 08-11-16
7+
8+
import math
9+
import random
10+
11+
12+
def predict_(x, w):
13+
wx = sum([w[j] * x[j] for j in xrange(len(w))])
14+
exp_wx = math.exp(wx)
15+
16+
predict1 = exp_wx / (1 + exp_wx)
17+
predict0 = 1 / (1 + exp_wx)
18+
19+
if predict1 > predict0:
20+
return 1
21+
else:
22+
return 0
23+
24+
25+
def train(features, labels):
26+
w = [0.0] * (len(features[0]) + 1)
27+
28+
learning_step = 0.00001
29+
max_iteration = 1000
30+
correct_count = 0
31+
time = 0
32+
33+
while time < max_iteration:
34+
index = random.randint(0, len(labels) - 1)
35+
x = features[index]
36+
x.append(1.0)
37+
y = labels[index]
38+
39+
if y == predict_(x, w):
40+
correct_count += 1
41+
if correct_count > max_iteration:
42+
break
43+
continue
44+
45+
print 'iterater times %d' % time
46+
time += 1
47+
correct_count = 0
48+
49+
wx = sum([w[i] * x[i] for i in xrange(len(w))])
50+
exp_wx = math.exp(wx)
51+
52+
for i in xrange(len(w)):
53+
w[i] -= learning_step * (-y * x[i] + float(x[i] * exp_wx) / float(1 + exp_wx))
54+
55+
return w
56+
57+
58+
def predict(features, w):
59+
labels = []
60+
61+
for feature in features:
62+
feature.append(1)
63+
x = feature
64+
65+
labels.append(predict_(x,w))
66+
67+
return labels
68+
69+
70+
def build_dataset(label, original_posins, radius, size):
71+
datasets = []
72+
dim = len(original_posins)
73+
74+
for i in xrange(size):
75+
dataset = [label]
76+
for j in xrange(dim):
77+
point = random.randint(0, 2 * radius) - radius + original_posins[j]
78+
dataset.append(point)
79+
datasets.append(dataset)
80+
81+
return datasets
82+
83+
if __name__ == "__main__":
84+
85+
# 构建训练集
86+
trainset1 = build_dataset(0, [0, 0], 10, 100)
87+
trainset2 = build_dataset(1, [30, 30], 10, 100)
88+
89+
trainset = trainset1
90+
trainset.extend(trainset2)
91+
random.shuffle(trainset)
92+
93+
trainset_features = map(lambda x: x[1:], trainset)
94+
trainset_labels = map(lambda x: x[0], trainset)
95+
96+
# 训练
97+
w = train(trainset_features, trainset_labels)
98+
99+
# 构建测试集
100+
testset1 = build_dataset(0, [0, 0], 10, 500)
101+
testset2 = build_dataset(1, [30, 30], 10, 500)
102+
103+
testset = testset1
104+
testset.extend(testset2)
105+
random.shuffle(testset)
106+
107+
testset_features = map(lambda x: x[1:], testset)
108+
testset_labels = map(lambda x: x[0], testset)
109+
110+
# 测试
111+
testset_predicts = predict(testset_features, w)
112+
print 'asad'
113+
accuracy_score = float(len(filter(lambda x: x == True, [testset_labels[i] == testset_predicts[
114+
i] for i in xrange(len(testset_predicts))]))) / float(len(testset_predicts))
115+
print "The accruacy socre is ", accuracy_score

0 commit comments

Comments
 (0)