Skip to content

Commit 5afe53a

Browse files
author
linyiqun
committed
dbscan基于密度的聚类算法
dbscan基于密度的聚类算法
1 parent 9a241ab commit 5afe53a

File tree

4 files changed

+303
-0
lines changed

4 files changed

+303
-0
lines changed

Others/DataMining_DBSCAN/Client.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package DataMining_DBSCAN;
2+
3+
/**
4+
* Dbscan基于密度的聚类算法测试类
5+
* @author lyq
6+
*
7+
*/
8+
public class Client {
9+
public static void main(String[] args){
10+
String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt";
11+
//簇扫描半径
12+
double eps = 3;
13+
//最小包含点数阈值
14+
int minPts = 3;
15+
16+
DBSCANTool tool = new DBSCANTool(filePath, eps, minPts);
17+
tool.dbScanCluster();
18+
}
19+
}
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
package DataMining_DBSCAN;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileReader;
6+
import java.io.IOException;
7+
import java.text.MessageFormat;
8+
import java.util.ArrayList;
9+
10+
/**
11+
* DBSCAN基于密度聚类算法工具类
12+
*
13+
* @author lyq
14+
*
15+
*/
16+
public class DBSCANTool {
17+
// 测试数据文件地址
18+
private String filePath;
19+
// 簇扫描半径
20+
private double eps;
21+
// 最小包含点数阈值
22+
private int minPts;
23+
// 所有的数据坐标点
24+
private ArrayList<Point> totalPoints;
25+
// 聚簇结果
26+
private ArrayList<ArrayList<Point>> resultClusters;
27+
//噪声数据
28+
private ArrayList<Point> noisePoint;
29+
30+
public DBSCANTool(String filePath, double eps, int minPts) {
31+
this.filePath = filePath;
32+
this.eps = eps;
33+
this.minPts = minPts;
34+
readDataFile();
35+
}
36+
37+
/**
38+
* 从文件中读取数据
39+
*/
40+
public void readDataFile() {
41+
File file = new File(filePath);
42+
ArrayList<String[]> dataArray = new ArrayList<String[]>();
43+
44+
try {
45+
BufferedReader in = new BufferedReader(new FileReader(file));
46+
String str;
47+
String[] tempArray;
48+
while ((str = in.readLine()) != null) {
49+
tempArray = str.split(" ");
50+
dataArray.add(tempArray);
51+
}
52+
in.close();
53+
} catch (IOException e) {
54+
e.getStackTrace();
55+
}
56+
57+
Point p;
58+
totalPoints = new ArrayList<>();
59+
for (String[] array : dataArray) {
60+
p = new Point(array[0], array[1]);
61+
totalPoints.add(p);
62+
}
63+
}
64+
65+
/**
66+
* 递归的寻找聚簇
67+
*
68+
* @param pointList
69+
* 当前的点列表
70+
* @param parentCluster
71+
* 父聚簇
72+
*/
73+
private void recursiveCluster(Point point, ArrayList<Point> parentCluster) {
74+
double distance = 0;
75+
ArrayList<Point> cluster;
76+
77+
// 如果已经访问过了,则跳过
78+
if (point.isVisited) {
79+
return;
80+
}
81+
82+
point.isVisited = true;
83+
cluster = new ArrayList<>();
84+
for (Point p2 : totalPoints) {
85+
// 过滤掉自身的坐标点
86+
if (point.isTheSame(p2)) {
87+
continue;
88+
}
89+
90+
distance = point.ouDistance(p2);
91+
if (distance <= eps) {
92+
// 如果聚类小于给定的半径,则加入簇中
93+
cluster.add(p2);
94+
}
95+
}
96+
97+
if (cluster.size() >= minPts) {
98+
// 将自己也加入到聚簇中
99+
cluster.add(point);
100+
// 如果附近的节点个数超过最下值,则加入到父聚簇中,同时去除重复的点
101+
addCluster(parentCluster, cluster);
102+
103+
for (Point p : cluster) {
104+
recursiveCluster(p, parentCluster);
105+
}
106+
}
107+
}
108+
109+
/**
110+
* 往父聚簇中添加局部簇坐标点
111+
*
112+
* @param parentCluster
113+
* 原始父聚簇坐标点
114+
* @param cluster
115+
* 待合并的聚簇
116+
*/
117+
private void addCluster(ArrayList<Point> parentCluster,
118+
ArrayList<Point> cluster) {
119+
boolean isCotained = false;
120+
ArrayList<Point> addPoints = new ArrayList<>();
121+
122+
for (Point p : cluster) {
123+
isCotained = false;
124+
for (Point p2 : parentCluster) {
125+
if (p.isTheSame(p2)) {
126+
isCotained = true;
127+
break;
128+
}
129+
}
130+
131+
if (!isCotained) {
132+
addPoints.add(p);
133+
}
134+
}
135+
136+
parentCluster.addAll(addPoints);
137+
}
138+
139+
/**
140+
* dbScan算法基于密度的聚类
141+
*/
142+
public void dbScanCluster() {
143+
ArrayList<Point> cluster = null;
144+
resultClusters = new ArrayList<>();
145+
noisePoint = new ArrayList<>();
146+
147+
for (Point p : totalPoints) {
148+
if(p.isVisited){
149+
continue;
150+
}
151+
152+
cluster = new ArrayList<>();
153+
recursiveCluster(p, cluster);
154+
155+
if (cluster.size() > 0) {
156+
resultClusters.add(cluster);
157+
}else{
158+
noisePoint.add(p);
159+
}
160+
}
161+
removeFalseNoise();
162+
163+
printClusters();
164+
}
165+
166+
/**
167+
* 移除被错误分类的噪声点数据
168+
*/
169+
private void removeFalseNoise(){
170+
ArrayList<Point> totalCluster = new ArrayList<>();
171+
ArrayList<Point> deletePoints = new ArrayList<>();
172+
173+
//将聚簇合并
174+
for(ArrayList<Point> list: resultClusters){
175+
totalCluster.addAll(list);
176+
}
177+
178+
for(Point p: noisePoint){
179+
for(Point p2: totalCluster){
180+
if(p2.isTheSame(p)){
181+
deletePoints.add(p);
182+
}
183+
}
184+
}
185+
186+
noisePoint.removeAll(deletePoints);
187+
}
188+
189+
/**
190+
* 输出聚类结果
191+
*/
192+
private void printClusters() {
193+
int i = 1;
194+
for (ArrayList<Point> pList : resultClusters) {
195+
System.out.print("聚簇" + (i++) + ":");
196+
for (Point p : pList) {
197+
System.out.print(MessageFormat.format("({0},{1}) ", p.x, p.y));
198+
}
199+
System.out.println();
200+
}
201+
202+
System.out.println();
203+
System.out.print("噪声数据:");
204+
for (Point p : noisePoint) {
205+
System.out.print(MessageFormat.format("({0},{1}) ", p.x, p.y));
206+
}
207+
System.out.println();
208+
}
209+
}

Others/DataMining_DBSCAN/Point.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package DataMining_DBSCAN;
2+
3+
/**
4+
* 坐标点类
5+
*
6+
* @author lyq
7+
*
8+
*/
9+
public class Point {
10+
// 坐标点横坐标
11+
int x;
12+
// 坐标点纵坐标
13+
int y;
14+
// 此节点是否已经被访问过
15+
boolean isVisited;
16+
17+
public Point(String x, String y) {
18+
this.x = (Integer.parseInt(x));
19+
this.y = (Integer.parseInt(y));
20+
this.isVisited = false;
21+
}
22+
23+
/**
24+
* 计算当前点与制定点之间的欧式距离
25+
*
26+
* @param p
27+
* 待计算聚类的p点
28+
* @return
29+
*/
30+
public double ouDistance(Point p) {
31+
double distance = 0;
32+
33+
distance = (this.x - p.x) * (this.x - p.x) + (this.y - p.y)
34+
* (this.y - p.y);
35+
distance = Math.sqrt(distance);
36+
37+
return distance;
38+
}
39+
40+
/**
41+
* 判断2个坐标点是否为用个坐标点
42+
*
43+
* @param p
44+
* 待比较坐标点
45+
* @return
46+
*/
47+
public boolean isTheSame(Point p) {
48+
boolean isSamed = false;
49+
50+
if (this.x == p.x && this.y == p.y) {
51+
isSamed = true;
52+
}
53+
54+
return isSamed;
55+
}
56+
}

Others/DataMining_DBSCAN/input.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2 2
2+
3 1
3+
3 4
4+
3 14
5+
5 3
6+
8 3
7+
8 6
8+
9 8
9+
10 4
10+
10 7
11+
10 10
12+
10 14
13+
11 13
14+
12 8
15+
12 15
16+
14 7
17+
14 9
18+
14 15
19+
15 8

0 commit comments

Comments
 (0)