Skip to content

Commit f103b30

Browse files
author
linyiqun
committed
频繁子图挖掘算法
频繁子图挖掘算法,用到了dfs编码和五元组,最右路径扩展等概念
1 parent 320e530 commit f103b30

File tree

11 files changed

+20075
-0
lines changed

11 files changed

+20075
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package DataMining_GSpan;
2+
3+
/**
4+
* gSpan频繁子图挖掘算法
5+
* @author lyq
6+
*
7+
*/
8+
public class Client {
9+
public static void main(String[] args){
10+
//测试数据文件地址
11+
String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt";
12+
//最小支持度率
13+
double minSupportRate = 0.2;
14+
15+
GSpanTool tool = new GSpanTool(filePath, minSupportRate);
16+
tool.freqGraphMining();
17+
}
18+
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package DataMining_GSpan;
2+
3+
import java.util.ArrayList;
4+
import java.util.Stack;
5+
6+
/**
7+
* 图编码深度优先搜索类,判断当前编码在给定图中是否为最小编码
8+
*
9+
* @author lyq
10+
*
11+
*/
12+
public class DFSCodeTraveler {
13+
// 当前的编码是否为最下编码标识
14+
boolean isMin;
15+
// 当前挖掘的图的边五元组编码组
16+
ArrayList<Edge> edgeSeqs;
17+
// 当前的图结构
18+
Graph graph;
19+
// 图节点id对应的边五元组中的id标识
20+
int[] g2s;
21+
// 代表图中的边是否被用到了
22+
boolean f[][];
23+
24+
public DFSCodeTraveler(ArrayList<Edge> edgeSeqs, Graph graph) {
25+
this.isMin = true;
26+
this.edgeSeqs = edgeSeqs;
27+
this.graph = graph;
28+
}
29+
30+
public void traveler() {
31+
int nodeLNums = graph.nodeLabels.size();
32+
g2s = new int[nodeLNums];
33+
for (int i = 0; i < nodeLNums; i++) {
34+
// 设置-1代表此点还未被计入编码
35+
g2s[i] = -1;
36+
}
37+
38+
f = new boolean[nodeLNums][nodeLNums];
39+
for (int i = 0; i < nodeLNums; i++) {
40+
for (int j = 0; j < nodeLNums; j++) {
41+
f[i][j] = false;
42+
}
43+
}
44+
45+
// 从每个点开始寻找最小编码五元组
46+
for (int i = 0; i < nodeLNums; i++) {
47+
//对选择的第一个点的标号做判断
48+
if(graph.getNodeLabels().get(i) > edgeSeqs.get(0).x){
49+
continue;
50+
}
51+
// 五元组id从0开始设置
52+
g2s[i] = 0;
53+
54+
Stack<Integer> s = new Stack<>();
55+
s.push(i);
56+
dfsSearch(s, 0, 1);
57+
if (!isMin) {
58+
return;
59+
}
60+
g2s[i] = -1;
61+
}
62+
}
63+
64+
/**
65+
* 深度优先搜索最小编码组
66+
*
67+
* @param stack
68+
* 加入的节点id栈
69+
* @param currentPosition
70+
* 当前进行的层次,代表找到的第几条边
71+
* @param next
72+
* 五元组边下一条边的点的临时标识
73+
*/
74+
private void dfsSearch(Stack<Integer> stack, int currentPosition, int next) {
75+
if (currentPosition >= edgeSeqs.size()) {
76+
stack.pop();
77+
// 比较到底了则返回
78+
return;
79+
}
80+
81+
while (!stack.isEmpty()) {
82+
int x = stack.pop();
83+
for (int i = 0; i < graph.edgeNexts.get(x).size(); i++) {
84+
// 从此id节点所连接的点中选取1个点作为下一个点
85+
int y = graph.edgeNexts.get(x).get(i);
86+
// 如果这2个点所构成的边已经被用过,则继续
87+
if (f[x][y] || f[y][x]) {
88+
continue;
89+
}
90+
91+
// 如果y这个点未被用过
92+
if (g2s[y] < 0) {
93+
// 新建这条边五元组
94+
Edge e = new Edge(g2s[x], next, graph.nodeLabels.get(x),
95+
graph.edgeLabels.get(x).get(i),
96+
graph.nodeLabels.get(y));
97+
98+
// 与相应位置的边做比较,如果不是最小则失败
99+
int compareResult = e.compareWith(edgeSeqs
100+
.get(currentPosition));
101+
if (compareResult == Edge.EDGE_SMALLER) {
102+
isMin = false;
103+
return;
104+
} else if (compareResult == Edge.EDGE_LARGER) {
105+
continue;
106+
}
107+
// 如果相等则继续比
108+
g2s[y] = next;
109+
f[x][y] = true;
110+
f[y][x] = true;
111+
stack.push(y);
112+
dfsSearch(stack, currentPosition + 1, next + 1);
113+
if (!isMin) {
114+
return;
115+
}
116+
f[x][y] = false;
117+
f[y][x] = false;
118+
g2s[y] = -1;
119+
} else {
120+
// 这个点已经被用过的时候,不需要再设置五元组id标识
121+
// 新建这条边五元组
122+
Edge e = new Edge(g2s[x], g2s[y], graph.nodeLabels.get(x),
123+
graph.edgeLabels.get(x).get(i),
124+
graph.nodeLabels.get(y));
125+
126+
// 与相应位置的边做比较,如果不是最小则失败
127+
int compareResult = e.compareWith(edgeSeqs
128+
.get(currentPosition));
129+
if (compareResult == Edge.EDGE_SMALLER) {
130+
isMin = false;
131+
return;
132+
} else if (compareResult == Edge.EDGE_LARGER) {
133+
continue;
134+
}
135+
// 如果相等则继续比
136+
g2s[y] = next;
137+
f[x][y] = true;
138+
f[y][x] = true;
139+
stack.push(y);
140+
dfsSearch(stack, currentPosition + 1, next);
141+
if (!isMin) {
142+
return;
143+
}
144+
f[x][y] = false;
145+
f[y][x] = false;
146+
}
147+
}
148+
}
149+
}
150+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package DataMining_GSpan;
2+
3+
/**
4+
* 边,用五元组表示
5+
*
6+
* @author lyq
7+
*
8+
*/
9+
public class Edge {
10+
// 五元组的大小比较结果
11+
public static final int EDGE_EQUAL = 0;
12+
public static final int EDGE_SMALLER = 1;
13+
public static final int EDGE_LARGER = 2;
14+
15+
// 边的一端的id号标识
16+
int ix;
17+
// 边的另一端的id号标识
18+
int iy;
19+
// 边的一端的点标号
20+
int x;
21+
// 边的标号
22+
int a;
23+
// 边的另一端的点标号
24+
int y;
25+
26+
public Edge(int ix, int iy, int x, int a, int y) {
27+
this.ix = ix;
28+
this.iy = iy;
29+
this.x = x;
30+
this.a = a;
31+
this.y = y;
32+
}
33+
34+
/**
35+
* 当前边是与给定的边的大小比较关系
36+
*
37+
* @param e
38+
* @return
39+
*/
40+
public int compareWith(Edge e) {
41+
int result = EDGE_EQUAL;
42+
int[] array1 = new int[] { ix, iy, x, y, a };
43+
int[] array2 = new int[] { e.ix, e.iy, e.x, e.y, e.a };
44+
45+
// 按照ix, iy,x,y,a的次序依次比较
46+
for (int i = 0; i < array1.length; i++) {
47+
if (array1[i] < array2[i]) {
48+
result = EDGE_SMALLER;
49+
break;
50+
} else if (array1[i] > array2[i]) {
51+
result = EDGE_LARGER;
52+
break;
53+
} else {
54+
// 如果相等,继续比较下一个
55+
continue;
56+
}
57+
}
58+
59+
return result;
60+
}
61+
62+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package DataMining_GSpan;
2+
3+
/**
4+
* 边的频繁统计
5+
* @author lyq
6+
*
7+
*/
8+
public class EdgeFrequency {
9+
//节点标号数量
10+
private int nodeLabelNum;
11+
//边的标号数量
12+
private int edgeLabelNum;
13+
//用于存放边计数的3维数组
14+
public int[][][] edgeFreqCount;
15+
16+
public EdgeFrequency(int nodeLabelNum, int edgeLabelNum){
17+
this.nodeLabelNum = nodeLabelNum;
18+
this.edgeLabelNum = edgeLabelNum;
19+
20+
edgeFreqCount = new int[nodeLabelNum][edgeLabelNum][nodeLabelNum];
21+
//最初始化操作
22+
for(int i=0; i<nodeLabelNum; i++){
23+
for(int j=0; j<edgeLabelNum; j++){
24+
for(int k=0; k<nodeLabelNum; k++){
25+
edgeFreqCount[i][j][k] = 0;
26+
}
27+
}
28+
}
29+
}
30+
31+
}

0 commit comments

Comments
 (0)