Skip to content

Commit 79c3d7d

Browse files
author
linyiqun
committed
PageRank网页排名算法实现
PageRank网页排名算法实现
1 parent 300a566 commit 79c3d7d

File tree

3 files changed

+203
-0
lines changed

3 files changed

+203
-0
lines changed

DataMining_PageRank/Client.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package DataMining_PageRank;
2+
3+
/**
4+
* PageRank计算网页重要性/排名算法
5+
* @author lyq
6+
*
7+
*/
8+
public class Client {
9+
public static void main(String[] args){
10+
String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt";
11+
12+
PageRankTool tool = new PageRankTool(filePath);
13+
tool.printPageRankValue();
14+
}
15+
}

DataMining_PageRank/PageRankTool.java

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package DataMining_PageRank;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileReader;
6+
import java.io.IOException;
7+
import java.lang.reflect.Array;
8+
import java.text.MessageFormat;
9+
import java.util.ArrayList;
10+
11+
/**
12+
* PageRank网页排名算法工具类
13+
*
14+
* @author lyq
15+
*
16+
*/
17+
public class PageRankTool {
18+
// 测试输入数据
19+
private String filePath;
20+
// 网页总数量
21+
private int pageNum;
22+
// 链接关系矩阵
23+
private double[][] linkMatrix;
24+
// 每个页面pageRank值初始向量
25+
private double[] pageRankVecor;
26+
27+
// 网页数量分类
28+
ArrayList<String> pageClass;
29+
30+
public PageRankTool(String filePath) {
31+
this.filePath = filePath;
32+
readDataFile();
33+
}
34+
35+
/**
36+
* 从文件中读取数据
37+
*/
38+
private void readDataFile() {
39+
File file = new File(filePath);
40+
ArrayList<String[]> dataArray = new ArrayList<String[]>();
41+
42+
try {
43+
BufferedReader in = new BufferedReader(new FileReader(file));
44+
String str;
45+
String[] tempArray;
46+
while ((str = in.readLine()) != null) {
47+
tempArray = str.split(" ");
48+
dataArray.add(tempArray);
49+
}
50+
in.close();
51+
} catch (IOException e) {
52+
e.getStackTrace();
53+
}
54+
55+
pageClass = new ArrayList<>();
56+
// 统计网页类型种数
57+
for (String[] array : dataArray) {
58+
for (String s : array) {
59+
if (!pageClass.contains(s)) {
60+
pageClass.add(s);
61+
}
62+
}
63+
}
64+
65+
int i = 0;
66+
int j = 0;
67+
pageNum = pageClass.size();
68+
linkMatrix = new double[pageNum][pageNum];
69+
pageRankVecor = new double[pageNum];
70+
for (int k = 0; k < pageNum; k++) {
71+
// 初始每个页面的pageRank值为1
72+
pageRankVecor[k] = 1.0;
73+
}
74+
for (String[] array : dataArray) {
75+
76+
i = Integer.parseInt(array[0]);
77+
j = Integer.parseInt(array[1]);
78+
79+
// 设置linkMatrix[i][j]为1代表i网页包含指向j网页的链接
80+
linkMatrix[i - 1][j - 1] = 1;
81+
}
82+
}
83+
84+
/**
85+
* 将矩阵转置
86+
*/
87+
private void transferMatrix() {
88+
int count = 0;
89+
for (double[] array : linkMatrix) {
90+
// 计算页面链接个数
91+
count = 0;
92+
for (double d : array) {
93+
if (d == 1) {
94+
count++;
95+
}
96+
}
97+
// 按概率均分
98+
for (int i = 0; i < array.length; i++) {
99+
if (array[i] == 1) {
100+
array[i] /= count;
101+
}
102+
}
103+
}
104+
105+
double t = 0;
106+
// 将矩阵转置换,作为概率转移矩阵
107+
for (int i = 0; i < linkMatrix.length; i++) {
108+
for (int j = i + 1; j < linkMatrix[0].length; j++) {
109+
t = linkMatrix[i][j];
110+
linkMatrix[i][j] = linkMatrix[j][i];
111+
linkMatrix[j][i] = t;
112+
}
113+
}
114+
}
115+
116+
/**
117+
* 利用幂法计算pageRank值
118+
*/
119+
public void printPageRankValue() {
120+
transferMatrix();
121+
// 阻尼系数
122+
double damp = 0.5;
123+
// 链接概率矩阵
124+
double[][] A = new double[pageNum][pageNum];
125+
double[][] e = new double[pageNum][pageNum];
126+
127+
// 调用公式A=d*q+(1-d)*e/m,m为网页总个数,d就是damp
128+
double temp = (1 - damp) / pageNum;
129+
for (int i = 0; i < e.length; i++) {
130+
for (int j = 0; j < e[0].length; j++) {
131+
e[i][j] = temp;
132+
}
133+
}
134+
135+
for (int i = 0; i < pageNum; i++) {
136+
for (int j = 0; j < pageNum; j++) {
137+
temp = damp * linkMatrix[i][j] + e[i][j];
138+
A[i][j] = temp;
139+
140+
}
141+
}
142+
143+
// 误差值,作为判断收敛标准
144+
double errorValue = Integer.MAX_VALUE;
145+
double[] newPRVector = new double[pageNum];
146+
// 当平均每个PR值误差小于0.001时就算达到收敛
147+
while (errorValue > 0.001 * pageNum) {
148+
System.out.println("**********");
149+
for (int i = 0; i < pageNum; i++) {
150+
temp = 0;
151+
// 将A*pageRankVector,利用幂法求解,直到pageRankVector值收敛
152+
for (int j = 0; j < pageNum; j++) {
153+
// temp就是每个网页到i页面的pageRank值
154+
temp += A[i][j] * pageRankVecor[j];
155+
}
156+
157+
// 最后的temp就是i网页的总PageRank值
158+
newPRVector[i] = temp;
159+
System.out.println(temp);
160+
}
161+
162+
errorValue = 0;
163+
for (int i = 0; i < pageNum; i++) {
164+
errorValue += Math.abs(pageRankVecor[i] - newPRVector[i]);
165+
// 新的向量代替旧的向量
166+
pageRankVecor[i] = newPRVector[i];
167+
}
168+
}
169+
170+
String name = null;
171+
temp = 0;
172+
System.out.println("--------------------");
173+
for (int i = 0; i < pageNum; i++) {
174+
System.out.println(MessageFormat.format("网页{0}的pageRank值:{1}",
175+
pageClass.get(i), pageRankVecor[i]));
176+
if (pageRankVecor[i] > temp) {
177+
temp = pageRankVecor[i];
178+
name = pageClass.get(i);
179+
}
180+
}
181+
System.out.println(MessageFormat.format("等级最高的网页为:{0}", name));
182+
}
183+
184+
}

DataMining_PageRank/input.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1 2
2+
1 3
3+
2 3
4+
3 1

0 commit comments

Comments
 (0)