Skip to content

Commit ba167cc

Browse files
committed
Just testing Google's NGram API
1 parent a80e2dc commit ba167cc

File tree

2 files changed

+1117
-0
lines changed

2 files changed

+1117
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package test;
2+
3+
import java.io.BufferedInputStream;
4+
import java.io.BufferedReader;
5+
import java.io.DataOutputStream;
6+
import java.io.File;
7+
import java.io.FileOutputStream;
8+
import java.io.FileReader;
9+
import java.io.IOException;
10+
import java.io.InputStream;
11+
import java.io.InputStreamReader;
12+
import java.io.OutputStreamWriter;
13+
import java.net.HttpURLConnection;
14+
import java.net.MalformedURLException;
15+
import java.net.ProtocolException;
16+
import java.net.URI;
17+
import java.net.URL;
18+
import java.net.URLConnection;
19+
import java.net.URLEncoder;
20+
import java.nio.charset.StandardCharsets;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.nio.file.Paths;
24+
import java.util.HashMap;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.Map.Entry;
28+
29+
import org.json.simple.JSONObject;
30+
import org.json.simple.parser.JSONParser;
31+
import org.json.simple.parser.ParseException;
32+
33+
import javajs.util.AjaxURLConnection;
34+
import javajs.util.Rdr;
35+
import sun.misc.IOUtils;
36+
import swingjs.JSUtil;
37+
import test.osp.OSPDocument;
38+
39+
public class Test_Google extends Test_ {
40+
41+
@SuppressWarnings("unused")
42+
public static void main(String[] args) {
43+
testGoogle();
44+
}
45+
46+
private static void testGoogle() {
47+
int mssleep = 5000;
48+
URL url = Test_URL.class.getResource("words.txt");
49+
byte[] ret;
50+
try {
51+
ret = (byte[]) Rdr.getStreamAsBytes(new BufferedInputStream(url.openStream()), null);
52+
String[] words = new String(ret).split("\r\n");
53+
int n = words.length;
54+
double[] freq = new double[n];
55+
StringBuffer sb = new StringBuffer();
56+
for (int i = 0; i < n; i++) {
57+
try {
58+
processWord(i, words, freq);
59+
sb.append(i).append('\t').append(words[i]).append('\t').append(freq[i]).append('\n');
60+
} catch (IOException e) {
61+
System.out.println(e);
62+
// TODO Auto-generated catch block
63+
i--;
64+
try {
65+
Thread.sleep(mssleep);
66+
} catch (InterruptedException e1) {
67+
// TODO Auto-generated catch block
68+
e1.printStackTrace();
69+
}
70+
}
71+
}
72+
FileOutputStream fos = new FileOutputStream("c:/temp/words.out");
73+
fos.write(sb.toString().getBytes());
74+
fos.close();
75+
} catch (IOException e2) {
76+
// TODO Auto-generated catch block
77+
e2.printStackTrace();
78+
}
79+
80+
// TODO Auto-generated method stub
81+
82+
}
83+
84+
private static void processWord(int i, String[] words, double[] freq) throws IOException {
85+
String word = words[i];
86+
URL url = new URL("https://books.google.com/ngrams/json?content=+"+word + "&year_start=1981&year_end=2020&corpus=en&smoothing=3"); byte[] bytes = (byte[]) Rdr.getStreamAsBytes(new BufferedInputStream(url.openStream()), null);
87+
String json = new String(bytes);
88+
String[] values = json.split("\\[")[2].split("\\]")[0].split(",");
89+
double v = 0;
90+
for (int j = values.length; --j >= 0;) {
91+
v += Double.valueOf(values[j]);
92+
}
93+
System.out.println(i + "\t" + word + "\t" + v);
94+
freq[i] = v;
95+
}
96+
97+
}

0 commit comments

Comments
 (0)