|
| 1 | +package test; |
| 2 | + |
| 3 | +import java.io.BufferedInputStream; |
| 4 | +import java.io.BufferedReader; |
| 5 | +import java.io.DataOutputStream; |
| 6 | +import java.io.File; |
| 7 | +import java.io.FileOutputStream; |
| 8 | +import java.io.FileReader; |
| 9 | +import java.io.IOException; |
| 10 | +import java.io.InputStream; |
| 11 | +import java.io.InputStreamReader; |
| 12 | +import java.io.OutputStreamWriter; |
| 13 | +import java.net.HttpURLConnection; |
| 14 | +import java.net.MalformedURLException; |
| 15 | +import java.net.ProtocolException; |
| 16 | +import java.net.URI; |
| 17 | +import java.net.URL; |
| 18 | +import java.net.URLConnection; |
| 19 | +import java.net.URLEncoder; |
| 20 | +import java.nio.charset.StandardCharsets; |
| 21 | +import java.nio.file.Files; |
| 22 | +import java.nio.file.Path; |
| 23 | +import java.nio.file.Paths; |
| 24 | +import java.util.HashMap; |
| 25 | +import java.util.List; |
| 26 | +import java.util.Map; |
| 27 | +import java.util.Map.Entry; |
| 28 | + |
| 29 | +import org.json.simple.JSONObject; |
| 30 | +import org.json.simple.parser.JSONParser; |
| 31 | +import org.json.simple.parser.ParseException; |
| 32 | + |
| 33 | +import javajs.util.AjaxURLConnection; |
| 34 | +import javajs.util.Rdr; |
| 35 | +import sun.misc.IOUtils; |
| 36 | +import swingjs.JSUtil; |
| 37 | +import test.osp.OSPDocument; |
| 38 | + |
| 39 | +public class Test_Google extends Test_ { |
| 40 | + |
| 41 | + @SuppressWarnings("unused") |
| 42 | + public static void main(String[] args) { |
| 43 | + testGoogle(); |
| 44 | + } |
| 45 | + |
| 46 | + private static void testGoogle() { |
| 47 | + int mssleep = 5000; |
| 48 | + URL url = Test_URL.class.getResource("words.txt"); |
| 49 | + byte[] ret; |
| 50 | + try { |
| 51 | + ret = (byte[]) Rdr.getStreamAsBytes(new BufferedInputStream(url.openStream()), null); |
| 52 | + String[] words = new String(ret).split("\r\n"); |
| 53 | + int n = words.length; |
| 54 | + double[] freq = new double[n]; |
| 55 | + StringBuffer sb = new StringBuffer(); |
| 56 | + for (int i = 0; i < n; i++) { |
| 57 | + try { |
| 58 | + processWord(i, words, freq); |
| 59 | + sb.append(i).append('\t').append(words[i]).append('\t').append(freq[i]).append('\n'); |
| 60 | + } catch (IOException e) { |
| 61 | + System.out.println(e); |
| 62 | + // TODO Auto-generated catch block |
| 63 | + i--; |
| 64 | + try { |
| 65 | + Thread.sleep(mssleep); |
| 66 | + } catch (InterruptedException e1) { |
| 67 | + // TODO Auto-generated catch block |
| 68 | + e1.printStackTrace(); |
| 69 | + } |
| 70 | + } |
| 71 | + } |
| 72 | + FileOutputStream fos = new FileOutputStream("c:/temp/words.out"); |
| 73 | + fos.write(sb.toString().getBytes()); |
| 74 | + fos.close(); |
| 75 | + } catch (IOException e2) { |
| 76 | + // TODO Auto-generated catch block |
| 77 | + e2.printStackTrace(); |
| 78 | + } |
| 79 | + |
| 80 | + // TODO Auto-generated method stub |
| 81 | + |
| 82 | + } |
| 83 | + |
| 84 | + private static void processWord(int i, String[] words, double[] freq) throws IOException { |
| 85 | + String word = words[i]; |
| 86 | + URL url = new URL("https://books.google.com/ngrams/json?content=+"+word + "&year_start=1981&year_end=2020&corpus=en&smoothing=3"); byte[] bytes = (byte[]) Rdr.getStreamAsBytes(new BufferedInputStream(url.openStream()), null); |
| 87 | + String json = new String(bytes); |
| 88 | + String[] values = json.split("\\[")[2].split("\\]")[0].split(","); |
| 89 | + double v = 0; |
| 90 | + for (int j = values.length; --j >= 0;) { |
| 91 | + v += Double.valueOf(values[j]); |
| 92 | + } |
| 93 | + System.out.println(i + "\t" + word + "\t" + v); |
| 94 | + freq[i] = v; |
| 95 | + } |
| 96 | + |
| 97 | +} |
0 commit comments