Skip to content

Commit c701f29

Browse files
committed
Adds GCS filepath snippets and tests
1 parent bd07872 commit c701f29

File tree

3 files changed

+125
-20
lines changed

3 files changed

+125
-20
lines changed

language/analysis/README.md

+9-5
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@ mvn clean compile assembly:single
3030
```
3131

3232
We can then run the assembled JAR file with the `java` command. The variable $COMMAND takes
33-
three values `entities`, `sentiment` or `syntax`.
33+
six values `entities-text`, `entities-file`, `sentiment-text`, `sentiment-file`,
34+
`syntax-text`, or `syntax-file`.
3435

3536
```
3637
MAIN_CLASS=com.google.cloud.language.samples.Analyze
3738
JAR_FILE=target/language-entities-1.0-jar-with-dependencies.jar
38-
java -cp $JAR_FILE $MAIN_CLASS <sentiment|entities|syntax> <text>
39+
java -cp $JAR_FILE $MAIN_CLASS <sentiment-text|sentiment-file|entities-text|entities-file|syntax-text|syntax-file> <text|path>
3940
```
4041

4142
Example usage:
@@ -46,8 +47,11 @@ QUOTE="Larry Page, Google's co-founder, once described the 'perfect search
4647
back exactly what you want.' Since he spoke those words Google has grown to
4748
offer products beyond search, but the spirit of what he said remains."
4849
49-
java -cp $JAR_FILE $MAIN_CLASS entities "$QUOTE"
50-
java -cp $JAR_FILE $MAIN_CLASS sentiment "$QUOTE"
51-
java -cp $JAR_FILE $MAIN_CLASS syntax "$QUOTE"
50+
java -cp $JAR_FILE $MAIN_CLASS entities-text "$QUOTE"
51+
java -cp $JAR_FILE $MAIN_CLASS entities-file "gs://bucket/file.txt"
52+
java -cp $JAR_FILE $MAIN_CLASS sentiment-text "$QUOTE"
53+
java -cp $JAR_FILE $MAIN_CLASS sentiment-file "gs://bucket/file.txt"
54+
java -cp $JAR_FILE $MAIN_CLASS syntax-text "$QUOTE"
55+
java -cp $JAR_FILE $MAIN_CLASS syntax-file "gs://bucket/file.txt"
5256
```
5357

language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java

+51-9
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,18 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept
5959

6060
Analyze app = new Analyze(LanguageServiceClient.create());
6161

62-
if (command.equals("entities")) {
63-
printEntities(System.out, app.analyzeEntities(text));
64-
} else if (command.equals("sentiment")) {
65-
printSentiment(System.out, app.analyzeSentiment(text));
66-
} else if (command.equals("syntax")) {
67-
printSyntax(System.out, app.analyzeSyntax(text));
62+
if (command.equals("entities-text")) {
63+
printEntities(System.out, app.analyzeEntitiesText(text));
64+
} else if (command.equals("entities-file")) {
65+
printEntities(System.out, app.analyzeEntitiesFile(text));
66+
} else if (command.equals("sentiment-text")) {
67+
printSentiment(System.out, app.analyzeSentimentText(text));
68+
} else if (command.equals("sentiment-file")) {
69+
printSentiment(System.out, app.analyzeSentimentFile(text));
70+
} else if (command.equals("syntax-text")) {
71+
printSyntax(System.out, app.analyzeSyntaxText(text));
72+
} else if (command.equals("syntax-file")) {
73+
printSyntax(System.out, app.analyzeSyntaxFile(text));
6874
}
6975
}
7076

@@ -153,7 +159,7 @@ public Analyze(LanguageServiceClient languageApi) {
153159
/**
154160
* Gets {@link Entity}s from the string {@code text}.
155161
*/
156-
public List<Entity> analyzeEntities(String text) throws IOException {
162+
public List<Entity> analyzeEntitiesText(String text) throws IOException {
157163
Document doc = Document.newBuilder()
158164
.setContent(text).setType(Type.PLAIN_TEXT).build();
159165
AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder()
@@ -162,21 +168,44 @@ public List<Entity> analyzeEntities(String text) throws IOException {
162168
AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request);
163169
return response.getEntitiesList();
164170
}
171+
172+
/**
173+
* Gets {@link Entity}s from the string representing the GCS {@code path}.
174+
*/
175+
public List<Entity> analyzeEntitiesFile(String path) throws IOException {
176+
Document doc = Document.newBuilder()
177+
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
178+
AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder()
179+
.setDocument(doc)
180+
.setEncodingType(EncodingType.UTF16).build();
181+
AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request);
182+
return response.getEntitiesList();
183+
}
165184

166185
/**
167186
* Gets {@link Sentiment} from the string {@code text}.
168187
*/
169-
public Sentiment analyzeSentiment(String text) throws IOException {
188+
public Sentiment analyzeSentimentText(String text) throws IOException {
170189
Document doc = Document.newBuilder()
171190
.setContent(text).setType(Type.PLAIN_TEXT).build();
172191
AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc);
173192
return response.getDocumentSentiment();
174193
}
175194

195+
/**
196+
* Gets {@link Sentiment} from the string representing the GCS {@code path}.
197+
*/
198+
public Sentiment analyzeSentimentFile(String path) throws IOException {
199+
Document doc = Document.newBuilder()
200+
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
201+
AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc);
202+
return response.getDocumentSentiment();
203+
}
204+
176205
/**
177206
* Gets {@link Token}s from the string {@code text}.
178207
*/
179-
public List<Token> analyzeSyntax(String text) throws IOException {
208+
public List<Token> analyzeSyntaxText(String text) throws IOException {
180209
Document doc = Document.newBuilder()
181210
.setContent(text).setType(Type.PLAIN_TEXT).build();
182211
AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder()
@@ -185,4 +214,17 @@ public List<Token> analyzeSyntax(String text) throws IOException {
185214
AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request);
186215
return response.getTokensList();
187216
}
217+
218+
/**
219+
* Gets {@link Token}s from the string representing the GCS {@code path}.
220+
*/
221+
public List<Token> analyzeSyntaxFile(String path) throws IOException {
222+
Document doc = Document.newBuilder()
223+
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
224+
AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder()
225+
.setDocument(doc)
226+
.setEncodingType(EncodingType.UTF16).build();
227+
AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request);
228+
return response.getTokensList();
229+
}
188230
}

language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java

+65-6
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public class AnalyzeIT {
4848
@Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception {
4949
// Act
5050
List<Entity> entities =
51-
analyzeApp.analyzeEntities(
51+
analyzeApp.analyzeEntitiesText(
5252
"Larry Page, Google's co-founder, once described the 'perfect search engine' as"
5353
+ " something that 'understands exactly what you mean and gives you back exactly what"
5454
+ " you want.' Since he spoke those words Google has grown to offer products beyond"
@@ -58,33 +58,76 @@ public class AnalyzeIT {
5858
// Assert
5959
assertThat(got).named("entity names").contains("Larry Page");
6060
}
61-
62-
@Test public void analyzeSentiment_returnPositive() throws Exception {
61+
62+
@Test public void analyzeEntities_withEntitiesFile_containsGod() throws Exception {
63+
// Act
64+
List<Entity> entities =
65+
analyzeApp.analyzeEntitiesFile("gs://cloud-samples-tests/natural-language/gettysburg.txt");
66+
List<String> got = entities.stream().map(e -> e.getName()).collect(Collectors.toList());
67+
68+
// Assert
69+
assertThat(got).named("entity names").contains("God");
70+
}
71+
72+
@Test public void analyzeSentimentText_returnPositive() throws Exception {
6373
// Act
6474
Sentiment sentiment =
65-
analyzeApp.analyzeSentiment(
75+
analyzeApp.analyzeSentimentText(
6676
"Tom Cruise is one of the finest actors in hollywood and a great star!");
6777

6878
// Assert
6979
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
7080
assertThat((double)sentiment.getScore()).isGreaterThan(0.0);
7181
}
7282

83+
@Test public void analyzeSentimentFile_returnPositiveFile() throws Exception {
84+
// Act
85+
Sentiment sentiment =
86+
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
87+
+ "sentiment/bladerunner-pos.txt");
88+
89+
// Assert
90+
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
91+
assertThat((double)sentiment.getScore()).isGreaterThan(0.0);
92+
}
93+
7394
@Test public void analyzeSentiment_returnNegative() throws Exception {
7495
// Act
7596
Sentiment sentiment =
76-
analyzeApp.analyzeSentiment(
97+
analyzeApp.analyzeSentimentText(
7798
"That was the worst performance I've seen in awhile.");
7899

79100
// Assert
80101
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
81102
assertThat((double)sentiment.getScore()).isLessThan(0.0);
82103
}
104+
105+
@Test public void analyzeSentiment_returnNegativeFile() throws Exception {
106+
// Act
107+
Sentiment sentiment =
108+
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
109+
+ "sentiment/bladerunner-neg.txt");
110+
111+
// Assert
112+
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
113+
assertThat((double)sentiment.getScore()).isLessThan(0.0);
114+
}
115+
116+
@Test public void analyzeSentiment_returnNeutralFile() throws Exception {
117+
// Act
118+
Sentiment sentiment =
119+
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
120+
+ "sentiment/bladerunner-neutral.txt");
121+
122+
// Assert
123+
assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0);
124+
assertThat((double)sentiment.getScore()).isWithin(0.0);
125+
}
83126

84127
@Test public void analyzeSyntax_partOfSpeech() throws Exception {
85128
// Act
86129
List<Token> token =
87-
analyzeApp.analyzeSyntax(
130+
analyzeApp.analyzeSyntaxText(
88131
"President Obama was elected for the second term");
89132

90133
List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag())
@@ -94,4 +137,20 @@ public class AnalyzeIT {
94137
assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB,
95138
Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder();
96139
}
140+
141+
@Test public void analyzeSyntax_partOfSpeechFile() throws Exception {
142+
// Act
143+
List<Token> token =
144+
analyzeApp.analyzeSyntaxFile("gs://cloud-samples-tests/natural-language/"
145+
+ "sentiment/bladerunner-neutral.txt");
146+
147+
List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag())
148+
.collect(Collectors.toList());
149+
150+
// Assert
151+
assertThat(got).containsExactly(Tag.PRON, Tag.CONJ, Tag.VERB, Tag.CONJ, Tag.VERB,
152+
Tag.DET, Tag.NOUN, Tag.PUNCT, Tag.NOUN, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.CONJ,
153+
Tag.ADV, Tag.PRON, Tag.VERB, Tag.VERB, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.DET,
154+
Tag.NOUN, Tag.VERB, Tag.ADV, Tag.ADJ,Tag.PUNCT).inOrder();
155+
}
97156
}

0 commit comments

Comments
 (0)