Skip to content

Commit e904547

Browse files
authored
Create Java code sample for inspect BQ with sampling (GoogleCloudPlatform#3149)
Fixes internal bug b/157051267 - [X] I have followed [Sample Format Guide](https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/SAMPLE_FORMAT.md) - [X] `pom.xml` parent set to latest `shared-configuration` - [X] Appropriate changes to README are included in PR - [X] API's need to be enabled to test (tell us) (**Nothing new**) - [X] Environment Variables need to be set (ask us to set them) (**Nothing new**) - [X] **Tests** pass: `mvn clean verify` **required** - [X] **Lint** passes: `mvn -P lint checkstyle:check` **required** - [X] **Static Analysis**: `mvn -P lint clean compile pmd:cpd-check spotbugs:check` **advisory only** - [X] Please **merge** this PR for me once it is approved.
1 parent b825077 commit e904547

File tree

3 files changed

+189
-3
lines changed

3 files changed

+189
-3
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package dlp.snippets;
18+
19+
// [START dlp_inspect_bigquery_with_sampling]
20+
21+
import com.google.api.core.SettableApiFuture;
22+
import com.google.cloud.dlp.v2.DlpServiceClient;
23+
import com.google.cloud.pubsub.v1.AckReplyConsumer;
24+
import com.google.cloud.pubsub.v1.MessageReceiver;
25+
import com.google.cloud.pubsub.v1.Subscriber;
26+
import com.google.privacy.dlp.v2.Action;
27+
import com.google.privacy.dlp.v2.BigQueryOptions;
28+
import com.google.privacy.dlp.v2.BigQueryOptions.SampleMethod;
29+
import com.google.privacy.dlp.v2.BigQueryTable;
30+
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
31+
import com.google.privacy.dlp.v2.DlpJob;
32+
import com.google.privacy.dlp.v2.FieldId;
33+
import com.google.privacy.dlp.v2.GetDlpJobRequest;
34+
import com.google.privacy.dlp.v2.InfoType;
35+
import com.google.privacy.dlp.v2.InfoTypeStats;
36+
import com.google.privacy.dlp.v2.InspectConfig;
37+
import com.google.privacy.dlp.v2.InspectDataSourceDetails;
38+
import com.google.privacy.dlp.v2.InspectJobConfig;
39+
import com.google.privacy.dlp.v2.LocationName;
40+
import com.google.privacy.dlp.v2.StorageConfig;
41+
import com.google.pubsub.v1.ProjectSubscriptionName;
42+
import com.google.pubsub.v1.PubsubMessage;
43+
import java.io.IOException;
44+
import java.util.concurrent.ExecutionException;
45+
import java.util.concurrent.TimeUnit;
46+
import java.util.concurrent.TimeoutException;
47+
48+
public class InspectBigQueryTableWithSampling {
49+
50+
public static void inspectBigQueryTableWithSampling()
51+
throws InterruptedException, ExecutionException, IOException {
52+
// TODO(developer): Replace these variables before running the sample.
53+
String projectId = "your-project-id";
54+
String topicId = "your-pubsub-topic-id";
55+
String subscriptionId = "your-pubsub-subscription-id";
56+
inspectBigQueryTableWithSampling(projectId, topicId, subscriptionId);
57+
}
58+
59+
// Inspects a BigQuery Table
60+
public static void inspectBigQueryTableWithSampling(
61+
String projectId, String topicId, String subscriptionId)
62+
throws ExecutionException, InterruptedException, IOException {
63+
// Initialize client that will be used to send requests. This client only needs to be created
64+
// once, and can be reused for multiple requests. After completing all of your requests, call
65+
// the "close" method on the client to safely clean up any remaining background resources.
66+
try (DlpServiceClient dlp = DlpServiceClient.create()) {
67+
// Specify the BigQuery table to be inspected.
68+
BigQueryTable tableReference =
69+
BigQueryTable.newBuilder()
70+
.setProjectId("bigquery-public-data")
71+
.setDatasetId("usa_names")
72+
.setTableId("usa_1910_current")
73+
.build();
74+
75+
BigQueryOptions bigQueryOptions =
76+
BigQueryOptions.newBuilder()
77+
.setTableReference(tableReference)
78+
.setRowsLimit(1000)
79+
.setSampleMethod(SampleMethod.RANDOM_START)
80+
.addIdentifyingFields(FieldId.newBuilder().setName("name"))
81+
.build();
82+
83+
StorageConfig storageConfig =
84+
StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build();
85+
86+
// Specify the type of info the inspection will look for.
87+
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
88+
InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build();
89+
90+
// Specify how the content should be inspected.
91+
InspectConfig inspectConfig =
92+
InspectConfig.newBuilder()
93+
.addInfoTypes(infoType)
94+
.setIncludeQuote(true)
95+
.build();
96+
97+
// Specify the action that is triggered when the job completes.
98+
String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId);
99+
Action.PublishToPubSub publishToPubSub =
100+
Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build();
101+
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
102+
103+
// Configure the long running job we want the service to perform.
104+
InspectJobConfig inspectJobConfig =
105+
InspectJobConfig.newBuilder()
106+
.setStorageConfig(storageConfig)
107+
.setInspectConfig(inspectConfig)
108+
.addActions(action)
109+
.build();
110+
111+
// Create the request for the job configured above.
112+
CreateDlpJobRequest createDlpJobRequest =
113+
CreateDlpJobRequest.newBuilder()
114+
.setParent(LocationName.of(projectId, "global").toString())
115+
.setInspectJob(inspectJobConfig)
116+
.build();
117+
118+
// Use the client to send the request.
119+
final DlpJob dlpJob = dlp.createDlpJob(createDlpJobRequest);
120+
System.out.println("Job created: " + dlpJob.getName());
121+
122+
// Set up a Pub/Sub subscriber to listen on the job completion status
123+
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
124+
125+
ProjectSubscriptionName subscriptionName =
126+
ProjectSubscriptionName.of(projectId, subscriptionId);
127+
128+
MessageReceiver messageHandler =
129+
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> {
130+
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer);
131+
};
132+
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build();
133+
subscriber.startAsync();
134+
135+
// Wait for the original job to complete
136+
try {
137+
done.get(15, TimeUnit.MINUTES);
138+
} catch (TimeoutException e) {
139+
System.out.println("Job was not completed after 15 minutes.");
140+
return;
141+
} finally {
142+
subscriber.stopAsync();
143+
subscriber.awaitTerminated();
144+
}
145+
146+
// Get the latest state of the job from the service
147+
GetDlpJobRequest request = GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build();
148+
DlpJob completedJob = dlp.getDlpJob(request);
149+
150+
// Parse the response and process results.
151+
System.out.println("Job status: " + completedJob.getState());
152+
InspectDataSourceDetails.Result result = completedJob.getInspectDetails().getResult();
153+
System.out.println("Findings: ");
154+
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
155+
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
156+
System.out.println("\tCount: " + infoTypeStat.getCount());
157+
}
158+
}
159+
}
160+
161+
// handleMessage injects the job and settableFuture into the message reciever interface
162+
private static void handleMessage(
163+
DlpJob job,
164+
SettableApiFuture<Boolean> done,
165+
PubsubMessage pubsubMessage,
166+
AckReplyConsumer ackReplyConsumer) {
167+
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName");
168+
if (job.getName().equals(messageAttribute)) {
169+
done.set(true);
170+
ackReplyConsumer.ack();
171+
} else {
172+
ackReplyConsumer.nack();
173+
}
174+
}
175+
}
176+
// [END dlp_inspect_bigquery_with_sampling]

dlp/src/main/java/dlp/snippets/InspectGcsFileWithSampling.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
package dlp.snippets;
1818

19-
// [START dlp_inspect_gcs]
19+
// [START dlp_inspect_gcs_with_sampling]
2020

2121
import com.google.api.core.SettableApiFuture;
2222
import com.google.cloud.dlp.v2.DlpServiceClient;
@@ -171,4 +171,4 @@ private static void handleMessage(
171171
}
172172
}
173173
}
174-
// [END dlp_inspect_gcs]
174+
// [END dlp_inspect_gcs_with_sampling]

dlp/src/test/java/dlp/snippets/InspectTests.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ public void testInspectStringWithoutOverlap() throws Exception {
225225
assertThat(output, containsString("example.com"));
226226
assertThat(output, not(containsString("example.org")));
227227
}
228-
228+
229229
@Test
230230
public void testInspectTable() {
231231
Table tableToInspect = Table.newBuilder()
@@ -373,6 +373,16 @@ public void testInspectBigQueryTable() throws Exception {
373373
assertThat(output, containsString("Job status: DONE"));
374374
}
375375

376+
@Test
377+
public void testInspectBigQueryTableWithSampling() throws Exception {
378+
InspectBigQueryTableWithSampling
379+
.inspectBigQueryTableWithSampling(PROJECT_ID, topicName.getTopic(),
380+
subscriptionName.getSubscription());
381+
382+
String output = bout.toString();
383+
assertThat(output, containsString("Job status: DONE"));
384+
}
385+
376386
@Test
377387
public void testInspectWithHotwordRules() throws Exception {
378388
InspectWithHotwordRules.inspectWithHotwordRules(

0 commit comments

Comments
 (0)