Skip to content

Commit fe599dc

Browse files
authored
Inspect gcs sample b 157050959 (GoogleCloudPlatform#3146)
Fixes internal bug b/157050959 - [X] I have followed [Sample Format Guide](https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/SAMPLE_FORMAT.md) - [X] `pom.xml` parent set to latest `shared-configuration` - [X] Appropriate changes to README are included in PR - [X] API's need to be enabled to test (tell us) (**Nothing new**) - [X] Environment Variables need to be set (ask us to set them) (**Nothing new**) - [X] **Tests** pass: `mvn clean verify` **required** - [X] **Lint** passes: `mvn -P lint checkstyle:check` **required** - [X] **Static Analysis**: `mvn -P lint clean compile pmd:cpd-check spotbugs:check` **advisory only** - [X] Please **merge** this PR for me once it is approved.
1 parent 2815d73 commit fe599dc

File tree

3 files changed

+188
-2
lines changed

3 files changed

+188
-2
lines changed

dlp/src/main/java/dlp/snippets/InspectGcsFile.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,11 @@ public static void inspectGcsFile(
6666
// the "close" method on the client to safely clean up any remaining background resources.
6767
try (DlpServiceClient dlp = DlpServiceClient.create()) {
6868
// Specify the GCS file to be inspected.
69-
FileSet fileSet = FileSet.newBuilder().setUrl(gcsUri).build();
7069
CloudStorageOptions cloudStorageOptions =
71-
CloudStorageOptions.newBuilder().setFileSet(fileSet).build();
70+
CloudStorageOptions.newBuilder()
71+
.setFileSet(FileSet.newBuilder().setUrl(gcsUri))
72+
.build();
73+
7274
StorageConfig storageConfig =
7375
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();
7476

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package dlp.snippets;
18+
19+
// [START dlp_inspect_gcs]
20+
21+
import com.google.api.core.SettableApiFuture;
22+
import com.google.cloud.dlp.v2.DlpServiceClient;
23+
import com.google.cloud.pubsub.v1.AckReplyConsumer;
24+
import com.google.cloud.pubsub.v1.MessageReceiver;
25+
import com.google.cloud.pubsub.v1.Subscriber;
26+
import com.google.privacy.dlp.v2.Action;
27+
import com.google.privacy.dlp.v2.CloudStorageOptions;
28+
import com.google.privacy.dlp.v2.CloudStorageOptions.FileSet;
29+
import com.google.privacy.dlp.v2.CloudStorageOptions.SampleMethod;
30+
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
31+
import com.google.privacy.dlp.v2.DlpJob;
32+
import com.google.privacy.dlp.v2.FileType;
33+
import com.google.privacy.dlp.v2.GetDlpJobRequest;
34+
import com.google.privacy.dlp.v2.InfoType;
35+
import com.google.privacy.dlp.v2.InfoTypeStats;
36+
import com.google.privacy.dlp.v2.InspectConfig;
37+
import com.google.privacy.dlp.v2.InspectDataSourceDetails;
38+
import com.google.privacy.dlp.v2.InspectJobConfig;
39+
import com.google.privacy.dlp.v2.Likelihood;
40+
import com.google.privacy.dlp.v2.LocationName;
41+
import com.google.privacy.dlp.v2.StorageConfig;
42+
import com.google.pubsub.v1.ProjectSubscriptionName;
43+
import com.google.pubsub.v1.PubsubMessage;
44+
import java.io.IOException;
45+
import java.util.concurrent.ExecutionException;
46+
import java.util.concurrent.TimeUnit;
47+
import java.util.concurrent.TimeoutException;
48+
49+
public class InspectGcsFileWithSampling {
50+
51+
public static void inspectGcsFileWithSampling()
52+
throws InterruptedException, ExecutionException, IOException {
53+
// TODO(developer): Replace these variables before running the sample.
54+
String projectId = "your-project-id";
55+
String gcsUri = "gs://" + "your-bucket-name" + "/path/to/your/file.txt";
56+
String topicId = "your-pubsub-topic-id";
57+
String subscriptionId = "your-pubsub-subscription-id";
58+
inspectGcsFileWithSampling(projectId, gcsUri, topicId, subscriptionId);
59+
}
60+
61+
// Inspects a file in a Google Cloud Storage Bucket.
62+
public static void inspectGcsFileWithSampling(
63+
String projectId, String gcsUri, String topicId, String subscriptionId)
64+
throws ExecutionException, InterruptedException, IOException {
65+
// Initialize client that will be used to send requests. This client only needs to be created
66+
// once, and can be reused for multiple requests. After completing all of your requests, call
67+
// the "close" method on the client to safely clean up any remaining background resources.
68+
try (DlpServiceClient dlp = DlpServiceClient.create()) {
69+
// Specify the GCS file to be inspected and sampling configuration
70+
CloudStorageOptions cloudStorageOptions =
71+
CloudStorageOptions.newBuilder()
72+
.setFileSet(FileSet.newBuilder().setUrl(gcsUri))
73+
.setBytesLimitPerFile(200)
74+
.addFileTypes(FileType.TEXT_FILE)
75+
.setFilesLimitPercent(90)
76+
.setSampleMethod(SampleMethod.RANDOM_START)
77+
.build();
78+
79+
StorageConfig storageConfig =
80+
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();
81+
82+
// Specify the type of info the inspection will look for.
83+
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
84+
InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build();
85+
86+
// Specify how the content should be inspected.
87+
InspectConfig inspectConfig =
88+
InspectConfig.newBuilder()
89+
.addInfoTypes(infoType)
90+
.setExcludeInfoTypes(true)
91+
.setIncludeQuote(true)
92+
.setMinLikelihood(Likelihood.POSSIBLE)
93+
.build();
94+
95+
// Specify the action that is triggered when the job completes.
96+
String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId);
97+
Action.PublishToPubSub publishToPubSub =
98+
Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build();
99+
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
100+
101+
// Configure the long running job we want the service to perform.
102+
InspectJobConfig inspectJobConfig =
103+
InspectJobConfig.newBuilder()
104+
.setStorageConfig(storageConfig)
105+
.setInspectConfig(inspectConfig)
106+
.addActions(action)
107+
.build();
108+
109+
// Create the request for the job configured above.
110+
CreateDlpJobRequest createDlpJobRequest =
111+
CreateDlpJobRequest.newBuilder()
112+
.setParent(LocationName.of(projectId, "global").toString())
113+
.setInspectJob(inspectJobConfig)
114+
.build();
115+
116+
// Use the client to send the request.
117+
final DlpJob dlpJob = dlp.createDlpJob(createDlpJobRequest);
118+
System.out.println("Job created: " + dlpJob.getName());
119+
120+
// Set up a Pub/Sub subscriber to listen on the job completion status
121+
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
122+
123+
ProjectSubscriptionName subscriptionName =
124+
ProjectSubscriptionName.of(projectId, subscriptionId);
125+
126+
MessageReceiver messageHandler =
127+
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> {
128+
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer);
129+
};
130+
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build();
131+
subscriber.startAsync();
132+
133+
// Wait for the original job to complete
134+
try {
135+
done.get(15, TimeUnit.MINUTES);
136+
} catch (TimeoutException e) {
137+
System.out.println("Job was not completed after 15 minutes.");
138+
return;
139+
} finally {
140+
subscriber.stopAsync();
141+
subscriber.awaitTerminated();
142+
}
143+
144+
// Get the latest state of the job from the service
145+
GetDlpJobRequest request = GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build();
146+
DlpJob completedJob = dlp.getDlpJob(request);
147+
148+
// Parse the response and process results.
149+
System.out.println("Job status: " + completedJob.getState());
150+
InspectDataSourceDetails.Result result = completedJob.getInspectDetails().getResult();
151+
System.out.println("Findings: ");
152+
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
153+
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
154+
System.out.println("\tCount: " + infoTypeStat.getCount());
155+
}
156+
}
157+
}
158+
159+
// handleMessage injects the job and settableFuture into the message reciever interface
160+
private static void handleMessage(
161+
DlpJob job,
162+
SettableApiFuture<Boolean> done,
163+
PubsubMessage pubsubMessage,
164+
AckReplyConsumer ackReplyConsumer) {
165+
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName");
166+
if (job.getName().equals(messageAttribute)) {
167+
done.set(true);
168+
ackReplyConsumer.ack();
169+
} else {
170+
ackReplyConsumer.nack();
171+
}
172+
}
173+
}
174+
// [END dlp_inspect_gcs]

dlp/src/test/java/dlp/snippets/InspectTests.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,16 @@ public void testInspectGcsFile() throws Exception {
343343
assertThat(output, containsString("Job status: DONE"));
344344
}
345345

346+
@Test
347+
public void testInspectGcsFileWithSampling() throws Exception {
348+
InspectGcsFileWithSampling
349+
.inspectGcsFileWithSampling(PROJECT_ID, GCS_PATH, topicName.getTopic(),
350+
subscriptionName.getSubscription());
351+
352+
String output = bout.toString();
353+
assertThat(output, containsString("Job status: DONE"));
354+
}
355+
346356
@Test
347357
public void testInspectDatastoreEntity() throws Exception {
348358
InspectDatastoreEntity

0 commit comments

Comments
 (0)