Skip to content

Commit 7949498

Browse files
authored
adding DLP Quickstart + redact image sample (GoogleCloudPlatform#797)
* adding DLP Quickstart + redact image sample * adding comments
1 parent 091229b commit 7949498

File tree

9 files changed

+480
-209
lines changed

9 files changed

+480
-209
lines changed

dlp/README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ info types for a given category. eg. HEALTH or GOVERNMENT.
3131
java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Metadata
3232
```
3333

34+
## Run the quickstart
35+
36+
The Quickstart demonstrates using the DLP API to identify an InfoType in a given string.
37+
```
38+
java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.QuickStart
39+
```
40+
3441
## Inspect data for sensitive elements
3542
Inspect strings, files locally and on Google Cloud Storage and Cloud Datastore kinds with the DLP API.
3643

@@ -112,4 +119,3 @@ Run all tests:
112119
```
113120
mvn clean verify
114121
```
115-

dlp/src/main/java/com/example/dlp/Inspect.java

+115-117
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
/**
2-
* Copyright 2017, Google, Inc.
2+
* Copyright 2017 Google Inc.
3+
*
34
* Licensed under the Apache License, Version 2.0 (the "License");
45
* you may not use this file except in compliance with the License.
56
* You may obtain a copy of the License at
67
*
7-
* http://www.apache.org/licenses/LICENSE-2.0
8+
* http://www.apache.org/licenses/LICENSE-2.0
89
*
910
* Unless required by applicable law or agreed to in writing, software
1011
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -38,15 +39,6 @@
3839
import com.google.privacy.dlp.v2beta1.ResultName;
3940
import com.google.privacy.dlp.v2beta1.StorageConfig;
4041
import com.google.protobuf.ByteString;
41-
import org.apache.commons.cli.CommandLine;
42-
import org.apache.commons.cli.CommandLineParser;
43-
import org.apache.commons.cli.DefaultParser;
44-
import org.apache.commons.cli.HelpFormatter;
45-
import org.apache.commons.cli.Option;
46-
import org.apache.commons.cli.OptionGroup;
47-
import org.apache.commons.cli.Options;
48-
import org.apache.commons.cli.ParseException;
49-
5042
import java.net.URLConnection;
5143
import java.nio.file.Files;
5244
import java.nio.file.Path;
@@ -55,11 +47,23 @@
5547
import java.util.Collections;
5648
import java.util.List;
5749
import javax.activation.MimetypesFileTypeMap;
50+
import org.apache.commons.cli.CommandLine;
51+
import org.apache.commons.cli.CommandLineParser;
52+
import org.apache.commons.cli.DefaultParser;
53+
import org.apache.commons.cli.HelpFormatter;
54+
import org.apache.commons.cli.Option;
55+
import org.apache.commons.cli.OptionGroup;
56+
import org.apache.commons.cli.Options;
57+
import org.apache.commons.cli.ParseException;
5858

5959
public class Inspect {
6060

61-
private static void inspectString(String string, Likelihood minLikelihood, int maxFindings,
62-
List<InfoType> infoTypes, boolean includeQuote) {
61+
private static void inspectString(
62+
String string,
63+
Likelihood minLikelihood,
64+
int maxFindings,
65+
List<InfoType> infoTypes,
66+
boolean includeQuote) {
6367
// [START dlp_inspect_string]
6468
// instantiate a client
6569
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
@@ -75,24 +79,24 @@ private static void inspectString(String string, Likelihood minLikelihood, int m
7579

7680
// Whether to include the matching string
7781
// includeQuote = true;
78-
InspectConfig inspectConfig = InspectConfig.newBuilder()
79-
.addAllInfoTypes(infoTypes)
80-
.setMinLikelihood(minLikelihood)
81-
.setMaxFindings(maxFindings)
82-
.setIncludeQuote(includeQuote)
83-
.build();
82+
InspectConfig inspectConfig =
83+
InspectConfig.newBuilder()
84+
.addAllInfoTypes(infoTypes)
85+
.setMinLikelihood(minLikelihood)
86+
.setMaxFindings(maxFindings)
87+
.setIncludeQuote(includeQuote)
88+
.build();
8489

8590
// The string to inspect
8691
// string = 'My name is Gary and my email is gary@example.com';
87-
ContentItem contentItem = ContentItem.newBuilder()
88-
.setType("text/plain")
89-
.setValue(string)
90-
.build();
91-
92-
InspectContentRequest request = InspectContentRequest.newBuilder()
93-
.setInspectConfig(inspectConfig)
94-
.addItems(contentItem)
95-
.build();
92+
ContentItem contentItem =
93+
ContentItem.newBuilder().setType("text/plain").setValue(string).build();
94+
95+
InspectContentRequest request =
96+
InspectContentRequest.newBuilder()
97+
.setInspectConfig(inspectConfig)
98+
.addItems(contentItem)
99+
.build();
96100
InspectContentResponse response = dlpServiceClient.inspectContent(request);
97101

98102
for (InspectResult result : response.getResultsList()) {
@@ -115,8 +119,12 @@ private static void inspectString(String string, Likelihood minLikelihood, int m
115119
// [END dlp_inspect_string]
116120
}
117121

118-
private static void inspectFile(String filePath, Likelihood minLikelihood, int maxFindings,
119-
List<InfoType> infoTypes, boolean includeQuote) {
122+
private static void inspectFile(
123+
String filePath,
124+
Likelihood minLikelihood,
125+
int maxFindings,
126+
List<InfoType> infoTypes,
127+
boolean includeQuote) {
120128
// [START dlp_inspect_file]
121129
// Instantiates a client
122130
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
@@ -146,22 +154,22 @@ private static void inspectFile(String filePath, Likelihood minLikelihood, int m
146154
}
147155

148156
byte[] data = Files.readAllBytes(path);
149-
ContentItem contentItem = ContentItem.newBuilder()
150-
.setType(mimeType)
151-
.setData(ByteString.copyFrom(data))
152-
.build();
153-
154-
InspectConfig inspectConfig = InspectConfig.newBuilder()
155-
.addAllInfoTypes(infoTypes)
156-
.setMinLikelihood(minLikelihood)
157-
.setMaxFindings(maxFindings)
158-
.setIncludeQuote(includeQuote)
159-
.build();
160-
161-
InspectContentRequest request = InspectContentRequest.newBuilder()
162-
.setInspectConfig(inspectConfig)
163-
.addItems(contentItem)
164-
.build();
157+
ContentItem contentItem =
158+
ContentItem.newBuilder().setType(mimeType).setData(ByteString.copyFrom(data)).build();
159+
160+
InspectConfig inspectConfig =
161+
InspectConfig.newBuilder()
162+
.addAllInfoTypes(infoTypes)
163+
.setMinLikelihood(minLikelihood)
164+
.setMaxFindings(maxFindings)
165+
.setIncludeQuote(includeQuote)
166+
.build();
167+
168+
InspectContentRequest request =
169+
InspectContentRequest.newBuilder()
170+
.setInspectConfig(inspectConfig)
171+
.addItems(contentItem)
172+
.build();
165173
InspectContentResponse response = dlpServiceClient.inspectContent(request);
166174

167175
for (InspectResult result : response.getResultsList()) {
@@ -185,13 +193,13 @@ private static void inspectFile(String filePath, Likelihood minLikelihood, int m
185193
// [END dlp_inspect_file]
186194
}
187195

188-
private static void inspectGcsFile(String bucketName, String fileName,
189-
Likelihood minLikelihood, List<InfoType> infoTypes)
196+
private static void inspectGcsFile(
197+
String bucketName, String fileName, Likelihood minLikelihood, List<InfoType> infoTypes)
190198
throws Exception {
191199
// [START dlp_inspect_gcs]
192200
// Instantiates a client
193201
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
194-
// The name of the bucket where the file resides.
202+
// The name of the bucket where the file resides.
195203
// bucketName = 'YOUR-BUCKET';
196204

197205
// The path to the file within the bucket to inspect.
@@ -207,21 +215,19 @@ private static void inspectGcsFile(String bucketName, String fileName,
207215
// The infoTypes of information to match
208216
// infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME'];
209217

210-
CloudStorageOptions cloudStorageOptions = CloudStorageOptions
211-
.newBuilder()
212-
.setFileSet(FileSet.newBuilder().setUrl(
213-
"gs://" + bucketName + "/" + fileName
214-
))
215-
.build();
218+
CloudStorageOptions cloudStorageOptions =
219+
CloudStorageOptions.newBuilder()
220+
.setFileSet(FileSet.newBuilder().setUrl("gs://" + bucketName + "/" + fileName))
221+
.build();
216222

217-
StorageConfig storageConfig = StorageConfig.newBuilder()
218-
.setCloudStorageOptions(cloudStorageOptions)
219-
.build();
223+
StorageConfig storageConfig =
224+
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();
220225

221-
InspectConfig inspectConfig = InspectConfig.newBuilder()
222-
.addAllInfoTypes(infoTypes)
223-
.setMinLikelihood(minLikelihood)
224-
.build();
226+
InspectConfig inspectConfig =
227+
InspectConfig.newBuilder()
228+
.addAllInfoTypes(infoTypes)
229+
.setMinLikelihood(minLikelihood)
230+
.build();
225231

226232
// optionally provide an output configuration to store results, default : none
227233
OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance();
@@ -252,8 +258,12 @@ private static void inspectGcsFile(String bucketName, String fileName,
252258
// [END dlp_inspect_gcs]
253259
}
254260

255-
private static void inspectDatastore(String projectId, String namespaceId, String kind,
256-
Likelihood minLikelihood, List<InfoType> infoTypes) {
261+
private static void inspectDatastore(
262+
String projectId,
263+
String namespaceId,
264+
String kind,
265+
Likelihood minLikelihood,
266+
List<InfoType> infoTypes) {
257267
// [START dlp_inspect_datastore]
258268
// Instantiates a client
259269
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
@@ -274,19 +284,24 @@ private static void inspectDatastore(String projectId, String namespaceId, Strin
274284
// The infoTypes of information to match
275285
// infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME'];
276286

277-
// Get reference to the file to be inspected
278-
PartitionId partitionId = PartitionId.newBuilder().setProjectId(projectId)
279-
.setNamespaceId(namespaceId).build();
287+
// Reference to the Datastore namespace
288+
PartitionId partitionId =
289+
PartitionId.newBuilder().setProjectId(projectId).setNamespaceId(namespaceId).build();
290+
291+
// Reference to the Datastore kind
280292
KindExpression kindExpression = KindExpression.newBuilder().setName(kind).build();
281-
DatastoreOptions datastoreOptions = DatastoreOptions.newBuilder()
282-
.setKind(kindExpression).setPartitionId(partitionId).build();
283-
StorageConfig storageConfig = StorageConfig.newBuilder()
284-
.setDatastoreOptions(datastoreOptions).build();
293+
DatastoreOptions datastoreOptions =
294+
DatastoreOptions.newBuilder().setKind(kindExpression).setPartitionId(partitionId).build();
295+
296+
// Construct Datastore configuration to be inspected
297+
StorageConfig storageConfig =
298+
StorageConfig.newBuilder().setDatastoreOptions(datastoreOptions).build();
285299

286-
InspectConfig inspectConfig = InspectConfig.newBuilder()
287-
.addAllInfoTypes(infoTypes)
288-
.setMinLikelihood(minLikelihood)
289-
.build();
300+
InspectConfig inspectConfig =
301+
InspectConfig.newBuilder()
302+
.addAllInfoTypes(infoTypes)
303+
.setMinLikelihood(minLikelihood)
304+
.build();
290305

291306
// optionally provide an output configuration to store results, default : none
292307
OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance();
@@ -317,6 +332,10 @@ private static void inspectDatastore(String projectId, String namespaceId, Strin
317332
// [END dlp_inspect_datastore]
318333
}
319334

335+
/**
336+
* Command line application to inspect data using the Data Loss Prevention API.
337+
* Supported data formats : string, file, text files on GCS and Datastore entities
338+
*/
320339
public static void main(String[] args) throws Exception {
321340

322341
OptionGroup optionsGroup = new OptionGroup();
@@ -336,61 +355,37 @@ public static void main(String[] args) throws Exception {
336355
Options commandLineOptions = new Options();
337356
commandLineOptions.addOptionGroup(optionsGroup);
338357

339-
Option minLikelihoodOption = Option.builder("minLikelihood")
340-
.hasArg(true)
341-
.required(false)
342-
.build();
358+
Option minLikelihoodOption =
359+
Option.builder("minLikelihood").hasArg(true).required(false).build();
343360

344361
commandLineOptions.addOption(minLikelihoodOption);
345362

346-
Option maxFindingsOption = Option.builder("maxFindings")
347-
.hasArg(true)
348-
.required(false)
349-
.build();
363+
Option maxFindingsOption = Option.builder("maxFindings").hasArg(true).required(false).build();
350364

351365
commandLineOptions.addOption(maxFindingsOption);
352366

353-
Option infoTypesOption = Option.builder("infoTypes")
354-
.hasArg(true)
355-
.required(false)
356-
.build();
367+
Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build();
357368
infoTypesOption.setArgs(Option.UNLIMITED_VALUES);
358369
commandLineOptions.addOption(infoTypesOption);
359370

360-
Option includeQuoteOption = Option.builder("includeQuote")
361-
.hasArg(true)
362-
.required(false)
363-
.build();
371+
Option includeQuoteOption = Option.builder("includeQuote").hasArg(true).required(false).build();
364372
commandLineOptions.addOption(includeQuoteOption);
365373

366-
Option bucketNameOption = Option.builder("bucketName")
367-
.hasArg(true)
368-
.required(false)
369-
.build();
374+
Option bucketNameOption = Option.builder("bucketName").hasArg(true).required(false).build();
370375
commandLineOptions.addOption(bucketNameOption);
371376

372-
Option gcsFileNameOption = Option.builder("fileName")
373-
.hasArg(true)
374-
.required(false)
375-
.build();
377+
Option gcsFileNameOption = Option.builder("fileName").hasArg(true).required(false).build();
376378
commandLineOptions.addOption(gcsFileNameOption);
377379

378-
Option datastoreProjectIdOption = Option.builder("projectId")
379-
.hasArg(true)
380-
.required(false)
381-
.build();
380+
Option datastoreProjectIdOption =
381+
Option.builder("projectId").hasArg(true).required(false).build();
382382
commandLineOptions.addOption(datastoreProjectIdOption);
383383

384-
Option datastoreNamespaceOption = Option.builder("namespace")
385-
.hasArg(true)
386-
.required(false)
387-
.build();
384+
Option datastoreNamespaceOption =
385+
Option.builder("namespace").hasArg(true).required(false).build();
388386
commandLineOptions.addOption(datastoreNamespaceOption);
389387

390-
Option datastoreKindOption = Option.builder("kind")
391-
.hasArg(true)
392-
.required(false)
393-
.build();
388+
Option datastoreKindOption = Option.builder("kind").hasArg(true).required(false).build();
394389
commandLineOptions.addOption(datastoreKindOption);
395390

396391
CommandLineParser parser = new DefaultParser();
@@ -406,11 +401,13 @@ public static void main(String[] args) throws Exception {
406401
return;
407402
}
408403

409-
Likelihood minLikelihood = Likelihood.valueOf(cmd.getOptionValue(minLikelihoodOption.getOpt(),
410-
Likelihood.LIKELIHOOD_UNSPECIFIED.name()));
404+
Likelihood minLikelihood =
405+
Likelihood.valueOf(
406+
cmd.getOptionValue(
407+
minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name()));
411408
int maxFindings = Integer.parseInt(cmd.getOptionValue(maxFindingsOption.getOpt(), "0"));
412-
boolean includeQuote = Boolean
413-
.parseBoolean(cmd.getOptionValue(includeQuoteOption.getOpt(), "true"));
409+
boolean includeQuote =
410+
Boolean.parseBoolean(cmd.getOptionValue(includeQuoteOption.getOpt(), "true"));
414411

415412
List<InfoType> infoTypesList = Collections.emptyList();
416413
if (cmd.hasOption(infoTypesOption.getOpt())) {
@@ -437,8 +434,9 @@ public static void main(String[] args) throws Exception {
437434
String namespaceId = cmd.getOptionValue(datastoreNamespaceOption.getOpt(), "");
438435
String kind = cmd.getOptionValue(datastoreKindOption.getOpt());
439436
// use default project id when project id is not specified
440-
String projectId = cmd.getOptionValue(datastoreProjectIdOption.getOpt(),
441-
ServiceOptions.getDefaultProjectId());
437+
String projectId =
438+
cmd.getOptionValue(
439+
datastoreProjectIdOption.getOpt(), ServiceOptions.getDefaultProjectId());
442440
inspectDatastore(projectId, namespaceId, kind, minLikelihood, infoTypesList);
443441
}
444442
}

0 commit comments

Comments
 (0)