|
| 1 | +/* |
| 2 | + * Copyright 2019 Google LLC |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package dlp.snippets; |
| 18 | + |
| 19 | +// [START dlp_inspect_bigquery_with_sampling] |
| 20 | + |
| 21 | +import com.google.api.core.SettableApiFuture; |
| 22 | +import com.google.cloud.dlp.v2.DlpServiceClient; |
| 23 | +import com.google.cloud.pubsub.v1.AckReplyConsumer; |
| 24 | +import com.google.cloud.pubsub.v1.MessageReceiver; |
| 25 | +import com.google.cloud.pubsub.v1.Subscriber; |
| 26 | +import com.google.privacy.dlp.v2.Action; |
| 27 | +import com.google.privacy.dlp.v2.BigQueryOptions; |
| 28 | +import com.google.privacy.dlp.v2.BigQueryOptions.SampleMethod; |
| 29 | +import com.google.privacy.dlp.v2.BigQueryTable; |
| 30 | +import com.google.privacy.dlp.v2.CreateDlpJobRequest; |
| 31 | +import com.google.privacy.dlp.v2.DlpJob; |
| 32 | +import com.google.privacy.dlp.v2.FieldId; |
| 33 | +import com.google.privacy.dlp.v2.GetDlpJobRequest; |
| 34 | +import com.google.privacy.dlp.v2.InfoType; |
| 35 | +import com.google.privacy.dlp.v2.InfoTypeStats; |
| 36 | +import com.google.privacy.dlp.v2.InspectConfig; |
| 37 | +import com.google.privacy.dlp.v2.InspectDataSourceDetails; |
| 38 | +import com.google.privacy.dlp.v2.InspectJobConfig; |
| 39 | +import com.google.privacy.dlp.v2.LocationName; |
| 40 | +import com.google.privacy.dlp.v2.StorageConfig; |
| 41 | +import com.google.pubsub.v1.ProjectSubscriptionName; |
| 42 | +import com.google.pubsub.v1.PubsubMessage; |
| 43 | +import java.io.IOException; |
| 44 | +import java.util.concurrent.ExecutionException; |
| 45 | +import java.util.concurrent.TimeUnit; |
| 46 | +import java.util.concurrent.TimeoutException; |
| 47 | + |
| 48 | +public class InspectBigQueryTableWithSampling { |
| 49 | + |
| 50 | + public static void inspectBigQueryTableWithSampling() |
| 51 | + throws InterruptedException, ExecutionException, IOException { |
| 52 | + // TODO(developer): Replace these variables before running the sample. |
| 53 | + String projectId = "your-project-id"; |
| 54 | + String topicId = "your-pubsub-topic-id"; |
| 55 | + String subscriptionId = "your-pubsub-subscription-id"; |
| 56 | + inspectBigQueryTableWithSampling(projectId, topicId, subscriptionId); |
| 57 | + } |
| 58 | + |
| 59 | + // Inspects a BigQuery Table |
| 60 | + public static void inspectBigQueryTableWithSampling( |
| 61 | + String projectId, String topicId, String subscriptionId) |
| 62 | + throws ExecutionException, InterruptedException, IOException { |
| 63 | + // Initialize client that will be used to send requests. This client only needs to be created |
| 64 | + // once, and can be reused for multiple requests. After completing all of your requests, call |
| 65 | + // the "close" method on the client to safely clean up any remaining background resources. |
| 66 | + try (DlpServiceClient dlp = DlpServiceClient.create()) { |
| 67 | + // Specify the BigQuery table to be inspected. |
| 68 | + BigQueryTable tableReference = |
| 69 | + BigQueryTable.newBuilder() |
| 70 | + .setProjectId("bigquery-public-data") |
| 71 | + .setDatasetId("usa_names") |
| 72 | + .setTableId("usa_1910_current") |
| 73 | + .build(); |
| 74 | + |
| 75 | + BigQueryOptions bigQueryOptions = |
| 76 | + BigQueryOptions.newBuilder() |
| 77 | + .setTableReference(tableReference) |
| 78 | + .setRowsLimit(1000) |
| 79 | + .setSampleMethod(SampleMethod.RANDOM_START) |
| 80 | + .addIdentifyingFields(FieldId.newBuilder().setName("name")) |
| 81 | + .build(); |
| 82 | + |
| 83 | + StorageConfig storageConfig = |
| 84 | + StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); |
| 85 | + |
| 86 | + // Specify the type of info the inspection will look for. |
| 87 | + // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types |
| 88 | + InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build(); |
| 89 | + |
| 90 | + // Specify how the content should be inspected. |
| 91 | + InspectConfig inspectConfig = |
| 92 | + InspectConfig.newBuilder() |
| 93 | + .addInfoTypes(infoType) |
| 94 | + .setIncludeQuote(true) |
| 95 | + .build(); |
| 96 | + |
| 97 | + // Specify the action that is triggered when the job completes. |
| 98 | + String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); |
| 99 | + Action.PublishToPubSub publishToPubSub = |
| 100 | + Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); |
| 101 | + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); |
| 102 | + |
| 103 | + // Configure the long running job we want the service to perform. |
| 104 | + InspectJobConfig inspectJobConfig = |
| 105 | + InspectJobConfig.newBuilder() |
| 106 | + .setStorageConfig(storageConfig) |
| 107 | + .setInspectConfig(inspectConfig) |
| 108 | + .addActions(action) |
| 109 | + .build(); |
| 110 | + |
| 111 | + // Create the request for the job configured above. |
| 112 | + CreateDlpJobRequest createDlpJobRequest = |
| 113 | + CreateDlpJobRequest.newBuilder() |
| 114 | + .setParent(LocationName.of(projectId, "global").toString()) |
| 115 | + .setInspectJob(inspectJobConfig) |
| 116 | + .build(); |
| 117 | + |
| 118 | + // Use the client to send the request. |
| 119 | + final DlpJob dlpJob = dlp.createDlpJob(createDlpJobRequest); |
| 120 | + System.out.println("Job created: " + dlpJob.getName()); |
| 121 | + |
| 122 | + // Set up a Pub/Sub subscriber to listen on the job completion status |
| 123 | + final SettableApiFuture<Boolean> done = SettableApiFuture.create(); |
| 124 | + |
| 125 | + ProjectSubscriptionName subscriptionName = |
| 126 | + ProjectSubscriptionName.of(projectId, subscriptionId); |
| 127 | + |
| 128 | + MessageReceiver messageHandler = |
| 129 | + (PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> { |
| 130 | + handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer); |
| 131 | + }; |
| 132 | + Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build(); |
| 133 | + subscriber.startAsync(); |
| 134 | + |
| 135 | + // Wait for the original job to complete |
| 136 | + try { |
| 137 | + done.get(15, TimeUnit.MINUTES); |
| 138 | + } catch (TimeoutException e) { |
| 139 | + System.out.println("Job was not completed after 15 minutes."); |
| 140 | + return; |
| 141 | + } finally { |
| 142 | + subscriber.stopAsync(); |
| 143 | + subscriber.awaitTerminated(); |
| 144 | + } |
| 145 | + |
| 146 | + // Get the latest state of the job from the service |
| 147 | + GetDlpJobRequest request = GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build(); |
| 148 | + DlpJob completedJob = dlp.getDlpJob(request); |
| 149 | + |
| 150 | + // Parse the response and process results. |
| 151 | + System.out.println("Job status: " + completedJob.getState()); |
| 152 | + InspectDataSourceDetails.Result result = completedJob.getInspectDetails().getResult(); |
| 153 | + System.out.println("Findings: "); |
| 154 | + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { |
| 155 | + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); |
| 156 | + System.out.println("\tCount: " + infoTypeStat.getCount()); |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + // handleMessage injects the job and settableFuture into the message reciever interface |
| 162 | + private static void handleMessage( |
| 163 | + DlpJob job, |
| 164 | + SettableApiFuture<Boolean> done, |
| 165 | + PubsubMessage pubsubMessage, |
| 166 | + AckReplyConsumer ackReplyConsumer) { |
| 167 | + String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName"); |
| 168 | + if (job.getName().equals(messageAttribute)) { |
| 169 | + done.set(true); |
| 170 | + ackReplyConsumer.ack(); |
| 171 | + } else { |
| 172 | + ackReplyConsumer.nack(); |
| 173 | + } |
| 174 | + } |
| 175 | +} |
| 176 | +// [END dlp_inspect_bigquery_with_sampling] |
0 commit comments