diff --git a/dlp/deid.py b/dlp/deid.py index 423e0c26c64..b08a341dd82 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -46,7 +46,9 @@ def deidentify_with_mask( parent = dlp.project_path(project) # Construct inspect configuration dictionary - inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } # Construct deidentify configuration dictionary deidentify_config = { @@ -131,17 +133,24 @@ def deidentify_with_fpe( # Construct FPE configuration dictionary crypto_replace_ffx_fpe_config = { "crypto_key": { - "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name} + "kms_wrapped": { + "wrapped_key": wrapped_key, + "crypto_key_name": key_name, + } }, "common_alphabet": alphabet, } # Add surrogate type if surrogate_type: - crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type} + crypto_replace_ffx_fpe_config["surrogate_info_type"] = { + "name": surrogate_type + } # Construct inspect configuration dictionary - inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } # Construct deidentify configuration dictionary deidentify_config = { @@ -176,7 +185,12 @@ def deidentify_with_fpe( # [START dlp_reidentify_fpe] def reidentify_with_fpe( - project, string, alphabet=None, surrogate_type=None, key_name=None, wrapped_key=None + project, + string, + alphabet=None, + surrogate_type=None, + key_name=None, + wrapped_key=None, ): """Uses the Data Loss Prevention API to reidentify sensitive data in a string that was encrypted by Format Preserving Encryption (FPE). @@ -333,7 +347,11 @@ def map_data(value): try: date = datetime.strptime(value, "%m/%d/%Y") return { - "date_value": {"year": date.year, "month": date.month, "day": date.day} + "date_value": { + "year": date.year, + "month": date.month, + "day": date.day, + } } except ValueError: return {"string_value": value} @@ -426,7 +444,8 @@ def write_data(data): mask_parser = subparsers.add_parser( "deid_mask", - help="Deidentify sensitive data in a string by masking it with a " "character.", + help="Deidentify sensitive data in a string by masking it with a " + "character.", ) mask_parser.add_argument( "--info_types", @@ -438,7 +457,8 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) mask_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) mask_parser.add_argument("item", help="The string to deidentify.") mask_parser.add_argument( @@ -471,11 +491,13 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) fpe_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) fpe_parser.add_argument( "item", - help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " + "Example: string = 'My SSN is 372819127'", ) fpe_parser.add_argument( "key_name", @@ -513,11 +535,13 @@ def write_data(data): "Encryption (FPE).", ) reid_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) reid_parser.add_argument( "item", - help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " + "Example: string = 'My SSN is 372819127'", ) reid_parser.add_argument( "surrogate_type", @@ -553,7 +577,8 @@ def write_data(data): help="Deidentify dates in a CSV file by pseudorandomly shifting them.", ) date_shift_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) date_shift_parser.add_argument( "input_csv_file", diff --git a/dlp/deid_test.py b/dlp/deid_test.py index df9dae418e6..db14b5758e9 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -78,7 +78,10 @@ def test_deidentify_with_mask_masking_character_specified(capsys): def test_deidentify_with_mask_masking_number_specified(capsys): deid.deidentify_with_mask( - GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7 + GCLOUD_PROJECT, + HARMFUL_STRING, + ["US_SOCIAL_SECURITY_NUMBER"], + number_to_mask=7, ) out, _ = capsys.readouterr() diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index 0c151bf64e7..336f798128b 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -220,7 +220,9 @@ def inspect_table( headers = [{"name": val} for val in data["header"]] rows = [] for row in data["rows"]: - rows.append({"values": [{"string_value": cell_val} for cell_val in row]}) + rows.append( + {"values": [{"string_value": cell_val} for cell_val in row]} + ) table = {} table["headers"] = headers @@ -978,7 +980,9 @@ def callback(message): ) parser_file = subparsers.add_parser("file", help="Inspect a local file.") - parser_file.add_argument("filename", help="The path to the file to inspect.") + parser_file.add_argument( + "filename", help="The path to the file to inspect." + ) parser_file.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", @@ -1121,10 +1125,12 @@ def callback(message): "datastore", help="Inspect files on Google Datastore." ) parser_datastore.add_argument( - "datastore_project", help="The Google Cloud project id of the target Datastore." + "datastore_project", + help="The Google Cloud project id of the target Datastore.", ) parser_datastore.add_argument( - "kind", help='The kind of the Datastore entity to inspect, e.g. "Person".' + "kind", + help='The kind of the Datastore entity to inspect, e.g. "Person".', ) parser_datastore.add_argument( "topic_id", @@ -1200,7 +1206,8 @@ def callback(message): "bigquery", help="Inspect files on Google BigQuery." ) parser_bigquery.add_argument( - "bigquery_project", help="The Google Cloud project id of the target table." + "bigquery_project", + help="The Google Cloud project id of the target table.", ) parser_bigquery.add_argument( "dataset_id", help="The ID of the target BigQuery dataset." diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index 899ed64c3b3..e15d6f55a56 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import uuid from gcp_devrel.testing import eventually_consistent from gcp_devrel.testing.flaky import flaky @@ -26,16 +27,18 @@ import pytest import inspect_content +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" +TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources") RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"] -TOPIC_ID = "dlp-test" -SUBSCRIPTION_ID = "dlp-test-subscription" +TOPIC_ID = "dlp-test" + UNIQUE_STRING +SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING DATASTORE_KIND = "DLP test kind" -BIGQUERY_DATASET_ID = "dlp_test_dataset" -BIGQUERY_TABLE_ID = "dlp_test_table" +DATASTORE_NAME = "DLP test object" + UNIQUE_STRING +BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING +BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING @pytest.fixture(scope="module") @@ -91,7 +94,9 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID + ) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -108,7 +113,7 @@ def datastore_project(): datastore_client = google.cloud.datastore.Client() kind = DATASTORE_KIND - name = "DLP test object" + name = DATASTORE_NAME key = datastore_client.key(kind, name) item = google.cloud.datastore.Entity(key=key) item["payload"] = "My name is Gary Smith and my email is gary@example.com" @@ -159,7 +164,10 @@ def test_inspect_string(capsys): test_string = "My name is Gary Smith and my email is gary@example.com" inspect_content.inspect_string( - GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True + GCLOUD_PROJECT, + test_string, + ["FIRST_NAME", "EMAIL_ADDRESS"], + include_quote=True, ) out, _ = capsys.readouterr() @@ -211,7 +219,10 @@ def test_inspect_string_no_results(capsys): test_string = "Nothing to see here" inspect_content.inspect_string( - GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True + GCLOUD_PROJECT, + test_string, + ["FIRST_NAME", "EMAIL_ADDRESS"], + include_quote=True, ) out, _ = capsys.readouterr() @@ -320,7 +331,9 @@ def test_inspect_gcs_file_with_custom_info_types( @flaky -def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys): +def test_inspect_gcs_file_no_results( + bucket, topic_id, subscription_id, capsys +): inspect_content.inspect_gcs_file( GCLOUD_PROJECT, bucket.name, @@ -367,7 +380,9 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): @flaky -def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys): +def test_inspect_datastore( + datastore_project, topic_id, subscription_id, capsys +): @eventually_consistent.call def _(): inspect_content.inspect_datastore( diff --git a/dlp/jobs.py b/dlp/jobs.py index ec84efbf8f5..a8ac0b43c5e 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -65,7 +65,8 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): # Job type dictionary job_type_to_int = { - "DLP_JOB_TYPE_UNSPECIFIED": google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, + "DLP_JOB_TYPE_UNSPECIFIED": + google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, "INSPECT_JOB": google.cloud.dlp.enums.DlpJobType.INSPECT_JOB, "RISK_ANALYSIS_JOB": google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB, } @@ -122,7 +123,8 @@ def delete_dlp_job(project, job_name): list_parser = subparsers.add_parser( "list", - help="List Data Loss Prevention API jobs corresponding to a given " "filter.", + help="List Data Loss Prevention API jobs corresponding to a given " + "filter.", ) list_parser.add_argument( "project", help="The project id to use as a parent resource." @@ -135,7 +137,11 @@ def delete_dlp_job(project, job_name): list_parser.add_argument( "-t", "--type", - choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB"], + choices=[ + "DLP_JOB_TYPE_UNSPECIFIED", + "INSPECT_JOB", + "RISK_ANALYSIS_JOB", + ], help='The type of job. API defaults to "INSPECT"', ) @@ -147,12 +153,15 @@ def delete_dlp_job(project, job_name): ) delete_parser.add_argument( "job_name", - help="The name of the DlpJob resource to be deleted. " "Example: X-#####", + help="The name of the DlpJob resource to be deleted. " + "Example: X-#####", ) args = parser.parse_args() if args.content == "list": - list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type) + list_dlp_jobs( + args.project, filter_string=args.filter, job_type=args.type + ) elif args.content == "delete": delete_dlp_job(args.project, args.job_name) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 15417def67c..98acb7464e3 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +from flaky import flaky import pytest @@ -25,7 +26,7 @@ TEST_TABLE_ID = "bikeshare_trips" -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def test_job_name(): import google.cloud.dlp @@ -48,29 +49,40 @@ def test_job_name(): response = dlp.create_dlp_job(parent, risk_job=risk_job) full_path = response.name # API expects only job name, not full project path - job_name = full_path[full_path.rfind("/") + 1 :] - return job_name + job_name = full_path[full_path.rfind("/") + 1:] + yield job_name + # clean up job if not deleted + try: + dlp.delete_dlp_job(full_path) + except google.api_core.exceptions.NotFound: + print("Issue during teardown, missing job") -def test_list_dlp_jobs(capsys): + +def test_list_dlp_jobs(test_job_name, capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT) out, _ = capsys.readouterr() - assert "Job: projects/" in out + assert test_job_name not in out -def test_list_dlp_jobs_with_filter(capsys): - jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string="state=DONE") +@flaky +def test_list_dlp_jobs_with_filter(test_job_name, capsys): + jobs.list_dlp_jobs( + GCLOUD_PROJECT, + filter_string="state=RUNNING", + job_type="RISK_ANALYSIS_JOB", + ) out, _ = capsys.readouterr() - assert "Job: projects/" in out + assert test_job_name in out -def test_list_dlp_jobs_with_job_type(capsys): +def test_list_dlp_jobs_with_job_type(test_job_name, capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type="INSPECT_JOB") out, _ = capsys.readouterr() - assert "Job: projects/" in out + assert test_job_name not in out # job created is a risk analysis job def test_delete_dlp_job(test_job_name, capsys): diff --git a/dlp/metadata.py b/dlp/metadata.py index 81b8f5e08a4..7a65941d622 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2017 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,7 +44,7 @@ def list_info_types(language_code=None, result_filter=None): print("Info types:") for info_type in response.info_types: print( - "{name}: {display_name}".format( + u"{name}: {display_name}".format( name=info_type.name, display_name=info_type.display_name ) ) @@ -55,7 +56,8 @@ def list_info_types(language_code=None, result_filter=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--language_code", help="The BCP-47 language code to use, e.g. 'en-US'." + "--language_code", + help="The BCP-47 language code to use, e.g. 'en-US'.", ) parser.add_argument( "--filter", @@ -65,4 +67,6 @@ def list_info_types(language_code=None, result_filter=None): args = parser.parse_args() - list_info_types(language_code=args.language_code, result_filter=args.filter) + list_info_types( + language_code=args.language_code, result_filter=args.filter + ) diff --git a/dlp/quickstart.py b/dlp/quickstart.py index 2cc0f144267..1b12a83da1d 100644 --- a/dlp/quickstart.py +++ b/dlp/quickstart.py @@ -74,7 +74,9 @@ def quickstart(project_id): print("Info type: {}".format(finding.info_type.name)) # Convert likelihood value to string respresentation. likelihood = ( - google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name["likelihood"] + google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name[ + "likelihood" + ] .enum_type.values_by_number[finding.likelihood] .name ) @@ -86,7 +88,9 @@ def quickstart(project_id): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("project_id", help="Enter your GCP project id.", type=str) + parser.add_argument( + "project_id", help="Enter your GCP project id.", type=str + ) args = parser.parse_args() if len(sys.argv) == 1: parser.print_usage() diff --git a/dlp/redact.py b/dlp/redact.py index e3ff08ec65e..ad1d866d6d6 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -30,7 +30,12 @@ def redact_image( - project, filename, output_filename, info_types, min_likelihood=None, mime_type=None + project, + filename, + output_filename, + info_types, + min_likelihood=None, + mime_type=None, ): """Uses the Data Loss Prevention API to redact protected data in an image. Args: @@ -68,7 +73,10 @@ def redact_image( # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. - inspect_config = {"min_likelihood": min_likelihood, "info_types": info_types} + inspect_config = { + "min_likelihood": min_likelihood, + "info_types": info_types, + } # If mime_type is not specified, guess it from the filename. if mime_type is None: @@ -121,7 +129,8 @@ def redact_image( parser.add_argument("filename", help="The path to the file to inspect.") parser.add_argument( - "output_filename", help="The path to which the redacted image will be written." + "output_filename", + help="The path to which the redacted image will be written.", ) parser.add_argument( "--project", diff --git a/dlp/redact_test.py b/dlp/redact_test.py index 39875551b12..dd9a887d23d 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -36,7 +36,10 @@ def test_redact_image_file(tempdir, capsys): output_filepath = os.path.join(tempdir, "redacted.png") redact.redact_image( - GCLOUD_PROJECT, test_filepath, output_filepath, ["FIRST_NAME", "EMAIL_ADDRESS"] + GCLOUD_PROJECT, + test_filepath, + output_filepath, + ["FIRST_NAME", "EMAIL_ADDRESS"], ) out, _ = capsys.readouterr() diff --git a/dlp/risk.py b/dlp/risk.py index 272d29768dc..386f05c0d73 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -66,7 +66,8 @@ def callback(message): results = job.risk_details.numerical_stats_result print( "Value Range: [{}, {}]".format( - results.min_value.integer_value, results.max_value.integer_value + results.min_value.integer_value, + results.max_value.integer_value, ) ) prev_value = None @@ -99,7 +100,9 @@ def callback(message): # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for risk_job = { - "privacy_metric": {"numerical_stats_config": {"field": {"name": column_name}}}, + "privacy_metric": { + "numerical_stats_config": {"field": {"name": column_name}} + }, "source_table": source_table, "actions": actions, } @@ -171,7 +174,7 @@ def callback(message): # Now that the job is done, fetch the results and print them. job = dlp.get_dlp_job(operation.name) histogram_buckets = ( - job.risk_details.categorical_stats_result.value_frequency_histogram_buckets + job.risk_details.categorical_stats_result.value_frequency_histogram_buckets # noqa: E501 ) # Print bucket stats for i, bucket in enumerate(histogram_buckets): @@ -426,7 +429,7 @@ def callback(message): # Now that the job is done, fetch the results and print them. job = dlp.get_dlp_job(operation.name) histogram_buckets = ( - job.risk_details.l_diversity_result.sensitive_value_frequency_histogram_buckets + job.risk_details.l_diversity_result.sensitive_value_frequency_histogram_buckets # noqa: E501 ) # Print bucket stats for i, bucket in enumerate(histogram_buckets): @@ -444,7 +447,9 @@ def callback(message): ) ) print( - " Class size: {}".format(value_bucket.equivalence_class_size) + " Class size: {}".format( + value_bucket.equivalence_class_size + ) ) for value in value_bucket.top_sensitive_values: print( @@ -674,7 +679,8 @@ def map_fields(quasi_id, info_type): numerical_parser = subparsers.add_parser("numerical", help="") numerical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) numerical_parser.add_argument( "table_project_id", @@ -683,9 +689,12 @@ def map_fields(quasi_id, info_type): numerical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - numerical_parser.add_argument("table_id", help="The id of the table to inspect.") numerical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for." + "table_id", help="The id of the table to inspect." + ) + numerical_parser.add_argument( + "column_name", + help="The name of the column to compute risk metrics for.", ) numerical_parser.add_argument( "topic_id", @@ -704,7 +713,8 @@ def map_fields(quasi_id, info_type): categorical_parser = subparsers.add_parser("categorical", help="") categorical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) categorical_parser.add_argument( "table_project_id", @@ -713,9 +723,12 @@ def map_fields(quasi_id, info_type): categorical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - categorical_parser.add_argument("table_id", help="The id of the table to inspect.") categorical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for." + "table_id", help="The id of the table to inspect." + ) + categorical_parser.add_argument( + "column_name", + help="The name of the column to compute risk metrics for.", ) categorical_parser.add_argument( "topic_id", @@ -734,10 +747,12 @@ def map_fields(quasi_id, info_type): k_anonymity_parser = subparsers.add_parser( "k_anonymity", - help="Computes the k-anonymity of a column set in a Google BigQuery" "table.", + help="Computes the k-anonymity of a column set in a Google BigQuery" + "table.", ) k_anonymity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) k_anonymity_parser.add_argument( "table_project_id", @@ -746,7 +761,9 @@ def map_fields(quasi_id, info_type): k_anonymity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - k_anonymity_parser.add_argument("table_id", help="The id of the table to inspect.") + k_anonymity_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) k_anonymity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -757,7 +774,9 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_anonymity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key." + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) k_anonymity_parser.add_argument( "--timeout", @@ -767,10 +786,12 @@ def map_fields(quasi_id, info_type): l_diversity_parser = subparsers.add_parser( "l_diversity", - help="Computes the l-diversity of a column set in a Google BigQuery" "table.", + help="Computes the l-diversity of a column set in a Google BigQuery" + "table.", ) l_diversity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) l_diversity_parser.add_argument( "table_project_id", @@ -779,7 +800,9 @@ def map_fields(quasi_id, info_type): l_diversity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - l_diversity_parser.add_argument("table_id", help="The id of the table to inspect.") + l_diversity_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) l_diversity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -790,10 +813,13 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) l_diversity_parser.add_argument( - "sensitive_attribute", help="The column to measure l-diversity relative to." + "sensitive_attribute", + help="The column to measure l-diversity relative to.", ) l_diversity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key." + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) l_diversity_parser.add_argument( "--timeout", @@ -807,14 +833,19 @@ def map_fields(quasi_id, info_type): "BigQuery table.", ) k_map_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", + help="The Google Cloud project id to use as a parent resource.", ) k_map_parser.add_argument( "table_project_id", help="The Google Cloud project id where the BigQuery table is stored.", ) - k_map_parser.add_argument("dataset_id", help="The id of the dataset to inspect.") - k_map_parser.add_argument("table_id", help="The id of the table to inspect.") + k_map_parser.add_argument( + "dataset_id", help="The id of the dataset to inspect." + ) + k_map_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) k_map_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -825,7 +856,9 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_map_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key." + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) k_map_parser.add_argument( "-t", diff --git a/dlp/risk_test.py b/dlp/risk_test.py index dafb58523bc..41b514f4da7 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -12,23 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -from gcp_devrel.testing.flaky import flaky +from flaky import flaky +import uuid + import google.cloud.pubsub +import google.cloud.bigquery import pytest +import os import risk -GCLOUD_PROJECT = "python-docs-samples" -TABLE_PROJECT = "python-docs-samples" -TOPIC_ID = "dlp-test" -SUBSCRIPTION_ID = "dlp-test-subscription" -DATASET_ID = "integration_tests_dlp" +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] +GCLOUD_PROJECT = os.environ.get("GCLOUD_PROJECT") +TABLE_PROJECT = os.environ.get("GCLOUD_PROJECT") +TOPIC_ID = "dlp-test" + UNIQUE_STRING +SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING UNIQUE_FIELD = "Name" REPEATED_FIELD = "Mystery" NUMERIC_FIELD = "Age" STRING_BOOLEAN_FIELD = "Gender" +BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING +BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING +BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING # Create new custom topic/subscription @pytest.fixture(scope="module") @@ -51,7 +58,9 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID + ) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -62,13 +71,104 @@ def subscription_id(topic_id): subscriber.delete_subscription(subscription_path) +@pytest.fixture(scope="module") +def bigquery_project(): + # Adds test Bigquery data, yields the project ID and then tears down. + + bigquery_client = google.cloud.bigquery.Client() + + dataset_ref = bigquery_client.dataset(BIGQUERY_DATASET_ID) + dataset = google.cloud.bigquery.Dataset(dataset_ref) + try: + dataset = bigquery_client.create_dataset(dataset) + except google.api_core.exceptions.Conflict: + dataset = bigquery_client.get_dataset(dataset) + table_ref = dataset_ref.table(BIGQUERY_TABLE_ID) + table = google.cloud.bigquery.Table(table_ref) + + harmful_table_ref = dataset_ref.table(BIGQUERY_HARMFUL_TABLE_ID) + harmful_table = google.cloud.bigquery.Table(harmful_table_ref) + + table.schema = ( + google.cloud.bigquery.SchemaField("Name", "STRING"), + google.cloud.bigquery.SchemaField("Comment", "STRING"), + ) + + harmful_table.schema = ( + google.cloud.bigquery.SchemaField("Name", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField( + "TelephoneNumber", "STRING", "REQUIRED" + ), + google.cloud.bigquery.SchemaField("Mystery", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField("Age", "INTEGER", "REQUIRED"), + google.cloud.bigquery.SchemaField("Gender", "STRING"), + google.cloud.bigquery.SchemaField("RegionCode", "STRING"), + ) + + try: + table = bigquery_client.create_table(table) + except google.api_core.exceptions.Conflict: + table = bigquery_client.get_table(table) + + try: + harmful_table = bigquery_client.create_table(harmful_table) + except google.api_core.exceptions.Conflict: + harmful_table = bigquery_client.get_table(harmful_table) + + rows_to_insert = [(u"Gary Smith", u"My email is gary@example.com")] + harmful_rows_to_insert = [ + ( + u"Gandalf", + u"(123) 456-7890", + "4231 5555 6781 9876", + 27, + "Male", + "US", + ), + ( + u"Dumbledore", + u"(313) 337-1337", + "6291 8765 1095 7629", + 27, + "Male", + "US", + ), + (u"Joe", u"(452) 123-1234", "3782 2288 1166 3030", 35, "Male", "US"), + (u"James", u"(567) 890-1234", "8291 3627 8250 1234", 19, "Male", "US"), + ( + u"Marie", + u"(452) 123-1234", + "8291 3627 8250 1234", + 35, + "Female", + "US", + ), + ( + u"Carrie", + u"(567) 890-1234", + "2253 5218 4251 4526", + 35, + "Female", + "US", + ), + ] + + bigquery_client.insert_rows(table, rows_to_insert) + bigquery_client.insert_rows(harmful_table, harmful_rows_to_insert) + yield GCLOUD_PROJECT + + bigquery_client.delete_dataset(dataset_ref, delete_contents=True) + + @flaky -def test_numerical_risk_analysis(topic_id, subscription_id, capsys): +def test_numerical_risk_analysis( + topic_id, subscription_id, bigquery_project, capsys +): risk.numerical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, NUMERIC_FIELD, topic_id, subscription_id, @@ -79,12 +179,14 @@ def test_numerical_risk_analysis(topic_id, subscription_id, capsys): @flaky -def test_categorical_risk_analysis_on_string_field(topic_id, subscription_id, capsys): +def test_categorical_risk_analysis_on_string_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.categorical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, UNIQUE_FIELD, topic_id, subscription_id, @@ -96,12 +198,14 @@ def test_categorical_risk_analysis_on_string_field(topic_id, subscription_id, ca @flaky -def test_categorical_risk_analysis_on_number_field(topic_id, subscription_id, capsys): +def test_categorical_risk_analysis_on_number_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.categorical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, NUMERIC_FIELD, topic_id, subscription_id, @@ -112,12 +216,14 @@ def test_categorical_risk_analysis_on_number_field(topic_id, subscription_id, ca @flaky -def test_k_anonymity_analysis_single_field(topic_id, subscription_id, capsys): +def test_k_anonymity_analysis_single_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_anonymity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD], @@ -129,12 +235,14 @@ def test_k_anonymity_analysis_single_field(topic_id, subscription_id, capsys): @flaky -def test_k_anonymity_analysis_multiple_fields(topic_id, subscription_id, capsys): +def test_k_anonymity_analysis_multiple_fields( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_anonymity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD, REPEATED_FIELD], @@ -146,12 +254,14 @@ def test_k_anonymity_analysis_multiple_fields(topic_id, subscription_id, capsys) @flaky -def test_l_diversity_analysis_single_field(topic_id, subscription_id, capsys): +def test_l_diversity_analysis_single_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.l_diversity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, UNIQUE_FIELD, @@ -165,12 +275,14 @@ def test_l_diversity_analysis_single_field(topic_id, subscription_id, capsys): @flaky -def test_l_diversity_analysis_multiple_field(topic_id, subscription_id, capsys): +def test_l_diversity_analysis_multiple_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.l_diversity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, UNIQUE_FIELD, @@ -184,12 +296,14 @@ def test_l_diversity_analysis_multiple_field(topic_id, subscription_id, capsys): @flaky -def test_k_map_estimate_analysis_single_field(topic_id, subscription_id, capsys): +def test_k_map_estimate_analysis_single_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_map_estimate_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD], @@ -203,12 +317,14 @@ def test_k_map_estimate_analysis_single_field(topic_id, subscription_id, capsys) @flaky -def test_k_map_estimate_analysis_multiple_field(topic_id, subscription_id, capsys): +def test_k_map_estimate_analysis_multiple_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_map_estimate_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], @@ -222,13 +338,15 @@ def test_k_map_estimate_analysis_multiple_field(topic_id, subscription_id, capsy @flaky -def test_k_map_estimate_analysis_quasi_ids_info_types_equal(topic_id, subscription_id): +def test_k_map_estimate_analysis_quasi_ids_info_types_equal( + topic_id, subscription_id, bigquery_project +): with pytest.raises(ValueError): risk.k_map_estimate_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], diff --git a/dlp/templates.py b/dlp/templates.py index 9e29245a248..5f03b596fc3 100644 --- a/dlp/templates.py +++ b/dlp/templates.py @@ -68,7 +68,10 @@ def create_inspect_template( "limits": {"max_findings_per_request": max_findings}, } - inspect_template = {"inspect_config": inspect_config, "display_name": display_name} + inspect_template = { + "inspect_config": inspect_config, + "display_name": display_name, + } # Convert the project id into a full resource id. parent = dlp.project_path(project) @@ -114,12 +117,18 @@ def human_readable_time(timestamp): print("Template {}:".format(template.name)) if template.display_name: print(" Display Name: {}".format(template.display_name)) - print(" Created: {}".format(human_readable_time(template.create_time))) - print(" Updated: {}".format(human_readable_time(template.update_time))) + print( + " Created: {}".format(human_readable_time(template.create_time)) + ) + print( + " Updated: {}".format(human_readable_time(template.update_time)) + ) config = template.inspect_config print( - " InfoTypes: {}".format(", ".join([it.name for it in config.info_types])) + " InfoTypes: {}".format( + ", ".join([it.name for it in config.info_types]) + ) ) print(" Minimum likelihood: {}".format(config.min_likelihood)) print(" Include quotes: {}".format(config.include_quote)) @@ -176,7 +185,8 @@ def delete_inspect_template(project, template_id): parser_create = subparsers.add_parser("create", help="Create a template.") parser_create.add_argument( "--template_id", - help="The id of the template. If omitted, an id will be randomly " "generated", + help="The id of the template. If omitted, an id will be randomly " + "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the template." @@ -229,7 +239,9 @@ def delete_inspect_template(project, template_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a template.") - parser_delete.add_argument("template_id", help="The id of the template to delete.") + parser_delete.add_argument( + "template_id", help="The id of the template to delete." + ) parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", diff --git a/dlp/templates_test.py b/dlp/templates_test.py index dff157a9ee6..8ecf8542db6 100644 --- a/dlp/templates_test.py +++ b/dlp/templates_test.py @@ -13,15 +13,16 @@ # limitations under the License. import os +import uuid import google.api_core.exceptions import google.cloud.storage import templates - +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_TEMPLATE_ID = "test-template" +TEST_TEMPLATE_ID = "test-template" + UNIQUE_STRING def test_create_list_and_delete_template(capsys): diff --git a/dlp/triggers.py b/dlp/triggers.py index c786cf6e547..0c2b0bb4e29 100644 --- a/dlp/triggers.py +++ b/dlp/triggers.py @@ -92,7 +92,9 @@ def create_trigger( # Construct the schedule definition: schedule = { - "recurrence_period_duration": {"seconds": scan_period_days * 60 * 60 * 24} + "recurrence_period_duration": { + "seconds": scan_period_days * 60 * 60 * 24 + } } # Construct the trigger definition. @@ -210,7 +212,8 @@ def delete_trigger(project, trigger_id): ) parser_create.add_argument( "--trigger_id", - help="The id of the trigger. If omitted, an id will be randomly " "generated", + help="The id of the trigger. If omitted, an id will be randomly " + "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the trigger." @@ -251,7 +254,9 @@ def delete_trigger(project, trigger_id): help="The maximum number of findings to report; 0 = no maximum.", ) parser_create.add_argument( - "--auto_populate_timespan", type=bool, help="Limit scan to new content only." + "--auto_populate_timespan", + type=bool, + help="Limit scan to new content only.", ) parser_list = subparsers.add_parser("list", help="List all triggers.") @@ -262,7 +267,9 @@ def delete_trigger(project, trigger_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a trigger.") - parser_delete.add_argument("trigger_id", help="The id of the trigger to delete.") + parser_delete.add_argument( + "trigger_id", help="The id of the trigger to delete." + ) parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", diff --git a/dlp/triggers_test.py b/dlp/triggers_test.py index 6a9d7d79261..9ecf92e9a37 100644 --- a/dlp/triggers_test.py +++ b/dlp/triggers_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import uuid import google.api_core.exceptions import google.cloud.storage @@ -21,12 +22,12 @@ import triggers - +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" +TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources") RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"] -TEST_TRIGGER_ID = "test-trigger" +TEST_TRIGGER_ID = "test-trigger" + UNIQUE_STRING @pytest.fixture(scope="module")