From 56c079743b4422fbe4194d3693b4c9c81aee0059 Mon Sep 17 00:00:00 2001 From: Averi Kitsch Date: Thu, 15 Mar 2018 12:05:30 -0700 Subject: [PATCH 1/3] job samples and tests --- dlp/jobs.py | 141 +++++++++++++++++++++++++++++++++++++++++++++++ dlp/jobs_test.py | 80 +++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 dlp/jobs.py create mode 100644 dlp/jobs_test.py diff --git a/dlp/jobs.py b/dlp/jobs.py new file mode 100644 index 00000000000..cf71c5c0d27 --- /dev/null +++ b/dlp/jobs.py @@ -0,0 +1,141 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app to list and delete DLP jobs using the Data Loss Prevent API. """ + +from __future__ import print_function + +import argparse + + +def list_dlp_jobs(project, filter_string=None, job_type=None): + """Uses the Data Loss Prevention API to lists DLP jobs that match the + specified filter in the request. + Args: + project: The Google Cloud project id to use as a parent resource. + filter: (Optional) Filter expressions are made up of one or more + restrictions. + type: (Optional) The type of job. Defaults to 'INSPECT'. + Choices: + DLP_JOB_TYPE_UNSPECIFIED + INSPECT_JOB: The job inspected Google Cloud for sensitive data. + RISK_ANALYSIS_JOB: The job executed a Risk Analysis computation. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library. + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # If job type is specified, convert job type to number through enums. + from google.cloud.dlp_v2 import enums + # Job type dictionary + job_type_to_int = { + 'UNSPECIFIED': enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, + 'INSPECT': enums.DlpJobType.INSPECT_JOB, + 'RISK_ANALYSIS': enums.DlpJobType.RISK_ANALYSIS_JOB + } + if job_type: + job_type = job_type_to_int[job_type] + + # Call the API to get a list of jobs. + response = dlp.list_dlp_jobs( + parent, + filter_=filter_string, + type_=job_type) + + # Iterate over results. + for job in response: + print('Job: %s; status: %s' % (job.name, job.JobState.Name(job.state))) + info_type_stats = job.inspect_details.result.info_type_stats + if len(info_type_stats) > 0: + for info_type_stat in info_type_stats: + print( + ' Found %i instance(s) of info_type %s' % + (info_type_stat.count, info_type_stat.info_type.name)) + else: + print(' No findings.') + + +def delete_dlp_job(project, job_name): + """Uses the Data Loss Prevention API to delete a long-running DLP job. + Args: + project: The Google Cloud project id to use as a parent resource. + job_name: The name of the DlpJob resource to be deleted. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library. + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id and job name into a full resource id. + name = dlp.dlp_job_path(project, job_name) + + # Call the API to delete job. + dlp.delete_dlp_job(name) + + print('Successfully deleted %s' % job_name) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers( + dest='content', help='Select how to submit content to the API.') + subparsers.required = True + + list_parser = subparsers.add_parser( + 'list', + help='List Data Loss Prevention API jobs corresponding to a given ' + 'filter.') + list_parser.add_argument( + 'project', + help='The Google Cloud project id to use as a parent resource.') + list_parser.add_argument( + '-f', '--filter', + help='Filter expressions are made up of one or more restrictions.') + list_parser.add_argument( + '-t', '--type', + choices=['UNSPECIFIED', 'INSPECT', 'RISK_ANALYSIS'], + help='The type of job. API defaults to "INSPECT"') + + delete_parser = subparsers.add_parser( + 'delete', + help='Delete results of a Data Loss Prevention API job.') + delete_parser.add_argument( + 'project', + help='The Google Cloud project id to use as a parent resource.') + delete_parser.add_argument( + 'job_name', + help='The name of the DlpJob resource to be deleted. ' + 'Example: X-#####') + + args = parser.parse_args() + + if args.content == 'list': + list_dlp_jobs( + args.project, + filter_string=args.filter, + job_type=args.type) + elif args.content == 'delete': + delete_dlp_job(args.project, args.job_name) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py new file mode 100644 index 00000000000..1922d4e6b6e --- /dev/null +++ b/dlp/jobs_test.py @@ -0,0 +1,80 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import jobs + +import pytest +import os + +GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') +TEST_COLUMN_NAME = 'zip_code' +TEST_TABLE_PROJECT_ID = 'bigquery-public-data' +TEST_DATASET_ID = 'san_francisco' +TEST_TABLE_ID = 'bikeshare_trips' + + +@pytest.fixture(scope='session') +def create_test_job(): + import google.cloud.dlp + dlp = google.cloud.dlp.DlpServiceClient() + + parent = dlp.project_path(GCLOUD_PROJECT) + + # Construct job request + risk_job = { + 'privacy_metric': { + 'categorical_stats_config': { + 'field': { + 'name': TEST_COLUMN_NAME + } + } + }, + 'source_table': { + 'project_id': TEST_TABLE_PROJECT_ID, + 'dataset_id': TEST_DATASET_ID, + 'table_id': TEST_TABLE_ID + } + } + + response = dlp.create_dlp_job(parent, risk_job=risk_job) + full_path = response.name + # API expects only job name, not full project path + job_name = full_path[full_path.rfind('/')+1:] + return job_name + + +def test_list_dlp_jobs(capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT) + + out, _ = capsys.readouterr() + assert 'Job: projects/' in out + + +def test_list_dlp_jobs_with_filter(capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string='state=DONE') + + out, _ = capsys.readouterr() + assert 'Job: projects/' in out + + +def test_list_dlp_jobs_with_job_type(capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type='INSPECT') + + out, _ = capsys.readouterr() + assert 'Job: projects/' in out + + +def test_delete_dlp_job(capsys): + test_job_name = create_test_job() + jobs.delete_dlp_job(GCLOUD_PROJECT, test_job_name) From bbef4e0f789db2c9fa938fb126a7a663aef3baa4 Mon Sep 17 00:00:00 2001 From: Averi Kitsch Date: Fri, 16 Mar 2018 09:25:47 -0700 Subject: [PATCH 2/3] in response to PR --- dlp/jobs.py | 46 +++++++++++++++++++++++++++++----------------- dlp/jobs_test.py | 2 +- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/dlp/jobs.py b/dlp/jobs.py index cf71c5c0d27..ce10354d123 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -24,13 +24,31 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): specified filter in the request. Args: project: The Google Cloud project id to use as a parent resource. - filter: (Optional) Filter expressions are made up of one or more - restrictions. + filter: (Optional) Allows filtering. + Supported syntax: + * Filter expressions are made up of one or more restrictions. + * Restrictions can be combined by 'AND' or 'OR' logical operators. + A sequence of restrictions implicitly uses 'AND'. + * A restriction has the form of ' '. + * Supported fields/values for inspect jobs: + - `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED + - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY + - `trigger_name` - The resource name of the trigger that + created job. + * Supported fields for risk analysis jobs: + - `state` - RUNNING|CANCELED|FINISHED|FAILED + * The operator must be '=' or '!='. + Examples: + * inspected_storage = cloud_storage AND state = done + * inspected_storage = cloud_storage OR inspected_storage = bigquery + * inspected_storage = cloud_storage AND + (state = done OR state = canceled) type: (Optional) The type of job. Defaults to 'INSPECT'. Choices: DLP_JOB_TYPE_UNSPECIFIED - INSPECT_JOB: The job inspected Google Cloud for sensitive data. + INSPECT_JOB: The job inspected content for sensitive data. RISK_ANALYSIS_JOB: The job executed a Risk Analysis computation. + Returns: None; the response from the API is printed to the terminal. """ @@ -44,14 +62,15 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): # Convert the project id into a full resource id. parent = dlp.project_path(project) - # If job type is specified, convert job type to number through enums. - from google.cloud.dlp_v2 import enums # Job type dictionary job_type_to_int = { - 'UNSPECIFIED': enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, - 'INSPECT': enums.DlpJobType.INSPECT_JOB, - 'RISK_ANALYSIS': enums.DlpJobType.RISK_ANALYSIS_JOB + 'DLP_JOB_TYPE_UNSPECIFIED': + google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, + 'INSPECT_JOB': google.cloud.dlp.enums.DlpJobType.INSPECT_JOB, + 'RISK_ANALYSIS_JOB': + google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB } + # If job type is specified, convert job type to number through enums. if job_type: job_type = job_type_to_int[job_type] @@ -64,14 +83,6 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): # Iterate over results. for job in response: print('Job: %s; status: %s' % (job.name, job.JobState.Name(job.state))) - info_type_stats = job.inspect_details.result.info_type_stats - if len(info_type_stats) > 0: - for info_type_stat in info_type_stats: - print( - ' Found %i instance(s) of info_type %s' % - (info_type_stat.count, info_type_stat.info_type.name)) - else: - print(' No findings.') def delete_dlp_job(project, job_name): @@ -79,6 +90,7 @@ def delete_dlp_job(project, job_name): Args: project: The Google Cloud project id to use as a parent resource. job_name: The name of the DlpJob resource to be deleted. + Returns: None; the response from the API is printed to the terminal. """ @@ -116,7 +128,7 @@ def delete_dlp_job(project, job_name): help='Filter expressions are made up of one or more restrictions.') list_parser.add_argument( '-t', '--type', - choices=['UNSPECIFIED', 'INSPECT', 'RISK_ANALYSIS'], + choices=['DLP_JOB_TYPE_UNSPECIFIED', 'INSPECT_JOB', 'RISK_ANALYSIS_JOB'], help='The type of job. API defaults to "INSPECT"') delete_parser = subparsers.add_parser( diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 1922d4e6b6e..69648dc018b 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -69,7 +69,7 @@ def test_list_dlp_jobs_with_filter(capsys): def test_list_dlp_jobs_with_job_type(capsys): - jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type='INSPECT') + jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type='INSPECT_JOB') out, _ = capsys.readouterr() assert 'Job: projects/' in out From 50e7adf8a66d8e700547225c664b2d7432787271 Mon Sep 17 00:00:00 2001 From: Averi Kitsch Date: Fri, 16 Mar 2018 15:56:43 -0700 Subject: [PATCH 3/3] Removed Google Cloud from docstrings --- dlp/jobs.py | 11 ++++++----- dlp/jobs_test.py | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dlp/jobs.py b/dlp/jobs.py index ce10354d123..dbf93419fad 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -23,7 +23,7 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): """Uses the Data Loss Prevention API to lists DLP jobs that match the specified filter in the request. Args: - project: The Google Cloud project id to use as a parent resource. + project: The project id to use as a parent resource. filter: (Optional) Allows filtering. Supported syntax: * Filter expressions are made up of one or more restrictions. @@ -88,7 +88,7 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): def delete_dlp_job(project, job_name): """Uses the Data Loss Prevention API to delete a long-running DLP job. Args: - project: The Google Cloud project id to use as a parent resource. + project: The project id to use as a parent resource. job_name: The name of the DlpJob resource to be deleted. Returns: @@ -122,13 +122,14 @@ def delete_dlp_job(project, job_name): 'filter.') list_parser.add_argument( 'project', - help='The Google Cloud project id to use as a parent resource.') + help='The project id to use as a parent resource.') list_parser.add_argument( '-f', '--filter', help='Filter expressions are made up of one or more restrictions.') list_parser.add_argument( '-t', '--type', - choices=['DLP_JOB_TYPE_UNSPECIFIED', 'INSPECT_JOB', 'RISK_ANALYSIS_JOB'], + choices=['DLP_JOB_TYPE_UNSPECIFIED', 'INSPECT_JOB', + 'RISK_ANALYSIS_JOB'], help='The type of job. API defaults to "INSPECT"') delete_parser = subparsers.add_parser( @@ -136,7 +137,7 @@ def delete_dlp_job(project, job_name): help='Delete results of a Data Loss Prevention API job.') delete_parser.add_argument( 'project', - help='The Google Cloud project id to use as a parent resource.') + help='The project id to use as a parent resource.') delete_parser.add_argument( 'job_name', help='The name of the DlpJob resource to be deleted. ' diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 69648dc018b..87c39d4c3cc 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import jobs +import os import pytest -import os + +import jobs GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') TEST_COLUMN_NAME = 'zip_code'