diff --git a/dlp/jobs.py b/dlp/jobs.py new file mode 100644 index 00000000000..dbf93419fad --- /dev/null +++ b/dlp/jobs.py @@ -0,0 +1,154 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app to list and delete DLP jobs using the Data Loss Prevent API. """ + +from __future__ import print_function + +import argparse + + +def list_dlp_jobs(project, filter_string=None, job_type=None): + """Uses the Data Loss Prevention API to lists DLP jobs that match the + specified filter in the request. + Args: + project: The project id to use as a parent resource. + filter: (Optional) Allows filtering. + Supported syntax: + * Filter expressions are made up of one or more restrictions. + * Restrictions can be combined by 'AND' or 'OR' logical operators. + A sequence of restrictions implicitly uses 'AND'. + * A restriction has the form of ' '. + * Supported fields/values for inspect jobs: + - `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED + - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY + - `trigger_name` - The resource name of the trigger that + created job. + * Supported fields for risk analysis jobs: + - `state` - RUNNING|CANCELED|FINISHED|FAILED + * The operator must be '=' or '!='. + Examples: + * inspected_storage = cloud_storage AND state = done + * inspected_storage = cloud_storage OR inspected_storage = bigquery + * inspected_storage = cloud_storage AND + (state = done OR state = canceled) + type: (Optional) The type of job. Defaults to 'INSPECT'. + Choices: + DLP_JOB_TYPE_UNSPECIFIED + INSPECT_JOB: The job inspected content for sensitive data. + RISK_ANALYSIS_JOB: The job executed a Risk Analysis computation. + + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library. + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Job type dictionary + job_type_to_int = { + 'DLP_JOB_TYPE_UNSPECIFIED': + google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, + 'INSPECT_JOB': google.cloud.dlp.enums.DlpJobType.INSPECT_JOB, + 'RISK_ANALYSIS_JOB': + google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB + } + # If job type is specified, convert job type to number through enums. + if job_type: + job_type = job_type_to_int[job_type] + + # Call the API to get a list of jobs. + response = dlp.list_dlp_jobs( + parent, + filter_=filter_string, + type_=job_type) + + # Iterate over results. + for job in response: + print('Job: %s; status: %s' % (job.name, job.JobState.Name(job.state))) + + +def delete_dlp_job(project, job_name): + """Uses the Data Loss Prevention API to delete a long-running DLP job. + Args: + project: The project id to use as a parent resource. + job_name: The name of the DlpJob resource to be deleted. + + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library. + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id and job name into a full resource id. + name = dlp.dlp_job_path(project, job_name) + + # Call the API to delete job. + dlp.delete_dlp_job(name) + + print('Successfully deleted %s' % job_name) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers( + dest='content', help='Select how to submit content to the API.') + subparsers.required = True + + list_parser = subparsers.add_parser( + 'list', + help='List Data Loss Prevention API jobs corresponding to a given ' + 'filter.') + list_parser.add_argument( + 'project', + help='The project id to use as a parent resource.') + list_parser.add_argument( + '-f', '--filter', + help='Filter expressions are made up of one or more restrictions.') + list_parser.add_argument( + '-t', '--type', + choices=['DLP_JOB_TYPE_UNSPECIFIED', 'INSPECT_JOB', + 'RISK_ANALYSIS_JOB'], + help='The type of job. API defaults to "INSPECT"') + + delete_parser = subparsers.add_parser( + 'delete', + help='Delete results of a Data Loss Prevention API job.') + delete_parser.add_argument( + 'project', + help='The project id to use as a parent resource.') + delete_parser.add_argument( + 'job_name', + help='The name of the DlpJob resource to be deleted. ' + 'Example: X-#####') + + args = parser.parse_args() + + if args.content == 'list': + list_dlp_jobs( + args.project, + filter_string=args.filter, + job_type=args.type) + elif args.content == 'delete': + delete_dlp_job(args.project, args.job_name) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py new file mode 100644 index 00000000000..87c39d4c3cc --- /dev/null +++ b/dlp/jobs_test.py @@ -0,0 +1,81 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import jobs + +GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') +TEST_COLUMN_NAME = 'zip_code' +TEST_TABLE_PROJECT_ID = 'bigquery-public-data' +TEST_DATASET_ID = 'san_francisco' +TEST_TABLE_ID = 'bikeshare_trips' + + +@pytest.fixture(scope='session') +def create_test_job(): + import google.cloud.dlp + dlp = google.cloud.dlp.DlpServiceClient() + + parent = dlp.project_path(GCLOUD_PROJECT) + + # Construct job request + risk_job = { + 'privacy_metric': { + 'categorical_stats_config': { + 'field': { + 'name': TEST_COLUMN_NAME + } + } + }, + 'source_table': { + 'project_id': TEST_TABLE_PROJECT_ID, + 'dataset_id': TEST_DATASET_ID, + 'table_id': TEST_TABLE_ID + } + } + + response = dlp.create_dlp_job(parent, risk_job=risk_job) + full_path = response.name + # API expects only job name, not full project path + job_name = full_path[full_path.rfind('/')+1:] + return job_name + + +def test_list_dlp_jobs(capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT) + + out, _ = capsys.readouterr() + assert 'Job: projects/' in out + + +def test_list_dlp_jobs_with_filter(capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string='state=DONE') + + out, _ = capsys.readouterr() + assert 'Job: projects/' in out + + +def test_list_dlp_jobs_with_job_type(capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type='INSPECT_JOB') + + out, _ = capsys.readouterr() + assert 'Job: projects/' in out + + +def test_delete_dlp_job(capsys): + test_job_name = create_test_job() + jobs.delete_dlp_job(GCLOUD_PROJECT, test_job_name)