Skip to content

Commit 90a1166

Browse files
authored
add Jobs samples (GoogleCloudPlatform#1405)
* job samples and tests * changes in response to PR * Removed Google Cloud from docstrings
1 parent 1decee1 commit 90a1166

File tree

2 files changed

+235
-0
lines changed

2 files changed

+235
-0
lines changed

dlp/jobs.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Sample app to list and delete DLP jobs using the Data Loss Prevent API. """
16+
17+
from __future__ import print_function
18+
19+
import argparse
20+
21+
22+
def list_dlp_jobs(project, filter_string=None, job_type=None):
23+
"""Uses the Data Loss Prevention API to lists DLP jobs that match the
24+
specified filter in the request.
25+
Args:
26+
project: The project id to use as a parent resource.
27+
filter: (Optional) Allows filtering.
28+
Supported syntax:
29+
* Filter expressions are made up of one or more restrictions.
30+
* Restrictions can be combined by 'AND' or 'OR' logical operators.
31+
A sequence of restrictions implicitly uses 'AND'.
32+
* A restriction has the form of '<field> <operator> <value>'.
33+
* Supported fields/values for inspect jobs:
34+
- `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED
35+
- `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY
36+
- `trigger_name` - The resource name of the trigger that
37+
created job.
38+
* Supported fields for risk analysis jobs:
39+
- `state` - RUNNING|CANCELED|FINISHED|FAILED
40+
* The operator must be '=' or '!='.
41+
Examples:
42+
* inspected_storage = cloud_storage AND state = done
43+
* inspected_storage = cloud_storage OR inspected_storage = bigquery
44+
* inspected_storage = cloud_storage AND
45+
(state = done OR state = canceled)
46+
type: (Optional) The type of job. Defaults to 'INSPECT'.
47+
Choices:
48+
DLP_JOB_TYPE_UNSPECIFIED
49+
INSPECT_JOB: The job inspected content for sensitive data.
50+
RISK_ANALYSIS_JOB: The job executed a Risk Analysis computation.
51+
52+
Returns:
53+
None; the response from the API is printed to the terminal.
54+
"""
55+
56+
# Import the client library.
57+
import google.cloud.dlp
58+
59+
# Instantiate a client.
60+
dlp = google.cloud.dlp.DlpServiceClient()
61+
62+
# Convert the project id into a full resource id.
63+
parent = dlp.project_path(project)
64+
65+
# Job type dictionary
66+
job_type_to_int = {
67+
'DLP_JOB_TYPE_UNSPECIFIED':
68+
google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
69+
'INSPECT_JOB': google.cloud.dlp.enums.DlpJobType.INSPECT_JOB,
70+
'RISK_ANALYSIS_JOB':
71+
google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB
72+
}
73+
# If job type is specified, convert job type to number through enums.
74+
if job_type:
75+
job_type = job_type_to_int[job_type]
76+
77+
# Call the API to get a list of jobs.
78+
response = dlp.list_dlp_jobs(
79+
parent,
80+
filter_=filter_string,
81+
type_=job_type)
82+
83+
# Iterate over results.
84+
for job in response:
85+
print('Job: %s; status: %s' % (job.name, job.JobState.Name(job.state)))
86+
87+
88+
def delete_dlp_job(project, job_name):
89+
"""Uses the Data Loss Prevention API to delete a long-running DLP job.
90+
Args:
91+
project: The project id to use as a parent resource.
92+
job_name: The name of the DlpJob resource to be deleted.
93+
94+
Returns:
95+
None; the response from the API is printed to the terminal.
96+
"""
97+
98+
# Import the client library.
99+
import google.cloud.dlp
100+
101+
# Instantiate a client.
102+
dlp = google.cloud.dlp.DlpServiceClient()
103+
104+
# Convert the project id and job name into a full resource id.
105+
name = dlp.dlp_job_path(project, job_name)
106+
107+
# Call the API to delete job.
108+
dlp.delete_dlp_job(name)
109+
110+
print('Successfully deleted %s' % job_name)
111+
112+
113+
if __name__ == '__main__':
114+
parser = argparse.ArgumentParser(description=__doc__)
115+
subparsers = parser.add_subparsers(
116+
dest='content', help='Select how to submit content to the API.')
117+
subparsers.required = True
118+
119+
list_parser = subparsers.add_parser(
120+
'list',
121+
help='List Data Loss Prevention API jobs corresponding to a given '
122+
'filter.')
123+
list_parser.add_argument(
124+
'project',
125+
help='The project id to use as a parent resource.')
126+
list_parser.add_argument(
127+
'-f', '--filter',
128+
help='Filter expressions are made up of one or more restrictions.')
129+
list_parser.add_argument(
130+
'-t', '--type',
131+
choices=['DLP_JOB_TYPE_UNSPECIFIED', 'INSPECT_JOB',
132+
'RISK_ANALYSIS_JOB'],
133+
help='The type of job. API defaults to "INSPECT"')
134+
135+
delete_parser = subparsers.add_parser(
136+
'delete',
137+
help='Delete results of a Data Loss Prevention API job.')
138+
delete_parser.add_argument(
139+
'project',
140+
help='The project id to use as a parent resource.')
141+
delete_parser.add_argument(
142+
'job_name',
143+
help='The name of the DlpJob resource to be deleted. '
144+
'Example: X-#####')
145+
146+
args = parser.parse_args()
147+
148+
if args.content == 'list':
149+
list_dlp_jobs(
150+
args.project,
151+
filter_string=args.filter,
152+
job_type=args.type)
153+
elif args.content == 'delete':
154+
delete_dlp_job(args.project, args.job_name)

dlp/jobs_test.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import pytest
18+
19+
import jobs
20+
21+
GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT')
22+
TEST_COLUMN_NAME = 'zip_code'
23+
TEST_TABLE_PROJECT_ID = 'bigquery-public-data'
24+
TEST_DATASET_ID = 'san_francisco'
25+
TEST_TABLE_ID = 'bikeshare_trips'
26+
27+
28+
@pytest.fixture(scope='session')
29+
def create_test_job():
30+
import google.cloud.dlp
31+
dlp = google.cloud.dlp.DlpServiceClient()
32+
33+
parent = dlp.project_path(GCLOUD_PROJECT)
34+
35+
# Construct job request
36+
risk_job = {
37+
'privacy_metric': {
38+
'categorical_stats_config': {
39+
'field': {
40+
'name': TEST_COLUMN_NAME
41+
}
42+
}
43+
},
44+
'source_table': {
45+
'project_id': TEST_TABLE_PROJECT_ID,
46+
'dataset_id': TEST_DATASET_ID,
47+
'table_id': TEST_TABLE_ID
48+
}
49+
}
50+
51+
response = dlp.create_dlp_job(parent, risk_job=risk_job)
52+
full_path = response.name
53+
# API expects only job name, not full project path
54+
job_name = full_path[full_path.rfind('/')+1:]
55+
return job_name
56+
57+
58+
def test_list_dlp_jobs(capsys):
59+
jobs.list_dlp_jobs(GCLOUD_PROJECT)
60+
61+
out, _ = capsys.readouterr()
62+
assert 'Job: projects/' in out
63+
64+
65+
def test_list_dlp_jobs_with_filter(capsys):
66+
jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string='state=DONE')
67+
68+
out, _ = capsys.readouterr()
69+
assert 'Job: projects/' in out
70+
71+
72+
def test_list_dlp_jobs_with_job_type(capsys):
73+
jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type='INSPECT_JOB')
74+
75+
out, _ = capsys.readouterr()
76+
assert 'Job: projects/' in out
77+
78+
79+
def test_delete_dlp_job(capsys):
80+
test_job_name = create_test_job()
81+
jobs.delete_dlp_job(GCLOUD_PROJECT, test_job_name)

0 commit comments

Comments
 (0)