Skip to content

Commit 38c29a4

Browse files
docs: Bigtable Load generator tool (GoogleCloudPlatform#6372)
* Working workload generator pipeline * Clean up metadata and pom * Add tests, README * fix template path * Cleanup some text * fix regexes * fix shaded * Update deployment instructions * Update POM comment * Lint * Rename QPS variable * Fix name regexes * Fix new lines * Update test to use monitoring and actually run dataflow job on a table * fix lint issue: VariableDeclarationUsageDistance * Update bigtable/beam/workload-generator/README.md Co-authored-by: kolea2 <45548808+kolea2@users.noreply.github.com> * Update bigtable/beam/workload-generator/README.md Co-authored-by: kolea2 <45548808+kolea2@users.noreply.github.com> * responding to kolea2's review * new line fix Co-authored-by: kolea2 <45548808+kolea2@users.noreply.github.com>
1 parent 90d105e commit 38c29a4

File tree

5 files changed

+544
-0
lines changed

5 files changed

+544
-0
lines changed
+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Bigtable workload generator
2+
3+
This is a tool to perform a high number of reads to a Bigtable table for
4+
demonstration purposes. It is deployed as a Dataflow template, so it can easily
5+
be run as a Dataflow job.
6+
7+
## Template
8+
9+
### Running
10+
11+
1. Set your environment variables
12+
13+
```
14+
TEMPLATE_PATH="gs://cloud-bigtable-dataflow-templates/generate-workload.json"
15+
INSTANCE_ID=YOUR-INSTANCE-ID
16+
TABLE_ID=YOUR-TABLE-ID
17+
REGION=us-central1
18+
WORKLOAD_QPS=100 # Optional
19+
```
20+
21+
1. Run this command to start a job from dataflow template:
22+
23+
```
24+
JOB_NAME="generate-bigtable-workload-`date +%Y%m%d-%H%M%S`"
25+
gcloud dataflow flex-template run $JOB_NAME \
26+
--template-file-gcs-location "$TEMPLATE_PATH" \
27+
--parameters bigtableInstanceId="$INSTANCE_ID" \
28+
--parameters bigtableTableId="$TABLE_ID" \
29+
--region "$REGION" \
30+
--parameters workloadQPS=$WORKLOAD_QPS
31+
```
32+
33+
1. Make sure to cancel the job once you are done.
34+
35+
```
36+
gcloud dataflow jobs cancel $JOB_NAME
37+
```
38+
39+
### Deploying a template instructions
40+
41+
These instructions are for maintenance of the workload generator, but if you
42+
would like to modify this example and deploy the template yourself, you can
43+
follow them to do so.
44+
45+
1. Build the project
46+
47+
```
48+
mvn clean package -DskipTests
49+
```
50+
51+
1. Set the environment variables. To deploy a version on your project, update
52+
these with your own resources as described in the [Using Flex Templates](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates)
53+
documentation.
54+
55+
```
56+
export TEMPLATE_PATH="gs://cloud-bigtable-dataflow-templates/generate-workload.json"
57+
export TEMPLATE_IMAGE="gcr.io/cloud-bigtable-ecosystem/dataflow/generate-workload:latest"
58+
export LOGS_PATH="gs://cloud-bigtable-dataflow-templates-logs/workload-generator"
59+
```
60+
61+
1. Deploy the template
62+
63+
```
64+
gcloud dataflow flex-template build $TEMPLATE_PATH \
65+
--image-gcr-path "$TEMPLATE_IMAGE" \
66+
--sdk-language "JAVA" \
67+
--flex-template-base-image JAVA11 \
68+
--metadata-file "metadata.json" \
69+
--jar "target/workload-generator-0.1.jar" \
70+
--env FLEX_TEMPLATE_JAVA_MAIN_CLASS="bigtable.WorkloadGenerator" \
71+
--gcs-log-dir="$LOGS_PATH"
72+
```
73+
74+
Note: Make sure your account or service account has cloudbuild and storage permissions.
75+
76+
## Building and running
77+
78+
If you would like to modify this and run it yourself you can use these commands:
79+
80+
1. Create a Bigtable instance and table
81+
82+
1. Set up the environment variables
83+
84+
```
85+
GOOGLE_CLOUD_PROJECT=your-project-id
86+
INSTANCE_ID=your-instance-id
87+
REGION=us-central1
88+
TABLE_ID=your-table-id
89+
WORKLOAD_QPS=100 # Optional
90+
```
91+
92+
1. Run the command
93+
94+
```
95+
mvn compile exec:java -Dexec.mainClass=WorkloadGenerator \
96+
"-Dexec.args=--bigtableInstanceId=$INSTANCE_ID =--bigtableTableId=$TABLE_ID \
97+
--runner=dataflow --project=$GOOGLE_CLOUD_PROJECT \
98+
--region=$REGION" \
99+
--workloadQPS=$WORKLOAD_QPS
100+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"name": "Bigtable workload generator",
3+
"description": "An Apache Beam that puts a specified read QPS to a Bigtable table.",
4+
"parameters": [
5+
{
6+
"name": "bigtableInstanceId",
7+
"label": "Bigtable instance",
8+
"helpText": "Bigtable instance to read from.",
9+
"regexes": [
10+
"[A-Za-z]+[-A-Za-z0-9]*[A-Za-z0-9]+"
11+
]
12+
},
13+
{
14+
"name": "bigtableTableId",
15+
"label": "Bigtable table",
16+
"helpText": "Bigtable table to read from.",
17+
"regexes": [
18+
"[A-Za-z]+[-A-Za-z0-9]*[A-Za-z0-9]+"
19+
]
20+
},
21+
{
22+
"name": "workloadRate",
23+
"label": "Workload Rate",
24+
"helpText": "The QPS to put on the table (default 1000)",
25+
"isOptional": true,
26+
"regexes": [
27+
"[0-9]+"
28+
]
29+
}
30+
]
31+
}
+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Copyright 2021 Google LLC
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
-->
17+
<project xmlns="http://maven.apache.org/POM/4.0.0"
18+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
19+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
20+
<modelVersion>4.0.0</modelVersion>
21+
22+
<groupId>com.google.cloud</groupId>
23+
<artifactId>workload-generator</artifactId>
24+
<version>0.1</version>
25+
26+
<properties>
27+
<maven.compiler.source>8</maven.compiler.source>
28+
<maven.compiler.target>8</maven.compiler.target>
29+
<apache_beam.version>2.34.0</apache_beam.version>
30+
</properties>
31+
32+
<!--
33+
The parent pom defines common style checks and testing strategies for our samples.
34+
Removing or replacing it should not affect the execution of the samples in anyway.
35+
-->
36+
<parent>
37+
<groupId>com.google.cloud.samples</groupId>
38+
<artifactId>shared-configuration</artifactId>
39+
<version>1.0.23</version>
40+
</parent>
41+
42+
<!-- Shading plugin needed to create uber jars for Dataflow flex templates. -->
43+
<build>
44+
<plugins>
45+
<plugin>
46+
<groupId>org.apache.maven.plugins</groupId>
47+
<artifactId>maven-shade-plugin</artifactId>
48+
<version>3.2.4</version>
49+
<executions>
50+
<execution>
51+
<phase>package</phase>
52+
<goals>
53+
<goal>shade</goal>
54+
</goals>
55+
<configuration>
56+
<!-- Exclude signed jars which cause build issue when deploying shaded jar -->
57+
<filters>
58+
<filter>
59+
<artifact>*:*</artifact>
60+
<excludes>
61+
<exclude>META-INF/LICENSE</exclude>
62+
<exclude>META-INF/*.SF</exclude>
63+
<exclude>META-INF/*.DSA</exclude>
64+
<exclude>META-INF/*.RSA</exclude>
65+
</excludes>
66+
</filter>
67+
</filters>
68+
<transformers>
69+
<transformer
70+
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer">
71+
</transformer>
72+
</transformers>
73+
</configuration>
74+
</execution>
75+
</executions>
76+
</plugin>
77+
</plugins>
78+
</build>
79+
80+
<dependencies>
81+
<dependency>
82+
<groupId>org.apache.beam</groupId>
83+
<artifactId>beam-runners-direct-java</artifactId>
84+
<version>${apache_beam.version}</version>
85+
</dependency>
86+
<dependency>
87+
<groupId>org.apache.beam</groupId>
88+
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
89+
<version>${apache_beam.version}</version>
90+
</dependency>
91+
<dependency>
92+
<groupId>org.apache.beam</groupId>
93+
<artifactId>beam-sdks-java-extensions-google-cloud-platform-core
94+
</artifactId>
95+
<version>${apache_beam.version}</version>
96+
</dependency>
97+
98+
<dependency>
99+
<groupId>com.google.guava</groupId>
100+
<artifactId>guava</artifactId>
101+
<version>31.0.1-jre</version>
102+
</dependency>
103+
104+
<dependency>
105+
<groupId>com.google.cloud.bigtable</groupId>
106+
<artifactId>bigtable-hbase-beam</artifactId>
107+
<version>1.26.1</version>
108+
</dependency>
109+
110+
<dependency>
111+
<groupId>junit</groupId>
112+
<artifactId>junit</artifactId>
113+
<version>4.13.2</version>
114+
<scope>test</scope>
115+
</dependency>
116+
<dependency>
117+
<groupId>com.google.truth</groupId>
118+
<artifactId>truth</artifactId>
119+
<version>1.1.3</version>
120+
<scope>test</scope>
121+
</dependency>
122+
</dependencies>
123+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright 2021 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package bigtable;
18+
19+
import com.google.cloud.bigtable.beam.AbstractCloudBigtableTableDoFn;
20+
import com.google.cloud.bigtable.beam.CloudBigtableConfiguration;
21+
import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration;
22+
import java.io.IOException;
23+
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
24+
import org.apache.beam.sdk.Pipeline;
25+
import org.apache.beam.sdk.PipelineResult;
26+
import org.apache.beam.sdk.io.GenerateSequence;
27+
import org.apache.beam.sdk.options.Default;
28+
import org.apache.beam.sdk.options.Description;
29+
import org.apache.beam.sdk.options.PipelineOptions;
30+
import org.apache.beam.sdk.options.PipelineOptionsFactory;
31+
import org.apache.beam.sdk.transforms.ParDo;
32+
import org.apache.hadoop.hbase.TableName;
33+
import org.apache.hadoop.hbase.client.Scan;
34+
import org.apache.hadoop.hbase.client.Table;
35+
import org.joda.time.Duration;
36+
37+
public class WorkloadGenerator {
38+
39+
public static void main(String[] args) {
40+
BigtableWorkloadOptions options =
41+
PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableWorkloadOptions.class);
42+
generateWorkload(options);
43+
}
44+
45+
static PipelineResult generateWorkload(BigtableWorkloadOptions options) {
46+
CloudBigtableTableConfiguration bigtableTableConfig =
47+
new CloudBigtableTableConfiguration.Builder()
48+
.withProjectId(options.getProject())
49+
.withInstanceId(options.getBigtableInstanceId())
50+
.withTableId(options.getBigtableTableId())
51+
.build();
52+
53+
Pipeline p = Pipeline.create(options);
54+
55+
// Initiates a new pipeline every second
56+
p.apply(GenerateSequence.from(0).withRate(options.getWorkloadRate(), new Duration(1000)))
57+
.apply(ParDo.of(new ReadFromTableFn(bigtableTableConfig)));
58+
System.out.println("Beginning to generate read workload.");
59+
return p.run();
60+
}
61+
62+
public static class ReadFromTableFn extends AbstractCloudBigtableTableDoFn<Long, Void> {
63+
64+
public ReadFromTableFn(CloudBigtableConfiguration config) {
65+
super(config);
66+
System.out.println("Connected to table.");
67+
}
68+
69+
@ProcessElement
70+
public void processElement(PipelineOptions po) throws IOException {
71+
BigtableWorkloadOptions options = po.as(BigtableWorkloadOptions.class);
72+
Scan scan = new Scan();
73+
Table table = getConnection().getTable(TableName.valueOf(options.getBigtableTableId()));
74+
table.getScanner(scan);
75+
}
76+
}
77+
78+
public interface BigtableWorkloadOptions extends DataflowPipelineOptions {
79+
80+
@Description("The Bigtable instance ID")
81+
@Default.String("bigtable-instance")
82+
String getBigtableInstanceId();
83+
84+
void setBigtableInstanceId(String bigtableInstanceId);
85+
86+
@Description("The Bigtable table ID in the instance.")
87+
@Default.String("bigtable-table")
88+
String getBigtableTableId();
89+
90+
void setBigtableTableId(String bigtableTableId);
91+
92+
@Description("The QPS for the workload to produce.")
93+
@Default.Integer(1000)
94+
Integer getWorkloadRate();
95+
96+
void setWorkloadRate(Integer workloadRate);
97+
}
98+
}

0 commit comments

Comments
 (0)