Skip to content

Commit 0057fc9

Browse files
Bigtable Keyviz Art Codelab (GoogleCloudPlatform#2631)
* working keyviz art * Cleanup ReadData with comments and private functions * Add more comments and cleanup code * Updating types, working on tests * Package for code and working tests * Cleanup commented code and import * Cache number format, comment on row counting * empty line * Fix filter for efficient row counting
1 parent 8153112 commit 0057fc9

File tree

5 files changed

+636
-4
lines changed

5 files changed

+636
-4
lines changed

bigtable/beam/helloworld/src/test/java/HelloWorldTest.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,7 @@ public void testWrite() {
104104
new String[] {
105105
"--bigtableProjectId=" + projectId,
106106
"--bigtableInstanceId=" + instanceId,
107-
"--bigtableTableId=" + TABLE_ID,
108-
"--runner=DirectRunner"
107+
"--bigtableTableId=" + TABLE_ID
109108
});
110109

111110
long count = 0;
@@ -131,8 +130,7 @@ public void testRead() {
131130
new String[] {
132131
"--bigtableProjectId=" + projectId,
133132
"--bigtableInstanceId=" + instanceId,
134-
"--bigtableTableId=" + TABLE_ID,
135-
"--runner=DirectRunner"
133+
"--bigtableTableId=" + TABLE_ID
136134
});
137135

138136
String output = bout.toString();

bigtable/beam/keyviz-art/pom.xml

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Copyright 2020 Google LLC
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
-->
17+
<project xmlns="http://maven.apache.org/POM/4.0.0"
18+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
19+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
20+
<modelVersion>4.0.0</modelVersion>
21+
22+
<groupId>com.example.bigtable</groupId>
23+
<artifactId>keyviz-art</artifactId>
24+
<version>1.0-SNAPSHOT</version>
25+
26+
27+
<properties>
28+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
29+
<maven.compiler.source>1.8</maven.compiler.source>
30+
<maven.compiler.target>1.8</maven.compiler.target>
31+
<apache_beam.version>2.19.0</apache_beam.version>
32+
</properties>
33+
34+
<dependencies>
35+
<dependency>
36+
<groupId>com.google.cloud.bigtable</groupId>
37+
<artifactId>bigtable-hbase-beam</artifactId>
38+
<version>1.14.0</version>
39+
</dependency>
40+
41+
<dependency>
42+
<groupId>org.apache.beam</groupId>
43+
<artifactId>beam-runners-direct-java</artifactId>
44+
<version>${apache_beam.version}</version>
45+
</dependency>
46+
<dependency>
47+
<groupId>org.apache.beam</groupId>
48+
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
49+
<version>${apache_beam.version}</version>
50+
</dependency>
51+
<dependency>
52+
<groupId>org.apache.beam</groupId>
53+
<artifactId>beam-sdks-java-extensions-google-cloud-platform-core
54+
</artifactId>
55+
<version>${apache_beam.version}</version>
56+
</dependency>
57+
58+
<dependency>
59+
<groupId>junit</groupId>
60+
<artifactId>junit</artifactId>
61+
<version>4.13</version>
62+
<scope>test</scope>
63+
</dependency>
64+
<dependency>
65+
<groupId>com.google.truth</groupId>
66+
<artifactId>truth</artifactId>
67+
<version>1.0</version>
68+
<scope>test</scope>
69+
</dependency>
70+
</dependencies>
71+
72+
</project>
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package keyviz;
18+
19+
import com.google.cloud.bigtable.beam.CloudBigtableIO;
20+
import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration;
21+
import java.util.Random;
22+
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
23+
import org.apache.beam.sdk.Pipeline;
24+
import org.apache.beam.sdk.io.GenerateSequence;
25+
import org.apache.beam.sdk.options.Default;
26+
import org.apache.beam.sdk.options.Description;
27+
import org.apache.beam.sdk.options.PipelineOptionsFactory;
28+
import org.apache.beam.sdk.transforms.DoFn;
29+
import org.apache.beam.sdk.transforms.ParDo;
30+
import org.apache.hadoop.hbase.client.Mutation;
31+
import org.apache.hadoop.hbase.client.Put;
32+
import org.apache.hadoop.hbase.util.Bytes;
33+
34+
/**
35+
* A Beam job that loads random data into Cloud Bigtable.
36+
*/
37+
public class LoadData {
38+
39+
static final long ONE_MB = 1000 * 1000;
40+
static final long ONE_GB = 1000 * ONE_MB;
41+
static final String COLUMN_FAMILY = "cf";
42+
43+
public static void main(String[] args) {
44+
45+
WriteDataOptions options =
46+
PipelineOptionsFactory.fromArgs(args).withValidation().as(WriteDataOptions.class);
47+
Pipeline p = Pipeline.create(options);
48+
CloudBigtableTableConfiguration bigtableTableConfig =
49+
new CloudBigtableTableConfiguration.Builder()
50+
.withProjectId(options.getBigtableProjectId())
51+
.withInstanceId(options.getBigtableInstanceId())
52+
.withTableId(options.getBigtableTableId())
53+
.build();
54+
55+
long rowSize = options.getMegabytesPerRow() * ONE_MB;
56+
final long max =
57+
(Math.round((options.getGigabytesWritten() * ONE_GB)) / rowSize);
58+
// Make each number the same length by padding with 0s
59+
int maxLength = ("" + max).length();
60+
String numberFormat = "%0" + maxLength + "d";
61+
62+
p.apply(GenerateSequence.from(0).to(max))
63+
.apply(
64+
ParDo.of(
65+
new DoFn<Long, Mutation>() {
66+
@ProcessElement
67+
public void processElement(@Element Long rowkey, OutputReceiver<Mutation> out) {
68+
String paddedRowkey = String.format(numberFormat, rowkey);
69+
70+
// Reverse the rowkey for more efficient writing
71+
String reversedRowkey = new StringBuilder(paddedRowkey).reverse().toString();
72+
Put row = new Put(Bytes.toBytes(reversedRowkey));
73+
74+
// Generate random bytes
75+
byte[] b = new byte[(int) rowSize];
76+
new Random().nextBytes(b);
77+
78+
long timestamp = System.currentTimeMillis();
79+
row.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes("C"), timestamp, b);
80+
out.output(row);
81+
}
82+
}))
83+
.apply(CloudBigtableIO.writeToTable(bigtableTableConfig));
84+
85+
p.run().waitUntilFinish();
86+
}
87+
88+
public interface WriteDataOptions extends BigtableOptions {
89+
90+
@Description("The number of gigabytes to write")
91+
@Default.Double(40)
92+
double getGigabytesWritten();
93+
94+
void setGigabytesWritten(double gigabytesWritten);
95+
96+
@Description("The number of megabytes per row to write")
97+
@Default.Long(5)
98+
long getMegabytesPerRow();
99+
100+
void setMegabytesPerRow(long megabytesPerRow);
101+
}
102+
103+
public interface BigtableOptions extends DataflowPipelineOptions {
104+
105+
@Description("The Bigtable project ID, this can be different than your Dataflow project")
106+
@Default.String("bigtable-project")
107+
String getBigtableProjectId();
108+
109+
void setBigtableProjectId(String bigtableProjectId);
110+
111+
@Description("The Bigtable instance ID")
112+
@Default.String("bigtable-instance")
113+
String getBigtableInstanceId();
114+
115+
void setBigtableInstanceId(String bigtableInstanceId);
116+
117+
@Description("The Bigtable table ID in the instance.")
118+
@Default.String("bigtable-table")
119+
String getBigtableTableId();
120+
121+
void setBigtableTableId(String bigtableTableId);
122+
}
123+
}

0 commit comments

Comments
 (0)