Add sample code for reading table with Index

nielm · Shabirmean · commit f658c3defac7 · 2023-01-23T14:03:49.000-05:00
diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerReadApiWithIndex.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerReadApiWithIndex.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.dataflow;
+
+import com.google.cloud.spanner.Dialect;
+import com.google.cloud.spanner.Struct;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.io.gcp.spanner.SpannerIO;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Default.Enum;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.Validation;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.transforms.ToString;
+import org.apache.beam.sdk.values.PCollection;
+
+/**
+ * This sample demonstrates how to read from a Spanner table using the Read API, reading from a
+ * secondary index.
+ */
+public class SpannerReadApiWithIndex {
+
+  public interface Options extends PipelineOptions {
+
+    @Description("Spanner instance ID to query from")
+    @Validation.Required
+    String getInstanceId();
+
+    void setInstanceId(String value);
+
+    @Description("Spanner database name to query from")
+    @Validation.Required
+    String getDatabaseId();
+
+    void setDatabaseId(String value);
+
+    @Description("Dialect of the database that is used")
+    @Default
+    @Enum("GOOGLE_STANDARD_SQL")
+    Dialect getDialect();
+
+    void setDialect(Dialect dialect);
+
+    @Description("Output filename for records size")
+    @Validation.Required
+    String getOutput();
+
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+
+    String instanceId = options.getInstanceId();
+    String databaseId = options.getDatabaseId();
+    Dialect dialect = options.getDialect();
+    PCollection<Struct> records;
+    if (dialect == Dialect.POSTGRESQL) {
+      records = postgreSqlRead(instanceId, databaseId, pipeline);
+    } else {
+      records = googleSqlRead(instanceId, databaseId, pipeline);
+    }
+
+    PCollection<Long> tableEstimatedSize =
+        records
+            // Estimate the size of every row
+            .apply(EstimateSize.create())
+            // Sum all the row sizes to get the total estimated size of the table
+            .apply(Sum.longsGlobally());
+
+    // Write the total size to a file
+    tableEstimatedSize
+        .apply(ToString.elements())
+        .apply(TextIO.write().to(options.getOutput()).withoutSharding());
+
+    pipeline.run().waitUntilFinish();
+  }
+
+  /**
+   * GoogleSQL databases retain the casing of table and column names. It is therefore common to use
+   * CamelCase for identifiers.
+   */
+  static PCollection<Struct> googleSqlRead(
+      String instanceId, String databaseId, Pipeline pipeline) {
+    // [START spanner_dataflow_readapi_withindex]
+    // Query for all the columns and rows in the specified Spanner table
+    PCollection<Struct> records =
+        pipeline.apply(
+            SpannerIO.read()
+                .withInstanceId(instanceId)
+                .withDatabaseId(databaseId)
+                .withTable("Songs")
+                .withIndex("SongsBySongName")
+                // Can only read columns that are either indexed, STORED in the index or
+                // part of the primary key of the Songs table,
+                .withColumns("SingerId", "AlbumId", "TrackId", "SongName"));
+    // [END spanner_dataflow_readapi_withindex]
+    return records;
+  }
+
+  /**
+   * PostgreSQL databases automatically fold identifiers to lower case. It is therefore common to
+   * use all lower case identifiers with underscores to separate multiple words in an identifier.
+   */
+  static PCollection<Struct> postgreSqlRead(
+      String instanceId, String databaseId, Pipeline pipeline) {
+    // [START spanner_pg_dataflow_readapi_withindex]
+    // Query for all the columns and rows in the specified Spanner table
+    PCollection<Struct> records =
+        pipeline.apply(
+            SpannerIO.read()
+                .withInstanceId(instanceId)
+                .withDatabaseId(databaseId)
+                .withTable("Songs")
+                .withIndex("SongsBySongName")
+                // Can only read columns that are either indexed, STORED in the index or
+                // part of the primary key of the songs table,
+                .withColumns("singer_id", "album_id", "track_id", "song_name"));
+    // [END spanner_pg_dataflow_readapi_withindex]
+    return records;
+  }
+}
diff --git a/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerReadIT.java b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerReadIT.java
@@ -52,8 +52,7 @@
 @RunWith(Parameterized.class)
 public class SpannerReadIT {
 
-  @Parameter
-  public Dialect dialect;
+  @Parameter public Dialect dialect;
 
   @Parameters(name = "dialect = {0}")
   public static List<Object[]> data() {
@@ -104,7 +103,11 @@ public void setUp() throws InterruptedException, ExecutionException {
                       + "(singer_id bigint NOT NULL primary key, first_name varchar NOT NULL, "
                       + "last_name varchar NOT NULL)",
                   "CREATE TABLE Albums (singer_id bigint NOT NULL, album_id bigint NOT NULL, "
-                      + "album_title varchar NOT NULL, PRIMARY KEY (singer_id, album_id))"),
+                      + "album_title varchar NOT NULL, PRIMARY KEY (singer_id, album_id))",
+                  "CREATE TABLE Songs (singer_id bigint NOT NULL, album_id bigint NOT NULL, "
+                      + "track_id bigint NOT NULL, song_name varchar, Duration bigint, "
+                      + "song_genre varchar, PRIMARY KEY(singer_id, album_id, track_id))",
+                  "CREATE INDEX SongsBySongName ON Songs(song_name)"),
               null)
           .get();
     } else {
@@ -117,7 +120,11 @@ public void setUp() throws InterruptedException, ExecutionException {
                       + "(SingerId INT64 NOT NULL, FirstName STRING(MAX) NOT NULL, "
                       + "LastName STRING(MAX) NOT NULL,) PRIMARY KEY (SingerId)",
                   "CREATE TABLE Albums (SingerId INT64 NOT NULL, AlbumId INT64 NOT NULL, "
-                      + "AlbumTitle STRING(MAX) NOT NULL,) PRIMARY KEY (SingerId, AlbumId)"))
+                      + "AlbumTitle STRING(MAX) NOT NULL,) PRIMARY KEY (SingerId, AlbumId)",
+                  "CREATE TABLE Songs (SingerId  INT64 NOT NULL, AlbumId INT64 NOT NULL, "
+                      + "TrackId INT64 NOT NULL, SongName  STRING(MAX), Duration  INT64, "
+                      + "SongGenre STRING(25)) PRIMARY KEY(SingerId, AlbumId, TrackId)",
+                  "CREATE INDEX SongsBySongName ON Songs(SongName)"))
           .get();
     }
 
@@ -163,13 +170,41 @@ public void setUp() throws InterruptedException, ExecutionException {
                 .set(formatColumnName("AlbumTitle", dialect))
                 .to("Imagine")
                 .build(),
+            Mutation.newInsertBuilder("Songs")
+                .set(formatColumnName("SingerId", dialect))
+                .to(1L)
+                .set(formatColumnName("AlbumId", dialect))
+                .to(1L)
+                .set(formatColumnName("TrackId", dialect))
+                .to(1L)
+                .set(formatColumnName("SongName", dialect))
+                .to("Imagine")
+                .set(formatColumnName("Duration", dialect))
+                .to(181L)
+                .set(formatColumnName("SongGenre", dialect))
+                .to("Rock/Pop")
+                .build(),
             Mutation.newInsertBuilder("Albums")
                 .set(formatColumnName("SingerId", dialect))
                 .to(2L)
                 .set(formatColumnName("AlbumId", dialect))
                 .to(1L)
                 .set(formatColumnName("AlbumTitle", dialect))
                 .to("Pipes of Peace")
+                .build(),
+            Mutation.newInsertBuilder("Songs")
+                .set(formatColumnName("SingerId", dialect))
+                .to(2L)
+                .set(formatColumnName("AlbumId", dialect))
+                .to(1L)
+                .set(formatColumnName("TrackId", dialect))
+                .to(1L)
+                .set(formatColumnName("SongName", dialect))
+                .to("Pipes of Peace")
+                .set(formatColumnName("Duration", dialect))
+                .to(236L)
+                .set(formatColumnName("SongGenre", dialect))
+                .to("Rock/Pop")
                 .build());
 
     DatabaseClient dbClient = getDbClient();
@@ -222,7 +257,7 @@ public void readDbEndToEnd() throws Exception {
 
     String content = Files.readAllLines(outPath).stream().collect(Collectors.joining("\n"));
 
-    assertEquals("132", content);
+    assertEquals("233", content);
   }
 
   @Test
@@ -259,6 +294,23 @@ public void readApiEndToEnd() throws Exception {
     assertEquals("79", content);
   }
 
+  @Test
+  public void readApiWithIndexEndToEnd() throws Exception {
+    Path outPath = Files.createTempFile("out", "txt");
+    SpannerReadApiWithIndex.main(
+        new String[] {
+          "--instanceId=" + instanceId,
+          "--databaseId=" + databaseId,
+          "--output=" + outPath,
+          "--runner=DirectRunner",
+          "--dialect=" + dialect
+        });
+
+    String content = Files.readAllLines(outPath).stream().collect(Collectors.joining("\n"));
+
+    assertEquals("69", content);
+  }
+
   @Test
   public void readTransactionalReadEndToEnd() throws Exception {
     Path singersPath = Files.createTempFile("singers", "txt");