From a678f62a5ac59d907e7da9dfc704801f42061c86 Mon Sep 17 00:00:00 2001
From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Fri, 25 Aug 2023 15:46:50 -0700
Subject: [PATCH 1/3] Added sumarizing example

---
 pgml-sdks/rust/pgml/python/examples/README.md |  3 +
 .../examples/extractive_question_answering.py |  3 +-
 .../summarizing_question_answering.py         | 70 +++++++++++++++++++
 3 files changed, 74 insertions(+), 2 deletions(-)
 create mode 100644 pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py

diff --git a/pgml-sdks/rust/pgml/python/examples/README.md b/pgml-sdks/rust/pgml/python/examples/README.md
index e2e22eb6e..81416c038 100644
--- a/pgml-sdks/rust/pgml/python/examples/README.md
+++ b/pgml-sdks/rust/pgml/python/examples/README.md
@@ -15,3 +15,6 @@ In this example, we will show how to use `vector_recall` result as a `context` t
 ### [Table Question Answering](./table_question_answering.py)
 In this example, we will use [Open Table-and-Text Question Answering (OTT-QA)
 ](https://github.com/wenhuchen/OTT-QA) dataset to run queries on tables. We will use `deepset/all-mpnet-base-v2-table` model that is trained for embedding tabular data for retrieval tasks. 
+
+### [Summarizing Question Answering](./summarizing_question_answering.py)
+This is an example to find documents relevant to a question from the collection of documents and then summarize those documents.
diff --git a/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py b/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py
index 21cfc90b5..21b5f2e67 100644
--- a/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py
+++ b/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py
@@ -56,8 +56,7 @@ async def main():
         "question-answering", [json.dumps({"question": query, "context": context})]
     )
     end = time()
-    console.print("Results for query '%s'" % query, style="bold")
-    console.print(answer)
+    console.print("Answer '%s'" % answer, style="bold")
     console.print("Query time = %0.3f" % (end - start))
 
     # Archive collection
diff --git a/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py
new file mode 100644
index 000000000..ab0f51f49
--- /dev/null
+++ b/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py
@@ -0,0 +1,70 @@
+from pgml import Collection, Model, Splitter, Pipeline, Builtins, py_init_logger
+import json
+from datasets import load_dataset
+from time import time
+from dotenv import load_dotenv
+from rich.console import Console
+import asyncio
+
+
+py_init_logger()
+
+async def main():
+    load_dotenv()
+    console = Console()
+
+    # Initialize collection
+    collection = Collection("squad_collection")
+
+    # Create a pipeline using the default model and splitter
+    model = Model()
+    splitter = Splitter()
+    pipeline = Pipeline("squadv1", model, splitter)
+    await collection.add_pipeline(pipeline)
+
+    # Prep documents for upserting
+    data = load_dataset("squad", split="train")
+    data = data.to_pandas()
+    data = data.drop_duplicates(subset=["context"])
+    documents = [
+        {"id": r["id"], "text": r["context"], "title": r["title"]}
+        for r in data.to_dict(orient="records")
+    ]
+
+    # Upsert documents
+    await collection.upsert_documents(documents[:200])
+
+    # Query for context
+    query = "Who won more than 20 grammy awards?"
+    console.print("Querying for context ...")
+    start = time()
+    results = (
+        await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
+    )
+    end = time()
+    console.print("\n Results for '%s' " % (query), style="bold")
+    console.print(results)
+    console.print("Query time = %0.3f" % (end - start))
+
+    # Construct context from results
+    context = " ".join(results[0][1].strip().split())
+    context = context.replace('"', '\\"').replace("'", "''")
+
+    # Query for summary 
+    builtins = Builtins()
+    console.print("Querying for summary ...")
+    start = time()
+    summary = await builtins.transform(
+        {"task": "summarization", "model": "sshleifer/distilbart-cnn-12-6"},
+        [json.dumps({"question": query, "context": context})],
+    )
+    end = time()
+    console.print("Summary '%s'" % summary, style="bold")
+    console.print("Query time = %0.3f" % (end - start))
+
+    # Archive collection
+    await collection.archive()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

From f3bbab22047bdc033f3983606437309b04a8c70b Mon Sep 17 00:00:00 2001
From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Fri, 25 Aug 2023 15:53:15 -0700
Subject: [PATCH 2/3] Cleaned up

---
 .../pgml/python/examples/summarizing_question_answering.py   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py
index ab0f51f49..4c291aac0 100644
--- a/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py
+++ b/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py
@@ -9,6 +9,7 @@
 
 py_init_logger()
 
+
 async def main():
     load_dotenv()
     console = Console()
@@ -50,13 +51,13 @@ async def main():
     context = " ".join(results[0][1].strip().split())
     context = context.replace('"', '\\"').replace("'", "''")
 
-    # Query for summary 
+    # Query for summary
     builtins = Builtins()
     console.print("Querying for summary ...")
     start = time()
     summary = await builtins.transform(
         {"task": "summarization", "model": "sshleifer/distilbart-cnn-12-6"},
-        [json.dumps({"question": query, "context": context})],
+        [context],
     )
     end = time()
     console.print("Summary '%s'" % summary, style="bold")

From bd2ebc5aa341615a86d1fda0534ff128eea84a15 Mon Sep 17 00:00:00 2001
From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Fri, 25 Aug 2023 16:11:41 -0700
Subject: [PATCH 3/3] Added summarizing example for javascript

---
 .../rust/pgml/javascript/examples/README.md   |  3 +
 .../summarizing_question_answering.js         | 63 +++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js

diff --git a/pgml-sdks/rust/pgml/javascript/examples/README.md b/pgml-sdks/rust/pgml/javascript/examples/README.md
index 440058e4f..77e13b638 100644
--- a/pgml-sdks/rust/pgml/javascript/examples/README.md
+++ b/pgml-sdks/rust/pgml/javascript/examples/README.md
@@ -11,3 +11,6 @@ In this example, we will use `hknlp/instructor-base` model to build text embeddi
 
 ### [Extractive Question Answering](./extractive_question_answering.js)
 In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database.
+
+### [Summarizing Question Answering](./summarizing_question_answering.js)
+This is an example to find documents relevant to a question from the collection of documents and then summarize those documents.
diff --git a/pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js b/pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js
new file mode 100644
index 000000000..a5e5fe19b
--- /dev/null
+++ b/pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js
@@ -0,0 +1,63 @@
+const pgml = require("pgml");
+require("dotenv").config();
+
+pgml.js_init_logger();
+
+const main = async () => {
+  // Initialize the collection
+  const collection = pgml.newCollection("my_javascript_sqa_collection");
+
+  // Add a pipeline
+  const model = pgml.newModel();
+  const splitter = pgml.newSplitter();
+  const pipeline = pgml.newPipeline(
+    "my_javascript_sqa_pipeline",
+    model,
+    splitter,
+  );
+  await collection.add_pipeline(pipeline);
+
+  // Upsert documents, these documents are automatically split into chunks and embedded by our pipeline
+  const documents = [
+    {
+      id: "Document One",
+      text: "PostgresML is the best tool for machine learning applications!",
+    },
+    {
+      id: "Document Two",
+      text: "PostgresML is open source and available to everyone!",
+    },
+  ];
+  await collection.upsert_documents(documents);
+
+  const query = "What is the best tool for machine learning?";
+
+  // Perform vector search
+  const queryResults = await collection
+    .query()
+    .vector_recall(query, pipeline)
+    .limit(1)
+    .fetch_all();
+
+  // Construct context from results
+  const context = queryResults
+    .map((result) => {
+      return result[1];
+    })
+    .join("\n");
+
+  // Query for summarization
+  const builtins = pgml.newBuiltins();
+  const answer = await builtins.transform(
+    { task: "summarization", model: "sshleifer/distilbart-cnn-12-6" },
+    [context],
+  );
+
+  // Archive the collection
+  await collection.archive();
+  return answer;
+};
+
+main().then((results) => {
+  console.log("Question summary: \n", results);
+});