From 9c4acca4aac679191a55e9251d2c1becca675a03 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 4 Jun 2024 16:46:55 -0700 Subject: [PATCH 1/5] Periodic commit --- pgml-cms/docs/api/client-sdk/pipelines.md | 1 - pgml-cms/docs/api/client-sdk/search.md | 12 ++++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index dccf3f2b7..718a0b072 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -440,7 +440,6 @@ const pipeline = pgml.newPipeline("test_pipeline") const collection = pgml.newCollection("test_collection") await collection.remove_pipeline(pipeline) ``` -{% endtab %} {% tab title="Python" %} ```python diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index 2d5b5ce41..95e7c73c0 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -253,6 +253,10 @@ results = await collection.vector_search( ``` {% endtab %} +<<<<<<< HEAD +======= +{% endtab %} +>>>>>>> 97398a2e (Periodic commit) {% tab title="Rust" %} ```rust let query = "What is the best database?"; @@ -363,6 +367,10 @@ results = await collection.vector_search( ``` {% endtab %} +<<<<<<< HEAD +======= +{% endtab %} +>>>>>>> 97398a2e (Periodic commit) {% tab title="Rust" %} ```rust let results = collection @@ -462,6 +470,10 @@ results = await collection.vector_search( ``` {% endtab %} +<<<<<<< HEAD +======= +{% endtab %} +>>>>>>> 97398a2e (Periodic commit) {% tab title="Rust" %} ```rust let results = collection From 2b8d8ea995c3a24be93f2ab40a2e8817bebf4165 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 5 Jun 2024 09:48:35 -0700 Subject: [PATCH 2/5] Updated everything to have rust and c --- pgml-cms/docs/api/client-sdk/search.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index 95e7c73c0..57706d277 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -253,10 +253,6 @@ results = await collection.vector_search( ``` {% endtab %} -<<<<<<< HEAD -======= -{% endtab %} ->>>>>>> 97398a2e (Periodic commit) {% tab title="Rust" %} ```rust let query = "What is the best database?"; @@ -367,10 +363,13 @@ results = await collection.vector_search( ``` {% endtab %} +<<<<<<< HEAD <<<<<<< HEAD ======= {% endtab %} >>>>>>> 97398a2e (Periodic commit) +======= +>>>>>>> 7efe6d9f (Updated everything to have rust and c) {% tab title="Rust" %} ```rust let results = collection @@ -470,10 +469,13 @@ results = await collection.vector_search( ``` {% endtab %} +<<<<<<< HEAD <<<<<<< HEAD ======= {% endtab %} >>>>>>> 97398a2e (Periodic commit) +======= +>>>>>>> 7efe6d9f (Updated everything to have rust and c) {% tab title="Rust" %} ```rust let results = collection From 377e53e5aadd126ed93c73068b05250892521b64 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:05:43 -0700 Subject: [PATCH 3/5] Rust and c docs ready to go --- pgml-cms/docs/api/client-sdk/pipelines.md | 1 + 1 file changed, 1 insertion(+) diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index 718a0b072..dccf3f2b7 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -440,6 +440,7 @@ const pipeline = pgml.newPipeline("test_pipeline") const collection = pgml.newCollection("test_collection") await collection.remove_pipeline(pipeline) ``` +{% endtab %} {% tab title="Python" %} ```python From b243b6a09f689e1adac31e1c154f2a5010e3505f Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:35:38 -0700 Subject: [PATCH 4/5] Finalize Rust and C docs --- pgml-cms/docs/api/client-sdk/README.md | 8 +-- pgml-cms/docs/api/client-sdk/collections.md | 22 ++++---- .../docs/api/client-sdk/document-search.md | 4 +- pgml-cms/docs/api/client-sdk/pipelines.md | 56 ++++++++++++++++--- pgml-cms/docs/api/client-sdk/search.md | 12 ++-- 5 files changed, 71 insertions(+), 31 deletions(-) diff --git a/pgml-cms/docs/api/client-sdk/README.md b/pgml-cms/docs/api/client-sdk/README.md index 5e6fc56a0..49510a315 100644 --- a/pgml-cms/docs/api/client-sdk/README.md +++ b/pgml-cms/docs/api/client-sdk/README.md @@ -99,7 +99,7 @@ async fn main() -> Result<(), Error> { {% endtab %} {% tab title="C" %} -```c +```cpp #include #include "pgml.h" @@ -176,7 +176,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp // Add this code to the end of the main function from the above example. PipelineC * pipeline = pgml_pipelinec_new("sample_pipeline", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"Alibaba-NLP/gte-base-en-v1.5\"}}}"); @@ -251,7 +251,7 @@ collection.upsert_documents(documents, None).await?; {% endtab %} {% tab title="C" %} -```c +```cpp // Add this code to the end of the main function in the above example. char * documents_to_upsert[2] = {"{\"id\": \"Document One\", \"text\": \"document one contents...\"}", "{\"id\": \"Document Two\", \"text\": \"document two contents...\"}"}; @@ -334,7 +334,7 @@ Ok(()) {% endtab %} {% tab title="C" %} -```c +```cpp // Add this code to the end of the main function in the above example. r_size = 0; char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Something about a document...\"}}}, \"limit\": 2}", pipeline, &r_size); diff --git a/pgml-cms/docs/api/client-sdk/collections.md b/pgml-cms/docs/api/client-sdk/collections.md index ebd63afca..ed23e2c64 100644 --- a/pgml-cms/docs/api/client-sdk/collections.md +++ b/pgml-cms/docs/api/client-sdk/collections.md @@ -34,7 +34,7 @@ let mut collection = Collection::new("test_collection", None)?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); ``` {% endtab %} @@ -64,7 +64,7 @@ let mut collection = Collection::new("test_collection", Some(CUSTOM_DATABASE_URL {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", CUSTOM_DATABASE_URL); ``` {% endtab %} @@ -138,7 +138,7 @@ collection.upsert_documents(documents, None).await?; {% endtab %} {% tab title="C" %} -```c +```cpp char * documents[2] = { "{\"id\": \"document_one\", \"title\": \"Document One\", \"text\": \"Here are the contents of Document 1\", \"random_key\": \"here is some random data\"}", "{\"id\": \"document_two\", \"title\": \"Document Two\", \"text\": \"Here are the contents of Document 2\", \"random_key\": \"here is some random data\"}" @@ -214,7 +214,7 @@ collection.upsert_documents(documents, None).await?; {% endtab %} {% tab title="C" %} -```c +```cpp char * documents[2] = { "{\"id\": \"document_one\", \"title\": \"Document One\", \"text\": \"Here is some new text for document one\", \"random_key\": \"here is some random data\"}", "{\"id\": \"document_two\", \"title\": \"Document Two\", \"text\": \"Here is some new text for document two\", \"random_key\": \"here is some random data\"}" @@ -288,7 +288,7 @@ collection {% endtab %} {% tab title="C" %} -```c +```cpp char * documents[2] = { "{\"id\": \"document_one\", \"new_key\": \"this will be a new key in document one\", \"random_key\": \"this will replace old random_key\"}", "{\"id\": \"document_two\", \"new_key\": \"this will be a new key in document two\", \"random_key\": \"this will replace old random_key\"}" @@ -324,7 +324,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100}", &r_size); ``` @@ -359,7 +359,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"offset\": 10}", &r_size); ``` @@ -390,7 +390,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"last_row_id\": 10}", &r_size); ``` @@ -447,7 +447,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"filter\": {\"id\": {\"$eq\": \"document_one\"}}}", &r_size); ``` @@ -501,7 +501,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"offset\": 10, \"order_by\": {\"id\": \"desc\"}}", &r_size); ``` @@ -549,7 +549,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp pgml_collectionc_delete_documents(collection, "{\"id\": { \"$eq\": 1}}"); ``` {% endtab %} diff --git a/pgml-cms/docs/api/client-sdk/document-search.md b/pgml-cms/docs/api/client-sdk/document-search.md index 4ada75d7f..9f12d77b0 100644 --- a/pgml-cms/docs/api/client-sdk/document-search.md +++ b/pgml-cms/docs/api/client-sdk/document-search.md @@ -79,7 +79,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC *pipeline = pgml_pipelinec_new("test_pipeline", "{\ \"abstract\": {\ \"semantic_search\": {\ @@ -192,7 +192,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp char * results = pgml_collectionc_search(collection, "\ \"query\": {\ \"full_text_search\": {\ diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index dccf3f2b7..3171f18da 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -82,7 +82,7 @@ let mut pipeline = Pipeline::new( {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new( "test_pipeline", "{\ @@ -156,7 +156,7 @@ let mut pipeline = Pipeline::new( {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new( "test_pipeline", "{\ @@ -174,6 +174,8 @@ PipelineC * pipeline = pgml_pipelinec_new( This `Pipeline` splits and embeds the `body` text enabling semantic search using vectors. This is a very popular `Pipeline` for RAG. +### Switching from OpenAI + We support most every open source model on [Hugging Face](https://huggingface.co/), and OpenAI's embedding models. To use a model from OpenAI specify the `source` as `openai`, and make sure and set the environment variable `OPENAI_API_KEY`. {% tabs %} @@ -204,6 +206,44 @@ pipeline = Pipeline( ) ``` {% endtab %} + +{% tab title="Rust" %} +```rust +let mut pipeline = Pipeline::new( + "test_pipeline", + Some( + serde_json::json!({ + "body": { + "splitter": {"model": "recursive_character"}, + "semantic_search": { + "model": "text-embedding-ada-002", + "source": "openai" + }, + }, + }) + .into(), + ), +)?; + +``` +{% endtab %} + +{% tab title="C" %} +```cpp +PipelineC * pipeline = pgml_pipelinec_new( + "test_pipeline", + "{\ + \"body\": {\ + \"splitter\": {\"model\": \"recursive_character\"},\ + \"semantic_search\": {\ + \"model\": \"text-embedding-ada-002\",\ + \"source\": \"openai\"\ + }\ + }\ + }" +); +``` +{% endtab %} {% endtabs %} ## Customizing the Indexes @@ -267,7 +307,7 @@ let mut pipeline = Pipeline::new( {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new( "test_pipeline", "{\ @@ -308,7 +348,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp pgml_collectionc_add_pipeline(collection, pipeline); ``` {% endtab %} @@ -336,7 +376,7 @@ let mut pipeline = Pipeline::new("test_pipeline", None)?; {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); ``` {% endtab %} @@ -381,7 +421,7 @@ collection.disable_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); pgml_collectionc_disable_pipeline(collection, pipeline); @@ -421,7 +461,7 @@ collection.enable_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); pgml_collectionc_enable_pipeline(collection, pipeline); @@ -459,7 +499,7 @@ collection.remove_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); pgml_collectionc_remove_pipeline(collection, pipeline); diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index 57706d277..fd3be649f 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -80,7 +80,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC *pipeline = pgml_pipelinec_new("test_pipeline", "{\ \"abstract\": {\ \"semantic_search\": {\ @@ -173,7 +173,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -283,7 +283,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -396,7 +396,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -502,7 +502,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -629,7 +629,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ From ef206b788c3cd9ca1e7a1d06648d3a04e618f9c1 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:42:18 -0700 Subject: [PATCH 5/5] Clean up docs --- pgml-cms/docs/api/client-sdk/search.md | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index fd3be649f..b891befc5 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -363,13 +363,6 @@ results = await collection.vector_search( ``` {% endtab %} -<<<<<<< HEAD -<<<<<<< HEAD -======= -{% endtab %} ->>>>>>> 97398a2e (Periodic commit) -======= ->>>>>>> 7efe6d9f (Updated everything to have rust and c) {% tab title="Rust" %} ```rust let results = collection @@ -469,13 +462,6 @@ results = await collection.vector_search( ``` {% endtab %} -<<<<<<< HEAD -<<<<<<< HEAD -======= -{% endtab %} ->>>>>>> 97398a2e (Periodic commit) -======= ->>>>>>> 7efe6d9f (Updated everything to have rust and c) {% tab title="Rust" %} ```rust let results = collection