diff --git a/pgml-cms/docs/api/client-sdk/README.md b/pgml-cms/docs/api/client-sdk/README.md index 5e6fc56a0..49510a315 100644 --- a/pgml-cms/docs/api/client-sdk/README.md +++ b/pgml-cms/docs/api/client-sdk/README.md @@ -99,7 +99,7 @@ async fn main() -> Result<(), Error> { {% endtab %} {% tab title="C" %} -```c +```cpp #include #include "pgml.h" @@ -176,7 +176,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp // Add this code to the end of the main function from the above example. PipelineC * pipeline = pgml_pipelinec_new("sample_pipeline", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"Alibaba-NLP/gte-base-en-v1.5\"}}}"); @@ -251,7 +251,7 @@ collection.upsert_documents(documents, None).await?; {% endtab %} {% tab title="C" %} -```c +```cpp // Add this code to the end of the main function in the above example. char * documents_to_upsert[2] = {"{\"id\": \"Document One\", \"text\": \"document one contents...\"}", "{\"id\": \"Document Two\", \"text\": \"document two contents...\"}"}; @@ -334,7 +334,7 @@ Ok(()) {% endtab %} {% tab title="C" %} -```c +```cpp // Add this code to the end of the main function in the above example. r_size = 0; char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Something about a document...\"}}}, \"limit\": 2}", pipeline, &r_size); diff --git a/pgml-cms/docs/api/client-sdk/collections.md b/pgml-cms/docs/api/client-sdk/collections.md index ebd63afca..ed23e2c64 100644 --- a/pgml-cms/docs/api/client-sdk/collections.md +++ b/pgml-cms/docs/api/client-sdk/collections.md @@ -34,7 +34,7 @@ let mut collection = Collection::new("test_collection", None)?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); ``` {% endtab %} @@ -64,7 +64,7 @@ let mut collection = Collection::new("test_collection", Some(CUSTOM_DATABASE_URL {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", CUSTOM_DATABASE_URL); ``` {% endtab %} @@ -138,7 +138,7 @@ collection.upsert_documents(documents, None).await?; {% endtab %} {% tab title="C" %} -```c +```cpp char * documents[2] = { "{\"id\": \"document_one\", \"title\": \"Document One\", \"text\": \"Here are the contents of Document 1\", \"random_key\": \"here is some random data\"}", "{\"id\": \"document_two\", \"title\": \"Document Two\", \"text\": \"Here are the contents of Document 2\", \"random_key\": \"here is some random data\"}" @@ -214,7 +214,7 @@ collection.upsert_documents(documents, None).await?; {% endtab %} {% tab title="C" %} -```c +```cpp char * documents[2] = { "{\"id\": \"document_one\", \"title\": \"Document One\", \"text\": \"Here is some new text for document one\", \"random_key\": \"here is some random data\"}", "{\"id\": \"document_two\", \"title\": \"Document Two\", \"text\": \"Here is some new text for document two\", \"random_key\": \"here is some random data\"}" @@ -288,7 +288,7 @@ collection {% endtab %} {% tab title="C" %} -```c +```cpp char * documents[2] = { "{\"id\": \"document_one\", \"new_key\": \"this will be a new key in document one\", \"random_key\": \"this will replace old random_key\"}", "{\"id\": \"document_two\", \"new_key\": \"this will be a new key in document two\", \"random_key\": \"this will replace old random_key\"}" @@ -324,7 +324,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100}", &r_size); ``` @@ -359,7 +359,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"offset\": 10}", &r_size); ``` @@ -390,7 +390,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"last_row_id\": 10}", &r_size); ``` @@ -447,7 +447,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"filter\": {\"id\": {\"$eq\": \"document_one\"}}}", &r_size); ``` @@ -501,7 +501,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp unsigned long r_size = 0; char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"offset\": 10, \"order_by\": {\"id\": \"desc\"}}", &r_size); ``` @@ -549,7 +549,7 @@ let documents = collection {% endtab %} {% tab title="C" %} -```c +```cpp pgml_collectionc_delete_documents(collection, "{\"id\": { \"$eq\": 1}}"); ``` {% endtab %} diff --git a/pgml-cms/docs/api/client-sdk/document-search.md b/pgml-cms/docs/api/client-sdk/document-search.md index 4ada75d7f..9f12d77b0 100644 --- a/pgml-cms/docs/api/client-sdk/document-search.md +++ b/pgml-cms/docs/api/client-sdk/document-search.md @@ -79,7 +79,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC *pipeline = pgml_pipelinec_new("test_pipeline", "{\ \"abstract\": {\ \"semantic_search\": {\ @@ -192,7 +192,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp char * results = pgml_collectionc_search(collection, "\ \"query\": {\ \"full_text_search\": {\ diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index dccf3f2b7..3171f18da 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -82,7 +82,7 @@ let mut pipeline = Pipeline::new( {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new( "test_pipeline", "{\ @@ -156,7 +156,7 @@ let mut pipeline = Pipeline::new( {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new( "test_pipeline", "{\ @@ -174,6 +174,8 @@ PipelineC * pipeline = pgml_pipelinec_new( This `Pipeline` splits and embeds the `body` text enabling semantic search using vectors. This is a very popular `Pipeline` for RAG. +### Switching from OpenAI + We support most every open source model on [Hugging Face](https://huggingface.co/), and OpenAI's embedding models. To use a model from OpenAI specify the `source` as `openai`, and make sure and set the environment variable `OPENAI_API_KEY`. {% tabs %} @@ -204,6 +206,44 @@ pipeline = Pipeline( ) ``` {% endtab %} + +{% tab title="Rust" %} +```rust +let mut pipeline = Pipeline::new( + "test_pipeline", + Some( + serde_json::json!({ + "body": { + "splitter": {"model": "recursive_character"}, + "semantic_search": { + "model": "text-embedding-ada-002", + "source": "openai" + }, + }, + }) + .into(), + ), +)?; + +``` +{% endtab %} + +{% tab title="C" %} +```cpp +PipelineC * pipeline = pgml_pipelinec_new( + "test_pipeline", + "{\ + \"body\": {\ + \"splitter\": {\"model\": \"recursive_character\"},\ + \"semantic_search\": {\ + \"model\": \"text-embedding-ada-002\",\ + \"source\": \"openai\"\ + }\ + }\ + }" +); +``` +{% endtab %} {% endtabs %} ## Customizing the Indexes @@ -267,7 +307,7 @@ let mut pipeline = Pipeline::new( {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new( "test_pipeline", "{\ @@ -308,7 +348,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp pgml_collectionc_add_pipeline(collection, pipeline); ``` {% endtab %} @@ -336,7 +376,7 @@ let mut pipeline = Pipeline::new("test_pipeline", None)?; {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); ``` {% endtab %} @@ -381,7 +421,7 @@ collection.disable_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); pgml_collectionc_disable_pipeline(collection, pipeline); @@ -421,7 +461,7 @@ collection.enable_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); pgml_collectionc_enable_pipeline(collection, pipeline); @@ -459,7 +499,7 @@ collection.remove_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp CollectionC * collection = pgml_collectionc_new("test_collection", NULL); PipelineC * pipeline = pgml_pipelinec_new("test_pipeline", NULL); pgml_collectionc_remove_pipeline(collection, pipeline); diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index 2d5b5ce41..b891befc5 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -80,7 +80,7 @@ collection.add_pipeline(&mut pipeline).await?; {% endtab %} {% tab title="C" %} -```c +```cpp PipelineC *pipeline = pgml_pipelinec_new("test_pipeline", "{\ \"abstract\": {\ \"semantic_search\": {\ @@ -173,7 +173,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -283,7 +283,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -389,7 +389,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -488,7 +488,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\ @@ -615,7 +615,7 @@ let results = collection {% endtab %} {% tab title="C" %} -```c +```cpp r_size = 0; char **results = pgml_collectionc_vector_search(collection, "{\ \"query\": {\