Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6c14956
Remote save
SilasMarvin Jul 14, 2023
3b2e3b5
Working remote embeddings
SilasMarvin Jul 17, 2023
f1d6bf7
Compiling
SilasMarvin Jul 19, 2023
22f280e
Commit before moving everything to lazy
SilasMarvin Jul 19, 2023
ec090ca
Working lazy python
SilasMarvin Jul 21, 2023
58c01a3
Commit before moving adjusting Javascript macros
SilasMarvin Jul 21, 2023
9e7b146
Working javascript sdk
SilasMarvin Jul 22, 2023
abb4f5e
Working javascript sdk
SilasMarvin Jul 26, 2023
76ccf3a
The start of working pipelines
SilasMarvin Jul 28, 2023
cfcc66b
Working pipelines in python
SilasMarvin Aug 3, 2023
a9dcbc9
Uncomment
SilasMarvin Aug 3, 2023
8b48750
Added to_dict function
SilasMarvin Aug 3, 2023
6e3f1e6
Small changes and prep for progress bars
SilasMarvin Aug 4, 2023
5365557
Working progress bars and many other small but exciting things
SilasMarvin Aug 4, 2023
66476ff
Prepping to push to test pypi
SilasMarvin Aug 7, 2023
f2613d7
Prepping for javascript
SilasMarvin Aug 8, 2023
92c9623
Improvments to javascript and updates to the python sdk deploy script
SilasMarvin Aug 8, 2023
8a4e3cf
Prepping for real tests
SilasMarvin Aug 8, 2023
12bb3a8
Updated sql
SilasMarvin Aug 9, 2023
2b5b68b
Python examples translated to use pipelines
SilasMarvin Aug 9, 2023
447fc80
Mostly cleaned up and documented crate, and cleaned up python README …
SilasMarvin Aug 10, 2023
333c5e6
Ready for test deployments
SilasMarvin Aug 10, 2023
11bcce2
Updated manual build file for python
SilasMarvin Aug 10, 2023
845bf02
Build fast
SilasMarvin Aug 11, 2023
4904a1a
Small tweaks
SilasMarvin Aug 11, 2023
64dc7e2
Prepping for another test release
SilasMarvin Aug 11, 2023
c3b274c
Prepping to expand query_builder
SilasMarvin Aug 11, 2023
cb143a5
Massive cleanups to macros
SilasMarvin Aug 11, 2023
c66b07b
Massive cleanups to macros
SilasMarvin Aug 11, 2023
b7d4c2d
Ready to release
SilasMarvin Aug 11, 2023
a2c87b1
Formatting
SilasMarvin Aug 13, 2023
dd9c3ab
Renamed files
SilasMarvin Aug 21, 2023
5568608
Added removed file
SilasMarvin Aug 21, 2023
4a2e98d
Removed unnecessary file
SilasMarvin Aug 21, 2023
e673af4
Updated sdk version to 0.9
SilasMarvin Aug 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Improvments to javascript and updates to the python sdk deploy script
  • Loading branch information
SilasMarvin committed Aug 21, 2023
commit 92c962315dbfe4545a7480c706cbcc000255506d
56 changes: 7 additions & 49 deletions pgml-sdks/rust/pgml-macros/src/javascript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ pub fn generate_javascript_derive(parsed: DeriveInput) -> proc_macro::TokenStrea
#[cfg(feature = "javascript")]
impl FromJsType for & #wrapped_type_ident {
type From = neon::types::JsObject;
fn from_js_type<'a, C: neon::context::Context<'a>>(cx: &mut C, arg: neon::handle::Handle<Self::From>) -> neon::result::NeonResult<Self> {
fn from_js_type<'a, C: neon::context::Context<'a>>(cx: &mut C, arg: neon::handle::Handle<Self::From>) -> neon::result::NeonResult<Self> {
use neon::prelude::*;
use core::ops::Deref;
let s: neon::handle::Handle<neon::types::JsBox<#name_ident>> = arg.get(cx, "s")?;
Expand All @@ -136,51 +136,6 @@ pub fn generate_javascript_derive(parsed: DeriveInput) -> proc_macro::TokenStrea
}
}

#[cfg(feature = "javascript")]
impl CustomInto<#wrapped_type_ident> for &#name_ident {
fn custom_into(self) -> #wrapped_type_ident {
*self.wrapped.clone()
}
}

#[cfg(feature = "javascript")]
impl CustomInto<&'static #wrapped_type_ident> for &#name_ident {
fn custom_into(self) -> &'static #wrapped_type_ident {
unsafe {
let ptr = &*self.wrapped as *const #wrapped_type_ident;
let ptr = ptr as *mut #wrapped_type_ident;
let boxed = Box::from_raw(ptr);
Box::leak(boxed)
}
}
}

#[cfg(feature = "javascript")]
impl CustomInto<&'static mut #wrapped_type_ident> for &#name_ident {
fn custom_into(self) -> &'static mut #wrapped_type_ident {
unsafe {
let ptr = &*self.wrapped as *const #wrapped_type_ident;
let ptr = ptr as *mut #wrapped_type_ident;
let boxed = Box::from_raw(ptr);
Box::leak(boxed)
}
}
}

// #[cfg(feature = "javascript")]
// impl FromJsType for #name_ident {
// type From = neon::types::JsObject;
// fn from_js_type<'a, C: neon::context::Context<'a>>(cx: &mut C, arg: neon::handle::Handle<Self::From>) -> neon::result::NeonResult<Self> {
// use neon::prelude::*;
// use core::ops::Deref;
// let s: neon::handle::Handle<neon::types::JsBox<#name_ident>> = arg.get(cx, "s")?;
// let wrapped = (*s).wrapped.clone();
// Ok(Self {
// wrapped
// })
// }
// }

#[cfg(feature = "javascript")]
impl IntoJsResult for #wrapped_type_ident {
type Output = neon::types::JsObject;
Expand Down Expand Up @@ -309,9 +264,12 @@ pub fn generate_javascript_methods(
} else {
quote! {
let this = this.into_inner(&mut cx);
let s: neon::handle::Handle<neon::types::JsBox<#name_ident>> = this.get(&mut cx, "s")?;
// let wrapped: &mut #wrapped_type_ident = <&mut #wrapped_type_ident>::from_js_type(&mut cx, s)?;
let wrapped: &mut #wrapped_type_ident = s.custom_into();
// let s: neon::handle::handle<neon::types::jsbox<#name_ident>> = this.get(&mut cx, "s")?;
// let wrapped: &mut #wrapped_type_ident = s.custom_into();

let s: neon::handle::Handle<neon::types::JsObject> = this.get(&mut cx, "s")?;
let wrapped = <&mut #wrapped_type_ident>::from_js_type(&mut cx, s)?;

#(#inner_prep_arguments)*
}
}
Expand Down
5 changes: 3 additions & 2 deletions pgml-sdks/rust/pgml/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use std::fs::OpenOptions;
use std::io::Write;

const ADDITIONAL_DEFAULTS_FOR_PYTHON: &[u8] = br#"
def py_init_logger(log_level: str) -> None
def py_init_logger(level: Option[str], format: Option[str]) -> None

Json = dict[str, Any]
DateTime = int
"#;

const ADDITIONAL_DEFAULTS_FOR_JAVASCRIPT: &[u8] = br#"
export function setupLogger(log_level: string): void;
export function js_init_logger(level?: string, format?: string): void;

export type Json = { [key: string]: any };
export type DateTime = Date;
Expand All @@ -19,6 +19,7 @@ export function newCollection(name: string, database_url?: string): Collection;
export function newModel(name?: string, task?: string, source?: string, parameters?: string, database_url?: string): Model;
export function newSplitter(name?: string, parameters?: any, database_url?: string): Splitter;
export function newBuiltins(database_url?: string): Builtins;
export function newPipeline(name: string, model: Model, splitter: Splitter): Pipeline;
"#;

fn main() {
Expand Down
128 changes: 46 additions & 82 deletions pgml-sdks/rust/pgml/javascript/tests/typescript-tests/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,23 @@ import pgml from '../../index.js'

////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////
// PLEASE BE AWARE THESE TESTS DO INVOLVE CHECKS ON LAZILY CREATD DATABASE ITEMS. //
// PLEASE BE AWARE THESE TESTS DO INVOLVE CHECKS ON LAZILY CREATD DATABASE ITEMS //
// IF ANY OF THE COLLECTION NAMES ALREADY EXIST, SOME TESTS MAY FAIL //
// THIS DOES NOT MEAN THE SDK IS BROKEN. PLEASE CLEAR YOUR DATABASE INSTANCE //
// BEFORE RUNNING ANY TESTS //
////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////

const generate_documents = (count: number) => {
const DATABASE_URL = process.env.DATABASE_URL;
if (!DATABASE_URL) {
console.log("No DATABASE_URL environment variable found. Please set one")
process.exit(1)
}
const LOG_LEVEL = process.env.LOG_LEVEL ? process.env.LOG_LEVEL : "ERROR";

pgml.js_init_logger(DATABASE_URL, LOG_LEVEL);

const generate_dummy_documents = (count: number) => {
let docs = [];
for (let i = 0; i < count; i++) {
docs.push({
Expand All @@ -24,94 +33,49 @@ const generate_documents = (count: number) => {
return docs;
}

it("can lazily create collection", async () => {
let collection_name = "j_ccc_test_2";
let collection = pgml.newCollection(collection_name);
let builtins = pgml.newBuiltins();
let does_collection_exist = await builtins.does_collection_exist(collection_name);
expect(does_collection_exist).toBe(false);
// Do something that requires the collection to be created
await collection.upsert_documents(generate_documents(1));
// Now the collection will exit because it had to be created to upsert documents
does_collection_exist = await builtins.does_collection_exist(collection_name);
await collection.archive();
expect(does_collection_exist).toBe(true);
///////////////////////////////////////////////////
// Test the API exposed is correct ////////////////
///////////////////////////////////////////////////

it("can create collection", () => {
let collection = pgml.newCollection("test_j_c_ccc_0");
expect(collection).toBeTruthy();
});

it("can lazily create model", async () => {
it("can create model", () => {
let model = pgml.newModel();
expect(model.get_verified_in_database()).toBe(false);
let id = await model.get_id();
expect(id).toBeDefined();
expect(model.get_verified_in_database()).toBe(true);
})
expect(model).toBeTruthy();
});

it("can lazily create splitter", async () => {
it("can create splitter", () => {
let splitter = pgml.newSplitter();
expect(splitter.get_verified_in_database()).toBe(false);
let id = await splitter.get_id();
expect(id).toBeDefined();
expect(splitter.get_verified_in_database()).toBe(true);
})
expect(splitter).toBeTruthy();
});

it("can vector search", async () => {
let collection_name = "j_cvs_test_0";
let collection = pgml.newCollection(collection_name);
it("can create pipeline", () => {
let model = pgml.newModel();
let splitter = pgml.newSplitter();
await collection.upsert_documents(generate_documents(2));
// Splitter should not be verified in the database yet
expect(splitter.get_verified_in_database()).toBe(false);
await collection.generate_chunks(splitter);
// Now splitter should be verified in the database
expect(splitter.get_verified_in_database()).toBe(true);
// Model should not be verified in the database yet
expect(model.get_verified_in_database()).toBe(false);
await collection.generate_embeddings(model, splitter);
// Now model should be verified in the database
expect(model.get_verified_in_database()).toBe(true);
let results = await collection.vector_search("Here is some query", model, splitter);
await collection.archive();
expect(results.length).toBe(2);
})
let pipeline = pgml.newPipeline("test_j_p_ccc_0", model, splitter);
expect(pipeline).toBeTruthy();
});

it("can vector search with remote embeddings", async () => {
let collection_name = "j_cvswre_test_0";
let collection = pgml.newCollection(collection_name);
let model = pgml.newModel("text-embedding-ada-002", "embeddings", "openai");
let splitter = pgml.newSplitter();
await collection.upsert_documents(generate_documents(2));
await collection.generate_chunks(splitter);
await collection.generate_embeddings(model, splitter);
let results = await collection.vector_search("Here is some query", model, splitter);
await collection.archive();
expect(results.length).toBe(2);
})
it("can create builtins", () => {
let builtins = pgml.newBuiltins();
expect(builtins).toBeTruthy();
});

it("can vector search with query builder", async () => {
let collection_name = "j_cvswqb_test_0";
let collection = pgml.newCollection(collection_name);
let model = pgml.newModel();
let splitter = pgml.newSplitter();
await collection.upsert_documents(generate_documents(2));
await collection.generate_chunks(splitter);
await collection.generate_embeddings(model, splitter);
await collection.generate_tsvectors();
let results = await collection.query().vector_recall("Here is some query", model, splitter).filter({
"metadata": {
"metadata": {
"$or": [
{"uuid": {"$eq": 0 }},
{"uuid": {"$eq": 10 }},
{"category": {"$eq": [1, 2, 3]}}
]
///////////////////////////////////////////////////
// Test various vector searches ///////////////////
///////////////////////////////////////////////////

}
},
"full_text": {
"text": "Test document"
}
}).run()
await collection.archive();
expect(results.length).toBe(2);
})
// it("can vector search with local embeddings", async () => {
// let model = pgml.newModel();
// let splitter = pgml.newSplitter();
// let pipeline = pgml.newPipeline("test_j_p_cvswle_0", model, splitter);
// let collection = pgml.newCollection("test_j_c_cvswle_0");
// await collection.upsert_documents(generate_dummy_documents(3));
// await collection.add_pipeline(pipeline);
// let results = await collection.vector_search("Here is some query", pipeline);
// expect(results).toHaveLength(3);
// await collection.archive();
// });
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub struct Builtins {
use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json};

#[cfg(feature = "javascript")]
use crate::{languages::javascript::*, query_runner::QueryRunnerJavascript, languages::CustomInto};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::CustomInto, query_runner::QueryRunnerPython};
Expand Down
9 changes: 3 additions & 6 deletions pgml-sdks/rust/pgml/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ use crate::types::Json;
use crate::utils;

#[cfg(feature = "javascript")]
use crate::{
languages::javascript::*, languages::CustomInto, pipeline::PipelineJavascript,
query_builder::QueryBuilderJavascript,
};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::CustomInto, pipeline::PipelinePython, query_builder::QueryBuilderPython};
Expand Down Expand Up @@ -529,10 +526,10 @@ impl Collection {
Ok(Some((source_uuid, text, metadata)))
}).collect();

// Yes we could continue chaining the above iterators but types become super annoying to
// We could continue chaining the above iterators but types become super annoying to
// deal with, especially because we are dealing with async functions. This is much easier to read
// Also, we may want to use a variant of chunks that is owned, I'm not 100% sure of what
// cloning happens when passing values into sqlx bind. itertools variant will not work as
// cloning happens when passing values into sqlx bind. itertools variants will not work as
// it is not thread safe and pyo3 will get upset
let mut document_ids = Vec::new();
for chunk in documents?.chunks(10) {
Expand Down
13 changes: 13 additions & 0 deletions pgml-sdks/rust/pgml/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,26 @@ fn pgml(_py: pyo3::Python, m: &pyo3::types::PyModule) -> pyo3::PyResult<()> {
Ok(())
}

#[cfg(feature = "javascript")]
fn js_init_logger(mut cx: neon::context::FunctionContext) -> neon::result::JsResult<neon::types::JsUndefined> {
use crate::languages::javascript::*;
let level = cx.argument_opt(0);
let level = <Option<String>>::from_option_js_type(&mut cx, level)?;
let format = cx.argument_opt(1);
let format = <Option<String>>::from_option_js_type(&mut cx, format)?;
init_logger(level, format).ok();
().into_js_result(&mut cx)
}

#[cfg(feature = "javascript")]
#[neon::main]
fn main(mut cx: neon::context::ModuleContext) -> neon::result::NeonResult<()> {
cx.export_function("js_init_logger", js_init_logger)?;
cx.export_function("newCollection", collection::CollectionJavascript::new)?;
cx.export_function("newModel", model::ModelJavascript::new)?;
cx.export_function("newSplitter", splitter::SplitterJavascript::new)?;
cx.export_function("newBuiltins", builtins::BuiltinsJavascript::new)?;
cx.export_function("newPipeline", pipeline::PipelineJavascript::new)?;
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
};

#[cfg(feature = "javascript")]
use crate::{languages::javascript::*, languages::CustomInto};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::languages::CustomInto;
Expand Down
3 changes: 0 additions & 3 deletions pgml-sdks/rust/pgml/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ use serde::Serialize;

use crate::types::{DateTime, Json};

#[cfg(feature = "javascript")]
use crate::languages::javascript::*;

/// A pipeline
#[enum_def]
#[derive(FromRow)]
Expand Down
3 changes: 1 addition & 2 deletions pgml-sdks/rust/pgml/src/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ use crate::{
use crate::{languages::CustomInto, model::ModelPython, splitter::SplitterPython};

#[cfg(feature = "javascript")]
use crate::{languages::{javascript::*, CustomInto}, model::ModelJavascript, splitter::SplitterJavascript};

use crate::languages::javascript::*;

#[derive(Debug, Clone)]
pub enum PipelineSyncStatus {
Expand Down
5 changes: 1 addition & 4 deletions pgml-sdks/rust/pgml/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@ use crate::{
};

#[cfg(feature = "javascript")]
use crate::{
languages::{javascript::*, CustomInto},
pipeline::PipelineJavascript,
};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::CustomInto, pipeline::PipelinePython};
Expand Down
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/query_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use sqlx::{Postgres, Row};
use crate::{get_or_initialize_pool, types::Json};

#[cfg(feature = "javascript")]
use crate::{languages::javascript::*, languages::CustomInto};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::languages::CustomInto;
Expand Down
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
};

#[cfg(feature = "javascript")]
use crate::{languages::javascript::*, languages::CustomInto};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::languages::CustomInto;
Expand Down