Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6c14956
Remote save
SilasMarvin Jul 14, 2023
3b2e3b5
Working remote embeddings
SilasMarvin Jul 17, 2023
f1d6bf7
Compiling
SilasMarvin Jul 19, 2023
22f280e
Commit before moving everything to lazy
SilasMarvin Jul 19, 2023
ec090ca
Working lazy python
SilasMarvin Jul 21, 2023
58c01a3
Commit before moving adjusting Javascript macros
SilasMarvin Jul 21, 2023
9e7b146
Working javascript sdk
SilasMarvin Jul 22, 2023
abb4f5e
Working javascript sdk
SilasMarvin Jul 26, 2023
76ccf3a
The start of working pipelines
SilasMarvin Jul 28, 2023
cfcc66b
Working pipelines in python
SilasMarvin Aug 3, 2023
a9dcbc9
Uncomment
SilasMarvin Aug 3, 2023
8b48750
Added to_dict function
SilasMarvin Aug 3, 2023
6e3f1e6
Small changes and prep for progress bars
SilasMarvin Aug 4, 2023
5365557
Working progress bars and many other small but exciting things
SilasMarvin Aug 4, 2023
66476ff
Prepping to push to test pypi
SilasMarvin Aug 7, 2023
f2613d7
Prepping for javascript
SilasMarvin Aug 8, 2023
92c9623
Improvments to javascript and updates to the python sdk deploy script
SilasMarvin Aug 8, 2023
8a4e3cf
Prepping for real tests
SilasMarvin Aug 8, 2023
12bb3a8
Updated sql
SilasMarvin Aug 9, 2023
2b5b68b
Python examples translated to use pipelines
SilasMarvin Aug 9, 2023
447fc80
Mostly cleaned up and documented crate, and cleaned up python README …
SilasMarvin Aug 10, 2023
333c5e6
Ready for test deployments
SilasMarvin Aug 10, 2023
11bcce2
Updated manual build file for python
SilasMarvin Aug 10, 2023
845bf02
Build fast
SilasMarvin Aug 11, 2023
4904a1a
Small tweaks
SilasMarvin Aug 11, 2023
64dc7e2
Prepping for another test release
SilasMarvin Aug 11, 2023
c3b274c
Prepping to expand query_builder
SilasMarvin Aug 11, 2023
cb143a5
Massive cleanups to macros
SilasMarvin Aug 11, 2023
c66b07b
Massive cleanups to macros
SilasMarvin Aug 11, 2023
b7d4c2d
Ready to release
SilasMarvin Aug 11, 2023
a2c87b1
Formatting
SilasMarvin Aug 13, 2023
dd9c3ab
Renamed files
SilasMarvin Aug 21, 2023
5568608
Added removed file
SilasMarvin Aug 21, 2023
4a2e98d
Removed unnecessary file
SilasMarvin Aug 21, 2023
e673af4
Updated sdk version to 0.9
SilasMarvin Aug 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Small tweaks
  • Loading branch information
SilasMarvin committed Aug 21, 2023
commit 4904a1a10d85691a01a565756ff1736e4e4378d8
2 changes: 0 additions & 2 deletions pgml-sdks/rust/pgml/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
__pycache__/
*.py[cod]
*$py.class
*.pyi

# C extensions
*.so
Expand All @@ -14,7 +13,6 @@ node_modules/

# Distribution / packaging
.Python
*.pyi
build/
develop-eggs/
dist/
Expand Down
1 change: 1 addition & 0 deletions pgml-sdks/rust/pgml/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pgml-sdks/rust/pgml/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ tracing = { version = "0.1.37" }
tracing-subscriber = { version = "0.3.17", features = ["json"] }
indicatif = "0.17.6"
serde = "1.0.181"
futures = "0.3.28"

[features]
default = []
Expand Down
4 changes: 2 additions & 2 deletions pgml-sdks/rust/pgml/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use std::fs::OpenOptions;
use std::io::Write;

const ADDITIONAL_DEFAULTS_FOR_PYTHON: &[u8] = br#"
def py_init_logger(level: Optional[str], format: Optional[str]) -> None
def py_init_logger(level: Optional[str] = "Default set in Rust. Please see documentation.", format: Optional[str] = "Default set in Rust. Please see documentation.") -> None

Json = dict[str, Any]
Json = Any
DateTime = int
"#;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from rich.console import Console
import asyncio


async def main():
load_dotenv()
console = Console()
Expand Down Expand Up @@ -36,10 +37,7 @@ async def main():
console.print("Querying for context ...")
start = time()
results = (
await collection.query()
.vector_recall(query, pipeline)
.limit(5)
.fetch_all()
await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
)
end = time()
console.print("\n Results for '%s' " % (query), style="bold")
Expand All @@ -54,7 +52,9 @@ async def main():
builtins = Builtins()
console.print("Querying for answer ...")
start = time()
answer = await builtins.transform("question-answering", [json.dumps({"question": query, "context": context})])
answer = await builtins.transform(
"question-answering", [json.dumps({"question": query, "context": context})]
)
end = time()
console.print("Results for query '%s'" % query, style="bold")
console.print(answer)
Expand All @@ -63,5 +63,6 @@ async def main():
# Archive collection
await collection.archive()


if __name__ == "__main__":
asyncio.run(main())
4 changes: 3 additions & 1 deletion pgml-sdks/rust/pgml/python/examples/question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ async def main():
query = "Who won 20 grammy awards?"
console.print("Querying for %s..." % query)
start = time()
results = await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
results = (
await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
)
end = time()
console.print("\n Results for '%s' " % (query), style="bold")
console.print(results)
Expand Down
4 changes: 3 additions & 1 deletion pgml-sdks/rust/pgml/python/examples/semantic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ async def main():
query = "What is a good mobile os?"
console.print("Querying for %s..." % query)
start = time()
results = await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
results = (
await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
)
end = time()
console.print("\n Results for '%s' " % (query), style="bold")
console.print(results)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ async def main():
query = "Which country has the highest GDP in 2020?"
console.print("Querying for %s..." % query)
start = time()
results = await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
results = (
await collection.query().vector_recall(query, pipeline).limit(5).fetch_all()
)
end = time()
console.print("\n Results for '%s' " % (query), style="bold")
console.print(results)
Expand Down
89 changes: 89 additions & 0 deletions pgml-sdks/rust/pgml/python/pgml/pgml.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

def py_init_logger(level: Optional[str] = "Default set in Rust. Please see documentation.", format: Optional[str] = "Default set in Rust. Please see documentation.") -> None

Json = Any
DateTime = int

# Top of file key: A12BECOD!
from typing import List, Dict, Optional, Self, Any


class Builtins:
def __init__(self, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
...
def query(self, query: str) -> QueryRunner
...
async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = {}) -> Json
...

class Collection:
def __init__(self, name: str, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
...
async def add_pipeline(self, pipeline: Pipeline) -> None
...
async def remove_pipeline(self, pipeline: Pipeline) -> None
...
async def enable_pipeline(self, pipeline: Pipeline) -> None
...
async def disable_pipeline(self, pipeline: Pipeline) -> None
...
async def upsert_documents(self, documents: List[Json], strict: Optional[bool] = True) -> None
...
async def get_documents(self, last_id: Optional[int] = 1, limit: Optional[int] = 1) -> List[Json]
...
async def vector_search(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = {}, top_k: Optional[int] = 1) -> List[tuple[float, str, Json]]
...
async def archive(self) -> None
...
def query(self) -> QueryBuilder
...
async def get_pipelines(self) -> List[Pipeline]
...
async def get_pipeline(self, name: str) -> Pipeline
...
async def exists(self) -> bool
...

class Model:
def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", source: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = {}) -> Self
...

class Pipeline:
def __init__(self, name: str, model: Optional[Model] = None, splitter: Optional[Splitter] = None, parameters: Optional[Json] = {}) -> Self
...
async def get_status(self) -> Any
...
async def to_dict(self) -> Json
...

class QueryBuilder:
def limit(self, limit: int) -> Self
...
def filter(self, filter: Json) -> Self
...
def vector_recall(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = {}) -> Self
...
async def fetch_all(self) -> List[tuple[float, str, Json]]
...
def to_full_string(self) -> str
...

class QueryRunner:
async def fetch_all(self) -> Json
...
async def execute(self) -> None
...
def bind_string(self, bind_value: str) -> Self
...
def bind_int(self, bind_value: int) -> Self
...
def bind_float(self, bind_value: float) -> Self
...
def bind_bool(self, bind_value: bool) -> Self
...
def bind_json(self, bind_value: Json) -> Self
...

class Splitter:
def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = {}) -> Self
...
37 changes: 20 additions & 17 deletions pgml-sdks/rust/pgml/python/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@
print("No DATABASE_URL environment variable found. Please set one")
exit(1)

LOG_LEVEL = os.environ.get("LOG_LEVEL")
if LOG_LEVEL is None:
print("No LOG_LEVEL environment variable found setting to ERROR")
LOG_LEVEL = "ERROR"
pgml.py_init_logger(LOG_LEVEL)
pgml.py_init_logger()


def generate_dummy_documents(count: int) -> List[Dict[str, Any]]:
Expand Down Expand Up @@ -162,29 +158,39 @@ async def test_pipeline_to_dict():
###################################################


async def vector_search(collection, pipeline):
results = (
await collection.query()
def vector_search(collection_name, pipeline_name):
collection = pgml.Collection(collection_name)
pipeline = pgml.Pipeline(pipeline_name)
result = asyncio.run(
collection.query()
.vector_recall("Here is some query", pipeline)
.limit(10)
.fetch_all()
)
return len(results)
print(result)
return [0, 1, 2]


# @pytest.mark.asyncio
# async def test_multiprocessing():
# collection_name = "test_p_p_tm_1"
# pipeline_name = "test_p_c_tm_4"
#
# model = pgml.Model()
# splitter = pgml.Splitter()
# pipeline = pgml.Pipeline("test_p_p_tm_1", model, splitter)
# collection = pgml.Collection(name="test_p_c_tm_4")
# pipeline = pgml.Pipeline(pipeline_name, model, splitter)
#
# collection = pgml.Collection(collection_name)
# await collection.upsert_documents(generate_dummy_documents(3))
# await collection.add_pipeline(pipeline)
#
# with Pool(5) as p:
# results = p.starmap_async(vector_search, [(collection, pipeline) for _ in range(5)])
# for x in results.get():
# assert(x == 3)
# results = p.starmap(
# vector_search, [(collection_name, pipeline_name) for _ in range(5)]
# )
# for x in results:
# print(x)
# assert len(x) == 3
#
# await collection.archive()

Expand All @@ -201,19 +207,16 @@ async def silas_test_add_pipeline():
collection = pgml.Collection(name="silas_test_c_10")
await collection.add_pipeline(pipeline)


async def silas_test_upsert_documents():
collection = pgml.Collection(name="silas_test_c_9")
await collection.upsert_documents(generate_dummy_documents(10))


async def silas_test_vector_search():
pipeline = pgml.Pipeline("silas_test_p_1")
collection = pgml.Collection(name="silas_test_c_9")
results = await collection.vector_search("Here is some query", pipeline)
print(results)


# asyncio.run(silas_test_add_pipeline())
# asyncio.run(silas_test_upsert_documents())
# asyncio.run(silas_test_vector_search())
4 changes: 2 additions & 2 deletions pgml-sdks/rust/pgml/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::CustomInto, query_runner::QueryRunnerPython};
use crate::{languages::python::*, query_runner::QueryRunnerPython};

#[custom_methods(new, query, transform)]
impl Builtins {
Expand All @@ -28,7 +28,7 @@ impl Builtins {
/// * `query` - The query to run
///
/// # Example
///
///
/// ```
/// use pgml::Builtins;
///
Expand Down
22 changes: 17 additions & 5 deletions pgml-sdks/rust/pgml/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::utils;
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::CustomInto, pipeline::PipelinePython, query_builder::QueryBuilderPython};
use crate::{languages::python::*, pipeline::PipelinePython, query_builder::QueryBuilderPython};

/// Our project tasks
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -710,9 +710,21 @@ impl Collection {
if !pipelines.is_empty() {
let mp = MultiProgress::new();
mp.println("Syncing Pipelines...")?;
for mut pipeline in pipelines {
pipeline.execute(&document_ids, mp.clone()).await?;
}
use futures::stream::StreamExt;
futures::stream::iter(pipelines)
// Need this map to get around moving the document_ids and mp
.map(|pipeline| (pipeline, document_ids.clone(), mp.clone()))
.for_each_concurrent(10, |(mut pipeline, document_ids, mp)| async move {
pipeline
.execute(&document_ids, mp)
.await
.expect("Failed to execute pipeline");
})
.await;
// pipelines.into_iter().for_each
// for mut pipeline in pipelines {
// pipeline.execute(&document_ids, mp.clone()).await?;
// }
eprintln!("Done Syncing Pipelines\n");
}
Ok(())
Expand Down Expand Up @@ -894,7 +906,7 @@ impl Collection {
QueryBuilder::new(self.clone())
}

/// Gets all pipelines for the [Collection]
/// Gets all pipelines for the [Collection]
///
/// # Example
///
Expand Down
14 changes: 0 additions & 14 deletions pgml-sdks/rust/pgml/src/languages/javascript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,20 +184,6 @@ impl<T: IntoJsResult> IntoJsResult for Vec<T> {
}
}

// Our own types
// gen_into!(
// crate::database::Database,
// JsBox<RefCell<crate::database::Database>>,
// RefCell<crate::database::Database>
// );
// impl Finalize for crate::database::Database {}
// gen_into!(
// crate::collection::Collection,
// JsBox<RefCell<crate::collection::Collection>>,
// RefCell<crate::collection::Collection>
// );
// impl Finalize for crate::collection::Collection {}

////////////////////////////////////////////////////////////////////////////////
// JS To Rust //////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
Expand Down
Loading