@@ -86,9 +86,9 @@ const main = async () => {
86
86
Continuing within ` const main `
87
87
88
88
` ` ` javascript
89
- model = pgml.newModel();
90
- splitter = pgml.newSplitter();
91
- pipeline = pgml.Pipeline ("my_javascript_pipeline", model, splitter);
89
+ const model = pgml.newModel();
90
+ const splitter = pgml.newSplitter();
91
+ const pipeline = pgml.newPipeline ("my_javascript_pipeline", model, splitter);
92
92
await collection.add_pipeline(pipeline);
93
93
` ` `
94
94
@@ -213,7 +213,7 @@ Documents are dictionaries with two required keys: `id` and `text`. All other ke
213
213
214
214
**Upsert documents with metadata**
215
215
` ` ` javascript
216
- documents = [
216
+ const documents = [
217
217
{
218
218
id: " Document 1" ,
219
219
text: " Here are the contents of Document 1" ,
@@ -225,7 +225,7 @@ documents = [
225
225
random_key: " this will be metadata for the document"
226
226
}
227
227
]
228
- collection = Collection (" test_collection" )
228
+ const collection = pgml . newCollection (" test_collection" )
229
229
await collection .upsert_documents (documents)
230
230
` ` `
231
231
@@ -237,16 +237,16 @@ Pipelines are required to perform search. See the [Pipelines Section](#pipelines
237
237
238
238
**Basic vector search**
239
239
` ` ` javascript
240
- collection = pgml .newCollection (" test_collection" )
241
- pipeline = pgml .newPipeline (" test_pipeline" )
242
- results = await collection .query ().vector_recall (" Why is PostgresML the best?" , pipeline).fetch_all ()
240
+ const collection = pgml .newCollection (" test_collection" )
241
+ const pipeline = pgml .newPipeline (" test_pipeline" )
242
+ const results = await collection .query ().vector_recall (" Why is PostgresML the best?" , pipeline).fetch_all ()
243
243
` ` `
244
244
245
245
**Vector search with custom limit**
246
246
` ` ` javascript
247
- collection = pgml .newCollection (" test_collection" )
248
- pipeline = pgml .newPipeline (" test_pipeline" )
249
- results = await collection .query ().vector_recall (" Why is PostgresML the best?" , pipeline).limit (10 ).fetch_all ()
247
+ const collection = pgml .newCollection (" test_collection" )
248
+ const pipeline = pgml .newPipeline (" test_pipeline" )
249
+ const results = await collection .query ().vector_recall (" Why is PostgresML the best?" , pipeline).limit (10 ).fetch_all ()
250
250
` ` `
251
251
252
252
#### Metadata Filtering
@@ -255,15 +255,15 @@ We provide powerful and flexible arbitrarly nested metadata filtering based off
255
255
256
256
**Vector search with $eq metadata filtering**
257
257
` ` ` javascript
258
- collection = pgml .newCollection (" test_collection" )
259
- pipeline = pgml .newPipeline (" test_pipeline" )
260
- results = await collection .query ()
258
+ const collection = pgml .newCollection (" test_collection" )
259
+ const pipeline = pgml .newPipeline (" test_pipeline" )
260
+ const results = await collection .query ()
261
261
.vector_recall (" Here is some query" , pipeline)
262
262
.limit (10 )
263
263
.filter ({
264
- " metadata" : {
265
- " uuid" : {
266
- " $eq" : 1
264
+ metadata: {
265
+ uuid: {
266
+ $eq: 1
267
267
}
268
268
}
269
269
})
@@ -274,15 +274,15 @@ The above query would filter out all documents that do not contain a key `uuid`
274
274
275
275
**Vector search with $gte metadata filtering**
276
276
` ` ` javascript
277
- collection = pgml .newCollection (" test_collection" )
278
- pipeline = pgml .newPipeline (" test_pipeline" )
279
- results = await collection .query ()
277
+ const collection = pgml .newCollection (" test_collection" )
278
+ const pipeline = pgml .newPipeline (" test_pipeline" )
279
+ const results = await collection .query ()
280
280
.vector_recall (" Here is some query" , pipeline)
281
281
.limit (10 )
282
282
.filter ({
283
- " metadata" : {
284
- " index" : {
285
- " $gte" : 3
283
+ metadata: {
284
+ index: {
285
+ $gte: 3
286
286
}
287
287
}
288
288
})
@@ -294,31 +294,31 @@ The above query would filter out all documents that do not contain a key `index`
294
294
295
295
**Vector search with $or and $and metadata filtering**
296
296
` ` ` javascript
297
- collection = pgml .newCollection (" test_collection" )
298
- pipeline = pgml .newPipeline (" test_pipeline" )
299
- results = await collection .query ()
297
+ const collection = pgml .newCollection (" test_collection" )
298
+ const pipeline = pgml .newPipeline (" test_pipeline" )
299
+ const results = await collection .query ()
300
300
.vector_recall (" Here is some query" , pipeline)
301
301
.limit (10 )
302
302
.filter ({
303
- " metadata" : {
304
- " $or" : [
303
+ metadata: {
304
+ $or: [
305
305
{
306
- " $and" : [
306
+ $and: [
307
307
{
308
- " $eq " : {
309
- " uuid " : 1
308
+ uuid : {
309
+ $eq : 1
310
310
}
311
311
},
312
312
{
313
- " $lt " : {
314
- " index " : 100
313
+ index : {
314
+ $lt : 100
315
315
}
316
316
}
317
317
]
318
318
},
319
319
{
320
- " special" : {
321
- " $ne" : True
320
+ special: {
321
+ $ne: true
322
322
}
323
323
}
324
324
]
@@ -334,15 +334,15 @@ The above query would filter out all documents that do not have a key `special`
334
334
If full text search is enabled for the associated Pipeline, documents can be first filtered by full text search and then recalled by embedding similarity.
335
335
336
336
` ` ` javascript
337
- collection = pgml .newCollection (" test_collection" )
338
- pipeline = pgml .newPipeline (" test_pipeline" )
339
- results = await collection .query ()
337
+ const collection = pgml .newCollection (" test_collection" )
338
+ const pipeline = pgml .newPipeline (" test_pipeline" )
339
+ const results = await collection .query ()
340
340
.vector_recall (" Here is some query" , pipeline)
341
341
.limit (10 )
342
342
.filter ({
343
- " full_text" : {
344
- " configuration" : " english" ,
345
- " text" : " Match Me"
343
+ full_text: {
344
+ configuration: " english" ,
345
+ text: " Match Me"
346
346
}
347
347
})
348
348
.fetch_all ()
@@ -362,20 +362,20 @@ Models are used for embedding chuncked documents. We support most every open sou
362
362
363
363
**Create a default Model "intfloat/e5-small" with default parameters: {}**
364
364
` ` ` javascript
365
- model = pgml .newModel ()
365
+ const model = pgml .newModel ()
366
366
` ` `
367
367
368
368
**Create a Model with custom parameters**
369
369
` ` ` javascript
370
- model = pgml .newModel (
371
- name = " hkunlp/instructor-base" ,
372
- parameters = {instruction: " Represent the Wikipedia document for retrieval: " }
370
+ const model = pgml .newModel (
371
+ " hkunlp/instructor-base" ,
372
+ {instruction: " Represent the Wikipedia document for retrieval: " }
373
373
)
374
374
` ` `
375
375
376
376
**Use an OpenAI model**
377
377
` ` ` javascript
378
- model = pgml .newModel (name= " text-embedding-ada-002" , source= " openai" )
378
+ const model = pgml .newModel (name= " text-embedding-ada-002" , source= " openai" )
379
379
` ` `
380
380
381
381
### Splitters
@@ -384,14 +384,14 @@ Splitters are used to split documents into chunks before embedding them. We supp
384
384
385
385
**Create a default Splitter "recursive_character" with default parameters: {}**
386
386
` ` ` javascript
387
- splitter = pgml .newSplitter ()
387
+ const splitter = pgml .newSplitter ()
388
388
` ` `
389
389
390
390
**Create a Splitter with custom parameters**
391
391
` ` ` javascript
392
- splitter = pgml .newSplitter (
393
- name = " recursive_character" ,
394
- parameters = {chunk_size: 1500 , chunk_overlap: 40 }
392
+ const splitter = pgml .newSplitter (
393
+ " recursive_character" ,
394
+ {chunk_size: 1500 , chunk_overlap: 40 }
395
395
)
396
396
` ` `
397
397
@@ -402,9 +402,9 @@ When adding a Pipeline to a collection it is required that Pipeline has a Model
402
402
The first time a Pipeline is added to a Collection it will automatically chunk and embed any documents already in that Collection.
403
403
404
404
` ` ` javascript
405
- model = pgml .newModel ()
406
- splitter = pgml .newSplitter ()
407
- pipeline = pgml .newPipeline (" test_pipeline" , model, splitter)
405
+ const model = pgml .newModel ()
406
+ const splitter = pgml .newSplitter ()
407
+ const pipeline = pgml .newPipeline (" test_pipeline" , model, splitter)
408
408
await collection .add_pipeline (pipeline)
409
409
` ` `
410
410
@@ -415,9 +415,9 @@ Pipelines can take additional arguments enabling full text search. When full tex
415
415
For more information on full text search please see: [Postgres Full Text Search](https://www.postgresql.org/docs/15/textsearch.html).
416
416
417
417
` ` ` javascript
418
- model = pgml .newModel ()
419
- splitter = pgml .newSplitter ()
420
- pipeline = pgml .newPipeline (" test_pipeline" , model, splitter, {
418
+ const model = pgml .newModel ()
419
+ const splitter = pgml .newSplitter ()
420
+ const pipeline = pgml .newPipeline (" test_pipeline" , model, splitter, {
421
421
" full_text_search" : {
422
422
active: True,
423
423
configuration: " english"
@@ -431,9 +431,9 @@ await collection.add_pipeline(pipeline)
431
431
Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Model and Splitter can be omitted when instantiating it.
432
432
433
433
` ` ` javascript
434
- pipeline = pgml .newPipeline (" test_pipeline" )
435
- collection = pgml .newCollection (" test_collection" )
436
- results = await collection .query ().vector_recall (" Why is PostgresML the best?" , pipeline).fetch_all ()
434
+ const pipeline = pgml .newPipeline (" test_pipeline" )
435
+ const collection = pgml .newCollection (" test_collection" )
436
+ const results = await collection .query ().vector_recall (" Why is PostgresML the best?" , pipeline).fetch_all ()
437
437
` ` `
438
438
439
439
### Enabling, Disabling, and Removing Pipelines
@@ -442,26 +442,26 @@ Pipelines can be disabled or removed to prevent them from running automatically
442
442
443
443
**Disable a Pipeline**
444
444
` ` ` javascript
445
- pipeline = pgml .newPipeline (" test_pipeline" )
446
- collection = pgml .newCollection (" test_collection" )
445
+ const pipeline = pgml .newPipeline (" test_pipeline" )
446
+ const collection = pgml .newCollection (" test_collection" )
447
447
await collection .disable_pipeline (pipeline)
448
448
` ` `
449
449
450
450
Disabling a Pipeline prevents it from running automatically, but leaves all chunks and embeddings already created by that Pipeline in the database.
451
451
452
452
**Enable a Pipeline**
453
453
` ` ` javascript
454
- pipeline = pgml .newPipeline (" test_pipeline" )
455
- collection = pgml .newCollection (" test_collection" )
454
+ const pipeline = pgml .newPipeline (" test_pipeline" )
455
+ const collection = pgml .newCollection (" test_collection" )
456
456
await collection .enable_pipeline (pipeline)
457
457
` ` `
458
458
459
459
Enabling a Pipeline will cause it to automatically run and chunk and embed all documents it may have missed while disabled.
460
460
461
461
**Remove a Pipeline**
462
462
` ` ` javascript
463
- pipeline = pgml .newPipeline (" test_pipeline" )
464
- collection = pgml .newCollection (" test_collection" )
463
+ const pipeline = pgml .newPipeline (" test_pipeline" )
464
+ const collection = pgml .newCollection (" test_collection" )
465
465
await collection .remove_pipeline (pipeline)
466
466
` ` `
467
467
@@ -478,4 +478,4 @@ This javascript library is generated from our core rust-sdk. Please check [rust-
478
478
- [x] ` hybrid_search` functionality that does a combination of ` vector_search` and ` text_search` . [Issue](https://github.com/postgresml/postgresml/issues/665)
479
479
- [x] Ability to call and manage OpenAI embeddings for comparison purposes. [Issue](https://github.com/postgresml/postgresml/issues/666)
480
480
- [x] Perform chunking on the DB with multiple langchain splitters. [Issue](https://github.com/postgresml/postgresml/issues/668)
481
- - [ ] Save ` vector_search` history for downstream monitoring of model performance. [Issue](https://github.com/postgresml/postgresml/issues/667)
481
+ - [ ] Save ` vector_search` history for downstream monitoring of model performance. [Issue](https://github.com/postgresml/postgresml/issues/667)
0 commit comments