Skip to content

Commit 5e6afb3

Browse files
authored
Vector search respect db idx (tursodatabase#1582)
* propagate schema name (iDb) for vector index to support working not only with main DB * add basic test * sometimes zDbSName can be null and this is fine * avoid test from writing files to disk * build bundles
1 parent a9639c3 commit 5e6afb3

File tree

11 files changed

+508
-259
lines changed

11 files changed

+508
-259
lines changed

libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c

Lines changed: 156 additions & 79 deletions
Large diffs are not rendered by default.

libsql-ffi/bundled/src/sqlite3.c

Lines changed: 156 additions & 79 deletions
Large diffs are not rendered by default.

libsql-sqlite3/src/build.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3323,6 +3323,7 @@ static void destroyTable(Parse *pParse, Table *pTab){
33233323
Pgno iTab = pTab->tnum;
33243324
Pgno iDestroyed = 0;
33253325
Index *pIdx;
3326+
int iDb;
33263327

33273328
#ifndef SQLITE_OMIT_VECTOR
33283329
/*
@@ -3336,9 +3337,12 @@ static void destroyTable(Parse *pParse, Table *pTab){
33363337
* 3. Delete index during the parsing stage (implemented variant) - it's hacky
33373338
* and bit dirty but seems to me as pretty safe and easy way to delete index
33383339
*/
3340+
iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
3341+
33393342
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
33403343
if( IsVectorIndex(pIdx) ){
3341-
vectorIndexDrop(pParse->db, pIdx->zName);
3344+
assert( 0 <= iDb && iDb < pParse->db->nDb );
3345+
vectorIndexDrop(pParse->db, pParse->db->aDb[iDb].zDbSName, pIdx->zName);
33423346
}
33433347
}
33443348
#endif
@@ -4305,7 +4309,7 @@ void sqlite3CreateIndex(
43054309

43064310

43074311
#ifndef SQLITE_OMIT_VECTOR
4308-
if( vectorIndexCreate(pParse, pIndex, pUsing) != SQLITE_OK ) {
4312+
if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) {
43094313
goto exit_create_index;
43104314
}
43114315
idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX)
@@ -4662,6 +4666,7 @@ void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){
46624666
"or PRIMARY KEY constraint cannot be dropped", 0);
46634667
goto exit_drop_index;
46644668
}
4669+
iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
46654670
#ifndef SQLITE_OMIT_VECTOR
46664671
/*
46674672
* There are several places to delete vector index:
@@ -4675,10 +4680,9 @@ void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){
46754680
* and bit dirty but seems to me as pretty safe and easy way to delete index
46764681
*/
46774682
if( IsVectorIndex(pIndex) ){
4678-
vectorIndexDrop(pParse->db, pIndex->zName);
4683+
vectorIndexDrop(pParse->db, pParse->db->aDb[iDb].zDbSName, pIndex->zName);
46794684
}
46804685
#endif
4681-
iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
46824686
#ifndef SQLITE_OMIT_AUTHORIZATION
46834687
{
46844688
int code = SQLITE_DROP_INDEX;
@@ -5620,7 +5624,7 @@ void sqlite3Reindex(Parse *pParse, Token *pName1, Token *pName2){
56205624
** when it has finished using it.
56215625
*/
56225626
KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){
5623-
int i;
5627+
int i, iDb;
56245628
int nCol = pIdx->nColumn;
56255629
int nKey = pIdx->nKeyCol;
56265630
KeyInfo *pKey;
@@ -5631,8 +5635,12 @@ KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){
56315635
pKey = sqlite3KeyInfoAlloc(pParse->db, nCol, 0);
56325636
}
56335637
if( pKey ){
5638+
iDb = sqlite3SchemaToIndex(pParse->db, pIdx->pSchema);
56345639
assert( sqlite3KeyInfoIsWriteable(pKey) );
56355640
pKey->zIndexName = sqlite3DbStrDup(pParse->db, pIdx->zName);
5641+
if( 0 <= iDb && iDb < pParse->db->nDb ){
5642+
pKey->zDbSName = sqlite3DbStrDup(pParse->db, pParse->db->aDb[iDb].zDbSName);
5643+
}
56365644
for(i=0; i<nCol; i++){
56375645
const char *zColl = pIdx->azColl[i];
56385646
pKey->aColl[i] = zColl==sqlite3StrBINARY ? 0 :

libsql-sqlite3/src/select.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,7 @@ KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){
15131513
p->db = db;
15141514
p->nRef = 1;
15151515
p->zIndexName = NULL;
1516+
p->zDbSName = NULL;
15161517
memset(&p[1], 0, nExtra);
15171518
}else{
15181519
return (KeyInfo*)sqlite3OomFault(db);
@@ -1532,6 +1533,9 @@ void sqlite3KeyInfoUnref(KeyInfo *p){
15321533
if( p->zIndexName != NULL ){
15331534
sqlite3DbFree(p->db, p->zIndexName);
15341535
}
1536+
if( p->zDbSName != NULL ){
1537+
sqlite3DbFree(p->db, p->zDbSName);
1538+
}
15351539
sqlite3DbNNFreeNN(p->db, p);
15361540
}
15371541
}

libsql-sqlite3/src/sqliteInt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2641,6 +2641,7 @@ struct KeyInfo {
26412641
* vector indices as they operate with names rather than with page numbers
26422642
*/
26432643
char *zIndexName; /* Name of the index (might be NULL) */
2644+
char *zDbSName; /* Name of the database schema (might be NULL) */
26442645
u32 nRef; /* Number of references to this KeyInfo object */
26452646
u8 enc; /* Text encoding - one of the SQLITE_UTF* values */
26462647
u16 nKeyField; /* Number of key columns in the index */

libsql-sqlite3/src/vdbe.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4229,13 +4229,14 @@ case OP_OpenVectorIdx: {
42294229
}else if( pOp->p4type==P4_INT32 ){
42304230
nField = pOp->p4.i;
42314231
}
4232+
assert( pKeyInfo->zDbSName != NULL );
42324233
if( pOp->p5 == OPFLAG_FORDELETE ){
4233-
rc = vectorIndexClear(db, pKeyInfo->zIndexName);
4234+
rc = vectorIndexClear(db, pKeyInfo->zDbSName, pKeyInfo->zIndexName);
42344235
if( rc ){
42354236
goto abort_due_to_error;
42364237
}
42374238
}
4238-
rc = vectorIndexCursorInit(db, &cursor, pKeyInfo->zIndexName);
4239+
rc = vectorIndexCursorInit(db, pKeyInfo->zDbSName, pKeyInfo->zIndexName, &cursor);
42394240
if( rc ) {
42404241
goto abort_due_to_error;
42414242
}

libsql-sqlite3/src/vectorIndex.c

Lines changed: 80 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ int vectorInRowAlloc(sqlite3 *db, const UnpackedRecord *pRecord, VectorInRow *pV
212212

213213
if( pVectorInRow->nKeys <= 0 ){
214214
rc = SQLITE_ERROR;
215-
goto out;
215+
goto out;
216216
}
217217

218218
if( sqlite3_value_type(pVectorValue)==SQLITE_NULL ){
@@ -233,7 +233,7 @@ int vectorInRowAlloc(sqlite3 *db, const UnpackedRecord *pRecord, VectorInRow *pV
233233

234234
if( sqlite3_value_type(pVectorValue) == SQLITE_BLOB ){
235235
vectorInitFromBlob(pVectorInRow->pVector, sqlite3_value_blob(pVectorValue), sqlite3_value_bytes(pVectorValue));
236-
} else if( sqlite3_value_type(pVectorValue) == SQLITE_TEXT ){
236+
} else if( sqlite3_value_type(pVectorValue) == SQLITE_TEXT ){
237237
// users can put strings (e.g. '[1,2,3]') in the table and we should process them correctly
238238
if( vectorParse(pVectorValue, pVectorInRow->pVector, pzErrMsg) != 0 ){
239239
rc = SQLITE_ERROR;
@@ -321,10 +321,10 @@ void vectorOutRowsGet(sqlite3_context *context, const VectorOutRows *pRows, int
321321

322322
void vectorOutRowsFree(sqlite3 *db, VectorOutRows *pRows) {
323323
int i;
324-
324+
325325
// both aIntValues and ppValues can be null if processing failing in the middle and we didn't created VectorOutRows
326326
assert( pRows->aIntValues == NULL || pRows->ppValues == NULL );
327-
327+
328328
if( pRows->aIntValues != NULL ){
329329
sqlite3DbFree(db, pRows->aIntValues);
330330
}else if( pRows->ppValues != NULL ){
@@ -337,8 +337,8 @@ void vectorOutRowsFree(sqlite3 *db, VectorOutRows *pRows) {
337337
}
338338
}
339339

340-
/*
341-
* Internal type to represent VECTOR_COLUMN_TYPES array
340+
/*
341+
* Internal type to represent VECTOR_COLUMN_TYPES array
342342
* We support both FLOATNN and FNN_BLOB type names for the following reasons:
343343
* 1. FLOATNN is easy to type for humans and generally OK to use for column type names
344344
* 2. FNN_BLOB is aligned with SQLite affinity rules and can be used in cases where compatibility with type affinity rules is important
@@ -349,15 +349,15 @@ struct VectorColumnType {
349349
int nBits;
350350
};
351351

352-
static struct VectorColumnType VECTOR_COLUMN_TYPES[] = {
353-
{ "FLOAT32", 32 },
354-
{ "FLOAT64", 64 },
355-
{ "F32_BLOB", 32 },
356-
{ "F64_BLOB", 64 }
352+
static struct VectorColumnType VECTOR_COLUMN_TYPES[] = {
353+
{ "FLOAT32", 32 },
354+
{ "FLOAT64", 64 },
355+
{ "F32_BLOB", 32 },
356+
{ "F64_BLOB", 64 }
357357
};
358358

359359
/*
360-
* Internal type to represent VECTOR_PARAM_NAMES array with recognized parameters for index creation
360+
* Internal type to represent VECTOR_PARAM_NAMES array with recognized parameters for index creation
361361
* For example, libsql_vector_idx(embedding, 'type=diskann', 'metric=cosine')
362362
*/
363363
struct VectorParamName {
@@ -368,7 +368,7 @@ struct VectorParamName {
368368
u64 value;
369369
};
370370

371-
static struct VectorParamName VECTOR_PARAM_NAMES[] = {
371+
static struct VectorParamName VECTOR_PARAM_NAMES[] = {
372372
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
373373
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
374374
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
@@ -550,15 +550,34 @@ int vectorIdxParseColumnType(const char *zType, int *pType, int *pDims, const ch
550550
return -1;
551551
}
552552

553-
int initVectorIndexMetaTable(sqlite3* db) {
554-
static const char *zSql = "CREATE TABLE IF NOT EXISTS " VECTOR_INDEX_GLOBAL_META_TABLE " ( name TEXT PRIMARY KEY, metadata BLOB ) WITHOUT ROWID;";
555-
return sqlite3_exec(db, zSql, 0, 0, 0);
553+
int initVectorIndexMetaTable(sqlite3* db, const char *zDbSName) {
554+
int rc;
555+
static const char *zSqlTemplate = "CREATE TABLE IF NOT EXISTS \"%w\"." VECTOR_INDEX_GLOBAL_META_TABLE " ( name TEXT PRIMARY KEY, metadata BLOB ) WITHOUT ROWID;";
556+
char* zSql;
557+
558+
assert( zDbSName != NULL );
559+
560+
zSql = sqlite3_mprintf(zSqlTemplate, zDbSName);
561+
if( zSql == NULL ){
562+
return SQLITE_NOMEM_BKPT;
563+
}
564+
rc = sqlite3_exec(db, zSql, 0, 0, 0);
565+
sqlite3_free(zSql);
566+
return rc;
556567
}
557568

558-
int insertIndexParameters(sqlite3* db, const char *zName, const VectorIdxParams *pParameters) {
559-
static const char *zSql = "INSERT INTO " VECTOR_INDEX_GLOBAL_META_TABLE " VALUES (?, ?)";
560-
sqlite3_stmt* pStatement = NULL;
569+
int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, const VectorIdxParams *pParameters) {
561570
int rc = SQLITE_ERROR;
571+
static const char *zSqlTemplate = "INSERT INTO \"%w\"." VECTOR_INDEX_GLOBAL_META_TABLE " VALUES (?, ?)";
572+
sqlite3_stmt* pStatement = NULL;
573+
char *zSql;
574+
575+
assert( zDbSName != NULL );
576+
577+
zSql = sqlite3_mprintf(zSqlTemplate, zDbSName);
578+
if( zSql == NULL ){
579+
return SQLITE_NOMEM_BKPT;
580+
}
562581

563582
rc = sqlite3_prepare_v2(db, zSql, -1, &pStatement, 0);
564583
if( rc != SQLITE_OK ){
@@ -579,6 +598,9 @@ int insertIndexParameters(sqlite3* db, const char *zName, const VectorIdxParams
579598
rc = SQLITE_OK;
580599
}
581600
clear_and_exit:
601+
if( zSql != NULL ){
602+
sqlite3_free(zSql);
603+
}
582604
if( pStatement != NULL ){
583605
sqlite3_finalize(pStatement);
584606
}
@@ -672,24 +694,31 @@ int vectorIndexGetParameters(
672694
}
673695

674696

675-
int vectorIndexDrop(sqlite3 *db, const char *zIdxName) {
697+
int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) {
676698
// we want to try delete all traces of index on every attempt
677699
// this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step
678-
int rcIdx = diskAnnDropIndex(db, zIdxName);
679-
int rcParams = removeIndexParameters(db, zIdxName);
700+
int rcIdx, rcParams;
701+
702+
assert( zDbSName != NULL );
703+
704+
rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName);
705+
rcParams = removeIndexParameters(db, zIdxName);
680706
return rcIdx != SQLITE_OK ? rcIdx : rcParams;
681707
}
682708

683-
int vectorIndexClear(sqlite3 *db, const char *zIdxName) {
684-
return diskAnnClearIndex(db, zIdxName);
709+
int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) {
710+
assert( zDbSName != NULL );
711+
return diskAnnClearIndex(db, zDbSName, zIdxName);
685712
}
686713

687-
int vectorIndexCreate(Parse *pParse, Index *pIdx, const IdList *pUsing) {
714+
int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) {
688715
int i, rc = SQLITE_OK;
689716
int dims, type;
690717
int hasLibsqlVectorIdxFn = 0, hasCollation = 0;
691718
const char *pzErrMsg;
692719

720+
assert( zDbSName != NULL );
721+
693722
sqlite3 *db = pParse->db;
694723
Table *pTable = pIdx->pTable;
695724
struct ExprList_item *pListItem;
@@ -776,34 +805,33 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const IdList *pUsing) {
776805
return SQLITE_ERROR;
777806
}
778807

779-
if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){
780-
sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg);
781-
return SQLITE_ERROR;
782-
}
783-
if( idxKey.nKeyColumns != 1 ){
784-
sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported");
785-
return SQLITE_ERROR;
786-
}
787-
788808
// schema is locked while db is initializing and we need to just proceed here
789809
if( db->init.busy == 1 ){
790810
goto succeed;
791811
}
792812

793-
rc = initVectorIndexMetaTable(db);
813+
rc = initVectorIndexMetaTable(db, zDbSName);
794814
if( rc != SQLITE_OK ){
795815
return rc;
796816
}
797817
rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1);
798818
if( rc != SQLITE_OK ){
799819
return rc;
800820
}
801-
rc = diskAnnCreateIndex(db, pIdx->zName, &idxKey, &idxParams);
821+
if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){
822+
sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg);
823+
return SQLITE_ERROR;
824+
}
825+
if( idxKey.nKeyColumns != 1 ){
826+
sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported");
827+
return SQLITE_ERROR;
828+
}
829+
rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams);
802830
if( rc != SQLITE_OK ){
803831
sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index");
804832
return rc;
805833
}
806-
rc = insertIndexParameters(db, pIdx->zName, &idxParams);
834+
rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams);
807835
if( rc != SQLITE_OK ){
808836
sqlite3ErrorMsg(pParse, "unable to update global metadata table");
809837
return rc;
@@ -815,7 +843,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const IdList *pUsing) {
815843
return SQLITE_OK;
816844
}
817845

818-
int vectorIndexSearch(sqlite3 *db, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) {
846+
int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) {
819847
int type, dims, k, rc;
820848
const char *zIdxName;
821849
const char *zErrMsg;
@@ -826,6 +854,8 @@ int vectorIndexSearch(sqlite3 *db, int argc, sqlite3_value **argv, VectorOutRows
826854
VectorIdxParams idxParams;
827855
vectorIdxParamsInit(&idxParams, NULL, 0);
828856

857+
assert( zDbSName != NULL );
858+
829859
if( argc != 3 ){
830860
*pzErrMsg = sqlite3_mprintf("vector search must have exactly 3 parameters");
831861
rc = SQLITE_ERROR;
@@ -871,22 +901,22 @@ int vectorIndexSearch(sqlite3 *db, int argc, sqlite3_value **argv, VectorOutRows
871901
rc = SQLITE_ERROR;
872902
goto out;
873903
}
874-
pIndex = sqlite3FindIndex(db, zIdxName, db->aDb[0].zDbSName);
904+
pIndex = sqlite3FindIndex(db, zIdxName, zDbSName);
875905
if( pIndex == NULL ){
876906
*pzErrMsg = sqlite3_mprintf("vector index not found");
877907
rc = SQLITE_ERROR;
878908
goto out;
879909
}
910+
rc = diskAnnOpenIndex(db, zDbSName, zIdxName, &idxParams, &pDiskAnn);
911+
if( rc != SQLITE_OK ){
912+
*pzErrMsg = sqlite3_mprintf("failed to open diskann index");
913+
goto out;
914+
}
880915
if( vectorIdxKeyGet(pIndex->pTable, &pKey, &zErrMsg) != 0 ){
881916
*pzErrMsg = sqlite3_mprintf("failed to extract table key: %s", zErrMsg);
882917
rc = SQLITE_ERROR;
883918
goto out;
884919
}
885-
rc = diskAnnOpenIndex(db, zIdxName, &idxParams, &pDiskAnn);
886-
if( rc != SQLITE_OK ){
887-
*pzErrMsg = sqlite3_mprintf("failed to open diskann index");
888-
goto out;
889-
}
890920
rc = diskAnnSearch(pDiskAnn, pVector, k, &pKey, pRows, pzErrMsg);
891921
out:
892922
if( pDiskAnn != NULL ){
@@ -932,22 +962,25 @@ int vectorIndexDelete(
932962

933963
int vectorIndexCursorInit(
934964
sqlite3 *db,
935-
VectorIdxCursor **ppCursor,
936-
const char *zIndexName
965+
const char *zDbSName,
966+
const char *zIndexName,
967+
VectorIdxCursor **ppCursor
937968
){
938969
int rc;
939970
VectorIdxCursor* pCursor;
940971
VectorIdxParams params;
941972
vectorIdxParamsInit(&params, NULL, 0);
942973

974+
assert( zDbSName != NULL );
975+
943976
if( vectorIndexGetParameters(db, zIndexName, &params) != 0 ){
944977
return SQLITE_ERROR;
945978
}
946979
pCursor = sqlite3DbMallocZero(db, sizeof(VectorIdxCursor));
947980
if( pCursor == 0 ){
948981
return SQLITE_NOMEM_BKPT;
949982
}
950-
rc = diskAnnOpenIndex(db, zIndexName, &params, &pCursor->pIndex);
983+
rc = diskAnnOpenIndex(db, zDbSName, zIndexName, &params, &pCursor->pIndex);
951984
if( rc != SQLITE_OK ){
952985
sqlite3DbFree(db, pCursor);
953986
return rc;

0 commit comments

Comments
 (0)