From 1037d7e4c05948b6b5bfc6f8d43e0e7730224f63 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 28 Oct 2024 19:37:18 -0700
Subject: [PATCH 001/123] Added test for half-precision indexing with
 SQLAlchemy - #98

---
 tests/test_sqlalchemy.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 1ca0ea3..8a032ef 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -46,6 +46,15 @@ class Item(Base):
 )
 index.create(engine)
 
+half_precision_index = Index(
+    'sqlalchemy_orm_half_precision_index',
+    func.cast(Item.embedding, HALFVEC(3)).label('embedding'),
+    postgresql_using='hnsw',
+    postgresql_with={'m': 16, 'ef_construction': 64},
+    postgresql_ops={'embedding': 'halfvec_l2_ops'}
+)
+half_precision_index.create(engine)
+
 
 def create_items():
     session = Session(engine)
@@ -438,6 +447,12 @@ def test_vector_array(self):
             assert item.embeddings[0].tolist() == [1, 2, 3]
             assert item.embeddings[1].tolist() == [4, 5, 6]
 
+    def test_half_precision(self):
+        create_items()
+        with Session(engine) as session:
+            items = session.query(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([1, 1, 1])).all()
+            assert [v.id for v in items] == [1, 3, 2]
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_async(self):

From 32a8d04b06b0f3e77d639e9a9ed275a67fa1e36f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 28 Oct 2024 19:52:49 -0700
Subject: [PATCH 002/123] Added docs for half-precision indexing with
 SQLAlchemy - #98 [skip ci]

---
 README.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/README.md b/README.md
index acd625d..bbf5973 100644
--- a/README.md
+++ b/README.md
@@ -214,6 +214,29 @@ index.create(engine)
 
 Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
 
+#### Half-Precision Indexing
+
+Index vectors at half-precision
+
+```python
+from pgvector.sqlalchemy import HALFVEC
+from sqlalchemy.sql import func
+
+index = Index(
+    'my_index',
+    func.cast(Item.embedding, HALFVEC(3)).label('embedding'),
+    postgresql_using='hnsw',
+    postgresql_with={'m': 16, 'ef_construction': 64},
+    postgresql_ops={'embedding': 'vector_l2_ops'}
+)
+```
+
+Get the nearest neighbors
+
+```python
+session.scalars(select(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([3, 1, 2])).limit(5))
+```
+
 ## SQLModel
 
 Enable the extension

From 2c8fe09f824bc509ae692d2932fe1a0bc15b6923 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 28 Oct 2024 20:19:06 -0700
Subject: [PATCH 003/123] Fixed example [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bbf5973..10cca79 100644
--- a/README.md
+++ b/README.md
@@ -227,7 +227,7 @@ index = Index(
     func.cast(Item.embedding, HALFVEC(3)).label('embedding'),
     postgresql_using='hnsw',
     postgresql_with={'m': 16, 'ef_construction': 64},
-    postgresql_ops={'embedding': 'vector_l2_ops'}
+    postgresql_ops={'embedding': 'halfvec_l2_ops'}
 )
 ```
 

From 3d9ff72a270bbfedc2d579db7f2d03b4048dfbc0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 28 Oct 2024 20:21:06 -0700
Subject: [PATCH 004/123] Improved example [skip ci]

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 10cca79..917d1a7 100644
--- a/README.md
+++ b/README.md
@@ -234,7 +234,8 @@ index = Index(
 Get the nearest neighbors
 
 ```python
-session.scalars(select(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([3, 1, 2])).limit(5))
+order = func.cast(Item.embedding, HALFVEC(3)).l2_distance([3, 1, 2])
+session.scalars(select(Item).order_by(order).limit(5))
 ```
 
 ## SQLModel

From 25a30264599c9646b743efdfe1d28b99d6208f90 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 3 Nov 2024 19:30:28 -0800
Subject: [PATCH 005/123] Updated test [skip ci]

---
 tests/test_django.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_django.py b/tests/test_django.py
index 5ab5f81..92f3733 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -65,7 +65,7 @@ class Meta:
                 name='hnsw_idx',
                 fields=['embedding'],
                 m=16,
-                ef_construction=100,
+                ef_construction=64,
                 opclasses=['vector_l2_ops']
             )
         ]

From 06a48c4699486b3dc2ab843104594d0fc4539038 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 09:12:53 -0800
Subject: [PATCH 006/123] Added pool example and tests for Psycopg 3 - closes
 #100

---
 README.md             |  9 +++++++++
 requirements.txt      |  2 +-
 tests/test_psycopg.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 917d1a7..0989ba7 100644
--- a/README.md
+++ b/README.md
@@ -338,6 +338,15 @@ from pgvector.psycopg import register_vector
 register_vector(conn)
 ```
 
+For [connection pools](https://www.psycopg.org/psycopg3/docs/advanced/pool.html), use
+
+```python
+def configure(conn):
+    register_vector(conn)
+
+pool = ConnectionPool(configure=configure)
+```
+
 For [async connections](https://www.psycopg.org/psycopg3/docs/advanced/async.html), use
 
 ```python
diff --git a/requirements.txt b/requirements.txt
index c1e11f3..0e30959 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ asyncpg
 Django
 numpy
 peewee
-psycopg[binary]
+psycopg[binary,pool]
 psycopg2-binary
 pytest
 pytest-asyncio
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index c4e1c22..5802b2b 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -1,6 +1,7 @@
 import numpy as np
 from pgvector.psycopg import register_vector, register_vector_async, Bit, HalfVector, SparseVector, Vector
 import psycopg
+from psycopg_pool import ConnectionPool, AsyncConnectionPool
 import pytest
 
 conn = psycopg.connect(dbname='pgvector_python_test', autocommit=True)
@@ -176,6 +177,18 @@ def test_vector_array(self):
         assert np.array_equal(res[0][0], embeddings[0])
         assert np.array_equal(res[0][1], embeddings[1])
 
+    def test_pool(self):
+        def configure(conn):
+            register_vector(conn)
+
+        pool = ConnectionPool(conninfo='postgres://localhost/pgvector_python_test', open=True, configure=configure)
+
+        with pool.connection() as conn:
+            res = conn.execute("SELECT '[1,2,3]'::vector").fetchone()
+            assert np.array_equal(res[0], np.array([1, 2, 3]))
+
+        pool.close()
+
     @pytest.mark.asyncio
     async def test_async(self):
         conn = await psycopg.AsyncConnection.connect(dbname='pgvector_python_test', autocommit=True)
@@ -195,3 +208,19 @@ async def test_async(self):
             assert np.array_equal(res[0][1], embedding)
             assert res[0][1].dtype == np.float32
             assert res[1][1] is None
+
+    @pytest.mark.asyncio
+    async def test_async_pool(self):
+        async def configure(conn):
+            await register_vector_async(conn)
+
+        pool = AsyncConnectionPool(conninfo='postgres://localhost/pgvector_python_test', open=False, configure=configure)
+        await pool.open()
+
+        async with pool.connection() as conn:
+            async with conn.cursor() as cur:
+                await cur.execute("SELECT '[1,2,3]'::vector")
+                res = await cur.fetchone()
+                assert np.array_equal(res[0], np.array([1, 2, 3]))
+
+        await pool.close()

From 49072f2e37ff97b07b422aa0a41c4d3bd312879f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 09:33:14 -0800
Subject: [PATCH 007/123] Updated readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0989ba7..37a4737 100644
--- a/README.md
+++ b/README.md
@@ -344,7 +344,7 @@ For [connection pools](https://www.psycopg.org/psycopg3/docs/advanced/pool.html)
 def configure(conn):
     register_vector(conn)
 
-pool = ConnectionPool(configure=configure)
+pool = ConnectionPool(..., configure=configure)
 ```
 
 For [async connections](https://www.psycopg.org/psycopg3/docs/advanced/async.html), use

From 9e1c421c62a84f650cf8be73e0768bfab6715e7b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 11:18:41 -0800
Subject: [PATCH 008/123] Added docs and test for half-precision indexing with
 Django

---
 README.md            | 25 +++++++++++++++++++++++++
 tests/test_django.py | 25 ++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 37a4737..938207a 100644
--- a/README.md
+++ b/README.md
@@ -133,6 +133,31 @@ class Item(models.Model):
 
 Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
 
+#### Half-Precision Indexing
+
+Index vectors at half-precision
+
+```python
+from django.contrib.postgres.indexes import OpClass
+from django.db.models.functions import Cast
+from pgvector.django import HalfVectorField
+
+index = HnswIndex(
+    OpClass(Cast('embedding', HalfVectorField(dimensions=3)), name='halfvec_l2_ops'),
+    name='my_index',
+    m=16,
+    ef_construction=64
+)
+```
+
+Note: Add `'django.contrib.postgres'` to `INSTALLED_APPS` to use `OpClass`
+
+Get the nearest neighbors
+
+```python
+Item.objects.order_by(L2Distance(Cast('embedding', HalfVectorField(dimensions=3)), [3, 1, 2]))[:5]
+```
+
 ## SQLAlchemy
 
 Enable the extension
diff --git a/tests/test_django.py b/tests/test_django.py
index 92f3733..353087e 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -1,6 +1,7 @@
 import django
 from django.conf import settings
 from django.contrib.postgres.fields import ArrayField
+from django.contrib.postgres.indexes import OpClass
 from django.core import serializers
 from django.db import connection, migrations, models
 from django.db.models import Avg, Sum, FloatField, DecimalField
@@ -38,7 +39,12 @@
                 'level': 'WARNING'
             }
         }
-    }
+    },
+    # needed for OpClass
+    # https://docs.djangoproject.com/en/5.1/ref/contrib/postgres/indexes/#opclass-expressions
+    INSTALLED_APPS=[
+        'django.contrib.postgres'
+    ]
 )
 django.setup()
 
@@ -67,6 +73,12 @@ class Meta:
                 m=16,
                 ef_construction=64,
                 opclasses=['vector_l2_ops']
+            ),
+            HnswIndex(
+                OpClass(Cast('embedding', HalfVectorField(dimensions=3)), name='halfvec_l2_ops'),
+                name='hnsw_half_precision_idx',
+                m=16,
+                ef_construction=64
             )
         ]
 
@@ -99,6 +111,10 @@ class Migration(migrations.Migration):
         migrations.AddIndex(
             model_name='item',
             index=pgvector.django.HnswIndex(fields=['embedding'], m=16, ef_construction=64, name='hnsw_idx', opclasses=['vector_l2_ops']),
+        ),
+        migrations.AddIndex(
+            model_name='item',
+            index=pgvector.django.HnswIndex(OpClass(Cast('embedding', HalfVectorField(dimensions=3)), name='halfvec_l2_ops'), m=16, ef_construction=64, name='hnsw_half_precision_idx'),
         )
     ]
 
@@ -473,3 +489,10 @@ def test_numeric_array(self):
         assert [v.id for v in items] == [1, 3, 2]
         assert [v.distance for v in items] == [0, 1, sqrt(3)]
         assert items[1].numeric_embedding == [1, 1, 2]
+
+    def test_half_precision(self):
+        create_items()
+        distance = L2Distance(Cast('embedding', HalfVectorField(dimensions=3)), [1, 1, 1])
+        items = Item.objects.annotate(distance=distance).order_by(distance)
+        assert [v.id for v in items] == [1, 3, 2]
+        assert [v.distance for v in items] == [0, 1, sqrt(3)]

From afcd67a4422dd390e07662bfe81a8fbfab571301 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 11:23:17 -0800
Subject: [PATCH 009/123] Updated readme [skip ci]

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 938207a..1000900 100644
--- a/README.md
+++ b/README.md
@@ -155,7 +155,8 @@ Note: Add `'django.contrib.postgres'` to `INSTALLED_APPS` to use `OpClass`
 Get the nearest neighbors
 
 ```python
-Item.objects.order_by(L2Distance(Cast('embedding', HalfVectorField(dimensions=3)), [3, 1, 2]))[:5]
+distance = L2Distance(Cast('embedding', HalfVectorField(dimensions=3)), [3, 1, 2])
+Item.objects.order_by(distance)[:5]
 ```
 
 ## SQLAlchemy

From 441b26ec3dfbdfb6013ffdf18df083614d9fc5ff Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 11:30:19 -0800
Subject: [PATCH 010/123] Updated example [skip ci]

---
 README.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 1000900..b61059e 100644
--- a/README.md
+++ b/README.md
@@ -142,12 +142,16 @@ from django.contrib.postgres.indexes import OpClass
 from django.db.models.functions import Cast
 from pgvector.django import HalfVectorField
 
-index = HnswIndex(
-    OpClass(Cast('embedding', HalfVectorField(dimensions=3)), name='halfvec_l2_ops'),
-    name='my_index',
-    m=16,
-    ef_construction=64
-)
+class Item(models.Model):
+    class Meta:
+        indexes = [
+            HnswIndex(
+                OpClass(Cast('embedding', HalfVectorField(dimensions=3)), name='halfvec_l2_ops'),
+                name='my_index',
+                m=16,
+                ef_construction=64
+            )
+        ]
 ```
 
 Note: Add `'django.contrib.postgres'` to `INSTALLED_APPS` to use `OpClass`

From 78e64594843f2eb833cca77d8f43c33971806963 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 11:31:05 -0800
Subject: [PATCH 011/123] Updated example [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b61059e..94fed55 100644
--- a/README.md
+++ b/README.md
@@ -140,7 +140,7 @@ Index vectors at half-precision
 ```python
 from django.contrib.postgres.indexes import OpClass
 from django.db.models.functions import Cast
-from pgvector.django import HalfVectorField
+from pgvector.django import HnswIndex, HalfVectorField
 
 class Item(models.Model):
     class Meta:

From 75e14d80c80975938c0f1c64f59901686f1cd24b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 12:02:54 -0800
Subject: [PATCH 012/123] Added pool test for Psycopg 2

---
 tests/test_psycopg2.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index c93fce4..85f08aa 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -2,6 +2,7 @@
 from pgvector.psycopg2 import register_vector, HalfVector, SparseVector
 import psycopg2
 from psycopg2.extras import DictCursor, RealDictCursor, NamedTupleCursor
+from psycopg2.pool import ThreadedConnectionPool
 
 conn = psycopg2.connect(dbname='pgvector_python_test')
 conn.autocommit = True
@@ -94,3 +95,21 @@ def test_cursor_factory_connection(self):
             conn = psycopg2.connect(dbname='pgvector_python_test', cursor_factory=cursor_factory)
             register_vector(conn, globally=False)
             conn.close()
+
+    def test_pool(self):
+        pool = ThreadedConnectionPool(1, 3, dbname='pgvector_python_test')
+
+        conn = pool.getconn()
+        try:
+            cur = conn.cursor()
+
+            # use globally=True for apps
+            register_vector(cur, globally=False)
+
+            cur.execute("SELECT '[1,2,3]'::vector")
+            res = cur.fetchone()
+            assert np.array_equal(res[0], np.array([1, 2, 3]))
+        finally:
+            pool.putconn(conn)
+
+        pool.closeall()

From 706cebcb4c10f5fc6288757744fcfe94cb461a0b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 12:04:39 -0800
Subject: [PATCH 013/123] Improved test

---
 tests/test_psycopg2.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 85f08aa..3730eb8 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -101,11 +101,14 @@ def test_pool(self):
 
         conn = pool.getconn()
         try:
-            cur = conn.cursor()
-
             # use globally=True for apps
-            register_vector(cur, globally=False)
+            register_vector(conn, globally=False)
+        finally:
+            pool.putconn(conn)
 
+        conn = pool.getconn()
+        try:
+            cur = conn.cursor()
             cur.execute("SELECT '[1,2,3]'::vector")
             res = cur.fetchone()
             assert np.array_equal(res[0], np.array([1, 2, 3]))

From 812a85e7ce40d42382d84244b25e2f44eddf2e94 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 12:04:59 -0800
Subject: [PATCH 014/123] Improved test [skip ci]

---
 tests/test_psycopg2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 3730eb8..3f52385 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -97,7 +97,7 @@ def test_cursor_factory_connection(self):
             conn.close()
 
     def test_pool(self):
-        pool = ThreadedConnectionPool(1, 3, dbname='pgvector_python_test')
+        pool = ThreadedConnectionPool(1, 1, dbname='pgvector_python_test')
 
         conn = pool.getconn()
         try:

From 07a3b2b6eec65d332041dcec136ac9c75291bc2b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 5 Nov 2024 12:09:23 -0800
Subject: [PATCH 015/123] Updated comment [skip ci]

---
 tests/test_psycopg2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 3f52385..c3cd3cd 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -101,7 +101,7 @@ def test_pool(self):
 
         conn = pool.getconn()
         try:
-            # use globally=True for apps
+            # use globally=True for apps to ensure registered with all connections
             register_vector(conn, globally=False)
         finally:
             pool.putconn(conn)

From ea32504ef8538c781fab1f579fcaec7b417b7163 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 11 Nov 2024 20:56:23 -0800
Subject: [PATCH 016/123] Updated pgvector on CI

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f8bcaa3..04f1c21 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,7 +19,7 @@ jobs:
           dev-files: true
       - run: |
           cd /tmp
-          git clone --branch v0.7.0 https://github.com/pgvector/pgvector.git
+          git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git
           cd pgvector
           make
           sudo make install

From 664b8ee8692a42236ff9b236ec2da635342b96c7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 18:28:01 -0800
Subject: [PATCH 017/123] Added test for halfvec arrays with SQLAlchemy - #101

---
 tests/test_sqlalchemy.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 8a032ef..c9aa900 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -32,6 +32,7 @@ class Item(Base):
     binary_embedding = mapped_column(BIT(3))
     sparse_embedding = mapped_column(SPARSEVEC(3))
     embeddings = mapped_column(ARRAY(VECTOR(3)))
+    half_embeddings = mapped_column(ARRAY(HALFVEC(3)))
 
 
 Base.metadata.drop_all(engine)
@@ -447,6 +448,20 @@ def test_vector_array(self):
             assert item.embeddings[0].tolist() == [1, 2, 3]
             assert item.embeddings[1].tolist() == [4, 5, 6]
 
+    def test_halfvec_array(self):
+        session = Session(engine)
+        session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+        session.commit()
+
+        with engine.connect() as connection:
+            from pgvector.psycopg2 import register_vector
+            register_vector(connection.connection.dbapi_connection, globally=False, arrays=True)
+
+            # this fails if the driver does not cast arrays
+            item = Session(bind=connection).get(Item, 1)
+            assert item.half_embeddings[0].to_list() == [1, 2, 3]
+            assert item.half_embeddings[1].to_list() == [4, 5, 6]
+
     def test_half_precision(self):
         create_items()
         with Session(engine) as session:

From 1c7e6a5fb3ea31512dacf71eaf4165eae9fa60e8 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 19:04:21 -0800
Subject: [PATCH 018/123] Added docs for arrays with SQLAlchemy [skip ci]

---
 README.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/README.md b/README.md
index 94fed55..44f1d93 100644
--- a/README.md
+++ b/README.md
@@ -268,6 +268,27 @@ order = func.cast(Item.embedding, HALFVEC(3)).l2_distance([3, 1, 2])
 session.scalars(select(Item).order_by(order).limit(5))
 ```
 
+#### Arrays
+
+Add an array column
+
+```python
+from pgvector.sqlalchemy import Vector
+from sqlalchemy import ARRAY
+
+class Item(Base):
+    embeddings = mapped_column(ARRAY(Vector(3)))
+```
+
+And register the types with the underlying driver
+
+```python
+from pgvector.psycopg2 import register_vector
+
+with engine.connect() as connection:
+    register_vector(connection.connection.dbapi_connection, globally=True, arrays=True)
+```
+
 ## SQLModel
 
 Enable the extension

From 0a760663b1acd993c7caf364c8c087c50306a01f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 20:42:27 -0800
Subject: [PATCH 019/123] Use connection from session in example and tests

---
 README.md                | 2 +-
 tests/test_sqlalchemy.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 44f1d93..757ade9 100644
--- a/README.md
+++ b/README.md
@@ -285,7 +285,7 @@ And register the types with the underlying driver
 ```python
 from pgvector.psycopg2 import register_vector
 
-with engine.connect() as connection:
+with session.connection() as connection:
     register_vector(connection.connection.dbapi_connection, globally=True, arrays=True)
 ```
 
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index c9aa900..57cc12b 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -439,12 +439,12 @@ def test_vector_array(self):
         session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
         session.commit()
 
-        with engine.connect() as connection:
+        with session.connection() as connection:
             from pgvector.psycopg2 import register_vector
             register_vector(connection.connection.dbapi_connection, globally=False, arrays=True)
 
             # this fails if the driver does not cast arrays
-            item = Session(bind=connection).get(Item, 1)
+            item = session.get(Item, 1)
             assert item.embeddings[0].tolist() == [1, 2, 3]
             assert item.embeddings[1].tolist() == [4, 5, 6]
 
@@ -453,12 +453,12 @@ def test_halfvec_array(self):
         session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
         session.commit()
 
-        with engine.connect() as connection:
+        with session.connection() as connection:
             from pgvector.psycopg2 import register_vector
             register_vector(connection.connection.dbapi_connection, globally=False, arrays=True)
 
             # this fails if the driver does not cast arrays
-            item = Session(bind=connection).get(Item, 1)
+            item = session.get(Item, 1)
             assert item.half_embeddings[0].to_list() == [1, 2, 3]
             assert item.half_embeddings[1].to_list() == [4, 5, 6]
 

From 030def94b19329fa29c71f5273183f82c0550fd3 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 20:55:15 -0800
Subject: [PATCH 020/123] Improved example and tests for arrays with SQLAlchemy
 - #101 [skip ci]

---
 README.md                |  6 ++++--
 tests/test_sqlalchemy.py | 39 ++++++++++++++++++++-------------------
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 757ade9..991c51f 100644
--- a/README.md
+++ b/README.md
@@ -284,9 +284,11 @@ And register the types with the underlying driver
 
 ```python
 from pgvector.psycopg2 import register_vector
+from sqlalchemy import engine
 
-with session.connection() as connection:
-    register_vector(connection.connection.dbapi_connection, globally=True, arrays=True)
+@event.listens_for(engine, "connect")
+def connect(dbapi_connection, connection_record):
+    register_vector(dbapi_connection, arrays=True)
 ```
 
 ## SQLModel
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 57cc12b..f8e4bb1 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -1,7 +1,7 @@
 import numpy as np
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, SparseVector, avg, sum
 import pytest
-from sqlalchemy import create_engine, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY
+from sqlalchemy import create_engine, event, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY
 from sqlalchemy.exc import StatementError
 from sqlalchemy.ext.automap import automap_base
 from sqlalchemy.orm import declarative_base, Session
@@ -20,6 +20,15 @@
     session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
     session.commit()
 
+array_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
+
+
+@event.listens_for(array_engine, "connect")
+def connect(dbapi_connection, connection_record):
+    from pgvector.psycopg2 import register_vector
+    register_vector(dbapi_connection, globally=False, arrays=True)
+
+
 Base = declarative_base()
 
 
@@ -435,32 +444,24 @@ def test_automap(self):
         assert item.embedding.tolist() == [1, 2, 3]
 
     def test_vector_array(self):
-        session = Session(engine)
+        session = Session(array_engine)
         session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
         session.commit()
 
-        with session.connection() as connection:
-            from pgvector.psycopg2 import register_vector
-            register_vector(connection.connection.dbapi_connection, globally=False, arrays=True)
-
-            # this fails if the driver does not cast arrays
-            item = session.get(Item, 1)
-            assert item.embeddings[0].tolist() == [1, 2, 3]
-            assert item.embeddings[1].tolist() == [4, 5, 6]
+        # this fails if the driver does not cast arrays
+        item = session.get(Item, 1)
+        assert item.embeddings[0].tolist() == [1, 2, 3]
+        assert item.embeddings[1].tolist() == [4, 5, 6]
 
     def test_halfvec_array(self):
-        session = Session(engine)
+        session = Session(array_engine)
         session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
         session.commit()
 
-        with session.connection() as connection:
-            from pgvector.psycopg2 import register_vector
-            register_vector(connection.connection.dbapi_connection, globally=False, arrays=True)
-
-            # this fails if the driver does not cast arrays
-            item = session.get(Item, 1)
-            assert item.half_embeddings[0].to_list() == [1, 2, 3]
-            assert item.half_embeddings[1].to_list() == [4, 5, 6]
+        # this fails if the driver does not cast arrays
+        item = session.get(Item, 1)
+        assert item.half_embeddings[0].to_list() == [1, 2, 3]
+        assert item.half_embeddings[1].to_list() == [4, 5, 6]
 
     def test_half_precision(self):
         create_items()

From d23844ef10dcd4297a9e2f3671ed8e851e0a2db1 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 21:00:22 -0800
Subject: [PATCH 021/123] Fixed example [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 991c51f..516f3aa 100644
--- a/README.md
+++ b/README.md
@@ -284,7 +284,7 @@ And register the types with the underlying driver
 
 ```python
 from pgvector.psycopg2 import register_vector
-from sqlalchemy import engine
+from sqlalchemy import event
 
 @event.listens_for(engine, "connect")
 def connect(dbapi_connection, connection_record):

From 04aa5bca2ee60c73de91507e5eb7472a6cf6d7a6 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 22:39:46 -0800
Subject: [PATCH 022/123] Added test for arrays with SQLAlchemy async - #101

---
 tests/test_sqlalchemy.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index f8e4bb1..77c03fc 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -483,3 +483,25 @@ async def test_async(self):
                 assert avg.first() == '[2.5,3.5,4.5]'
 
         await engine.dispose()
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+    async def test_async_vector_array(self):
+        engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
+
+        @event.listens_for(engine.sync_engine, "connect")
+        def connect(dbapi_connection, connection_record):
+            from pgvector.psycopg import register_vector_async
+            dbapi_connection.run_async(register_vector_async)
+
+        async with async_session() as session:
+            async with session.begin():
+                session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+
+                # this fails if the driver does not cast arrays
+                item = await session.get(Item, 1)
+                assert item.embeddings[0].tolist() == [1, 2, 3]
+                assert item.embeddings[1].tolist() == [4, 5, 6]
+
+        await engine.dispose()

From dbc44f4533e9edaa376dc4d4a18fea235c5e2187 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 12 Nov 2024 22:48:46 -0800
Subject: [PATCH 023/123] Added more examples for arrays with SQLAlchemy [skip
 ci]

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index 516f3aa..0dedce9 100644
--- a/README.md
+++ b/README.md
@@ -282,6 +282,30 @@ class Item(Base):
 
 And register the types with the underlying driver
 
+For Psycopg 3, use
+
+```python
+from pgvector.psycopg import register_vector
+from sqlalchemy import event
+
+@event.listens_for(engine, "connect")
+def connect(dbapi_connection, connection_record):
+    register_vector(dbapi_connection)
+```
+
+For [async connections](https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html) with Psycopg 3, use
+
+```python
+from pgvector.psycopg import register_vector_async
+from sqlalchemy import event
+
+@event.listens_for(engine.sync_engine, "connect")
+def connect(dbapi_connection, connection_record):
+    dbapi_connection.run_async(register_vector_async)
+```
+
+For Psycopg 2, use
+
 ```python
 from pgvector.psycopg2 import register_vector
 from sqlalchemy import event

From 368b363bbf9a48fe42bc114991c4e97ee140cdeb Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 15:09:06 -0800
Subject: [PATCH 024/123] Added ColBERT example for binary embeddings [skip ci]

---
 examples/colbert/exact_binary.py | 53 ++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 examples/colbert/exact_binary.py

diff --git a/examples/colbert/exact_binary.py b/examples/colbert/exact_binary.py
new file mode 100644
index 0000000..8d398e2
--- /dev/null
+++ b/examples/colbert/exact_binary.py
@@ -0,0 +1,53 @@
+from colbert.infra import ColBERTConfig
+from colbert.modeling.checkpoint import Checkpoint
+from pgvector.psycopg import register_vector, Bit
+import psycopg
+
+conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
+
+conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
+register_vector(conn)
+
+conn.execute('DROP TABLE IF EXISTS documents')
+conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embeddings bit(128)[])')
+conn.execute("""
+CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double precision AS $$
+    WITH queries AS (
+        SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query) AS query)
+    ),
+    documents AS (
+        SELECT unnest(document) AS document
+    ),
+    similarities AS (
+        SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents
+    ),
+    max_similarities AS (
+        SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
+    )
+    SELECT SUM(max_similarity) FROM max_similarities
+$$ LANGUAGE SQL
+""")
+
+
+def binary_quantize(embeddings):
+    return [Bit(e.numpy()) for e in (embeddings > 0)]
+
+
+config = ColBERTConfig(doc_maxlen=220, query_maxlen=32)
+checkpoint = Checkpoint('colbert-ir/colbertv2.0', colbert_config=config, verbose=0)
+
+input = [
+    'The dog is barking',
+    'The cat is purring',
+    'The bear is growling'
+]
+doc_embeddings = checkpoint.docFromText(input, keep_dims=False)
+for content, embeddings in zip(input, doc_embeddings):
+    embeddings = binary_quantize(embeddings)
+    conn.execute('INSERT INTO documents (content, embeddings) VALUES (%s, %s)', (content, embeddings))
+
+query = 'puppy'
+query_embeddings = binary_quantize(checkpoint.queryFromText([query])[0])
+result = conn.execute('SELECT content, max_sim(embeddings, %s) AS max_sim FROM documents ORDER BY max_sim DESC LIMIT 5', (query_embeddings,)).fetchall()
+for row in result:
+    print(row)

From 267d7960156b6866c300229a10b79b89d670ea39 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 15:41:25 -0800
Subject: [PATCH 025/123] Added ColPali / ColQwen2 example [skip ci]

---
 README.md                         |  1 +
 examples/colpali/exact.py         | 52 +++++++++++++++++++++++++++++++
 examples/colpali/requirements.txt |  4 +++
 3 files changed, 57 insertions(+)
 create mode 100644 examples/colpali/exact.py
 create mode 100644 examples/colpali/requirements.txt

diff --git a/README.md b/README.md
index 0dedce9..224fe57 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,7 @@ Or check out some examples:
 - [Hybrid search](https://github.com/pgvector/pgvector-python/blob/master/examples/hybrid_search/cross_encoder.py) with SentenceTransformers (cross-encoder)
 - [Sparse search](https://github.com/pgvector/pgvector-python/blob/master/examples/sparse_search/example.py) with Transformers
 - [Late interaction search](https://github.com/pgvector/pgvector-python/blob/master/examples/colbert/exact.py) with ColBERT
+- [Document retrieval](https://github.com/pgvector/pgvector-python/blob/master/examples/colpali/exact.py) with ColPali
 - [Image search](https://github.com/pgvector/pgvector-python/blob/master/examples/image_search/example.py) with PyTorch
 - [Image search](https://github.com/pgvector/pgvector-python/blob/master/examples/imagehash/example.py) with perceptual hashing
 - [Morgan fingerprints](https://github.com/pgvector/pgvector-python/blob/master/examples/rdkit/example.py) with RDKit
diff --git a/examples/colpali/exact.py b/examples/colpali/exact.py
new file mode 100644
index 0000000..408bc7f
--- /dev/null
+++ b/examples/colpali/exact.py
@@ -0,0 +1,52 @@
+from colpali_engine.models import ColQwen2, ColQwen2Processor
+from datasets import load_dataset
+from pgvector.psycopg import register_vector
+import psycopg
+import torch
+
+conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
+
+conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
+register_vector(conn)
+
+conn.execute('DROP TABLE IF EXISTS documents')
+conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, embeddings vector(128)[])')
+conn.execute("""
+CREATE OR REPLACE FUNCTION max_sim(document vector[], query vector[]) RETURNS double precision AS $$
+    WITH queries AS (
+        SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query) AS query)
+    ),
+    documents AS (
+        SELECT unnest(document) AS document
+    ),
+    similarities AS (
+        SELECT query_number, 1 - (document <=> query) AS similarity FROM queries CROSS JOIN documents
+    ),
+    max_similarities AS (
+        SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
+    )
+    SELECT SUM(max_similarity) FROM max_similarities
+$$ LANGUAGE SQL
+""")
+
+
+device = 'mps' if torch.backends.mps.is_available() else 'cpu'
+model = ColQwen2.from_pretrained('vidore/colqwen2-v1.0', torch_dtype=torch.bfloat16, device_map=device).eval()
+processor = ColQwen2Processor.from_pretrained('vidore/colqwen2-v1.0')
+
+
+def generate_embeddings(processed):
+    with torch.no_grad():
+        return model(**processed.to(model.device)).to(device='cpu', dtype=torch.float32)
+
+
+input = load_dataset('vidore/docvqa_test_subsampled', split='test[:3]')['image']
+for content in input:
+    embeddings = [e.numpy() for e in generate_embeddings(processor.process_images([content]))[0]]
+    conn.execute('INSERT INTO documents (embeddings) VALUES (%s)', (embeddings,))
+
+query = 'dividend'
+query_embeddings = [e.numpy() for e in generate_embeddings(processor.process_queries([query]))[0]]
+result = conn.execute('SELECT id, max_sim(embeddings, %s) AS max_sim FROM documents ORDER BY max_sim DESC LIMIT 5', (query_embeddings,)).fetchall()
+for row in result:
+    print(row)
diff --git a/examples/colpali/requirements.txt b/examples/colpali/requirements.txt
new file mode 100644
index 0000000..4cf770d
--- /dev/null
+++ b/examples/colpali/requirements.txt
@@ -0,0 +1,4 @@
+colpali-engine
+datasets
+pgvector
+psycopg[binary]

From 7d8a4173d988b5e9debaba7b4d6320d61879e76c Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 15:47:10 -0800
Subject: [PATCH 026/123] Updated ColPali example to use binary quantization
 [skip ci]

---
 examples/colbert/exact_binary.py | 53 --------------------------------
 examples/colpali/exact.py        | 16 ++++++----
 2 files changed, 10 insertions(+), 59 deletions(-)
 delete mode 100644 examples/colbert/exact_binary.py

diff --git a/examples/colbert/exact_binary.py b/examples/colbert/exact_binary.py
deleted file mode 100644
index 8d398e2..0000000
--- a/examples/colbert/exact_binary.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from colbert.infra import ColBERTConfig
-from colbert.modeling.checkpoint import Checkpoint
-from pgvector.psycopg import register_vector, Bit
-import psycopg
-
-conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
-
-conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
-register_vector(conn)
-
-conn.execute('DROP TABLE IF EXISTS documents')
-conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embeddings bit(128)[])')
-conn.execute("""
-CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double precision AS $$
-    WITH queries AS (
-        SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query) AS query)
-    ),
-    documents AS (
-        SELECT unnest(document) AS document
-    ),
-    similarities AS (
-        SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents
-    ),
-    max_similarities AS (
-        SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
-    )
-    SELECT SUM(max_similarity) FROM max_similarities
-$$ LANGUAGE SQL
-""")
-
-
-def binary_quantize(embeddings):
-    return [Bit(e.numpy()) for e in (embeddings > 0)]
-
-
-config = ColBERTConfig(doc_maxlen=220, query_maxlen=32)
-checkpoint = Checkpoint('colbert-ir/colbertv2.0', colbert_config=config, verbose=0)
-
-input = [
-    'The dog is barking',
-    'The cat is purring',
-    'The bear is growling'
-]
-doc_embeddings = checkpoint.docFromText(input, keep_dims=False)
-for content, embeddings in zip(input, doc_embeddings):
-    embeddings = binary_quantize(embeddings)
-    conn.execute('INSERT INTO documents (content, embeddings) VALUES (%s, %s)', (content, embeddings))
-
-query = 'puppy'
-query_embeddings = binary_quantize(checkpoint.queryFromText([query])[0])
-result = conn.execute('SELECT content, max_sim(embeddings, %s) AS max_sim FROM documents ORDER BY max_sim DESC LIMIT 5', (query_embeddings,)).fetchall()
-for row in result:
-    print(row)
diff --git a/examples/colpali/exact.py b/examples/colpali/exact.py
index 408bc7f..9fffc5f 100644
--- a/examples/colpali/exact.py
+++ b/examples/colpali/exact.py
@@ -1,6 +1,6 @@
 from colpali_engine.models import ColQwen2, ColQwen2Processor
 from datasets import load_dataset
-from pgvector.psycopg import register_vector
+from pgvector.psycopg import register_vector, Bit
 import psycopg
 import torch
 
@@ -10,9 +10,9 @@
 register_vector(conn)
 
 conn.execute('DROP TABLE IF EXISTS documents')
-conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, embeddings vector(128)[])')
+conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, embeddings bit(128)[])')
 conn.execute("""
-CREATE OR REPLACE FUNCTION max_sim(document vector[], query vector[]) RETURNS double precision AS $$
+CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double precision AS $$
     WITH queries AS (
         SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query) AS query)
     ),
@@ -20,7 +20,7 @@
         SELECT unnest(document) AS document
     ),
     similarities AS (
-        SELECT query_number, 1 - (document <=> query) AS similarity FROM queries CROSS JOIN documents
+        SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents
     ),
     max_similarities AS (
         SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
@@ -40,13 +40,17 @@ def generate_embeddings(processed):
         return model(**processed.to(model.device)).to(device='cpu', dtype=torch.float32)
 
 
+def binary_quantize(embedding):
+    return Bit(embedding > 0)
+
+
 input = load_dataset('vidore/docvqa_test_subsampled', split='test[:3]')['image']
 for content in input:
-    embeddings = [e.numpy() for e in generate_embeddings(processor.process_images([content]))[0]]
+    embeddings = [binary_quantize(e.numpy()) for e in generate_embeddings(processor.process_images([content]))[0]]
     conn.execute('INSERT INTO documents (embeddings) VALUES (%s)', (embeddings,))
 
 query = 'dividend'
-query_embeddings = [e.numpy() for e in generate_embeddings(processor.process_queries([query]))[0]]
+query_embeddings = [binary_quantize(e.numpy()) for e in generate_embeddings(processor.process_queries([query]))[0]]
 result = conn.execute('SELECT id, max_sim(embeddings, %s) AS max_sim FROM documents ORDER BY max_sim DESC LIMIT 5', (query_embeddings,)).fetchall()
 for row in result:
     print(row)

From d73a412de5fcb6d225b4d90865f0c4e514a142d3 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 15:54:52 -0800
Subject: [PATCH 027/123] Updated ColPali example to use get_torch_device [skip
 ci]

---
 examples/colpali/exact.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/colpali/exact.py b/examples/colpali/exact.py
index 9fffc5f..6eac7a4 100644
--- a/examples/colpali/exact.py
+++ b/examples/colpali/exact.py
@@ -1,4 +1,5 @@
 from colpali_engine.models import ColQwen2, ColQwen2Processor
+from colpali_engine.utils.torch_utils import get_torch_device
 from datasets import load_dataset
 from pgvector.psycopg import register_vector, Bit
 import psycopg
@@ -30,7 +31,7 @@
 """)
 
 
-device = 'mps' if torch.backends.mps.is_available() else 'cpu'
+device = get_torch_device('auto')
 model = ColQwen2.from_pretrained('vidore/colqwen2-v1.0', torch_dtype=torch.bfloat16, device_map=device).eval()
 processor = ColQwen2Processor.from_pretrained('vidore/colqwen2-v1.0')
 

From 7b6a46a014144f05ba174a53510ed69fd113b100 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 15:55:26 -0800
Subject: [PATCH 028/123] Removed extra line [skip ci]

---
 examples/colpali/exact.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/colpali/exact.py b/examples/colpali/exact.py
index 6eac7a4..06d1828 100644
--- a/examples/colpali/exact.py
+++ b/examples/colpali/exact.py
@@ -30,7 +30,6 @@
 $$ LANGUAGE SQL
 """)
 
-
 device = get_torch_device('auto')
 model = ColQwen2.from_pretrained('vidore/colqwen2-v1.0', torch_dtype=torch.bfloat16, device_map=device).eval()
 processor = ColQwen2Processor.from_pretrained('vidore/colqwen2-v1.0')

From 4998aa1daefe95eb7550bc92ed875d6193d73b57 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 16:19:48 -0800
Subject: [PATCH 029/123] Updated readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 224fe57..260d389 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Or check out some examples:
 - [Hybrid search](https://github.com/pgvector/pgvector-python/blob/master/examples/hybrid_search/cross_encoder.py) with SentenceTransformers (cross-encoder)
 - [Sparse search](https://github.com/pgvector/pgvector-python/blob/master/examples/sparse_search/example.py) with Transformers
 - [Late interaction search](https://github.com/pgvector/pgvector-python/blob/master/examples/colbert/exact.py) with ColBERT
-- [Document retrieval](https://github.com/pgvector/pgvector-python/blob/master/examples/colpali/exact.py) with ColPali
+- [Visual document retrieval](https://github.com/pgvector/pgvector-python/blob/master/examples/colpali/exact.py) with ColPali
 - [Image search](https://github.com/pgvector/pgvector-python/blob/master/examples/image_search/example.py) with PyTorch
 - [Image search](https://github.com/pgvector/pgvector-python/blob/master/examples/imagehash/example.py) with perceptual hashing
 - [Morgan fingerprints](https://github.com/pgvector/pgvector-python/blob/master/examples/rdkit/example.py) with RDKit

From af7b04f271612c3f663e9a508f9c44564272e3a8 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 16:40:44 -0800
Subject: [PATCH 030/123] Updated example [skip ci]

---
 examples/colpali/exact.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/colpali/exact.py b/examples/colpali/exact.py
index 06d1828..c6f1467 100644
--- a/examples/colpali/exact.py
+++ b/examples/colpali/exact.py
@@ -37,20 +37,20 @@
 
 def generate_embeddings(processed):
     with torch.no_grad():
-        return model(**processed.to(model.device)).to(device='cpu', dtype=torch.float32)
+        return model(**processed.to(model.device))
 
 
 def binary_quantize(embedding):
-    return Bit(embedding > 0)
+    return Bit(embedding.gt(0).numpy(force=True))
 
 
 input = load_dataset('vidore/docvqa_test_subsampled', split='test[:3]')['image']
 for content in input:
-    embeddings = [binary_quantize(e.numpy()) for e in generate_embeddings(processor.process_images([content]))[0]]
+    embeddings = [binary_quantize(e) for e in generate_embeddings(processor.process_images([content]))[0]]
     conn.execute('INSERT INTO documents (embeddings) VALUES (%s)', (embeddings,))
 
 query = 'dividend'
-query_embeddings = [binary_quantize(e.numpy()) for e in generate_embeddings(processor.process_queries([query]))[0]]
+query_embeddings = [binary_quantize(e) for e in generate_embeddings(processor.process_queries([query]))[0]]
 result = conn.execute('SELECT id, max_sim(embeddings, %s) AS max_sim FROM documents ORDER BY max_sim DESC LIMIT 5', (query_embeddings,)).fetchall()
 for row in result:
     print(row)

From ca637bff37674592f08b9f65c75249b0d709746e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 16 Nov 2024 16:44:27 -0800
Subject: [PATCH 031/123] Updated example [skip ci]

---
 examples/colpali/exact.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/colpali/exact.py b/examples/colpali/exact.py
index c6f1467..80bb603 100644
--- a/examples/colpali/exact.py
+++ b/examples/colpali/exact.py
@@ -37,11 +37,11 @@
 
 def generate_embeddings(processed):
     with torch.no_grad():
-        return model(**processed.to(model.device))
+        return model(**processed.to(model.device)).to(torch.float32).numpy(force=True)
 
 
 def binary_quantize(embedding):
-    return Bit(embedding.gt(0).numpy(force=True))
+    return Bit(embedding > 0)
 
 
 input = load_dataset('vidore/docvqa_test_subsampled', split='test[:3]')['image']

From 5c35a5399aa3101f35e09e941a4e7cce0218e1ef Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 30 Nov 2024 03:30:34 -0800
Subject: [PATCH 032/123] Added test for binary quantization with SQLAlchemy -
 #98 [skip ci]

---
 tests/test_sqlalchemy.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 77c03fc..0380c89 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -65,6 +65,14 @@ class Item(Base):
 )
 half_precision_index.create(engine)
 
+binary_quantize_index = Index(
+    'sqlalchemy_orm_binary_quantize_index',
+    func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'),
+    postgresql_using='hnsw',
+    postgresql_with={'m': 16, 'ef_construction': 64},
+    postgresql_ops={'embedding': 'bit_hamming_ops'}
+)
+binary_quantize_index.create(engine)
 
 def create_items():
     session = Session(engine)
@@ -469,6 +477,18 @@ def test_half_precision(self):
             items = session.query(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
+    def test_binary_quantize(self):
+        session = Session(engine)
+        session.add(Item(id=1, embedding=[-1, -2, -3]))
+        session.add(Item(id=2, embedding=[1, -2, 3]))
+        session.add(Item(id=3, embedding=[1, 2, 3]))
+        session.commit()
+
+        with Session(engine) as session:
+            distance = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3))))
+            items = session.query(Item).order_by(distance).all()
+            assert [v.id for v in items] == [2, 3, 1]
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_async(self):

From 230fe853d58105df1951fbbbc1730469b341f056 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 19 Dec 2024 11:59:52 -0800
Subject: [PATCH 033/123] Fixed spacing [skip ci]

---
 tests/test_sqlalchemy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 0380c89..9ab706a 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -74,6 +74,7 @@ class Item(Base):
 )
 binary_quantize_index.create(engine)
 
+
 def create_items():
     session = Session(engine)
     session.add(Item(id=1, embedding=[1, 1, 1], half_embedding=[1, 1, 1], binary_embedding='000', sparse_embedding=SparseVector([1, 1, 1])))

From 057806e44f6943230699d99c742621baeb9023c3 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 19 Dec 2024 12:00:52 -0800
Subject: [PATCH 034/123] Added test for bit type with SQLAlchemy and asyncpg -
 #110

---
 tests/test_sqlalchemy.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 9ab706a..0b53252 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -526,3 +526,20 @@ def connect(dbapi_connection, connection_record):
                 assert item.embeddings[1].tolist() == [4, 5, 6]
 
         await engine.dispose()
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+    async def test_asyncpg_bit(self):
+        import asyncpg
+
+        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
+
+        async with async_session() as session:
+            async with session.begin():
+                embedding = asyncpg.BitString('101')
+                session.add(Item(id=1, binary_embedding=embedding))
+                item = await session.get(Item, 1)
+                assert item.binary_embedding == embedding
+
+        await engine.dispose()

From 57b6a61149c1f009ae55cccc17a9b5900e335f72 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 19 Dec 2024 12:06:02 -0800
Subject: [PATCH 035/123] Improved asyncpg test [skip ci]

---
 tests/test_asyncpg.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py
index 7a68a9e..48d1e32 100644
--- a/tests/test_asyncpg.py
+++ b/tests/test_asyncpg.py
@@ -59,10 +59,11 @@ async def test_bit(self):
 
         await register_vector(conn)
 
-        embedding = asyncpg.BitString.from_int(5, length=3)
+        embedding = asyncpg.BitString('101')
         await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding)
 
         res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
+        assert res[0]['embedding'].as_string() == '101'
         assert res[0]['embedding'].to_int() == 5
         assert res[1]['embedding'] is None
 

From 47ad76d88f72cf07ffa238e4ad2714b672346149 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 19:35:43 -0800
Subject: [PATCH 036/123] Improved SQLModel example

---
 README.md              | 3 +--
 tests/test_sqlmodel.py | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 260d389..ca399ea 100644
--- a/README.md
+++ b/README.md
@@ -328,10 +328,9 @@ Add a vector column
 
 ```python
 from pgvector.sqlalchemy import Vector
-from sqlalchemy import Column
 
 class Item(SQLModel, table=True):
-    embedding: Any = Field(sa_column=Column(Vector(3)))
+    embedding: Any = Field(sa_type=Vector(3))
 ```
 
 Also supports `HALFVEC`, `BIT`, and `SPARSEVEC`
diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index 4cb0e9b..8a1c86c 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -15,10 +15,10 @@ class Item(SQLModel, table=True):
     __tablename__ = 'sqlmodel_item'
 
     id: Optional[int] = Field(default=None, primary_key=True)
-    embedding: Optional[Any] = Field(default=None, sa_column=Column(VECTOR(3)))
-    half_embedding: Optional[Any] = Field(default=None, sa_column=Column(HALFVEC(3)))
-    binary_embedding: Optional[Any] = Field(default=None, sa_column=Column(BIT(3)))
-    sparse_embedding: Optional[Any] = Field(default=None, sa_column=Column(SPARSEVEC(3)))
+    embedding: Optional[Any] = Field(default=None, sa_type=VECTOR(3))
+    half_embedding: Optional[Any] = Field(default=None, sa_type=HALFVEC(3))
+    binary_embedding: Optional[Any] = Field(default=None, sa_type=BIT(3))
+    sparse_embedding: Optional[Any] = Field(default=None, sa_type=SPARSEVEC(3))
 
 
 SQLModel.metadata.drop_all(engine)

From b3e8908d3b3d74eba016b1cdc5bc7b1df1ad92bf Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 19:36:07 -0800
Subject: [PATCH 037/123] Removed unneeded code [skip ci]

---
 tests/test_django.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_django.py b/tests/test_django.py
index 353087e..2c53d82 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -86,9 +86,6 @@ class Meta:
 class Migration(migrations.Migration):
     initial = True
 
-    dependencies = [
-    ]
-
     operations = [
         VectorExtension(),
         migrations.CreateModel(

From edd9b4ba02160ef429c4e44455eb0bfe6c781092 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 19:37:05 -0800
Subject: [PATCH 038/123] Test with Python 3.13 on CI

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 04f1c21..562ba94 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -6,7 +6,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: [3.12, 3.8]
+        python: [3.13, 3.8]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5

From 1b9df46f9542f3262f6c93a1a858c1414a0ffdc5 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 19:38:38 -0800
Subject: [PATCH 039/123] Improved test code [skip ci]

---
 tests/test_sqlmodel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index 8a1c86c..851afd8 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -1,9 +1,8 @@
 import numpy as np
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, SparseVector, avg, sum
 import pytest
-from sqlalchemy import Column, Index
 from sqlalchemy.exc import StatementError
-from sqlmodel import Field, Session, SQLModel, create_engine, delete, select, text
+from sqlmodel import Field, Index, Session, SQLModel, create_engine, delete, select, text
 from typing import Any, Optional
 
 engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')

From a3c611f3f141a00c42b311f387278bb4f3ee4bcf Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 19:42:16 -0800
Subject: [PATCH 040/123] Updated examples [skip ci]

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index ca399ea..d7a7e6c 100644
--- a/README.md
+++ b/README.md
@@ -175,10 +175,10 @@ session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
 Add a vector column
 
 ```python
-from pgvector.sqlalchemy import Vector
+from pgvector.sqlalchemy import VECTOR
 
 class Item(Base):
-    embedding = mapped_column(Vector(3))
+    embedding = mapped_column(VECTOR(3))
 ```
 
 Also supports `HALFVEC`, `BIT`, and `SPARSEVEC`
@@ -274,11 +274,11 @@ session.scalars(select(Item).order_by(order).limit(5))
 Add an array column
 
 ```python
-from pgvector.sqlalchemy import Vector
+from pgvector.sqlalchemy import VECTOR
 from sqlalchemy import ARRAY
 
 class Item(Base):
-    embeddings = mapped_column(ARRAY(Vector(3)))
+    embeddings = mapped_column(ARRAY(VECTOR(3)))
 ```
 
 And register the types with the underlying driver
@@ -327,10 +327,10 @@ session.exec(text('CREATE EXTENSION IF NOT EXISTS vector'))
 Add a vector column
 
 ```python
-from pgvector.sqlalchemy import Vector
+from pgvector.sqlalchemy import VECTOR
 
 class Item(SQLModel, table=True):
-    embedding: Any = Field(sa_type=Vector(3))
+    embedding: Any = Field(sa_type=VECTOR(3))
 ```
 
 Also supports `HALFVEC`, `BIT`, and `SPARSEVEC`

From 2ba2a855164f6f0947f17b94201a46d5ad615e6c Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 20:00:48 -0800
Subject: [PATCH 041/123] Improved example [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d7a7e6c..794cf91 100644
--- a/README.md
+++ b/README.md
@@ -376,7 +376,7 @@ Also supports `sum`
 Add an approximate index
 
 ```python
-from sqlalchemy import Index
+from sqlmodel import Index
 
 index = Index(
     'my_index',

From c6d2ddd429c10316ef329dd07ab86fe192bc71a0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 22:09:23 -0800
Subject: [PATCH 042/123] Improved sparsevec tests [skip ci]

---
 pgvector/utils/sparsevec.py | 1 +
 tests/test_psycopg.py       | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/pgvector/utils/sparsevec.py b/pgvector/utils/sparsevec.py
index fd9ccff..a370c5e 100644
--- a/pgvector/utils/sparsevec.py
+++ b/pgvector/utils/sparsevec.py
@@ -108,6 +108,7 @@ def from_binary(cls, value):
         dim, nnz, unused = unpack_from('>iii', value)
         indices = unpack_from(f'>{nnz}i', value, 12)
         values = unpack_from(f'>{nnz}f', value, 12 + nnz * 4)
+        # TODO convert indices and values to lists in 0.4.0
         return cls._from_parts(int(dim), indices, values)
 
     @classmethod
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index 5802b2b..6d4f34a 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -110,12 +110,19 @@ def test_sparsevec(self):
     def test_sparsevec_binary_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %b::sparsevec', (embedding,), binary=True).fetchone()[0]
+        assert res.dimensions() == 6
+        # TODO convert indices and values to lists in 0.4.0
+        assert res.indices() == (0, 2, 4)
+        assert res.values() == (1.5, 2, 3)
         assert res.to_list() == [1.5, 0, 2, 0, 3, 0]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
 
     def test_sparsevec_text_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %t::sparsevec', (embedding,)).fetchone()[0]
+        assert res.dimensions() == 6
+        assert res.indices() == [0, 2, 4]
+        assert res.values() == [1.5, 2, 3]
         assert res.to_list() == [1.5, 0, 2, 0, 3, 0]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
 

From 9d9f45b800f3731e213f7b06bf3374e177ad86d5 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 22:11:22 -0800
Subject: [PATCH 043/123] Added todo [skip ci]

---
 pgvector/psycopg2/register.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pgvector/psycopg2/register.py b/pgvector/psycopg2/register.py
index 7752852..08a69a9 100644
--- a/pgvector/psycopg2/register.py
+++ b/pgvector/psycopg2/register.py
@@ -5,6 +5,7 @@
 from .vector import register_vector_info
 
 
+# TODO remove default value for conn_or_curs in 0.4.0
 # TODO make globally False by default in 0.4.0
 # note: register_adapter is always global
 # TODO make arrays True by defalt in 0.4.0

From 972b6739788f5a09ec270bed552182a052e994c5 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 22:18:26 -0800
Subject: [PATCH 044/123] Updated license year [skip ci]

---
 LICENSE.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index d205f4e..b612d6d 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2021-2024 Andrew Kane
+Copyright (c) 2021-2025 Andrew Kane
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

From 57b7d3ba12781871045a378221d90bc972a3d5c1 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 23:26:02 -0800
Subject: [PATCH 045/123] Added test for vector type with SQLAlchemy and
 asyncpg - #114

---
 tests/test_sqlalchemy.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 0b53252..6fc0adf 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -527,6 +527,29 @@ def connect(dbapi_connection, connection_record):
 
         await engine.dispose()
 
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+    async def test_asyncpg_vector(self):
+        import asyncpg
+
+        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
+
+        # TODO do not throw error when types are registered
+        # @event.listens_for(engine.sync_engine, "connect")
+        # def connect(dbapi_connection, connection_record):
+        #     from pgvector.asyncpg import register_vector
+        #     dbapi_connection.run_async(register_vector)
+
+        async with async_session() as session:
+            async with session.begin():
+                embedding = np.array([1, 2, 3])
+                session.add(Item(id=1, embedding=embedding))
+                item = await session.get(Item, 1)
+                assert np.array_equal(item.embedding, embedding)
+
+        await engine.dispose()
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_asyncpg_bit(self):

From bf9a0a469983eabb1b1b38c6ba2495e3c4c2b8ce Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 23:29:37 -0800
Subject: [PATCH 046/123] Added tests for halfvec and sparsevec types with
 SQLAlchemy and asyncpg [skip ci]

---
 tests/test_sqlalchemy.py | 46 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 6fc0adf..40068e9 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -550,6 +550,29 @@ async def test_asyncpg_vector(self):
 
         await engine.dispose()
 
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+    async def test_asyncpg_halfvec(self):
+        import asyncpg
+
+        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
+
+        # TODO do not throw error when types are registered
+        # @event.listens_for(engine.sync_engine, "connect")
+        # def connect(dbapi_connection, connection_record):
+        #     from pgvector.asyncpg import register_vector
+        #     dbapi_connection.run_async(register_vector)
+
+        async with async_session() as session:
+            async with session.begin():
+                embedding = [1, 2, 3]
+                session.add(Item(id=1, half_embedding=embedding))
+                item = await session.get(Item, 1)
+                assert item.half_embedding.to_list() == embedding
+
+        await engine.dispose()
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_asyncpg_bit(self):
@@ -566,3 +589,26 @@ async def test_asyncpg_bit(self):
                 assert item.binary_embedding == embedding
 
         await engine.dispose()
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+    async def test_asyncpg_sparsevec(self):
+        import asyncpg
+
+        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
+
+        # TODO do not throw error when types are registered
+        # @event.listens_for(engine.sync_engine, "connect")
+        # def connect(dbapi_connection, connection_record):
+        #     from pgvector.asyncpg import register_vector
+        #     dbapi_connection.run_async(register_vector)
+
+        async with async_session() as session:
+            async with session.begin():
+                embedding = [1, 2, 3]
+                session.add(Item(id=1, sparse_embedding=embedding))
+                item = await session.get(Item, 1)
+                assert item.sparse_embedding.to_list() == embedding
+
+        await engine.dispose()

From 257eb3b92c9f02e2ca266a15c6c8b93ebc94082a Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 23:30:55 -0800
Subject: [PATCH 047/123] Simplified tests [skip ci]

---
 tests/test_sqlalchemy.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 40068e9..519a388 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -530,8 +530,6 @@ def connect(dbapi_connection, connection_record):
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_asyncpg_vector(self):
-        import asyncpg
-
         engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
@@ -553,8 +551,6 @@ async def test_asyncpg_vector(self):
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_asyncpg_halfvec(self):
-        import asyncpg
-
         engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
@@ -593,8 +589,6 @@ async def test_asyncpg_bit(self):
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_asyncpg_sparsevec(self):
-        import asyncpg
-
         engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 

From 91f5d34c11f0064c83ca08b7e69055ce6ef03124 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 23:32:53 -0800
Subject: [PATCH 048/123] Added test for vector[] type with SQLAlchemy and
 asyncpg [skip ci]

---
 tests/test_sqlalchemy.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 519a388..7e8b888 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -606,3 +606,26 @@ async def test_asyncpg_sparsevec(self):
                 assert item.sparse_embedding.to_list() == embedding
 
         await engine.dispose()
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+    async def test_asyncpg_vector_array(self):
+        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
+
+        # TODO do not throw error when types are registered
+        # @event.listens_for(engine.sync_engine, "connect")
+        # def connect(dbapi_connection, connection_record):
+        #     from pgvector.asyncpg import register_vector
+        #     dbapi_connection.run_async(register_vector)
+
+        async with async_session() as session:
+            async with session.begin():
+                session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+
+                # this fails if the driver does not cast arrays
+                item = await session.get(Item, 1)
+                assert item.embeddings[0].tolist() == [1, 2, 3]
+                assert item.embeddings[1].tolist() == [4, 5, 6]
+
+        await engine.dispose()

From f7eeb3a04554b9adf82a5073d08fc757c41604a3 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 23:36:40 -0800
Subject: [PATCH 049/123] Improved test code [skip ci]

---
 tests/test_sqlalchemy.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 7e8b888..f3d045f 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -492,7 +492,7 @@ def test_binary_quantize(self):
 
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_async(self):
+    async def test_async_avg(self):
         engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
@@ -622,8 +622,6 @@ async def test_asyncpg_vector_array(self):
         async with async_session() as session:
             async with session.begin():
                 session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
-
-                # this fails if the driver does not cast arrays
                 item = await session.get(Item, 1)
                 assert item.embeddings[0].tolist() == [1, 2, 3]
                 assert item.embeddings[1].tolist() == [4, 5, 6]

From 2d2563d702ee319a33d17b27549bce035a6c7348 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 8 Feb 2025 23:39:55 -0800
Subject: [PATCH 050/123] Improved test names [skip ci]

---
 tests/test_sqlalchemy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index f3d045f..fd46e74 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -492,7 +492,7 @@ def test_binary_quantize(self):
 
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_async_avg(self):
+    async def test_psycopg_async_avg(self):
         engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
@@ -507,7 +507,7 @@ async def test_async_avg(self):
 
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_async_vector_array(self):
+    async def test_psycopg_async_vector_array(self):
         engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 

From d828239fb466e11a8fb02c7e35a052dbbce3e5b8 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 01:00:50 -0800
Subject: [PATCH 051/123] Revert "Updated examples [skip ci]"

This reverts commit a3c611f3f141a00c42b311f387278bb4f3ee4bcf.
---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 794cf91..5a59c9d 100644
--- a/README.md
+++ b/README.md
@@ -175,10 +175,10 @@ session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
 Add a vector column
 
 ```python
-from pgvector.sqlalchemy import VECTOR
+from pgvector.sqlalchemy import Vector
 
 class Item(Base):
-    embedding = mapped_column(VECTOR(3))
+    embedding = mapped_column(Vector(3))
 ```
 
 Also supports `HALFVEC`, `BIT`, and `SPARSEVEC`
@@ -274,11 +274,11 @@ session.scalars(select(Item).order_by(order).limit(5))
 Add an array column
 
 ```python
-from pgvector.sqlalchemy import VECTOR
+from pgvector.sqlalchemy import Vector
 from sqlalchemy import ARRAY
 
 class Item(Base):
-    embeddings = mapped_column(ARRAY(VECTOR(3)))
+    embeddings = mapped_column(ARRAY(Vector(3)))
 ```
 
 And register the types with the underlying driver
@@ -327,10 +327,10 @@ session.exec(text('CREATE EXTENSION IF NOT EXISTS vector'))
 Add a vector column
 
 ```python
-from pgvector.sqlalchemy import VECTOR
+from pgvector.sqlalchemy import Vector
 
 class Item(SQLModel, table=True):
-    embedding: Any = Field(sa_type=VECTOR(3))
+    embedding: Any = Field(sa_type=Vector(3))
 ```
 
 Also supports `HALFVEC`, `BIT`, and `SPARSEVEC`

From 8a7040d2ee79ac8fc6313538ffbc38ebad3ac197 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 01:34:02 -0800
Subject: [PATCH 052/123] Removed unneeded code [skip ci]

---
 examples/citus/example.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/citus/example.py b/examples/citus/example.py
index d448204..915c25f 100644
--- a/examples/citus/example.py
+++ b/examples/citus/example.py
@@ -40,9 +40,6 @@
     for i in range(rows):
         copy.write_row([embeddings[i], categories[i]])
 
-        while conn.pgconn.flush() == 1:
-            pass
-
 print('Creating index in parallel')
 conn.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)')
 

From 00cd08e6c44077b99f378edbd007b2483ff406f7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 11:09:23 -0800
Subject: [PATCH 053/123] Improved tests

---
 tests/test_sqlalchemy.py | 139 ++++++++++++++++++++-------------------
 1 file changed, 71 insertions(+), 68 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index fd46e74..405cd21 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -76,11 +76,11 @@ class Item(Base):
 
 
 def create_items():
-    session = Session(engine)
-    session.add(Item(id=1, embedding=[1, 1, 1], half_embedding=[1, 1, 1], binary_embedding='000', sparse_embedding=SparseVector([1, 1, 1])))
-    session.add(Item(id=2, embedding=[2, 2, 2], half_embedding=[2, 2, 2], binary_embedding='101', sparse_embedding=SparseVector([2, 2, 2])))
-    session.add(Item(id=3, embedding=[1, 1, 2], half_embedding=[1, 1, 2], binary_embedding='111', sparse_embedding=SparseVector([1, 1, 2])))
-    session.commit()
+    with Session(engine) as session:
+        session.add(Item(id=1, embedding=[1, 1, 1], half_embedding=[1, 1, 1], binary_embedding='000', sparse_embedding=SparseVector([1, 1, 1])))
+        session.add(Item(id=2, embedding=[2, 2, 2], half_embedding=[2, 2, 2], binary_embedding='101', sparse_embedding=SparseVector([2, 2, 2])))
+        session.add(Item(id=3, embedding=[1, 1, 2], half_embedding=[1, 1, 2], binary_embedding='111', sparse_embedding=SparseVector([1, 1, 2])))
+        session.commit()
 
 
 class TestSqlalchemy:
@@ -129,11 +129,11 @@ def test_orm(self):
         item2 = Item(embedding=[4, 5, 6])
         item3 = Item()
 
-        session = Session(engine)
-        session.add(item)
-        session.add(item2)
-        session.add(item3)
-        session.commit()
+        with Session(engine) as session:
+            session.add(item)
+            session.add(item2)
+            session.add(item3)
+            session.commit()
 
         stmt = select(Item)
         with Session(engine) as session:
@@ -148,11 +148,11 @@ def test_orm(self):
             assert items[2].embedding is None
 
     def test_vector(self):
-        session = Session(engine)
-        session.add(Item(id=1, embedding=[1, 2, 3]))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.embedding.tolist() == [1, 2, 3]
+        with Session(engine) as session:
+            session.add(Item(id=1, embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.embedding.tolist() == [1, 2, 3]
 
     def test_vector_l2_distance(self):
         create_items()
@@ -203,11 +203,11 @@ def test_vector_l1_distance_orm(self):
             assert [v.id for v in items] == [1, 3, 2]
 
     def test_halfvec(self):
-        session = Session(engine)
-        session.add(Item(id=1, half_embedding=[1, 2, 3]))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.half_embedding.to_list() == [1, 2, 3]
+        with Session(engine) as session:
+            session.add(Item(id=1, half_embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.half_embedding.to_list() == [1, 2, 3]
 
     def test_halfvec_l2_distance(self):
         create_items()
@@ -258,11 +258,11 @@ def test_halfvec_l1_distance_orm(self):
             assert [v.id for v in items] == [1, 3, 2]
 
     def test_bit(self):
-        session = Session(engine)
-        session.add(Item(id=1, binary_embedding='101'))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.binary_embedding == '101'
+        with Session(engine) as session:
+            session.add(Item(id=1, binary_embedding='101'))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.binary_embedding == '101'
 
     def test_bit_hamming_distance(self):
         create_items()
@@ -289,11 +289,11 @@ def test_bit_jaccard_distance_orm(self):
             assert [v.id for v in items] == [2, 3, 1]
 
     def test_sparsevec(self):
-        session = Session(engine)
-        session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.sparse_embedding.to_list() == [1, 2, 3]
+        with Session(engine) as session:
+            session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.sparse_embedding.to_list() == [1, 2, 3]
 
     def test_sparsevec_l2_distance(self):
         create_items()
@@ -405,24 +405,24 @@ def test_sum_orm(self):
 
     def test_bad_dimensions(self):
         item = Item(embedding=[1, 2])
-        session = Session(engine)
-        session.add(item)
-        with pytest.raises(StatementError, match='expected 3 dimensions, not 2'):
-            session.commit()
+        with Session(engine) as session:
+            session.add(item)
+            with pytest.raises(StatementError, match='expected 3 dimensions, not 2'):
+                session.commit()
 
     def test_bad_ndim(self):
         item = Item(embedding=np.array([[1, 2, 3]]))
-        session = Session(engine)
-        session.add(item)
-        with pytest.raises(StatementError, match='expected ndim to be 1'):
-            session.commit()
+        with Session(engine) as session:
+            session.add(item)
+            with pytest.raises(StatementError, match='expected ndim to be 1'):
+                session.commit()
 
     def test_bad_dtype(self):
         item = Item(embedding=np.array(['one', 'two', 'three']))
-        session = Session(engine)
-        session.add(item)
-        with pytest.raises(StatementError, match='could not convert string to float'):
-            session.commit()
+        with Session(engine) as session:
+            session.add(item)
+            with pytest.raises(StatementError, match='could not convert string to float'):
+                session.commit()
 
     def test_inspect(self):
         columns = inspect(engine).get_columns('sqlalchemy_orm_item')
@@ -433,14 +433,17 @@ def test_literal_binds(self):
         assert "embedding <-> '[1.0,2.0,3.0]'" in str(sql)
 
     def test_insert(self):
-        session.execute(insert(Item).values(embedding=np.array([1, 2, 3])))
+        with Session(engine) as session:
+            session.execute(insert(Item).values(embedding=np.array([1, 2, 3])))
 
     def test_insert_bulk(self):
-        session.execute(insert(Item), [{'embedding': np.array([1, 2, 3])}])
+        with Session(engine) as session:
+            session.execute(insert(Item), [{'embedding': np.array([1, 2, 3])}])
 
     # register_vector in psycopg2 tests change this behavior
     # def test_insert_text(self):
-    #     session.execute(text('INSERT INTO sqlalchemy_orm_item (embedding) VALUES (:embedding)'), {'embedding': np.array([1, 2, 3])})
+    #     with Session(engine) as session:
+    #         session.execute(text('INSERT INTO sqlalchemy_orm_item (embedding) VALUES (:embedding)'), {'embedding': np.array([1, 2, 3])})
 
     def test_automap(self):
         metadata = MetaData()
@@ -448,29 +451,30 @@ def test_automap(self):
         AutoBase = automap_base(metadata=metadata)
         AutoBase.prepare()
         AutoItem = AutoBase.classes.sqlalchemy_orm_item
-        session.execute(insert(AutoItem), [{'embedding': np.array([1, 2, 3])}])
-        item = session.query(AutoItem).first()
-        assert item.embedding.tolist() == [1, 2, 3]
+        with Session(engine) as session:
+            session.execute(insert(AutoItem), [{'embedding': np.array([1, 2, 3])}])
+            item = session.query(AutoItem).first()
+            assert item.embedding.tolist() == [1, 2, 3]
 
     def test_vector_array(self):
-        session = Session(array_engine)
-        session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
-        session.commit()
+        with Session(array_engine) as session:
+            session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+            session.commit()
 
-        # this fails if the driver does not cast arrays
-        item = session.get(Item, 1)
-        assert item.embeddings[0].tolist() == [1, 2, 3]
-        assert item.embeddings[1].tolist() == [4, 5, 6]
+            # this fails if the driver does not cast arrays
+            item = session.get(Item, 1)
+            assert item.embeddings[0].tolist() == [1, 2, 3]
+            assert item.embeddings[1].tolist() == [4, 5, 6]
 
     def test_halfvec_array(self):
-        session = Session(array_engine)
-        session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
-        session.commit()
+        with Session(array_engine) as session:
+            session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+            session.commit()
 
-        # this fails if the driver does not cast arrays
-        item = session.get(Item, 1)
-        assert item.half_embeddings[0].to_list() == [1, 2, 3]
-        assert item.half_embeddings[1].to_list() == [4, 5, 6]
+            # this fails if the driver does not cast arrays
+            item = session.get(Item, 1)
+            assert item.half_embeddings[0].to_list() == [1, 2, 3]
+            assert item.half_embeddings[1].to_list() == [4, 5, 6]
 
     def test_half_precision(self):
         create_items()
@@ -479,13 +483,12 @@ def test_half_precision(self):
             assert [v.id for v in items] == [1, 3, 2]
 
     def test_binary_quantize(self):
-        session = Session(engine)
-        session.add(Item(id=1, embedding=[-1, -2, -3]))
-        session.add(Item(id=2, embedding=[1, -2, 3]))
-        session.add(Item(id=3, embedding=[1, 2, 3]))
-        session.commit()
-
         with Session(engine) as session:
+            session.add(Item(id=1, embedding=[-1, -2, -3]))
+            session.add(Item(id=2, embedding=[1, -2, 3]))
+            session.add(Item(id=3, embedding=[1, 2, 3]))
+            session.commit()
+
             distance = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3))))
             items = session.query(Item).order_by(distance).all()
             assert [v.id for v in items] == [2, 3, 1]

From 7837e92d72eef265e075e8ea5aa305e159e41437 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 11:15:18 -0800
Subject: [PATCH 054/123] Added more tests for SQLAlchemy

---
 tests/test_sqlalchemy.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 405cd21..79b3c50 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -29,6 +29,8 @@ def connect(dbapi_connection, connection_record):
     register_vector(dbapi_connection, globally=False, arrays=True)
 
 
+psycopg3_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
+
 Base = declarative_base()
 
 
@@ -493,6 +495,34 @@ def test_binary_quantize(self):
             items = session.query(Item).order_by(distance).all()
             assert [v.id for v in items] == [2, 3, 1]
 
+    def test_psycopg_vector(self):
+        with Session(psycopg3_engine) as session:
+            session.add(Item(id=1, embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.embedding.tolist() == [1, 2, 3]
+
+    def test_psycopg_halfvec(self):
+        with Session(psycopg3_engine) as session:
+            session.add(Item(id=1, half_embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.half_embedding.to_list() == [1, 2, 3]
+
+    def test_psycopg_bit(self):
+        with Session(psycopg3_engine) as session:
+            session.add(Item(id=1, binary_embedding='101'))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.binary_embedding == '101'
+
+    def test_psycopg_sparsevec(self):
+        with Session(psycopg3_engine) as session:
+            session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.sparse_embedding.to_list() == [1, 2, 3]
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_psycopg_async_avg(self):

From f08cec7a0522b19942a02df14f3f396f0773c912 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 11:46:42 -0800
Subject: [PATCH 055/123] Parameterize SQLAlchemy tests

---
 tests/test_sqlalchemy.py | 181 ++++++++++++++++++---------------------
 1 file changed, 84 insertions(+), 97 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 79b3c50..a4ac860 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -15,8 +15,15 @@
     mapped_column = Column
     sqlalchemy_version = 1
 
-engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
-with Session(engine) as session:
+psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
+engines = [psycopg2_engine]
+
+if sqlalchemy_version > 1:
+    psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
+    engines.append(psycopg_engine)
+
+setup_engine = engines[0]
+with Session(setup_engine) as session:
     session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
     session.commit()
 
@@ -29,8 +36,6 @@ def connect(dbapi_connection, connection_record):
     register_vector(dbapi_connection, globally=False, arrays=True)
 
 
-psycopg3_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
-
 Base = declarative_base()
 
 
@@ -46,8 +51,8 @@ class Item(Base):
     half_embeddings = mapped_column(ARRAY(HALFVEC(3)))
 
 
-Base.metadata.drop_all(engine)
-Base.metadata.create_all(engine)
+Base.metadata.drop_all(setup_engine)
+Base.metadata.create_all(setup_engine)
 
 index = Index(
     'sqlalchemy_orm_index',
@@ -56,7 +61,7 @@ class Item(Base):
     postgresql_with={'m': 16, 'ef_construction': 64},
     postgresql_ops={'embedding': 'vector_l2_ops'}
 )
-index.create(engine)
+index.create(setup_engine)
 
 half_precision_index = Index(
     'sqlalchemy_orm_half_precision_index',
@@ -65,7 +70,7 @@ class Item(Base):
     postgresql_with={'m': 16, 'ef_construction': 64},
     postgresql_ops={'embedding': 'halfvec_l2_ops'}
 )
-half_precision_index.create(engine)
+half_precision_index.create(setup_engine)
 
 binary_quantize_index = Index(
     'sqlalchemy_orm_binary_quantize_index',
@@ -74,24 +79,29 @@ class Item(Base):
     postgresql_with={'m': 16, 'ef_construction': 64},
     postgresql_ops={'embedding': 'bit_hamming_ops'}
 )
-binary_quantize_index.create(engine)
+binary_quantize_index.create(setup_engine)
 
 
 def create_items():
-    with Session(engine) as session:
+    with Session(setup_engine) as session:
         session.add(Item(id=1, embedding=[1, 1, 1], half_embedding=[1, 1, 1], binary_embedding='000', sparse_embedding=SparseVector([1, 1, 1])))
         session.add(Item(id=2, embedding=[2, 2, 2], half_embedding=[2, 2, 2], binary_embedding='101', sparse_embedding=SparseVector([2, 2, 2])))
         session.add(Item(id=3, embedding=[1, 1, 2], half_embedding=[1, 1, 2], binary_embedding='111', sparse_embedding=SparseVector([1, 1, 2])))
         session.commit()
 
 
+def delete_items():
+    with Session(setup_engine) as session:
+        session.query(Item).delete()
+        session.commit()
+
+
+@pytest.mark.parametrize("engine", engines)
 class TestSqlalchemy:
-    def setup_method(self, test_method):
-        with Session(engine) as session:
-            session.query(Item).delete()
-            session.commit()
+    def setup_method(self):
+        delete_items()
 
-    def test_core(self):
+    def test_core(self, engine):
         metadata = MetaData()
 
         item_table = Table(
@@ -126,7 +136,7 @@ def test_core(self):
         )
         hnsw_index.create(engine)
 
-    def test_orm(self):
+    def test_orm(self, engine):
         item = Item(embedding=np.array([1.5, 2, 3]))
         item2 = Item(embedding=[4, 5, 6])
         item3 = Item()
@@ -140,236 +150,236 @@ def test_orm(self):
         stmt = select(Item)
         with Session(engine) as session:
             items = [v[0] for v in session.execute(stmt).all()]
-            assert items[0].id == 1
-            assert items[1].id == 2
-            assert items[2].id == 3
+            assert items[0].id in [1, 4]
+            assert items[1].id in [2, 5]
+            assert items[2].id in [3, 6]
             assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3]))
             assert items[0].embedding.dtype == np.float32
             assert np.array_equal(items[1].embedding, np.array([4, 5, 6]))
             assert items[1].embedding.dtype == np.float32
             assert items[2].embedding is None
 
-    def test_vector(self):
+    def test_vector(self, engine):
         with Session(engine) as session:
             session.add(Item(id=1, embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
             assert item.embedding.tolist() == [1, 2, 3]
 
-    def test_vector_l2_distance(self):
+    def test_vector_l2_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.embedding.l2_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_vector_l2_distance_orm(self):
+    def test_vector_l2_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.embedding.l2_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_vector_max_inner_product(self):
+    def test_vector_max_inner_product(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.embedding.max_inner_product([1, 1, 1])).all()
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_vector_max_inner_product_orm(self):
+    def test_vector_max_inner_product_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.embedding.max_inner_product([1, 1, 1])))
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_vector_cosine_distance(self):
+    def test_vector_cosine_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.embedding.cosine_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 2, 3]
 
-    def test_vector_cosine_distance_orm(self):
+    def test_vector_cosine_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.embedding.cosine_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 2, 3]
 
-    def test_vector_l1_distance(self):
+    def test_vector_l1_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.embedding.l1_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_vector_l1_distance_orm(self):
+    def test_vector_l1_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.embedding.l1_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_halfvec(self):
+    def test_halfvec(self, engine):
         with Session(engine) as session:
             session.add(Item(id=1, half_embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
             assert item.half_embedding.to_list() == [1, 2, 3]
 
-    def test_halfvec_l2_distance(self):
+    def test_halfvec_l2_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.half_embedding.l2_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_halfvec_l2_distance_orm(self):
+    def test_halfvec_l2_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.half_embedding.l2_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_halfvec_max_inner_product(self):
+    def test_halfvec_max_inner_product(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.half_embedding.max_inner_product([1, 1, 1])).all()
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_halfvec_max_inner_product_orm(self):
+    def test_halfvec_max_inner_product_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.half_embedding.max_inner_product([1, 1, 1])))
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_halfvec_cosine_distance(self):
+    def test_halfvec_cosine_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.half_embedding.cosine_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 2, 3]
 
-    def test_halfvec_cosine_distance_orm(self):
+    def test_halfvec_cosine_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.half_embedding.cosine_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 2, 3]
 
-    def test_halfvec_l1_distance(self):
+    def test_halfvec_l1_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.half_embedding.l1_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_halfvec_l1_distance_orm(self):
+    def test_halfvec_l1_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.half_embedding.l1_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_bit(self):
+    def test_bit(self, engine):
         with Session(engine) as session:
             session.add(Item(id=1, binary_embedding='101'))
             session.commit()
             item = session.get(Item, 1)
             assert item.binary_embedding == '101'
 
-    def test_bit_hamming_distance(self):
+    def test_bit_hamming_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.binary_embedding.hamming_distance('101')).all()
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_bit_hamming_distance_orm(self):
+    def test_bit_hamming_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.binary_embedding.hamming_distance('101')))
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_bit_jaccard_distance(self):
+    def test_bit_jaccard_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.binary_embedding.jaccard_distance('101')).all()
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_bit_jaccard_distance_orm(self):
+    def test_bit_jaccard_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.binary_embedding.jaccard_distance('101')))
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_sparsevec(self):
+    def test_sparsevec(self, engine):
         with Session(engine) as session:
             session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
             assert item.sparse_embedding.to_list() == [1, 2, 3]
 
-    def test_sparsevec_l2_distance(self):
+    def test_sparsevec_l2_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.sparse_embedding.l2_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_sparsevec_l2_distance_orm(self):
+    def test_sparsevec_l2_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.sparse_embedding.l2_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_sparsevec_max_inner_product(self):
+    def test_sparsevec_max_inner_product(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.sparse_embedding.max_inner_product([1, 1, 1])).all()
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_sparsevec_max_inner_product_orm(self):
+    def test_sparsevec_max_inner_product_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.sparse_embedding.max_inner_product([1, 1, 1])))
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_sparsevec_cosine_distance(self):
+    def test_sparsevec_cosine_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.sparse_embedding.cosine_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 2, 3]
 
-    def test_sparsevec_cosine_distance_orm(self):
+    def test_sparsevec_cosine_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.sparse_embedding.cosine_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 2, 3]
 
-    def test_sparsevec_l1_distance(self):
+    def test_sparsevec_l1_distance(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.sparse_embedding.l1_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_sparsevec_l1_distance_orm(self):
+    def test_sparsevec_l1_distance_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.sparse_embedding.l1_distance([1, 1, 1])))
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_filter(self):
+    def test_filter(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).filter(Item.embedding.l2_distance([1, 1, 1]) < 1).all()
             assert [v.id for v in items] == [1]
 
-    def test_filter_orm(self):
+    def test_filter_orm(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).filter(Item.embedding.l2_distance([1, 1, 1]) < 1))
             assert [v.id for v in items] == [1]
 
-    def test_select(self):
+    def test_select(self, engine):
         with Session(engine) as session:
             session.add(Item(embedding=[2, 3, 3]))
             items = session.query(Item.embedding.l2_distance([1, 1, 1])).first()
             assert items[0] == 3
 
-    def test_select_orm(self):
+    def test_select_orm(self, engine):
         with Session(engine) as session:
             session.add(Item(embedding=[2, 3, 3]))
             items = session.scalars(select(Item.embedding.l2_distance([1, 1, 1]))).all()
             assert items[0] == 3
 
-    def test_avg(self):
+    def test_avg(self, engine):
         with Session(engine) as session:
             res = session.query(avg(Item.embedding)).first()[0]
             assert res is None
@@ -378,7 +388,7 @@ def test_avg(self):
             res = session.query(avg(Item.embedding)).first()[0]
             assert np.array_equal(res, np.array([2.5, 3.5, 4.5]))
 
-    def test_avg_orm(self):
+    def test_avg_orm(self, engine):
         with Session(engine) as session:
             res = session.scalars(select(avg(Item.embedding))).first()
             assert res is None
@@ -387,7 +397,7 @@ def test_avg_orm(self):
             res = session.scalars(select(avg(Item.embedding))).first()
             assert np.array_equal(res, np.array([2.5, 3.5, 4.5]))
 
-    def test_sum(self):
+    def test_sum(self, engine):
         with Session(engine) as session:
             res = session.query(sum(Item.embedding)).first()[0]
             assert res is None
@@ -396,7 +406,7 @@ def test_sum(self):
             res = session.query(sum(Item.embedding)).first()[0]
             assert np.array_equal(res, np.array([5, 7, 9]))
 
-    def test_sum_orm(self):
+    def test_sum_orm(self, engine):
         with Session(engine) as session:
             res = session.scalars(select(sum(Item.embedding))).first()
             assert res is None
@@ -405,40 +415,40 @@ def test_sum_orm(self):
             res = session.scalars(select(sum(Item.embedding))).first()
             assert np.array_equal(res, np.array([5, 7, 9]))
 
-    def test_bad_dimensions(self):
+    def test_bad_dimensions(self, engine):
         item = Item(embedding=[1, 2])
         with Session(engine) as session:
             session.add(item)
             with pytest.raises(StatementError, match='expected 3 dimensions, not 2'):
                 session.commit()
 
-    def test_bad_ndim(self):
+    def test_bad_ndim(self, engine):
         item = Item(embedding=np.array([[1, 2, 3]]))
         with Session(engine) as session:
             session.add(item)
             with pytest.raises(StatementError, match='expected ndim to be 1'):
                 session.commit()
 
-    def test_bad_dtype(self):
+    def test_bad_dtype(self, engine):
         item = Item(embedding=np.array(['one', 'two', 'three']))
         with Session(engine) as session:
             session.add(item)
             with pytest.raises(StatementError, match='could not convert string to float'):
                 session.commit()
 
-    def test_inspect(self):
+    def test_inspect(self, engine):
         columns = inspect(engine).get_columns('sqlalchemy_orm_item')
         assert isinstance(columns[1]['type'], VECTOR)
 
-    def test_literal_binds(self):
+    def test_literal_binds(self, engine):
         sql = select(Item).order_by(Item.embedding.l2_distance([1, 2, 3])).compile(engine, compile_kwargs={'literal_binds': True})
         assert "embedding <-> '[1.0,2.0,3.0]'" in str(sql)
 
-    def test_insert(self):
+    def test_insert(self, engine):
         with Session(engine) as session:
             session.execute(insert(Item).values(embedding=np.array([1, 2, 3])))
 
-    def test_insert_bulk(self):
+    def test_insert_bulk(self, engine):
         with Session(engine) as session:
             session.execute(insert(Item), [{'embedding': np.array([1, 2, 3])}])
 
@@ -447,7 +457,7 @@ def test_insert_bulk(self):
     #     with Session(engine) as session:
     #         session.execute(text('INSERT INTO sqlalchemy_orm_item (embedding) VALUES (:embedding)'), {'embedding': np.array([1, 2, 3])})
 
-    def test_automap(self):
+    def test_automap(self, engine):
         metadata = MetaData()
         metadata.reflect(engine, only=['sqlalchemy_orm_item'])
         AutoBase = automap_base(metadata=metadata)
@@ -458,7 +468,7 @@ def test_automap(self):
             item = session.query(AutoItem).first()
             assert item.embedding.tolist() == [1, 2, 3]
 
-    def test_vector_array(self):
+    def test_vector_array(self, engine):
         with Session(array_engine) as session:
             session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
             session.commit()
@@ -468,7 +478,7 @@ def test_vector_array(self):
             assert item.embeddings[0].tolist() == [1, 2, 3]
             assert item.embeddings[1].tolist() == [4, 5, 6]
 
-    def test_halfvec_array(self):
+    def test_halfvec_array(self, engine):
         with Session(array_engine) as session:
             session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
             session.commit()
@@ -478,13 +488,13 @@ def test_halfvec_array(self):
             assert item.half_embeddings[0].to_list() == [1, 2, 3]
             assert item.half_embeddings[1].to_list() == [4, 5, 6]
 
-    def test_half_precision(self):
+    def test_half_precision(self, engine):
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([1, 1, 1])).all()
             assert [v.id for v in items] == [1, 3, 2]
 
-    def test_binary_quantize(self):
+    def test_binary_quantize(self, engine):
         with Session(engine) as session:
             session.add(Item(id=1, embedding=[-1, -2, -3]))
             session.add(Item(id=2, embedding=[1, -2, 3]))
@@ -495,33 +505,10 @@ def test_binary_quantize(self):
             items = session.query(Item).order_by(distance).all()
             assert [v.id for v in items] == [2, 3, 1]
 
-    def test_psycopg_vector(self):
-        with Session(psycopg3_engine) as session:
-            session.add(Item(id=1, embedding=[1, 2, 3]))
-            session.commit()
-            item = session.get(Item, 1)
-            assert item.embedding.tolist() == [1, 2, 3]
-
-    def test_psycopg_halfvec(self):
-        with Session(psycopg3_engine) as session:
-            session.add(Item(id=1, half_embedding=[1, 2, 3]))
-            session.commit()
-            item = session.get(Item, 1)
-            assert item.half_embedding.to_list() == [1, 2, 3]
 
-    def test_psycopg_bit(self):
-        with Session(psycopg3_engine) as session:
-            session.add(Item(id=1, binary_embedding='101'))
-            session.commit()
-            item = session.get(Item, 1)
-            assert item.binary_embedding == '101'
-
-    def test_psycopg_sparsevec(self):
-        with Session(psycopg3_engine) as session:
-            session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
-            session.commit()
-            item = session.get(Item, 1)
-            assert item.sparse_embedding.to_list() == [1, 2, 3]
+class TestSqlalchemyAsync:
+    def setup_method(self):
+        delete_items()
 
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')

From 5e381602a739ca5307f02c75ee57d219555f5ada Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 11:54:57 -0800
Subject: [PATCH 056/123] Improved array tests

---
 tests/test_sqlalchemy.py | 49 +++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index a4ac860..b1f3e85 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -27,10 +27,11 @@
     session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
     session.commit()
 
-array_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
+psycopg2_array_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
+array_engines = [psycopg2_array_engine]
 
 
-@event.listens_for(array_engine, "connect")
+@event.listens_for(psycopg2_array_engine, "connect")
 def connect(dbapi_connection, connection_record):
     from pgvector.psycopg2 import register_vector
     register_vector(dbapi_connection, globally=False, arrays=True)
@@ -468,8 +469,31 @@ def test_automap(self, engine):
             item = session.query(AutoItem).first()
             assert item.embedding.tolist() == [1, 2, 3]
 
+    def test_half_precision(self, engine):
+        create_items()
+        with Session(engine) as session:
+            items = session.query(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([1, 1, 1])).all()
+            assert [v.id for v in items] == [1, 3, 2]
+
+    def test_binary_quantize(self, engine):
+        with Session(engine) as session:
+            session.add(Item(id=1, embedding=[-1, -2, -3]))
+            session.add(Item(id=2, embedding=[1, -2, 3]))
+            session.add(Item(id=3, embedding=[1, 2, 3]))
+            session.commit()
+
+            distance = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3))))
+            items = session.query(Item).order_by(distance).all()
+            assert [v.id for v in items] == [2, 3, 1]
+
+
+@pytest.mark.parametrize("engine", array_engines)
+class TestSqlalchemyArray:
+    def setup_method(self):
+        delete_items()
+
     def test_vector_array(self, engine):
-        with Session(array_engine) as session:
+        with Session(engine) as session:
             session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
             session.commit()
 
@@ -479,7 +503,7 @@ def test_vector_array(self, engine):
             assert item.embeddings[1].tolist() == [4, 5, 6]
 
     def test_halfvec_array(self, engine):
-        with Session(array_engine) as session:
+        with Session(engine) as session:
             session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
             session.commit()
 
@@ -488,23 +512,6 @@ def test_halfvec_array(self, engine):
             assert item.half_embeddings[0].to_list() == [1, 2, 3]
             assert item.half_embeddings[1].to_list() == [4, 5, 6]
 
-    def test_half_precision(self, engine):
-        create_items()
-        with Session(engine) as session:
-            items = session.query(Item).order_by(func.cast(Item.embedding, HALFVEC(3)).l2_distance([1, 1, 1])).all()
-            assert [v.id for v in items] == [1, 3, 2]
-
-    def test_binary_quantize(self, engine):
-        with Session(engine) as session:
-            session.add(Item(id=1, embedding=[-1, -2, -3]))
-            session.add(Item(id=2, embedding=[1, -2, 3]))
-            session.add(Item(id=3, embedding=[1, 2, 3]))
-            session.commit()
-
-            distance = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3))))
-            items = session.query(Item).order_by(distance).all()
-            assert [v.id for v in items] == [2, 3, 1]
-
 
 class TestSqlalchemyAsync:
     def setup_method(self):

From f82e44f231e498c86839735de1658ef7b8cb11a1 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:01:45 -0800
Subject: [PATCH 057/123] Added tests for SQLAlchemy with pg8000

---
 requirements.txt         |  1 +
 tests/test_sqlalchemy.py | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0e30959..a13be06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@ asyncpg
 Django
 numpy
 peewee
+pg8000
 psycopg[binary,pool]
 psycopg2-binary
 pytest
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index b1f3e85..37e803d 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -1,4 +1,5 @@
 import numpy as np
+import os
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, SparseVector, avg, sum
 import pytest
 from sqlalchemy import create_engine, event, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY
@@ -16,7 +17,8 @@
     sqlalchemy_version = 1
 
 psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
-engines = [psycopg2_engine]
+pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ['USER']}@localhost/pgvector_python_test')
+engines = [psycopg2_engine, pg8000_engine]
 
 if sqlalchemy_version > 1:
     psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
@@ -151,9 +153,9 @@ def test_orm(self, engine):
         stmt = select(Item)
         with Session(engine) as session:
             items = [v[0] for v in session.execute(stmt).all()]
-            assert items[0].id in [1, 4]
-            assert items[1].id in [2, 5]
-            assert items[2].id in [3, 6]
+            assert items[0].id in [1, 4, 7]
+            assert items[1].id in [2, 5, 8]
+            assert items[2].id in [3, 6, 9]
             assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3]))
             assert items[0].embedding.dtype == np.float32
             assert np.array_equal(items[1].embedding, np.array([4, 5, 6]))
@@ -290,12 +292,18 @@ def test_bit_hamming_distance_orm(self, engine):
             assert [v.id for v in items] == [2, 3, 1]
 
     def test_bit_jaccard_distance(self, engine):
+        if engine == pg8000_engine:
+            return
+
         create_items()
         with Session(engine) as session:
             items = session.query(Item).order_by(Item.binary_embedding.jaccard_distance('101')).all()
             assert [v.id for v in items] == [2, 3, 1]
 
     def test_bit_jaccard_distance_orm(self, engine):
+        if engine == pg8000_engine:
+            return
+
         create_items()
         with Session(engine) as session:
             items = session.scalars(select(Item).order_by(Item.binary_embedding.jaccard_distance('101')))

From cfcc2ea7b8b942c47c378bf47a4490c5acb50ec7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:02:44 -0800
Subject: [PATCH 058/123] Updated style [skip ci]

---
 tests/test_sqlalchemy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 37e803d..aa2ad97 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -99,7 +99,7 @@ def delete_items():
         session.commit()
 
 
-@pytest.mark.parametrize("engine", engines)
+@pytest.mark.parametrize('engine', engines)
 class TestSqlalchemy:
     def setup_method(self):
         delete_items()
@@ -495,7 +495,7 @@ def test_binary_quantize(self, engine):
             assert [v.id for v in items] == [2, 3, 1]
 
 
-@pytest.mark.parametrize("engine", array_engines)
+@pytest.mark.parametrize('engine', array_engines)
 class TestSqlalchemyArray:
     def setup_method(self):
         delete_items()

From 95403d5268e11ab6efef969f46f086e3f57e2b52 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:05:34 -0800
Subject: [PATCH 059/123] Added tests for arrays with SQLAlchemy and Psycopg 3

---
 tests/test_sqlalchemy.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index aa2ad97..f4a6bce 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -34,11 +34,21 @@
 
 
 @event.listens_for(psycopg2_array_engine, "connect")
-def connect(dbapi_connection, connection_record):
+def psycopg2_connect(dbapi_connection, connection_record):
     from pgvector.psycopg2 import register_vector
     register_vector(dbapi_connection, globally=False, arrays=True)
 
 
+if sqlalchemy_version > 1:
+    psycopg_array_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
+    array_engines.append(psycopg_array_engine)
+
+    @event.listens_for(psycopg_array_engine, "connect")
+    def psycopg_connect(dbapi_connection, connection_record):
+        from pgvector.psycopg import register_vector
+        register_vector(dbapi_connection)
+
+
 Base = declarative_base()
 
 

From c74e090f26a02fc920ef910265ac0e4f2eb7cbde Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:11:46 -0800
Subject: [PATCH 060/123] Fixed CI

---
 tests/test_sqlalchemy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index f4a6bce..7dbc565 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -17,7 +17,7 @@
     sqlalchemy_version = 1
 
 psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
-pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ['USER']}@localhost/pgvector_python_test')
+pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')
 engines = [psycopg2_engine, pg8000_engine]
 
 if sqlalchemy_version > 1:

From b350d6a8d45d02ea954fad945d194896c50fbc1e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:13:46 -0800
Subject: [PATCH 061/123] Simplified test code [skip ci]

---
 tests/test_django.py   | 2 +-
 tests/test_peewee.py   | 2 +-
 tests/test_psycopg.py  | 2 +-
 tests/test_psycopg2.py | 2 +-
 tests/test_sqlmodel.py | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_django.py b/tests/test_django.py
index 2c53d82..ea15771 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -158,7 +158,7 @@ class Meta:
 
 
 class TestDjango:
-    def setup_method(self, test_method):
+    def setup_method(self):
         Item.objects.all().delete()
 
     def test_vector(self):
diff --git a/tests/test_peewee.py b/tests/test_peewee.py
index 9666388..e98a0ec 100644
--- a/tests/test_peewee.py
+++ b/tests/test_peewee.py
@@ -36,7 +36,7 @@ def create_items():
 
 
 class TestPeewee:
-    def setup_method(self, test_method):
+    def setup_method(self):
         Item.truncate_table()
 
     def test_vector(self):
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index 6d4f34a..90f80b6 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -14,7 +14,7 @@
 
 
 class TestPsycopg:
-    def setup_method(self, test_method):
+    def setup_method(self):
         conn.execute('DELETE FROM psycopg_items')
 
     def test_vector(self):
diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index c3cd3cd..d661f12 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -16,7 +16,7 @@
 
 
 class TestPsycopg2:
-    def setup_method(self, test_method):
+    def setup_method(self):
         cur.execute('DELETE FROM psycopg2_items')
 
     def test_vector(self):
diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index 851afd8..e0330d2 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -42,7 +42,7 @@ def create_items():
 
 
 class TestSqlmodel:
-    def setup_method(self, test_method):
+    def setup_method(self):
         with Session(engine) as session:
             session.exec(delete(Item))
             session.commit()

From 651df0844b3c6790414ec2e8ed75330ad80406af Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:15:52 -0800
Subject: [PATCH 062/123] Improved SQLModel tests [skip ci]

---
 tests/test_sqlmodel.py | 68 +++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index e0330d2..373834f 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -34,11 +34,11 @@ class Item(SQLModel, table=True):
 
 
 def create_items():
-    session = Session(engine)
-    session.add(Item(id=1, embedding=[1, 1, 1], half_embedding=[1, 1, 1], binary_embedding='000', sparse_embedding=SparseVector([1, 1, 1])))
-    session.add(Item(id=2, embedding=[2, 2, 2], half_embedding=[2, 2, 2], binary_embedding='101', sparse_embedding=SparseVector([2, 2, 2])))
-    session.add(Item(id=3, embedding=[1, 1, 2], half_embedding=[1, 1, 2], binary_embedding='111', sparse_embedding=SparseVector([1, 1, 2])))
-    session.commit()
+    with Session(engine) as session:
+        session.add(Item(id=1, embedding=[1, 1, 1], half_embedding=[1, 1, 1], binary_embedding='000', sparse_embedding=SparseVector([1, 1, 1])))
+        session.add(Item(id=2, embedding=[2, 2, 2], half_embedding=[2, 2, 2], binary_embedding='101', sparse_embedding=SparseVector([2, 2, 2])))
+        session.add(Item(id=3, embedding=[1, 1, 2], half_embedding=[1, 1, 2], binary_embedding='111', sparse_embedding=SparseVector([1, 1, 2])))
+        session.commit()
 
 
 class TestSqlmodel:
@@ -52,11 +52,11 @@ def test_orm(self):
         item2 = Item(embedding=[4, 5, 6])
         item3 = Item()
 
-        session = Session(engine)
-        session.add(item)
-        session.add(item2)
-        session.add(item3)
-        session.commit()
+        with Session(engine) as session:
+            session.add(item)
+            session.add(item2)
+            session.add(item3)
+            session.commit()
 
         stmt = select(Item)
         with Session(engine) as session:
@@ -71,11 +71,11 @@ def test_orm(self):
             assert items[2].embedding is None
 
     def test_vector(self):
-        session = Session(engine)
-        session.add(Item(id=1, embedding=[1, 2, 3]))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.embedding.tolist() == [1, 2, 3]
+        with Session(engine) as session:
+            session.add(Item(id=1, embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.embedding.tolist() == [1, 2, 3]
 
     def test_vector_l2_distance(self):
         create_items()
@@ -102,11 +102,11 @@ def test_vector_l1_distance(self):
             assert [v.id for v in items] == [1, 3, 2]
 
     def test_halfvec(self):
-        session = Session(engine)
-        session.add(Item(id=1, half_embedding=[1, 2, 3]))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.half_embedding.to_list() == [1, 2, 3]
+        with Session(engine) as session:
+            session.add(Item(id=1, half_embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.half_embedding.to_list() == [1, 2, 3]
 
     def test_halfvec_l2_distance(self):
         create_items()
@@ -133,11 +133,11 @@ def test_halfvec_l1_distance(self):
             assert [v.id for v in items] == [1, 3, 2]
 
     def test_bit(self):
-        session = Session(engine)
-        session.add(Item(id=1, binary_embedding='101'))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.binary_embedding == '101'
+        with Session(engine) as session:
+            session.add(Item(id=1, binary_embedding='101'))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.binary_embedding == '101'
 
     def test_bit_hamming_distance(self):
         create_items()
@@ -152,11 +152,11 @@ def test_bit_jaccard_distance(self):
             assert [v.id for v in items] == [2, 3, 1]
 
     def test_sparsevec(self):
-        session = Session(engine)
-        session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
-        session.commit()
-        item = session.get(Item, 1)
-        assert item.sparse_embedding.to_list() == [1, 2, 3]
+        with Session(engine) as session:
+            session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
+            session.commit()
+            item = session.get(Item, 1)
+            assert item.sparse_embedding.to_list() == [1, 2, 3]
 
     def test_sparsevec_l2_distance(self):
         create_items()
@@ -232,7 +232,7 @@ def test_halfvec_sum(self):
 
     def test_bad_dimensions(self):
         item = Item(embedding=[1, 2])
-        session = Session(engine)
-        session.add(item)
-        with pytest.raises(StatementError, match='expected 3 dimensions, not 2'):
-            session.commit()
+        with Session(engine) as session:
+            session.add(item)
+            with pytest.raises(StatementError, match='expected 3 dimensions, not 2'):
+                session.commit()

From 2883156b461f08fe32be81439d8e653ac1c41c5a Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:42:19 -0800
Subject: [PATCH 063/123] Improved tests for async SQLAlchemy engines [skip ci]

---
 tests/test_sqlalchemy.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 7dbc565..6e1d496 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -19,11 +19,18 @@
 psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
 pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')
 engines = [psycopg2_engine, pg8000_engine]
+async_engines = []
 
 if sqlalchemy_version > 1:
     psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
     engines.append(psycopg_engine)
 
+    psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
+    async_engines.append(psycopg_async_engine)
+
+    asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+    async_engines.append(asyncpg_engine)
+
 setup_engine = engines[0]
 with Session(setup_engine) as session:
     session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
@@ -531,14 +538,13 @@ def test_halfvec_array(self, engine):
             assert item.half_embeddings[1].to_list() == [4, 5, 6]
 
 
+@pytest.mark.parametrize('engine', async_engines)
 class TestSqlalchemyAsync:
     def setup_method(self):
         delete_items()
 
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_psycopg_async_avg(self):
-        engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
+    async def test_psycopg_async_avg(self, engine):
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
@@ -550,6 +556,11 @@ async def test_psycopg_async_avg(self):
 
         await engine.dispose()
 
+
+class TestSqlalchemyAsync2:
+    def setup_method(self):
+        delete_items()
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_psycopg_async_vector_array(self):

From 86331f0ee6650adcdb655b5d092f1c24d3b0fa84 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:45:22 -0800
Subject: [PATCH 064/123] Improved tests for async SQLAlchemy engines [skip ci]

---
 tests/test_sqlalchemy.py | 104 +++++++++++++++------------------------
 1 file changed, 39 insertions(+), 65 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 6e1d496..689615b 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -544,122 +544,96 @@ def setup_method(self):
         delete_items()
 
     @pytest.mark.asyncio
-    async def test_psycopg_async_avg(self, engine):
+    async def test_vector(self, engine):
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
             async with session.begin():
-                session.add(Item(embedding=[1, 2, 3]))
-                session.add(Item(embedding=[4, 5, 6]))
-                avg = await session.scalars(select(func.avg(Item.embedding)))
-                assert avg.first() == '[2.5,3.5,4.5]'
+                embedding = np.array([1, 2, 3])
+                session.add(Item(id=1, embedding=embedding))
+                item = await session.get(Item, 1)
+                assert np.array_equal(item.embedding, embedding)
 
         await engine.dispose()
 
-
-class TestSqlalchemyAsync2:
-    def setup_method(self):
-        delete_items()
-
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_psycopg_async_vector_array(self):
-        engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
+    async def test_halfvec(self, engine):
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
-        @event.listens_for(engine.sync_engine, "connect")
-        def connect(dbapi_connection, connection_record):
-            from pgvector.psycopg import register_vector_async
-            dbapi_connection.run_async(register_vector_async)
-
         async with async_session() as session:
             async with session.begin():
-                session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
-
-                # this fails if the driver does not cast arrays
+                embedding = [1, 2, 3]
+                session.add(Item(id=1, half_embedding=embedding))
                 item = await session.get(Item, 1)
-                assert item.embeddings[0].tolist() == [1, 2, 3]
-                assert item.embeddings[1].tolist() == [4, 5, 6]
+                assert item.half_embedding.to_list() == embedding
 
         await engine.dispose()
 
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_asyncpg_vector(self):
-        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
-        async_session = async_sessionmaker(engine, expire_on_commit=False)
+    async def test_bit(self, engine):
+        import asyncpg
 
-        # TODO do not throw error when types are registered
-        # @event.listens_for(engine.sync_engine, "connect")
-        # def connect(dbapi_connection, connection_record):
-        #     from pgvector.asyncpg import register_vector
-        #     dbapi_connection.run_async(register_vector)
+        async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
             async with session.begin():
-                embedding = np.array([1, 2, 3])
-                session.add(Item(id=1, embedding=embedding))
+                embedding = asyncpg.BitString('101') if engine == asyncpg_engine else '101'
+                session.add(Item(id=1, binary_embedding=embedding))
                 item = await session.get(Item, 1)
-                assert np.array_equal(item.embedding, embedding)
+                assert item.binary_embedding == embedding
 
         await engine.dispose()
 
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_asyncpg_halfvec(self):
-        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+    async def test_sparsevec(self, engine):
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
-        # TODO do not throw error when types are registered
-        # @event.listens_for(engine.sync_engine, "connect")
-        # def connect(dbapi_connection, connection_record):
-        #     from pgvector.asyncpg import register_vector
-        #     dbapi_connection.run_async(register_vector)
-
         async with async_session() as session:
             async with session.begin():
                 embedding = [1, 2, 3]
-                session.add(Item(id=1, half_embedding=embedding))
+                session.add(Item(id=1, sparse_embedding=embedding))
                 item = await session.get(Item, 1)
-                assert item.half_embedding.to_list() == embedding
+                assert item.sparse_embedding.to_list() == embedding
 
         await engine.dispose()
 
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_asyncpg_bit(self):
-        import asyncpg
-
-        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+    async def test_avg(self, engine):
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
             async with session.begin():
-                embedding = asyncpg.BitString('101')
-                session.add(Item(id=1, binary_embedding=embedding))
-                item = await session.get(Item, 1)
-                assert item.binary_embedding == embedding
+                session.add(Item(embedding=[1, 2, 3]))
+                session.add(Item(embedding=[4, 5, 6]))
+                avg = await session.scalars(select(func.avg(Item.embedding)))
+                assert avg.first() == '[2.5,3.5,4.5]'
 
         await engine.dispose()
 
+
+class TestSqlalchemyAsync2:
+    def setup_method(self):
+        delete_items()
+
     @pytest.mark.asyncio
     @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_asyncpg_sparsevec(self):
-        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+    async def test_psycopg_async_vector_array(self):
+        engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
-        # TODO do not throw error when types are registered
-        # @event.listens_for(engine.sync_engine, "connect")
-        # def connect(dbapi_connection, connection_record):
-        #     from pgvector.asyncpg import register_vector
-        #     dbapi_connection.run_async(register_vector)
+        @event.listens_for(engine.sync_engine, "connect")
+        def connect(dbapi_connection, connection_record):
+            from pgvector.psycopg import register_vector_async
+            dbapi_connection.run_async(register_vector_async)
 
         async with async_session() as session:
             async with session.begin():
-                embedding = [1, 2, 3]
-                session.add(Item(id=1, sparse_embedding=embedding))
+                session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+
+                # this fails if the driver does not cast arrays
                 item = await session.get(Item, 1)
-                assert item.sparse_embedding.to_list() == embedding
+                assert item.embeddings[0].tolist() == [1, 2, 3]
+                assert item.embeddings[1].tolist() == [4, 5, 6]
 
         await engine.dispose()
 

From 224c18a47c2a2d652fe1f7267449e61c290b249f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 12:49:14 -0800
Subject: [PATCH 065/123] Simplified test code [skip ci]

---
 tests/test_sqlalchemy.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 689615b..07e29e7 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -1,3 +1,4 @@
+import asyncpg
 import numpy as np
 import os
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, SparseVector, avg, sum
@@ -571,8 +572,6 @@ async def test_halfvec(self, engine):
 
     @pytest.mark.asyncio
     async def test_bit(self, engine):
-        import asyncpg
-
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
@@ -611,13 +610,13 @@ async def test_avg(self, engine):
         await engine.dispose()
 
 
-class TestSqlalchemyAsync2:
+@pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+class TestSqlalchemyAsyncArray:
     def setup_method(self):
         delete_items()
 
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
-    async def test_psycopg_async_vector_array(self):
+    async def test_psycopg_vector_array(self):
         engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
@@ -638,7 +637,6 @@ def connect(dbapi_connection, connection_record):
         await engine.dispose()
 
     @pytest.mark.asyncio
-    @pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
     async def test_asyncpg_vector_array(self):
         engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
         async_session = async_sessionmaker(engine, expire_on_commit=False)

From a1d89971cd10ebe0dc11969ec532011eaa8a9a78 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:02:05 -0800
Subject: [PATCH 066/123] Improved test code [skip ci]

---
 tests/test_sqlalchemy.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 07e29e7..8868df4 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -19,7 +19,17 @@
 
 psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
 pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')
+psycopg2_array_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
+
+
+@event.listens_for(psycopg2_array_engine, "connect")
+def psycopg2_connect(dbapi_connection, connection_record):
+    from pgvector.psycopg2 import register_vector
+    register_vector(dbapi_connection, globally=False, arrays=True)
+
+
 engines = [psycopg2_engine, pg8000_engine]
+array_engines = [psycopg2_array_engine]
 async_engines = []
 
 if sqlalchemy_version > 1:
@@ -32,22 +42,6 @@
     asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
     async_engines.append(asyncpg_engine)
 
-setup_engine = engines[0]
-with Session(setup_engine) as session:
-    session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
-    session.commit()
-
-psycopg2_array_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
-array_engines = [psycopg2_array_engine]
-
-
-@event.listens_for(psycopg2_array_engine, "connect")
-def psycopg2_connect(dbapi_connection, connection_record):
-    from pgvector.psycopg2 import register_vector
-    register_vector(dbapi_connection, globally=False, arrays=True)
-
-
-if sqlalchemy_version > 1:
     psycopg_array_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
     array_engines.append(psycopg_array_engine)
 
@@ -56,6 +50,10 @@ def psycopg_connect(dbapi_connection, connection_record):
         from pgvector.psycopg import register_vector
         register_vector(dbapi_connection)
 
+setup_engine = engines[0]
+with Session(setup_engine) as session:
+    session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
+    session.commit()
 
 Base = declarative_base()
 

From c792451a76fea51352ad0a5f952c97eaeaea70d7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:12:57 -0800
Subject: [PATCH 067/123] Test more engine configurations

---
 tests/test_sqlalchemy.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 8868df4..dffa07b 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -19,37 +19,39 @@
 
 psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
 pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')
-psycopg2_array_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
+psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
 
 
-@event.listens_for(psycopg2_array_engine, "connect")
+@event.listens_for(psycopg2_type_engine, "connect")
 def psycopg2_connect(dbapi_connection, connection_record):
     from pgvector.psycopg2 import register_vector
     register_vector(dbapi_connection, globally=False, arrays=True)
 
 
-engines = [psycopg2_engine, pg8000_engine]
-array_engines = [psycopg2_array_engine]
+engines = [psycopg2_engine, pg8000_engine, psycopg2_type_engine]
+array_engines = [psycopg2_type_engine]
 async_engines = []
 
 if sqlalchemy_version > 1:
     psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
     engines.append(psycopg_engine)
 
+    psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
+
+    @event.listens_for(psycopg_type_engine, "connect")
+    def psycopg_connect(dbapi_connection, connection_record):
+        from pgvector.psycopg import register_vector
+        register_vector(dbapi_connection)
+
+    engines.append(psycopg_type_engine)
+    array_engines.append(psycopg_type_engine)
+
     psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
     async_engines.append(psycopg_async_engine)
 
     asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
     async_engines.append(asyncpg_engine)
 
-    psycopg_array_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
-    array_engines.append(psycopg_array_engine)
-
-    @event.listens_for(psycopg_array_engine, "connect")
-    def psycopg_connect(dbapi_connection, connection_record):
-        from pgvector.psycopg import register_vector
-        register_vector(dbapi_connection)
-
 setup_engine = engines[0]
 with Session(setup_engine) as session:
     session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
@@ -169,9 +171,10 @@ def test_orm(self, engine):
         stmt = select(Item)
         with Session(engine) as session:
             items = [v[0] for v in session.execute(stmt).all()]
-            assert items[0].id in [1, 4, 7]
-            assert items[1].id in [2, 5, 8]
-            assert items[2].id in [3, 6, 9]
+            # TODO improve
+            assert items[0].id % 3 == 1
+            assert items[1].id % 3 == 2
+            assert items[2].id % 3 == 0
             assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3]))
             assert items[0].embedding.dtype == np.float32
             assert np.array_equal(items[1].embedding, np.array([4, 5, 6]))

From 88873e54365ca6086a1c960e2ced19ee98ea2bb2 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:21:36 -0800
Subject: [PATCH 068/123] Improved tests for async SQLAlchemy engines [skip ci]

---
 tests/test_sqlalchemy.py | 58 +++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index dffa07b..a245ffc 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -31,6 +31,7 @@ def psycopg2_connect(dbapi_connection, connection_record):
 engines = [psycopg2_engine, pg8000_engine, psycopg2_type_engine]
 array_engines = [psycopg2_type_engine]
 async_engines = []
+async_array_engines = []
 
 if sqlalchemy_version > 1:
     psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
@@ -46,11 +47,32 @@ def psycopg_connect(dbapi_connection, connection_record):
     engines.append(psycopg_type_engine)
     array_engines.append(psycopg_type_engine)
 
+    psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
+
+    @event.listens_for(psycopg_async_type_engine.sync_engine, "connect")
+    def connect(dbapi_connection, connection_record):
+        from pgvector.psycopg import register_vector_async
+        dbapi_connection.run_async(register_vector_async)
+
+    async_engines.append(psycopg_async_type_engine)
+    async_array_engines.append(psycopg_async_type_engine)
+
     psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
     async_engines.append(psycopg_async_engine)
 
     asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
     async_engines.append(asyncpg_engine)
+    async_array_engines.append(asyncpg_engine)
+
+    asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
+
+    @event.listens_for(asyncpg_type_engine.sync_engine, "connect")
+    def connect(dbapi_connection, connection_record):
+        from pgvector.asyncpg import register_vector
+        dbapi_connection.run_async(register_vector)
+
+    # TODO do not throw error when types are registered
+    # async_array_engines.append(asyncpg_type_engine)
 
 setup_engine = engines[0]
 with Session(setup_engine) as session:
@@ -599,6 +621,10 @@ async def test_sparsevec(self, engine):
 
     @pytest.mark.asyncio
     async def test_avg(self, engine):
+        # TODO do not throw error when types are registered
+        if engine == psycopg_async_type_engine:
+            return
+
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
@@ -611,43 +637,15 @@ async def test_avg(self, engine):
         await engine.dispose()
 
 
-@pytest.mark.skipif(sqlalchemy_version == 1, reason='Requires SQLAlchemy 2+')
+@pytest.mark.parametrize('engine', async_array_engines)
 class TestSqlalchemyAsyncArray:
     def setup_method(self):
         delete_items()
 
     @pytest.mark.asyncio
-    async def test_psycopg_vector_array(self):
-        engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
+    async def test_vector_array(self, engine):
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
-        @event.listens_for(engine.sync_engine, "connect")
-        def connect(dbapi_connection, connection_record):
-            from pgvector.psycopg import register_vector_async
-            dbapi_connection.run_async(register_vector_async)
-
-        async with async_session() as session:
-            async with session.begin():
-                session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
-
-                # this fails if the driver does not cast arrays
-                item = await session.get(Item, 1)
-                assert item.embeddings[0].tolist() == [1, 2, 3]
-                assert item.embeddings[1].tolist() == [4, 5, 6]
-
-        await engine.dispose()
-
-    @pytest.mark.asyncio
-    async def test_asyncpg_vector_array(self):
-        engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
-        async_session = async_sessionmaker(engine, expire_on_commit=False)
-
-        # TODO do not throw error when types are registered
-        # @event.listens_for(engine.sync_engine, "connect")
-        # def connect(dbapi_connection, connection_record):
-        #     from pgvector.asyncpg import register_vector
-        #     dbapi_connection.run_async(register_vector)
-
         async with async_session() as session:
             async with session.begin():
                 session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))

From e78a8d5f866c2577644a64eafcaf9939c4b9ab8c Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:28:23 -0800
Subject: [PATCH 069/123] Improved test code [skip ci]

---
 tests/test_sqlalchemy.py | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index a245ffc..4b26922 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -18,7 +18,6 @@
     sqlalchemy_version = 1
 
 psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
-pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')
 psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test')
 
 
@@ -28,15 +27,10 @@ def psycopg2_connect(dbapi_connection, connection_record):
     register_vector(dbapi_connection, globally=False, arrays=True)
 
 
-engines = [psycopg2_engine, pg8000_engine, psycopg2_type_engine]
-array_engines = [psycopg2_type_engine]
-async_engines = []
-async_array_engines = []
+pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')
 
 if sqlalchemy_version > 1:
     psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
-    engines.append(psycopg_engine)
-
     psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test')
 
     @event.listens_for(psycopg_type_engine, "connect")
@@ -44,9 +38,7 @@ def psycopg_connect(dbapi_connection, connection_record):
         from pgvector.psycopg import register_vector
         register_vector(dbapi_connection)
 
-    engines.append(psycopg_type_engine)
-    array_engines.append(psycopg_type_engine)
-
+    psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
     psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
 
     @event.listens_for(psycopg_async_type_engine.sync_engine, "connect")
@@ -54,16 +46,7 @@ def connect(dbapi_connection, connection_record):
         from pgvector.psycopg import register_vector_async
         dbapi_connection.run_async(register_vector_async)
 
-    async_engines.append(psycopg_async_type_engine)
-    async_array_engines.append(psycopg_async_type_engine)
-
-    psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test')
-    async_engines.append(psycopg_async_engine)
-
     asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
-    async_engines.append(asyncpg_engine)
-    async_array_engines.append(asyncpg_engine)
-
     asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test')
 
     @event.listens_for(asyncpg_type_engine.sync_engine, "connect")
@@ -71,8 +54,17 @@ def connect(dbapi_connection, connection_record):
         from pgvector.asyncpg import register_vector
         dbapi_connection.run_async(register_vector)
 
-    # TODO do not throw error when types are registered
-    # async_array_engines.append(asyncpg_type_engine)
+engines = [psycopg2_engine, psycopg2_type_engine, pg8000_engine]
+array_engines = [psycopg2_type_engine]
+async_engines = []
+async_array_engines = []
+
+if sqlalchemy_version > 1:
+    engines += [psycopg_engine, psycopg_type_engine]
+    array_engines += [psycopg_type_engine]
+    async_engines += [psycopg_async_engine, psycopg_async_type_engine, asyncpg_engine]
+    # TODO add asyncpg_type_engine
+    async_array_engines += [psycopg_async_type_engine, asyncpg_engine]
 
 setup_engine = engines[0]
 with Session(setup_engine) as session:

From a2699639d7fd468ea68442d72227d5099ad8a64b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:33:09 -0800
Subject: [PATCH 070/123] Updated todo [skip ci]

---
 tests/test_sqlalchemy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 4b26922..09df9b9 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -62,8 +62,8 @@ def connect(dbapi_connection, connection_record):
 if sqlalchemy_version > 1:
     engines += [psycopg_engine, psycopg_type_engine]
     array_engines += [psycopg_type_engine]
+    # TODO support asyncpg_type_engine
     async_engines += [psycopg_async_engine, psycopg_async_type_engine, asyncpg_engine]
-    # TODO add asyncpg_type_engine
     async_array_engines += [psycopg_async_type_engine, asyncpg_engine]
 
 setup_engine = engines[0]

From 7cd310b5cf986fab1da536a94e9f9c74379e46b9 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:36:12 -0800
Subject: [PATCH 071/123] Improved test [skip ci]

---
 tests/test_sqlalchemy.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 09df9b9..563e3a3 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -613,10 +613,6 @@ async def test_sparsevec(self, engine):
 
     @pytest.mark.asyncio
     async def test_avg(self, engine):
-        # TODO do not throw error when types are registered
-        if engine == psycopg_async_type_engine:
-            return
-
         async_session = async_sessionmaker(engine, expire_on_commit=False)
 
         async with async_session() as session:
@@ -624,7 +620,10 @@ async def test_avg(self, engine):
                 session.add(Item(embedding=[1, 2, 3]))
                 session.add(Item(embedding=[4, 5, 6]))
                 avg = await session.scalars(select(func.avg(Item.embedding)))
-                assert avg.first() == '[2.5,3.5,4.5]'
+                if engine == psycopg_async_type_engine:
+                    assert avg.first().tolist() == [2.5, 3.5, 4.5]
+                else:
+                    assert avg.first() == '[2.5,3.5,4.5]'
 
         await engine.dispose()
 

From cae30a1d1b0b23620abb9c9ff4c7084ca5bac1ee Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 13:37:07 -0800
Subject: [PATCH 072/123] Improved test [skip ci]

---
 tests/test_sqlalchemy.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 563e3a3..aa5d81f 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -619,11 +619,8 @@ async def test_avg(self, engine):
             async with session.begin():
                 session.add(Item(embedding=[1, 2, 3]))
                 session.add(Item(embedding=[4, 5, 6]))
-                avg = await session.scalars(select(func.avg(Item.embedding)))
-                if engine == psycopg_async_type_engine:
-                    assert avg.first().tolist() == [2.5, 3.5, 4.5]
-                else:
-                    assert avg.first() == '[2.5,3.5,4.5]'
+                res = await session.scalars(select(avg(Item.embedding)))
+                assert res.first().tolist() == [2.5, 3.5, 4.5]
 
         await engine.dispose()
 

From 3de7832d164b82e929e08d928501b081c93e3a5a Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:27:35 -0800
Subject: [PATCH 073/123] Dropped support for Python < 3.9

---
 .github/workflows/build.yml | 2 +-
 CHANGELOG.md                | 4 ++++
 pyproject.toml              | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 562ba94..dc53dfe 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -6,7 +6,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: [3.13, 3.8]
+        python: [3.13, 3.9]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3a517d8..1788ff4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.4.0 (unreleased)
+
+- Dropped support for Python < 3.9
+
 ## 0.3.6 (2024-10-26)
 
 - Added `arrays` option for Psycopg 2
diff --git a/pyproject.toml b/pyproject.toml
index a6a6609..0f291f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
     {name = "Andrew Kane", email = "andrew@ankane.org"}
 ]
 license = {text = "MIT"}
-requires-python = ">= 3.8"
+requires-python = ">= 3.9"
 dependencies = [
     "numpy"
 ]

From 37b148f459863ee6f6c448fb93eecef569e7eb40 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:29:04 -0800
Subject: [PATCH 074/123] Removed default value [skip ci]

---
 pgvector/psycopg2/register.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pgvector/psycopg2/register.py b/pgvector/psycopg2/register.py
index 08a69a9..2be292f 100644
--- a/pgvector/psycopg2/register.py
+++ b/pgvector/psycopg2/register.py
@@ -5,11 +5,10 @@
 from .vector import register_vector_info
 
 
-# TODO remove default value for conn_or_curs in 0.4.0
 # TODO make globally False by default in 0.4.0
 # note: register_adapter is always global
 # TODO make arrays True by defalt in 0.4.0
-def register_vector(conn_or_curs=None, globally=True, arrays=False):
+def register_vector(conn_or_curs, globally=True, arrays=False):
     conn = conn_or_curs if hasattr(conn_or_curs, 'cursor') else conn_or_curs.connection
     cur = conn.cursor(cursor_factory=cursor)
     scope = None if globally else conn_or_curs

From 8a621a3ae96a85320475180b2120cb6d92c095a4 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:33:27 -0800
Subject: [PATCH 075/123] Changed default values of globally and arrays for
 register_type with Psycopg 2 [skip ci]

---
 pgvector/psycopg2/register.py | 4 +---
 tests/test_psycopg2.py        | 8 ++++----
 tests/test_sqlalchemy.py      | 2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/pgvector/psycopg2/register.py b/pgvector/psycopg2/register.py
index 2be292f..1bc9d44 100644
--- a/pgvector/psycopg2/register.py
+++ b/pgvector/psycopg2/register.py
@@ -5,10 +5,8 @@
 from .vector import register_vector_info
 
 
-# TODO make globally False by default in 0.4.0
 # note: register_adapter is always global
-# TODO make arrays True by defalt in 0.4.0
-def register_vector(conn_or_curs, globally=True, arrays=False):
+def register_vector(conn_or_curs, globally=False, arrays=True):
     conn = conn_or_curs if hasattr(conn_or_curs, 'cursor') else conn_or_curs.connection
     cur = conn.cursor(cursor_factory=cursor)
     scope = None if globally else conn_or_curs
diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index d661f12..85aa0e8 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -12,7 +12,7 @@
 cur.execute('DROP TABLE IF EXISTS psycopg2_items')
 cur.execute('CREATE TABLE psycopg2_items (id bigserial PRIMARY KEY, embedding vector(3), half_embedding halfvec(3), binary_embedding bit(3), sparse_embedding sparsevec(3), embeddings vector[], half_embeddings halfvec[], sparse_embeddings sparsevec[])')
 
-register_vector(cur, globally=False, arrays=True)
+register_vector(cur)
 
 
 class TestPsycopg2:
@@ -87,13 +87,13 @@ def test_cursor_factory(self):
         for cursor_factory in [DictCursor, RealDictCursor, NamedTupleCursor]:
             conn = psycopg2.connect(dbname='pgvector_python_test')
             cur = conn.cursor(cursor_factory=cursor_factory)
-            register_vector(cur, globally=False)
+            register_vector(cur)
             conn.close()
 
     def test_cursor_factory_connection(self):
         for cursor_factory in [DictCursor, RealDictCursor, NamedTupleCursor]:
             conn = psycopg2.connect(dbname='pgvector_python_test', cursor_factory=cursor_factory)
-            register_vector(conn, globally=False)
+            register_vector(conn)
             conn.close()
 
     def test_pool(self):
@@ -102,7 +102,7 @@ def test_pool(self):
         conn = pool.getconn()
         try:
             # use globally=True for apps to ensure registered with all connections
-            register_vector(conn, globally=False)
+            register_vector(conn)
         finally:
             pool.putconn(conn)
 
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index aa5d81f..067a153 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -24,7 +24,7 @@
 @event.listens_for(psycopg2_type_engine, "connect")
 def psycopg2_connect(dbapi_connection, connection_record):
     from pgvector.psycopg2 import register_vector
-    register_vector(dbapi_connection, globally=False, arrays=True)
+    register_vector(dbapi_connection)
 
 
 pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test')

From 32b09c0272545322b90d38139ee625a9a7809a71 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:36:23 -0800
Subject: [PATCH 076/123] Fixed indices and values methods returning tuple
 instead of list in certain cases [skip ci]

---
 pgvector/utils/sparsevec.py | 3 +--
 tests/test_psycopg.py       | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pgvector/utils/sparsevec.py b/pgvector/utils/sparsevec.py
index a370c5e..0398106 100644
--- a/pgvector/utils/sparsevec.py
+++ b/pgvector/utils/sparsevec.py
@@ -108,8 +108,7 @@ def from_binary(cls, value):
         dim, nnz, unused = unpack_from('>iii', value)
         indices = unpack_from(f'>{nnz}i', value, 12)
         values = unpack_from(f'>{nnz}f', value, 12 + nnz * 4)
-        # TODO convert indices and values to lists in 0.4.0
-        return cls._from_parts(int(dim), indices, values)
+        return cls._from_parts(int(dim), list(indices), list(values))
 
     @classmethod
     def _from_parts(cls, dim, indices, values):
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index 90f80b6..cf5f09a 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -111,9 +111,8 @@ def test_sparsevec_binary_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %b::sparsevec', (embedding,), binary=True).fetchone()[0]
         assert res.dimensions() == 6
-        # TODO convert indices and values to lists in 0.4.0
-        assert res.indices() == (0, 2, 4)
-        assert res.values() == (1.5, 2, 3)
+        assert res.indices() == [0, 2, 4]
+        assert res.values() == [1.5, 2, 3]
         assert res.to_list() == [1.5, 0, 2, 0, 3, 0]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
 

From c10799c3dec3dea699fc4590d3c3baa688023b23 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:38:50 -0800
Subject: [PATCH 077/123] Added support for Vector class to Psycopg 2 [skip ci]

---
 pgvector/psycopg2/__init__.py |  3 ++-
 pgvector/psycopg2/vector.py   |  1 +
 tests/test_psycopg2.py        | 12 +++++++++++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/pgvector/psycopg2/__init__.py b/pgvector/psycopg2/__init__.py
index 7c95295..f109203 100644
--- a/pgvector/psycopg2/__init__.py
+++ b/pgvector/psycopg2/__init__.py
@@ -1,8 +1,9 @@
 from .register import register_vector
-from ..utils import HalfVector, SparseVector
+from ..utils import HalfVector, SparseVector, Vector
 
 __all__ = [
     'register_vector',
+    'Vector',
     'HalfVector',
     'SparseVector'
 ]
diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py
index 9861f01..5bd00bb 100644
--- a/pgvector/psycopg2/vector.py
+++ b/pgvector/psycopg2/vector.py
@@ -24,3 +24,4 @@ def register_vector_info(oid, array_oid, scope):
         register_type(vectorarray, scope)
 
     register_adapter(np.ndarray, VectorAdapter)
+    register_adapter(Vector, VectorAdapter)
diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 85aa0e8..f927d86 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -1,5 +1,5 @@
 import numpy as np
-from pgvector.psycopg2 import register_vector, HalfVector, SparseVector
+from pgvector.psycopg2 import register_vector, Vector, HalfVector, SparseVector
 import psycopg2
 from psycopg2.extras import DictCursor, RealDictCursor, NamedTupleCursor
 from psycopg2.pool import ThreadedConnectionPool
@@ -29,6 +29,16 @@ def test_vector(self):
         assert res[0][0].dtype == np.float32
         assert res[1][0] is None
 
+    def test_vector_class(self):
+        embedding = Vector([1.5, 2, 3])
+        cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (NULL)', (embedding,))
+
+        cur.execute('SELECT embedding FROM psycopg2_items ORDER BY id')
+        res = cur.fetchall()
+        assert np.array_equal(res[0][0], embedding.to_numpy())
+        assert res[0][0].dtype == np.float32
+        assert res[1][0] is None
+
     def test_halfvec(self):
         embedding = [1.5, 2, 3]
         cur.execute('INSERT INTO psycopg2_items (half_embedding) VALUES (%s), (NULL)', (embedding,))

From e138e55d187c1b03f9b827c6849aabb1f9697eaf Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:50:52 -0800
Subject: [PATCH 078/123] Fixed equality for types [skip ci]

---
 pgvector/django/functions.py | 4 ++++
 pgvector/utils/bit.py        | 5 +++++
 pgvector/utils/halfvec.py    | 5 +++++
 pgvector/utils/sparsevec.py  | 5 +++++
 pgvector/utils/vector.py     | 5 +++++
 tests/test_bit.py            | 4 ++++
 tests/test_half_vector.py    | 4 ++++
 tests/test_sparse_vector.py  | 5 +++++
 tests/test_vector.py         | 4 ++++
 9 files changed, 41 insertions(+)

diff --git a/pgvector/django/functions.py b/pgvector/django/functions.py
index da9fbf8..6c14c3d 100644
--- a/pgvector/django/functions.py
+++ b/pgvector/django/functions.py
@@ -13,6 +13,10 @@ def __init__(self, expression, vector, **extra):
                 vector = Value(SparseVector._to_db(vector))
             else:
                 vector = Value(Vector._to_db(vector))
+
+            # prevent error with unhashable types
+            self._constructor_args = ((expression, vector), extra)
+
         super().__init__(expression, vector, **extra)
 
 
diff --git a/pgvector/utils/bit.py b/pgvector/utils/bit.py
index 51f7556..227edc1 100644
--- a/pgvector/utils/bit.py
+++ b/pgvector/utils/bit.py
@@ -24,6 +24,11 @@ def __init__(self, value):
     def __repr__(self):
         return f'Bit({self.to_text()})'
 
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return np.array_equal(self.to_numpy(), other.to_numpy())
+        return False
+
     def to_list(self):
         return self._value.tolist()
 
diff --git a/pgvector/utils/halfvec.py b/pgvector/utils/halfvec.py
index e1e5051..f335f2f 100644
--- a/pgvector/utils/halfvec.py
+++ b/pgvector/utils/halfvec.py
@@ -16,6 +16,11 @@ def __init__(self, value):
     def __repr__(self):
         return f'HalfVector({self.to_list()})'
 
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return np.array_equal(self.to_numpy(), other.to_numpy())
+        return False
+
     def dimensions(self):
         return len(self._value)
 
diff --git a/pgvector/utils/sparsevec.py b/pgvector/utils/sparsevec.py
index 0398106..8df2dfd 100644
--- a/pgvector/utils/sparsevec.py
+++ b/pgvector/utils/sparsevec.py
@@ -26,6 +26,11 @@ def __repr__(self):
         elements = dict(zip(self._indices, self._values))
         return f'SparseVector({elements}, {self._dim})'
 
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return self.dimensions() == other.dimensions() and self.indices() == other.indices() and self.values() == other.values()
+        return False
+
     def dimensions(self):
         return self._dim
 
diff --git a/pgvector/utils/vector.py b/pgvector/utils/vector.py
index 3fa2f35..ebbcafd 100644
--- a/pgvector/utils/vector.py
+++ b/pgvector/utils/vector.py
@@ -16,6 +16,11 @@ def __init__(self, value):
     def __repr__(self):
         return f'Vector({self.to_list()})'
 
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return np.array_equal(self.to_numpy(), other.to_numpy())
+        return False
+
     def dimensions(self):
         return len(self._value)
 
diff --git a/tests/test_bit.py b/tests/test_bit.py
index 32ab87b..a7e0093 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -37,3 +37,7 @@ def test_ndim_zero(self):
     def test_repr(self):
         assert repr(Bit([True, False, True])) == 'Bit(101)'
         assert str(Bit([True, False, True])) == 'Bit(101)'
+
+    def test_equality(self):
+        assert Bit([True, False, True]) == Bit([True, False, True])
+        assert Bit([True, False, True]) != Bit([True, False, False])
diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py
index fdaa5f7..77a7869 100644
--- a/tests/test_half_vector.py
+++ b/tests/test_half_vector.py
@@ -38,5 +38,9 @@ def test_repr(self):
         assert repr(HalfVector([1, 2, 3])) == 'HalfVector([1.0, 2.0, 3.0])'
         assert str(HalfVector([1, 2, 3])) == 'HalfVector([1.0, 2.0, 3.0])'
 
+    def test_equality(self):
+        assert HalfVector([1, 2, 3]) == HalfVector([1, 2, 3])
+        assert HalfVector([1, 2, 3]) != HalfVector([1, 2, 4])
+
     def test_dimensions(self):
         assert HalfVector([1, 2, 3]).dimensions() == 3
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index 06fe81a..24d8c20 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -52,6 +52,11 @@ def test_repr(self):
         assert repr(SparseVector([1, 0, 2, 0, 3, 0])) == 'SparseVector({0: 1.0, 2: 2.0, 4: 3.0}, 6)'
         assert str(SparseVector([1, 0, 2, 0, 3, 0])) == 'SparseVector({0: 1.0, 2: 2.0, 4: 3.0}, 6)'
 
+    def test_equality(self):
+        assert SparseVector([1, 0, 2, 0, 3, 0]) == SparseVector([1, 0, 2, 0, 3, 0])
+        assert SparseVector([1, 0, 2, 0, 3, 0]) != SparseVector([1, 0, 2, 0, 3, 1])
+        assert SparseVector([1, 0, 2, 0, 3, 0]) == SparseVector({2: 2, 4: 3, 0: 1, 3: 0}, 6)
+
     def test_dimensions(self):
         assert SparseVector([1, 0, 2, 0, 3, 0]).dimensions() == 6
 
diff --git a/tests/test_vector.py b/tests/test_vector.py
index 1be2bc0..fe14dea 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -38,5 +38,9 @@ def test_repr(self):
         assert repr(Vector([1, 2, 3])) == 'Vector([1.0, 2.0, 3.0])'
         assert str(Vector([1, 2, 3])) == 'Vector([1.0, 2.0, 3.0])'
 
+    def test_equality(self):
+        assert Vector([1, 2, 3]) == Vector([1, 2, 3])
+        assert Vector([1, 2, 3]) != Vector([1, 2, 4])
+
     def test_dimensions(self):
         assert Vector([1, 2, 3]).dimensions() == 3

From 838ea0c73b1669c94de274eccd82f58d83ea55b4 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 20:57:03 -0800
Subject: [PATCH 079/123] Moved classes to pgvector module [skip ci]

---
 pgvector/__init__.py              | 11 +++++++++++
 pgvector/{utils => }/bit.py       |  0
 pgvector/{utils => }/halfvec.py   |  0
 pgvector/{utils => }/sparsevec.py |  0
 pgvector/utils/__init__.py        |  5 +----
 pgvector/{utils => }/vector.py    |  0
 6 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 pgvector/__init__.py
 rename pgvector/{utils => }/bit.py (100%)
 rename pgvector/{utils => }/halfvec.py (100%)
 rename pgvector/{utils => }/sparsevec.py (100%)
 rename pgvector/{utils => }/vector.py (100%)

diff --git a/pgvector/__init__.py b/pgvector/__init__.py
new file mode 100644
index 0000000..3c01160
--- /dev/null
+++ b/pgvector/__init__.py
@@ -0,0 +1,11 @@
+from .bit import Bit
+from .halfvec import HalfVector
+from .sparsevec import SparseVector
+from .vector import Vector
+
+__all__ = [
+    'Vector',
+    'HalfVector',
+    'Bit',
+    'SparseVector'
+]
diff --git a/pgvector/utils/bit.py b/pgvector/bit.py
similarity index 100%
rename from pgvector/utils/bit.py
rename to pgvector/bit.py
diff --git a/pgvector/utils/halfvec.py b/pgvector/halfvec.py
similarity index 100%
rename from pgvector/utils/halfvec.py
rename to pgvector/halfvec.py
diff --git a/pgvector/utils/sparsevec.py b/pgvector/sparsevec.py
similarity index 100%
rename from pgvector/utils/sparsevec.py
rename to pgvector/sparsevec.py
diff --git a/pgvector/utils/__init__.py b/pgvector/utils/__init__.py
index 3c01160..1dcc240 100644
--- a/pgvector/utils/__init__.py
+++ b/pgvector/utils/__init__.py
@@ -1,7 +1,4 @@
-from .bit import Bit
-from .halfvec import HalfVector
-from .sparsevec import SparseVector
-from .vector import Vector
+from .. import Bit, HalfVector, SparseVector, Vector
 
 __all__ = [
     'Vector',
diff --git a/pgvector/utils/vector.py b/pgvector/vector.py
similarity index 100%
rename from pgvector/utils/vector.py
rename to pgvector/vector.py

From 0ac00b4e3d39ea1ddefd8573588f7de2e60d112f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 21:00:08 -0800
Subject: [PATCH 080/123] Improved imports for tests [skip ci]

---
 tests/test_asyncpg.py       | 3 ++-
 tests/test_bit.py           | 2 +-
 tests/test_django.py        | 3 ++-
 tests/test_half_vector.py   | 2 +-
 tests/test_peewee.py        | 3 ++-
 tests/test_psycopg.py       | 3 ++-
 tests/test_psycopg2.py      | 3 ++-
 tests/test_sparse_vector.py | 2 +-
 tests/test_sqlalchemy.py    | 3 ++-
 tests/test_sqlmodel.py      | 3 ++-
 tests/test_vector.py        | 2 +-
 11 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py
index 48d1e32..3c36048 100644
--- a/tests/test_asyncpg.py
+++ b/tests/test_asyncpg.py
@@ -1,6 +1,7 @@
 import asyncpg
 import numpy as np
-from pgvector.asyncpg import register_vector, SparseVector
+from pgvector import SparseVector
+from pgvector.asyncpg import register_vector
 import pytest
 
 
diff --git a/tests/test_bit.py b/tests/test_bit.py
index a7e0093..e0dcfe6 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -1,5 +1,5 @@
 import numpy as np
-from pgvector.utils import Bit
+from pgvector import Bit
 import pytest
 
 
diff --git a/tests/test_django.py b/tests/test_django.py
index ea15771..65082a3 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -12,7 +12,8 @@
 import numpy as np
 import os
 import pgvector.django
-from pgvector.django import VectorExtension, VectorField, HalfVectorField, BitField, SparseVectorField, IvfflatIndex, HnswIndex, L2Distance, MaxInnerProduct, CosineDistance, L1Distance, HammingDistance, JaccardDistance, HalfVector, SparseVector
+from pgvector import HalfVector, SparseVector
+from pgvector.django import VectorExtension, VectorField, HalfVectorField, BitField, SparseVectorField, IvfflatIndex, HnswIndex, L2Distance, MaxInnerProduct, CosineDistance, L1Distance, HammingDistance, JaccardDistance
 from unittest import mock
 
 settings.configure(
diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py
index 77a7869..6a94c2e 100644
--- a/tests/test_half_vector.py
+++ b/tests/test_half_vector.py
@@ -1,5 +1,5 @@
 import numpy as np
-from pgvector.utils import HalfVector
+from pgvector import HalfVector
 import pytest
 
 
diff --git a/tests/test_peewee.py b/tests/test_peewee.py
index e98a0ec..670d880 100644
--- a/tests/test_peewee.py
+++ b/tests/test_peewee.py
@@ -1,7 +1,8 @@
 from math import sqrt
 import numpy as np
 from peewee import Model, PostgresqlDatabase, fn
-from pgvector.peewee import VectorField, HalfVectorField, FixedBitField, SparseVectorField, SparseVector
+from pgvector import SparseVector
+from pgvector.peewee import VectorField, HalfVectorField, FixedBitField, SparseVectorField
 
 db = PostgresqlDatabase('pgvector_python_test')
 
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index cf5f09a..6a9d0b7 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -1,5 +1,6 @@
 import numpy as np
-from pgvector.psycopg import register_vector, register_vector_async, Bit, HalfVector, SparseVector, Vector
+from pgvector import Bit, HalfVector, SparseVector, Vector
+from pgvector.psycopg import register_vector, register_vector_async
 import psycopg
 from psycopg_pool import ConnectionPool, AsyncConnectionPool
 import pytest
diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index f927d86..1994c87 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -1,5 +1,6 @@
 import numpy as np
-from pgvector.psycopg2 import register_vector, Vector, HalfVector, SparseVector
+from pgvector import HalfVector, SparseVector, Vector
+from pgvector.psycopg2 import register_vector
 import psycopg2
 from psycopg2.extras import DictCursor, RealDictCursor, NamedTupleCursor
 from psycopg2.pool import ThreadedConnectionPool
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index 24d8c20..b5e7fe8 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -1,5 +1,5 @@
 import numpy as np
-from pgvector.utils import SparseVector
+from pgvector import SparseVector
 import pytest
 from scipy.sparse import coo_array
 
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 067a153..052edd7 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -1,7 +1,8 @@
 import asyncpg
 import numpy as np
 import os
-from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, SparseVector, avg, sum
+from pgvector import SparseVector
+from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum
 import pytest
 from sqlalchemy import create_engine, event, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY
 from sqlalchemy.exc import StatementError
diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index 373834f..b0e8ccd 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -1,5 +1,6 @@
 import numpy as np
-from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, SparseVector, avg, sum
+from pgvector import SparseVector
+from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum
 import pytest
 from sqlalchemy.exc import StatementError
 from sqlmodel import Field, Index, Session, SQLModel, create_engine, delete, select, text
diff --git a/tests/test_vector.py b/tests/test_vector.py
index fe14dea..406637f 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -1,5 +1,5 @@
 import numpy as np
-from pgvector.utils import Vector
+from pgvector import Vector
 import pytest
 
 

From 435e31654831d303342a1100a8dd32b6c1fe42a6 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 21:03:28 -0800
Subject: [PATCH 081/123] Improved imports

---
 pgvector/asyncpg/__init__.py     | 4 +++-
 pgvector/asyncpg/register.py     | 2 +-
 pgvector/django/__init__.py      | 4 +++-
 pgvector/django/functions.py     | 2 +-
 pgvector/django/halfvec.py       | 2 +-
 pgvector/django/sparsevec.py     | 2 +-
 pgvector/django/vector.py        | 2 +-
 pgvector/peewee/__init__.py      | 4 +++-
 pgvector/peewee/halfvec.py       | 2 +-
 pgvector/peewee/sparsevec.py     | 2 +-
 pgvector/peewee/vector.py        | 2 +-
 pgvector/psycopg/__init__.py     | 4 +++-
 pgvector/psycopg/bit.py          | 2 +-
 pgvector/psycopg/halfvec.py      | 2 +-
 pgvector/psycopg/sparsevec.py    | 2 +-
 pgvector/psycopg/vector.py       | 2 +-
 pgvector/psycopg2/__init__.py    | 4 +++-
 pgvector/psycopg2/halfvec.py     | 2 +-
 pgvector/psycopg2/sparsevec.py   | 2 +-
 pgvector/psycopg2/vector.py      | 2 +-
 pgvector/sqlalchemy/__init__.py  | 4 +++-
 pgvector/sqlalchemy/halfvec.py   | 2 +-
 pgvector/sqlalchemy/sparsevec.py | 2 +-
 pgvector/sqlalchemy/vector.py    | 2 +-
 24 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/pgvector/asyncpg/__init__.py b/pgvector/asyncpg/__init__.py
index 543b882..c6a3b4e 100644
--- a/pgvector/asyncpg/__init__.py
+++ b/pgvector/asyncpg/__init__.py
@@ -1,5 +1,7 @@
 from .register import register_vector
-from ..utils import Vector, HalfVector, SparseVector
+
+# TODO remove
+from .. import Vector, HalfVector, SparseVector
 
 __all__ = [
     'register_vector',
diff --git a/pgvector/asyncpg/register.py b/pgvector/asyncpg/register.py
index a388058..63726f3 100644
--- a/pgvector/asyncpg/register.py
+++ b/pgvector/asyncpg/register.py
@@ -1,4 +1,4 @@
-from ..utils import Vector, HalfVector, SparseVector
+from .. import Vector, HalfVector, SparseVector
 
 
 async def register_vector(conn, schema='public'):
diff --git a/pgvector/django/__init__.py b/pgvector/django/__init__.py
index 09978a9..43c64a3 100644
--- a/pgvector/django/__init__.py
+++ b/pgvector/django/__init__.py
@@ -5,7 +5,9 @@
 from .indexes import IvfflatIndex, HnswIndex
 from .sparsevec import SparseVectorField
 from .vector import VectorField
-from ..utils import HalfVector, SparseVector
+
+# TODO remove
+from .. import HalfVector, SparseVector
 
 __all__ = [
     'VectorExtension',
diff --git a/pgvector/django/functions.py b/pgvector/django/functions.py
index 6c14c3d..9df4fdb 100644
--- a/pgvector/django/functions.py
+++ b/pgvector/django/functions.py
@@ -1,5 +1,5 @@
 from django.db.models import FloatField, Func, Value
-from ..utils import Vector, HalfVector, SparseVector
+from .. import Vector, HalfVector, SparseVector
 
 
 class DistanceBase(Func):
diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py
index 6b59a7f..3aeb90f 100644
--- a/pgvector/django/halfvec.py
+++ b/pgvector/django/halfvec.py
@@ -1,6 +1,6 @@
 from django import forms
 from django.db.models import Field
-from ..utils import HalfVector
+from .. import HalfVector
 
 
 # https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/
diff --git a/pgvector/django/sparsevec.py b/pgvector/django/sparsevec.py
index d0d2d07..580f27c 100644
--- a/pgvector/django/sparsevec.py
+++ b/pgvector/django/sparsevec.py
@@ -1,6 +1,6 @@
 from django import forms
 from django.db.models import Field
-from ..utils import SparseVector
+from .. import SparseVector
 
 
 # https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/
diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py
index a89d540..861cfde 100644
--- a/pgvector/django/vector.py
+++ b/pgvector/django/vector.py
@@ -1,7 +1,7 @@
 from django import forms
 from django.db.models import Field
 import numpy as np
-from ..utils import Vector
+from .. import Vector
 
 
 # https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/
diff --git a/pgvector/peewee/__init__.py b/pgvector/peewee/__init__.py
index 945e0dc..df21200 100644
--- a/pgvector/peewee/__init__.py
+++ b/pgvector/peewee/__init__.py
@@ -2,7 +2,9 @@
 from .halfvec import HalfVectorField
 from .sparsevec import SparseVectorField
 from .vector import VectorField
-from ..utils import HalfVector, SparseVector
+
+# TODO remove
+from .. import HalfVector, SparseVector
 
 __all__ = [
     'VectorField',
diff --git a/pgvector/peewee/halfvec.py b/pgvector/peewee/halfvec.py
index deaa14d..0901fd2 100644
--- a/pgvector/peewee/halfvec.py
+++ b/pgvector/peewee/halfvec.py
@@ -1,5 +1,5 @@
 from peewee import Expression, Field
-from ..utils import HalfVector
+from .. import HalfVector
 
 
 class HalfVectorField(Field):
diff --git a/pgvector/peewee/sparsevec.py b/pgvector/peewee/sparsevec.py
index 67f7d1b..86dea73 100644
--- a/pgvector/peewee/sparsevec.py
+++ b/pgvector/peewee/sparsevec.py
@@ -1,5 +1,5 @@
 from peewee import Expression, Field
-from ..utils import SparseVector
+from .. import SparseVector
 
 
 class SparseVectorField(Field):
diff --git a/pgvector/peewee/vector.py b/pgvector/peewee/vector.py
index 22a87e5..83f9997 100644
--- a/pgvector/peewee/vector.py
+++ b/pgvector/peewee/vector.py
@@ -1,5 +1,5 @@
 from peewee import Expression, Field
-from ..utils import Vector
+from .. import Vector
 
 
 class VectorField(Field):
diff --git a/pgvector/psycopg/__init__.py b/pgvector/psycopg/__init__.py
index 9007c37..980af84 100644
--- a/pgvector/psycopg/__init__.py
+++ b/pgvector/psycopg/__init__.py
@@ -1,5 +1,7 @@
 from .register import register_vector, register_vector_async
-from ..utils import Bit, HalfVector, SparseVector, Vector
+
+# TODO remove
+from .. import Bit, HalfVector, SparseVector, Vector
 
 __all__ = [
     'register_vector',
diff --git a/pgvector/psycopg/bit.py b/pgvector/psycopg/bit.py
index f8eeb61..cffe8fb 100644
--- a/pgvector/psycopg/bit.py
+++ b/pgvector/psycopg/bit.py
@@ -1,6 +1,6 @@
 from psycopg.adapt import Dumper
 from psycopg.pq import Format
-from ..utils import Bit
+from .. import Bit
 
 
 class BitDumper(Dumper):
diff --git a/pgvector/psycopg/halfvec.py b/pgvector/psycopg/halfvec.py
index 351d2cb..b3a0060 100644
--- a/pgvector/psycopg/halfvec.py
+++ b/pgvector/psycopg/halfvec.py
@@ -1,6 +1,6 @@
 from psycopg.adapt import Loader, Dumper
 from psycopg.pq import Format
-from ..utils import HalfVector
+from .. import HalfVector
 
 
 class HalfVectorDumper(Dumper):
diff --git a/pgvector/psycopg/sparsevec.py b/pgvector/psycopg/sparsevec.py
index 435fd06..384a0e1 100644
--- a/pgvector/psycopg/sparsevec.py
+++ b/pgvector/psycopg/sparsevec.py
@@ -1,6 +1,6 @@
 from psycopg.adapt import Loader, Dumper
 from psycopg.pq import Format
-from ..utils import SparseVector
+from .. import SparseVector
 
 
 class SparseVectorDumper(Dumper):
diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py
index 0f62ca9..db9e826 100644
--- a/pgvector/psycopg/vector.py
+++ b/pgvector/psycopg/vector.py
@@ -1,7 +1,7 @@
 import psycopg
 from psycopg.adapt import Loader, Dumper
 from psycopg.pq import Format
-from ..utils import Vector
+from .. import Vector
 
 
 class VectorDumper(Dumper):
diff --git a/pgvector/psycopg2/__init__.py b/pgvector/psycopg2/__init__.py
index f109203..b40c673 100644
--- a/pgvector/psycopg2/__init__.py
+++ b/pgvector/psycopg2/__init__.py
@@ -1,5 +1,7 @@
 from .register import register_vector
-from ..utils import HalfVector, SparseVector, Vector
+
+# TODO remove
+from .. import HalfVector, SparseVector, Vector
 
 __all__ = [
     'register_vector',
diff --git a/pgvector/psycopg2/halfvec.py b/pgvector/psycopg2/halfvec.py
index b50e89b..0a4c736 100644
--- a/pgvector/psycopg2/halfvec.py
+++ b/pgvector/psycopg2/halfvec.py
@@ -1,5 +1,5 @@
 from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type
-from ..utils import HalfVector
+from .. import HalfVector
 
 
 class HalfvecAdapter:
diff --git a/pgvector/psycopg2/sparsevec.py b/pgvector/psycopg2/sparsevec.py
index a542807..148eff2 100644
--- a/pgvector/psycopg2/sparsevec.py
+++ b/pgvector/psycopg2/sparsevec.py
@@ -1,5 +1,5 @@
 from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type
-from ..utils import SparseVector
+from .. import SparseVector
 
 
 class SparsevecAdapter:
diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py
index 5bd00bb..562de18 100644
--- a/pgvector/psycopg2/vector.py
+++ b/pgvector/psycopg2/vector.py
@@ -1,6 +1,6 @@
 import numpy as np
 from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type
-from ..utils import Vector
+from .. import Vector
 
 
 class VectorAdapter:
diff --git a/pgvector/sqlalchemy/__init__.py b/pgvector/sqlalchemy/__init__.py
index 4955eeb..52adf88 100644
--- a/pgvector/sqlalchemy/__init__.py
+++ b/pgvector/sqlalchemy/__init__.py
@@ -4,7 +4,9 @@
 from .sparsevec import SPARSEVEC
 from .vector import VECTOR
 from .vector import VECTOR as Vector
-from ..utils import HalfVector, SparseVector
+
+# TODO remove
+from .. import HalfVector, SparseVector
 
 __all__ = [
     'Vector',
diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py
index 639f77b..10688b5 100644
--- a/pgvector/sqlalchemy/halfvec.py
+++ b/pgvector/sqlalchemy/halfvec.py
@@ -1,6 +1,6 @@
 from sqlalchemy.dialects.postgresql.base import ischema_names
 from sqlalchemy.types import UserDefinedType, Float, String
-from ..utils import HalfVector
+from .. import HalfVector
 
 
 class HALFVEC(UserDefinedType):
diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py
index 370f5d1..0058679 100644
--- a/pgvector/sqlalchemy/sparsevec.py
+++ b/pgvector/sqlalchemy/sparsevec.py
@@ -1,6 +1,6 @@
 from sqlalchemy.dialects.postgresql.base import ischema_names
 from sqlalchemy.types import UserDefinedType, Float, String
-from ..utils import SparseVector
+from .. import SparseVector
 
 
 class SPARSEVEC(UserDefinedType):
diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py
index f57a045..5a1e11f 100644
--- a/pgvector/sqlalchemy/vector.py
+++ b/pgvector/sqlalchemy/vector.py
@@ -1,6 +1,6 @@
 from sqlalchemy.dialects.postgresql.base import ischema_names
 from sqlalchemy.types import UserDefinedType, Float, String
-from ..utils import Vector
+from .. import Vector
 
 
 class VECTOR(UserDefinedType):

From 08e29e1acdcdf03965f7ffb4e1e552688df51785 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 21:04:40 -0800
Subject: [PATCH 082/123] Added todo [skip ci]

---
 pgvector/utils/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pgvector/utils/__init__.py b/pgvector/utils/__init__.py
index 1dcc240..8cdb5d6 100644
--- a/pgvector/utils/__init__.py
+++ b/pgvector/utils/__init__.py
@@ -1,3 +1,4 @@
+# TODO remove
 from .. import Bit, HalfVector, SparseVector, Vector
 
 __all__ = [

From 1c0ff62b65718899915cd51466c63b9b60c3787f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 21:50:36 -0800
Subject: [PATCH 083/123] Updated changelog [skip ci]

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1788ff4..df60740 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,6 @@
 ## 0.4.0 (unreleased)
 
+- Fixed equality for `Vector`, `HalfVector`, `Bit`, and `SparseVector` classes
 - Dropped support for Python < 3.9
 
 ## 0.3.6 (2024-10-26)

From f618edb43395795451d3079b1def7f6c8cbb76ba Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 21:55:07 -0800
Subject: [PATCH 084/123] Updated changelog [skip ci]

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index df60740..24aebf9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 ## 0.4.0 (unreleased)
 
 - Fixed equality for `Vector`, `HalfVector`, `Bit`, and `SparseVector` classes
+- Fixed `indices` and `values` methods of `SparseVector` returning tuple instead of list in some cases
 - Dropped support for Python < 3.9
 
 ## 0.3.6 (2024-10-26)

From 537f3ba72519314b2bc5b65f7d625594a496345e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 22:01:16 -0800
Subject: [PATCH 085/123] Updated changelog [skip ci]

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24aebf9..42e9bff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,6 @@
 ## 0.4.0 (unreleased)
 
+- Added top-level `pgvector` package
 - Fixed equality for `Vector`, `HalfVector`, `Bit`, and `SparseVector` classes
 - Fixed `indices` and `values` methods of `SparseVector` returning tuple instead of list in some cases
 - Dropped support for Python < 3.9

From 571bf4287cfe8a2371f477250c02efa3f62f67a1 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 9 Feb 2025 22:03:52 -0800
Subject: [PATCH 086/123] Updated changelog [skip ci]

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 42e9bff..f53a2ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,8 @@
 ## 0.4.0 (unreleased)
 
 - Added top-level `pgvector` package
+- Changed `globally` option to default to `False` for Psycopg 2
+- Changed `arrays` option to default to `True` for Psycopg 2
 - Fixed equality for `Vector`, `HalfVector`, `Bit`, and `SparseVector` classes
 - Fixed `indices` and `values` methods of `SparseVector` returning tuple instead of list in some cases
 - Dropped support for Python < 3.9

From 1676e3ead391493375ff6958a5b80c78080cf01e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 17:13:55 -0800
Subject: [PATCH 087/123] Test SQLAlchemy 1 on CI

---
 .github/workflows/build.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index dc53dfe..4d4e8ed 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -24,3 +24,6 @@ jobs:
           make
           sudo make install
       - run: pytest
+
+      - run: pip install "SQLAlchemy<2" -U
+      - run: pytest tests/test_sqlalchemy.py

From ac9fd532f77c1497df250e519238f7f5d627f645 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 17:49:10 -0800
Subject: [PATCH 088/123] Improved Bit constructor for uint8 NumPy arrays

---
 pgvector/bit.py   | 11 ++++-------
 tests/test_bit.py |  4 +---
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 227edc1..36da723 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -7,14 +7,11 @@ def __init__(self, value):
         if isinstance(value, str):
             self._value = self.from_text(value)._value
         else:
-            # TODO change in 0.4.0
             # TODO raise if dtype not bool or uint8
-            # if isinstance(value, np.ndarray) and value.dtype == np.uint8:
-            #     value = np.unpackbits(value)
-            # else:
-            #     value = np.asarray(value, dtype=bool)
-
-            value = np.asarray(value, dtype=bool)
+            if isinstance(value, np.ndarray) and value.dtype == np.uint8:
+                value = np.unpackbits(value)
+            else:
+                value = np.asarray(value, dtype=bool)
 
             if value.ndim != 1:
                 raise ValueError('expected ndim to be 1')
diff --git a/tests/test_bit.py b/tests/test_bit.py
index e0dcfe6..1d771ca 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -15,9 +15,7 @@ def test_str(self):
 
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
-        # TODO change in 0.4.0
-        # assert Bit(arr).to_text() == '111111100000011100000000'
-        assert Bit(arr).to_text() == '110'
+        assert Bit(arr).to_text() == '111111100000011100000000'
 
     def test_ndarray_same_object(self):
         arr = np.array([True, False, True])

From 1b25460e6184bb744b9c71c9c5b95852bdf7c63f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:02:23 -0800
Subject: [PATCH 089/123] Raise error for unexpected dtype for Bit constructor
 [skip ci]

---
 pgvector/bit.py   | 8 +++++---
 tests/test_bit.py | 6 ++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 36da723..4be7385 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -7,9 +7,11 @@ def __init__(self, value):
         if isinstance(value, str):
             self._value = self.from_text(value)._value
         else:
-            # TODO raise if dtype not bool or uint8
-            if isinstance(value, np.ndarray) and value.dtype == np.uint8:
-                value = np.unpackbits(value)
+            if isinstance(value, np.ndarray):
+                if value.dtype == np.uint8:
+                    value = np.unpackbits(value).astype(bool)
+                elif value.dtype != np.bool:
+                    raise ValueError('expected dtype to be bool or uint8')
             else:
                 value = np.asarray(value, dtype=bool)
 
diff --git a/tests/test_bit.py b/tests/test_bit.py
index 1d771ca..5e1bff2 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -17,6 +17,12 @@ def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
         assert Bit(arr).to_text() == '111111100000011100000000'
 
+    def test_ndarray_uint16(self):
+        arr = np.array([254, 7, 0], dtype=np.uint16)
+        with pytest.raises(ValueError) as error:
+            Bit(arr)
+        assert str(error.value) == 'expected dtype to be bool or uint8'
+
     def test_ndarray_same_object(self):
         arr = np.array([True, False, True])
         assert Bit(arr).to_list() == [True, False, True]

From 8b927161f7856415436159d0b2c804280261a759 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:11:57 -0800
Subject: [PATCH 090/123] Improved asyncpg tests [skip ci]

---
 tests/test_asyncpg.py | 45 ++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py
index 3c36048..982ea8d 100644
--- a/tests/test_asyncpg.py
+++ b/tests/test_asyncpg.py
@@ -1,6 +1,6 @@
 import asyncpg
 import numpy as np
-from pgvector import SparseVector
+from pgvector import HalfVector, SparseVector, Vector
 from pgvector.asyncpg import register_vector
 import pytest
 
@@ -15,13 +15,15 @@ async def test_vector(self):
 
         await register_vector(conn)
 
-        embedding = np.array([1.5, 2, 3])
-        await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding)
+        embedding = Vector([1.5, 2, 3])
+        embedding2 = np.array([4.5, 5, 6])
+        await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2)
 
         res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
-        assert np.array_equal(res[0]['embedding'], embedding)
+        assert np.array_equal(res[0]['embedding'], embedding.to_numpy())
         assert res[0]['embedding'].dtype == np.float32
-        assert res[1]['embedding'] is None
+        assert np.array_equal(res[1]['embedding'], embedding2)
+        assert res[2]['embedding'] is None
 
         # ensures binary format is correct
         text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1")
@@ -38,12 +40,14 @@ async def test_halfvec(self):
 
         await register_vector(conn)
 
-        embedding = [1.5, 2, 3]
-        await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding)
+        embedding = HalfVector([1.5, 2, 3])
+        embedding2 = [4.5, 5, 6]
+        await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2)
 
         res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
-        assert res[0]['embedding'].to_list() == [1.5, 2, 3]
-        assert res[1]['embedding'] is None
+        assert res[0]['embedding'] == embedding
+        assert res[1]['embedding'] == HalfVector(embedding2)
+        assert res[2]['embedding'] is None
 
         # ensures binary format is correct
         text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1")
@@ -87,7 +91,7 @@ async def test_sparsevec(self):
         await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding)
 
         res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
-        assert res[0]['embedding'].to_list() == [1.5, 2, 3]
+        assert res[0]['embedding'] == embedding
         assert res[1]['embedding'] is None
 
         # ensures binary format is correct
@@ -105,12 +109,15 @@ async def test_vector_array(self):
 
         await register_vector(conn)
 
-        embeddings = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])]
-        await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings[0], embeddings[1])
+        embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])]
+        embeddings2 = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])]
+        await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[]), (ARRAY[$3, $4]::vector[])", embeddings[0], embeddings[1], embeddings2[0], embeddings2[1])
 
         res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
-        assert np.array_equal(res[0]['embeddings'][0], embeddings[0])
-        assert np.array_equal(res[0]['embeddings'][1], embeddings[1])
+        assert np.array_equal(res[0]['embeddings'][0], embeddings[0].to_numpy())
+        assert np.array_equal(res[0]['embeddings'][1], embeddings[1].to_numpy())
+        assert np.array_equal(res[1]['embeddings'][0], embeddings2[0])
+        assert np.array_equal(res[1]['embeddings'][1], embeddings2[1])
 
         await conn.close()
 
@@ -126,10 +133,12 @@ async def init(conn):
             await conn.execute('DROP TABLE IF EXISTS asyncpg_items')
             await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))')
 
-            embedding = np.array([1.5, 2, 3])
-            await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding)
+            embedding = Vector([1.5, 2, 3])
+            embedding2 = np.array([1.5, 2, 3])
+            await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2)
 
             res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
-            assert np.array_equal(res[0]['embedding'], embedding)
+            assert np.array_equal(res[0]['embedding'], embedding.to_numpy())
             assert res[0]['embedding'].dtype == np.float32
-            assert res[1]['embedding'] is None
+            assert np.array_equal(res[1]['embedding'], embedding2)
+            assert res[2]['embedding'] is None

From 9f825f2e8360a4f6ec8af0341584817e5191008c Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:14:09 -0800
Subject: [PATCH 091/123] Improved asyncpg test [skip ci]

---
 tests/test_asyncpg.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py
index 982ea8d..34d66a1 100644
--- a/tests/test_asyncpg.py
+++ b/tests/test_asyncpg.py
@@ -110,8 +110,10 @@ async def test_vector_array(self):
         await register_vector(conn)
 
         embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])]
+        await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES ($1)", embeddings)
+
         embeddings2 = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])]
-        await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[]), (ARRAY[$3, $4]::vector[])", embeddings[0], embeddings[1], embeddings2[0], embeddings2[1])
+        await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings2[0], embeddings2[1])
 
         res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
         assert np.array_equal(res[0]['embeddings'][0], embeddings[0].to_numpy())

From bb3b32ccf9718c3675767de3e226d3638c1f82ea Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:20:20 -0800
Subject: [PATCH 092/123] Improved tests [skip ci]

---
 tests/test_peewee.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_peewee.py b/tests/test_peewee.py
index 670d880..d7028c3 100644
--- a/tests/test_peewee.py
+++ b/tests/test_peewee.py
@@ -1,7 +1,7 @@
 from math import sqrt
 import numpy as np
 from peewee import Model, PostgresqlDatabase, fn
-from pgvector import SparseVector
+from pgvector import HalfVector, SparseVector
 from pgvector.peewee import VectorField, HalfVectorField, FixedBitField, SparseVectorField
 
 db = PostgresqlDatabase('pgvector_python_test')
@@ -77,7 +77,7 @@ def test_vector_l1_distance(self):
     def test_halfvec(self):
         Item.create(id=1, half_embedding=[1, 2, 3])
         item = Item.get_by_id(1)
-        assert item.half_embedding.to_list() == [1, 2, 3]
+        assert item.half_embedding == HalfVector([1, 2, 3])
 
     def test_halfvec_l2_distance(self):
         create_items()
@@ -129,7 +129,7 @@ def test_bit_jaccard_distance(self):
     def test_sparsevec(self):
         Item.create(id=1, sparse_embedding=[1, 2, 3])
         item = Item.get_by_id(1)
-        assert item.sparse_embedding.to_list() == [1, 2, 3]
+        assert item.sparse_embedding == SparseVector([1, 2, 3])
 
     def test_sparsevec_l2_distance(self):
         create_items()
@@ -186,7 +186,7 @@ def test_halfvec_avg(self):
         Item.create(half_embedding=[1, 2, 3])
         Item.create(half_embedding=[4, 5, 6])
         avg = Item.select(fn.avg(Item.half_embedding).coerce(True)).scalar()
-        assert avg.to_list() == [2.5, 3.5, 4.5]
+        assert avg == HalfVector([2.5, 3.5, 4.5])
 
     def test_halfvec_sum(self):
         sum = Item.select(fn.sum(Item.half_embedding).coerce(True)).scalar()
@@ -194,7 +194,7 @@ def test_halfvec_sum(self):
         Item.create(half_embedding=[1, 2, 3])
         Item.create(half_embedding=[4, 5, 6])
         sum = Item.select(fn.sum(Item.half_embedding).coerce(True)).scalar()
-        assert sum.to_list() == [5, 7, 9]
+        assert sum == HalfVector([5, 7, 9])
 
     def test_get_or_create(self):
         Item.get_or_create(id=1, defaults={'embedding': [1, 2, 3]})

From c7cd058ea3145fd7cdcb45f712c0f4450ddbe16e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:24:06 -0800
Subject: [PATCH 093/123] Improved tests [skip ci]

---
 tests/test_django.py  | 12 ++++++------
 tests/test_psycopg.py |  8 ++++++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/test_django.py b/tests/test_django.py
index 65082a3..f187ad4 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -199,7 +199,7 @@ def test_vector_l1_distance(self):
     def test_halfvec(self):
         Item(id=1, half_embedding=[1, 2, 3]).save()
         item = Item.objects.get(pk=1)
-        assert item.half_embedding.to_list() == [1, 2, 3]
+        assert item.half_embedding == HalfVector([1, 2, 3])
 
     def test_halfvec_l2_distance(self):
         create_items()
@@ -251,7 +251,7 @@ def test_bit_jaccard_distance(self):
     def test_sparsevec(self):
         Item(id=1, sparse_embedding=SparseVector([1, 2, 3])).save()
         item = Item.objects.get(pk=1)
-        assert item.sparse_embedding.to_list() == [1, 2, 3]
+        assert item.sparse_embedding == SparseVector([1, 2, 3])
 
     def test_sparsevec_l2_distance(self):
         create_items()
@@ -309,7 +309,7 @@ def test_halfvec_avg(self):
         Item(half_embedding=[1, 2, 3]).save()
         Item(half_embedding=[4, 5, 6]).save()
         avg = Item.objects.aggregate(Avg('half_embedding'))['half_embedding__avg']
-        assert avg.to_list() == [2.5, 3.5, 4.5]
+        assert avg == HalfVector([2.5, 3.5, 4.5])
 
     def test_halfvec_sum(self):
         sum = Item.objects.aggregate(Sum('half_embedding'))['half_embedding__sum']
@@ -317,7 +317,7 @@ def test_halfvec_sum(self):
         Item(half_embedding=[1, 2, 3]).save()
         Item(half_embedding=[4, 5, 6]).save()
         sum = Item.objects.aggregate(Sum('half_embedding'))['half_embedding__sum']
-        assert sum.to_list() == [5, 7, 9]
+        assert sum == HalfVector([5, 7, 9])
 
     def test_serialization(self):
         create_items()
@@ -375,7 +375,7 @@ def test_halfvec_form_save(self):
         assert form.has_changed()
         assert form.is_valid()
         assert form.save()
-        assert [4, 5, 6] == Item.objects.get(pk=1).half_embedding.to_list()
+        assert Item.objects.get(pk=1).half_embedding == HalfVector([4, 5, 6])
 
     def test_halfvec_form_save_missing(self):
         Item(id=1).save()
@@ -432,7 +432,7 @@ def test_sparsevec_form_save(self):
         assert form.has_changed()
         assert form.is_valid()
         assert form.save()
-        assert [4, 5, 6] == Item.objects.get(pk=1).sparse_embedding.to_list()
+        assert Item.objects.get(pk=1).sparse_embedding == SparseVector([4, 5, 6])
 
     def test_sparesevec_form_save_missing(self):
         Item(id=1).save()
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index 6a9d0b7..e2a40b2 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -69,17 +69,19 @@ def test_halfvec(self):
         conn.execute('INSERT INTO psycopg_items (half_embedding) VALUES (%s)', (embedding,))
 
         res = conn.execute('SELECT half_embedding FROM psycopg_items ORDER BY id').fetchone()[0]
-        assert res.to_list() == [1.5, 2, 3]
+        assert res == HalfVector([1.5, 2, 3])
 
     def test_halfvec_binary_format(self):
         embedding = HalfVector([1.5, 2, 3])
         res = conn.execute('SELECT %b::halfvec', (embedding,), binary=True).fetchone()[0]
+        assert res == HalfVector([1.5, 2, 3])
         assert res.to_list() == [1.5, 2, 3]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 2, 3]))
 
     def test_halfvec_text_format(self):
         embedding = HalfVector([1.5, 2, 3])
         res = conn.execute('SELECT %t::halfvec', (embedding,)).fetchone()[0]
+        assert res == HalfVector([1.5, 2, 3])
         assert res.to_list() == [1.5, 2, 3]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 2, 3]))
 
@@ -106,11 +108,12 @@ def test_sparsevec(self):
         conn.execute('INSERT INTO psycopg_items (sparse_embedding) VALUES (%s)', (embedding,))
 
         res = conn.execute('SELECT sparse_embedding FROM psycopg_items ORDER BY id').fetchone()[0]
-        assert res.to_list() == [1.5, 2, 3]
+        assert res == SparseVector([1.5, 2, 3])
 
     def test_sparsevec_binary_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %b::sparsevec', (embedding,), binary=True).fetchone()[0]
+        assert res == embedding
         assert res.dimensions() == 6
         assert res.indices() == [0, 2, 4]
         assert res.values() == [1.5, 2, 3]
@@ -120,6 +123,7 @@ def test_sparsevec_binary_format(self):
     def test_sparsevec_text_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %t::sparsevec', (embedding,)).fetchone()[0]
+        assert res == embedding
         assert res.dimensions() == 6
         assert res.indices() == [0, 2, 4]
         assert res.values() == [1.5, 2, 3]

From 8441b463ccc8738a951dd8fd2c9ac8b8b292c774 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:26:05 -0800
Subject: [PATCH 094/123] Improved tests [skip ci]

---
 tests/test_psycopg2.py      | 12 ++++++------
 tests/test_sparse_vector.py |  1 +
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 1994c87..71e0015 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -46,7 +46,7 @@ def test_halfvec(self):
 
         cur.execute('SELECT half_embedding FROM psycopg2_items ORDER BY id')
         res = cur.fetchall()
-        assert res[0][0].to_list() == [1.5, 2, 3]
+        assert res[0][0] == HalfVector([1.5, 2, 3])
         assert res[1][0] is None
 
     def test_bit(self):
@@ -64,7 +64,7 @@ def test_sparsevec(self):
 
         cur.execute('SELECT sparse_embedding FROM psycopg2_items ORDER BY id')
         res = cur.fetchall()
-        assert res[0][0].to_list() == [1.5, 2, 3]
+        assert res[0][0] == SparseVector([1.5, 2, 3])
         assert res[1][0] is None
 
     def test_vector_array(self):
@@ -82,8 +82,8 @@ def test_halfvec_array(self):
 
         cur.execute('SELECT half_embeddings FROM psycopg2_items ORDER BY id')
         res = cur.fetchone()
-        assert res[0][0].to_list() == [1.5, 2, 3]
-        assert res[0][1].to_list() == [4.5, 5, 6]
+        assert res[0][0] == HalfVector([1.5, 2, 3])
+        assert res[0][1] == HalfVector([4.5, 5, 6])
 
     def test_sparsevec_array(self):
         embeddings = [SparseVector([1.5, 2, 3]), SparseVector([4.5, 5, 6])]
@@ -91,8 +91,8 @@ def test_sparsevec_array(self):
 
         cur.execute('SELECT sparse_embeddings FROM psycopg2_items ORDER BY id')
         res = cur.fetchone()
-        assert res[0][0].to_list() == [1.5, 2, 3]
-        assert res[0][1].to_list() == [4.5, 5, 6]
+        assert res[0][0] == SparseVector([1.5, 2, 3])
+        assert res[0][1] == SparseVector([4.5, 5, 6])
 
     def test_cursor_factory(self):
         for cursor_factory in [DictCursor, RealDictCursor, NamedTupleCursor]:
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index b5e7fe8..29c3ea7 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -56,6 +56,7 @@ def test_equality(self):
         assert SparseVector([1, 0, 2, 0, 3, 0]) == SparseVector([1, 0, 2, 0, 3, 0])
         assert SparseVector([1, 0, 2, 0, 3, 0]) != SparseVector([1, 0, 2, 0, 3, 1])
         assert SparseVector([1, 0, 2, 0, 3, 0]) == SparseVector({2: 2, 4: 3, 0: 1, 3: 0}, 6)
+        assert SparseVector({}, 1) != SparseVector({}, 2)
 
     def test_dimensions(self):
         assert SparseVector([1, 0, 2, 0, 3, 0]).dimensions() == 6

From 6b8857a3146cf581bebcf32eb81a37135aa2fc15 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:28:20 -0800
Subject: [PATCH 095/123] Improved tests [skip ci]

---
 tests/test_sqlalchemy.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 052edd7..d791bd6 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -1,7 +1,7 @@
 import asyncpg
 import numpy as np
 import os
-from pgvector import SparseVector
+from pgvector import HalfVector, SparseVector
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum
 import pytest
 from sqlalchemy import create_engine, event, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY
@@ -256,7 +256,7 @@ def test_halfvec(self, engine):
             session.add(Item(id=1, half_embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
-            assert item.half_embedding.to_list() == [1, 2, 3]
+            assert item.half_embedding == HalfVector([1, 2, 3])
 
     def test_halfvec_l2_distance(self, engine):
         create_items()
@@ -348,7 +348,7 @@ def test_sparsevec(self, engine):
             session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
-            assert item.sparse_embedding.to_list() == [1, 2, 3]
+            assert item.sparse_embedding == SparseVector([1, 2, 3])
 
     def test_sparsevec_l2_distance(self, engine):
         create_items()
@@ -551,8 +551,8 @@ def test_halfvec_array(self, engine):
 
             # this fails if the driver does not cast arrays
             item = session.get(Item, 1)
-            assert item.half_embeddings[0].to_list() == [1, 2, 3]
-            assert item.half_embeddings[1].to_list() == [4, 5, 6]
+            assert item.half_embeddings[0] == HalfVector([1, 2, 3])
+            assert item.half_embeddings[1] == HalfVector([4, 5, 6])
 
 
 @pytest.mark.parametrize('engine', async_engines)
@@ -582,7 +582,7 @@ async def test_halfvec(self, engine):
                 embedding = [1, 2, 3]
                 session.add(Item(id=1, half_embedding=embedding))
                 item = await session.get(Item, 1)
-                assert item.half_embedding.to_list() == embedding
+                assert item.half_embedding == HalfVector(embedding)
 
         await engine.dispose()
 
@@ -608,7 +608,7 @@ async def test_sparsevec(self, engine):
                 embedding = [1, 2, 3]
                 session.add(Item(id=1, sparse_embedding=embedding))
                 item = await session.get(Item, 1)
-                assert item.sparse_embedding.to_list() == embedding
+                assert item.sparse_embedding == SparseVector(embedding)
 
         await engine.dispose()
 

From 022dd061b4ebdfb7e39b23abe782bc2d89ec9e98 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:29:57 -0800
Subject: [PATCH 096/123] Improved tests [skip ci]

---
 tests/test_sqlalchemy.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index d791bd6..4b1e516 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -1,7 +1,7 @@
 import asyncpg
 import numpy as np
 import os
-from pgvector import HalfVector, SparseVector
+from pgvector import HalfVector, SparseVector, Vector
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum
 import pytest
 from sqlalchemy import create_engine, event, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY
@@ -637,9 +637,14 @@ async def test_vector_array(self, engine):
 
         async with async_session() as session:
             async with session.begin():
-                session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+                session.add(Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])]))
                 item = await session.get(Item, 1)
                 assert item.embeddings[0].tolist() == [1, 2, 3]
                 assert item.embeddings[1].tolist() == [4, 5, 6]
 
+                session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
+                item = await session.get(Item, 2)
+                assert item.embeddings[0].tolist() == [1, 2, 3]
+                assert item.embeddings[1].tolist() == [4, 5, 6]
+
         await engine.dispose()

From bb02ee2742714cb4b566b95deb71a82539977dd4 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:48:17 -0800
Subject: [PATCH 097/123] Improved tests [skip ci]

---
 tests/test_sqlmodel.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index b0e8ccd..8a472b1 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -1,5 +1,5 @@
 import numpy as np
-from pgvector import SparseVector
+from pgvector import HalfVector, SparseVector
 from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum
 import pytest
 from sqlalchemy.exc import StatementError
@@ -107,7 +107,7 @@ def test_halfvec(self):
             session.add(Item(id=1, half_embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
-            assert item.half_embedding.to_list() == [1, 2, 3]
+            assert item.half_embedding == HalfVector([1, 2, 3])
 
     def test_halfvec_l2_distance(self):
         create_items()
@@ -157,7 +157,7 @@ def test_sparsevec(self):
             session.add(Item(id=1, sparse_embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
-            assert item.sparse_embedding.to_list() == [1, 2, 3]
+            assert item.sparse_embedding == SparseVector([1, 2, 3])
 
     def test_sparsevec_l2_distance(self):
         create_items()
@@ -220,7 +220,7 @@ def test_halfvec_avg(self):
             session.add(Item(half_embedding=[1, 2, 3]))
             session.add(Item(half_embedding=[4, 5, 6]))
             res = session.exec(select(avg(Item.half_embedding))).first()
-            assert res.to_list() == [2.5, 3.5, 4.5]
+            assert res == HalfVector([2.5, 3.5, 4.5])
 
     def test_halfvec_sum(self):
         with Session(engine) as session:
@@ -229,7 +229,7 @@ def test_halfvec_sum(self):
             session.add(Item(half_embedding=[1, 2, 3]))
             session.add(Item(half_embedding=[4, 5, 6]))
             res = session.exec(select(sum(Item.half_embedding))).first()
-            assert res.to_list() == [5, 7, 9]
+            assert res == HalfVector([5, 7, 9])
 
     def test_bad_dimensions(self):
         item = Item(embedding=[1, 2])

From 340caa58195fc5e7b99eed8ab7fe4e4e912fd73c Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:53:17 -0800
Subject: [PATCH 098/123] Improved tests [skip ci]

---
 tests/test_psycopg.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index e2a40b2..0859be7 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -75,6 +75,7 @@ def test_halfvec_binary_format(self):
         embedding = HalfVector([1.5, 2, 3])
         res = conn.execute('SELECT %b::halfvec', (embedding,), binary=True).fetchone()[0]
         assert res == HalfVector([1.5, 2, 3])
+        # TODO move
         assert res.to_list() == [1.5, 2, 3]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 2, 3]))
 
@@ -82,6 +83,7 @@ def test_halfvec_text_format(self):
         embedding = HalfVector([1.5, 2, 3])
         res = conn.execute('SELECT %t::halfvec', (embedding,)).fetchone()[0]
         assert res == HalfVector([1.5, 2, 3])
+        # TODO move
         assert res.to_list() == [1.5, 2, 3]
         assert np.array_equal(res.to_numpy(), np.array([1.5, 2, 3]))
 
@@ -114,6 +116,7 @@ def test_sparsevec_binary_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %b::sparsevec', (embedding,), binary=True).fetchone()[0]
         assert res == embedding
+        # TODO move
         assert res.dimensions() == 6
         assert res.indices() == [0, 2, 4]
         assert res.values() == [1.5, 2, 3]
@@ -124,6 +127,7 @@ def test_sparsevec_text_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %t::sparsevec', (embedding,)).fetchone()[0]
         assert res == embedding
+        # TODO move
         assert res.dimensions() == 6
         assert res.indices() == [0, 2, 4]
         assert res.values() == [1.5, 2, 3]
@@ -166,8 +170,8 @@ def test_binary_copy_to(self):
         cur = conn.cursor()
         with cur.copy("COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)") as copy:
             for row in copy.rows():
-                assert Vector.from_binary(row[0]).to_list() == [1.5, 2, 3]
-                assert HalfVector.from_binary(row[1]).to_list() == [1.5, 2, 3]
+                assert np.array_equal(Vector.from_binary(row[0]).to_numpy(), embedding)
+                assert HalfVector.from_binary(row[1]) == half_embedding
 
     def test_binary_copy_to_set_types(self):
         embedding = np.array([1.5, 2, 3])
@@ -178,7 +182,7 @@ def test_binary_copy_to_set_types(self):
             copy.set_types(['vector', 'halfvec'])
             for row in copy.rows():
                 assert np.array_equal(row[0], embedding)
-                assert row[1].to_list() == [1.5, 2, 3]
+                assert row[1] == half_embedding
 
     def test_vector_array(self):
         embeddings = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])]

From e6edb2a68f1a93df94c725c6d5ba29654694feab Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 18:56:52 -0800
Subject: [PATCH 099/123] Improved tests [skip ci]

---
 tests/test_half_vector.py   | 5 +++++
 tests/test_psycopg.py       | 9 ---------
 tests/test_sparse_vector.py | 8 ++++++++
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py
index 6a94c2e..a17699a 100644
--- a/tests/test_half_vector.py
+++ b/tests/test_half_vector.py
@@ -44,3 +44,8 @@ def test_equality(self):
 
     def test_dimensions(self):
         assert HalfVector([1, 2, 3]).dimensions() == 3
+
+    def test_from_text(self):
+        vec = HalfVector.from_text('[1.5,2,3]')
+        assert vec.to_list() == [1.5, 2, 3]
+        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index 0859be7..24ab321 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -83,9 +83,6 @@ def test_halfvec_text_format(self):
         embedding = HalfVector([1.5, 2, 3])
         res = conn.execute('SELECT %t::halfvec', (embedding,)).fetchone()[0]
         assert res == HalfVector([1.5, 2, 3])
-        # TODO move
-        assert res.to_list() == [1.5, 2, 3]
-        assert np.array_equal(res.to_numpy(), np.array([1.5, 2, 3]))
 
     def test_bit(self):
         embedding = Bit([True, False, True])
@@ -127,12 +124,6 @@ def test_sparsevec_text_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %t::sparsevec', (embedding,)).fetchone()[0]
         assert res == embedding
-        # TODO move
-        assert res.dimensions() == 6
-        assert res.indices() == [0, 2, 4]
-        assert res.values() == [1.5, 2, 3]
-        assert res.to_list() == [1.5, 0, 2, 0, 3, 0]
-        assert np.array_equal(res.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
 
     def test_text_copy_from(self):
         embedding = np.array([1.5, 2, 3])
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index 29c3ea7..fb01b5e 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -73,3 +73,11 @@ def test_to_coo(self):
     def test_zero_vector_text(self):
         vec = SparseVector({}, 3)
         assert vec.to_list() == SparseVector.from_text(vec.to_text()).to_list()
+
+    def test_from_text(self):
+        vec = SparseVector.from_text('{1:1.5,3:2,5:3}/6')
+        assert vec.dimensions() == 6
+        assert vec.indices() == [0, 2, 4]
+        assert vec.values() == [1.5, 2, 3]
+        assert vec.to_list() == [1.5, 0, 2, 0, 3, 0]
+        assert np.array_equal(vec.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))

From b57a2e9ed35c68eaa22afe27ce93401b0190adc2 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 19:06:11 -0800
Subject: [PATCH 100/123] Improved tests [skip ci]

---
 tests/test_half_vector.py   |  8 ++++++++
 tests/test_psycopg.py       |  9 ---------
 tests/test_sparse_vector.py | 11 +++++++++++
 tests/test_vector.py        | 13 +++++++++++++
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py
index a17699a..9c0b041 100644
--- a/tests/test_half_vector.py
+++ b/tests/test_half_vector.py
@@ -1,6 +1,7 @@
 import numpy as np
 from pgvector import HalfVector
 import pytest
+from struct import pack
 
 
 class TestHalfVector:
@@ -49,3 +50,10 @@ def test_from_text(self):
         vec = HalfVector.from_text('[1.5,2,3]')
         assert vec.to_list() == [1.5, 2, 3]
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+
+    def test_from_binary(self):
+        data = pack('>HH3e', 3, 0, *[1.5, 2, 3])
+        vec = HalfVector.from_binary(data)
+        assert vec.to_list() == [1.5, 2, 3]
+        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+        assert vec.to_binary() == data
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index 24ab321..f61b4e3 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -75,9 +75,6 @@ def test_halfvec_binary_format(self):
         embedding = HalfVector([1.5, 2, 3])
         res = conn.execute('SELECT %b::halfvec', (embedding,), binary=True).fetchone()[0]
         assert res == HalfVector([1.5, 2, 3])
-        # TODO move
-        assert res.to_list() == [1.5, 2, 3]
-        assert np.array_equal(res.to_numpy(), np.array([1.5, 2, 3]))
 
     def test_halfvec_text_format(self):
         embedding = HalfVector([1.5, 2, 3])
@@ -113,12 +110,6 @@ def test_sparsevec_binary_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
         res = conn.execute('SELECT %b::sparsevec', (embedding,), binary=True).fetchone()[0]
         assert res == embedding
-        # TODO move
-        assert res.dimensions() == 6
-        assert res.indices() == [0, 2, 4]
-        assert res.values() == [1.5, 2, 3]
-        assert res.to_list() == [1.5, 0, 2, 0, 3, 0]
-        assert np.array_equal(res.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
 
     def test_sparsevec_text_format(self):
         embedding = SparseVector([1.5, 0, 2, 0, 3, 0])
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index fb01b5e..fb51db9 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -2,6 +2,7 @@
 from pgvector import SparseVector
 import pytest
 from scipy.sparse import coo_array
+from struct import pack
 
 
 class TestSparseVector:
@@ -81,3 +82,13 @@ def test_from_text(self):
         assert vec.values() == [1.5, 2, 3]
         assert vec.to_list() == [1.5, 0, 2, 0, 3, 0]
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
+
+    def test_from_binary(self):
+        data = pack('>iii3i3f', 6, 3, 0, *[0, 2, 4], *[1.5, 2, 3])
+        vec = SparseVector.from_binary(data)
+        assert vec.dimensions() == 6
+        assert vec.indices() == [0, 2, 4]
+        assert vec.values() == [1.5, 2, 3]
+        assert vec.to_list() == [1.5, 0, 2, 0, 3, 0]
+        assert np.array_equal(vec.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
+        assert vec.to_binary() == data
diff --git a/tests/test_vector.py b/tests/test_vector.py
index 406637f..094dd34 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -1,6 +1,7 @@
 import numpy as np
 from pgvector import Vector
 import pytest
+from struct import pack
 
 
 class TestVector:
@@ -44,3 +45,15 @@ def test_equality(self):
 
     def test_dimensions(self):
         assert Vector([1, 2, 3]).dimensions() == 3
+
+    def test_from_text(self):
+        vec = Vector.from_text('[1.5,2,3]')
+        assert vec.to_list() == [1.5, 2, 3]
+        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+
+    def test_from_binary(self):
+        data = pack('>HH3f', 3, 0, *[1.5, 2, 3])
+        vec = Vector.from_binary(data)
+        assert vec.to_list() == [1.5, 2, 3]
+        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+        assert vec.to_binary() == data

From b6ccb3043fb1dac552b4dcdf6ecb947434d3b234 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 19:06:51 -0800
Subject: [PATCH 101/123] Improved tests [skip ci]

---
 tests/test_half_vector.py   | 2 +-
 tests/test_sparse_vector.py | 2 +-
 tests/test_vector.py        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py
index 9c0b041..756adc2 100644
--- a/tests/test_half_vector.py
+++ b/tests/test_half_vector.py
@@ -52,7 +52,7 @@ def test_from_text(self):
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
 
     def test_from_binary(self):
-        data = pack('>HH3e', 3, 0, *[1.5, 2, 3])
+        data = pack('>HH3e', 3, 0, 1.5, 2, 3)
         vec = HalfVector.from_binary(data)
         assert vec.to_list() == [1.5, 2, 3]
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index fb51db9..cf5b016 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -84,7 +84,7 @@ def test_from_text(self):
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
 
     def test_from_binary(self):
-        data = pack('>iii3i3f', 6, 3, 0, *[0, 2, 4], *[1.5, 2, 3])
+        data = pack('>iii3i3f', 6, 3, 0, 0, 2, 4, 1.5, 2, 3)
         vec = SparseVector.from_binary(data)
         assert vec.dimensions() == 6
         assert vec.indices() == [0, 2, 4]
diff --git a/tests/test_vector.py b/tests/test_vector.py
index 094dd34..c367a7a 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -52,7 +52,7 @@ def test_from_text(self):
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
 
     def test_from_binary(self):
-        data = pack('>HH3f', 3, 0, *[1.5, 2, 3])
+        data = pack('>HH3f', 3, 0, 1.5, 2, 3)
         vec = Vector.from_binary(data)
         assert vec.to_list() == [1.5, 2, 3]
         assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))

From e566d4c9b4968b232c2348e9e608d06ee90b6253 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 19:20:13 -0800
Subject: [PATCH 102/123] Improved tests [skip ci]

---
 tests/test_django.py        | 12 ++++++------
 tests/test_half_vector.py   |  4 ++--
 tests/test_peewee.py        | 10 +++++-----
 tests/test_psycopg.py       | 12 ++++++------
 tests/test_psycopg2.py      |  2 +-
 tests/test_sparse_vector.py |  8 ++++----
 tests/test_sqlalchemy.py    | 18 +++++++++---------
 tests/test_sqlmodel.py      |  2 +-
 tests/test_vector.py        |  4 ++--
 9 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/tests/test_django.py b/tests/test_django.py
index f187ad4..7a8a6eb 100644
--- a/tests/test_django.py
+++ b/tests/test_django.py
@@ -165,7 +165,7 @@ def setup_method(self):
     def test_vector(self):
         Item(id=1, embedding=[1, 2, 3]).save()
         item = Item.objects.get(pk=1)
-        assert np.array_equal(item.embedding, np.array([1, 2, 3]))
+        assert np.array_equal(item.embedding, [1, 2, 3])
         assert item.embedding.dtype == np.float32
 
     def test_vector_l2_distance(self):
@@ -293,7 +293,7 @@ def test_vector_avg(self):
         Item(embedding=[1, 2, 3]).save()
         Item(embedding=[4, 5, 6]).save()
         avg = Item.objects.aggregate(Avg('embedding'))['embedding__avg']
-        assert np.array_equal(avg, np.array([2.5, 3.5, 4.5]))
+        assert np.array_equal(avg, [2.5, 3.5, 4.5])
 
     def test_vector_sum(self):
         sum = Item.objects.aggregate(Sum('embedding'))['embedding__sum']
@@ -301,7 +301,7 @@ def test_vector_sum(self):
         Item(embedding=[1, 2, 3]).save()
         Item(embedding=[4, 5, 6]).save()
         sum = Item.objects.aggregate(Sum('embedding'))['embedding__sum']
-        assert np.array_equal(sum, np.array([5, 7, 9]))
+        assert np.array_equal(sum, [5, 7, 9])
 
     def test_halfvec_avg(self):
         avg = Item.objects.aggregate(Avg('half_embedding'))['half_embedding__avg']
@@ -347,7 +347,7 @@ def test_vector_form_save(self):
         assert form.has_changed()
         assert form.is_valid()
         assert form.save()
-        assert [4, 5, 6] == Item.objects.get(pk=1).embedding.tolist()
+        assert np.array_equal(Item.objects.get(pk=1).embedding, [4, 5, 6])
 
     def test_vector_form_save_missing(self):
         Item(id=1).save()
@@ -465,8 +465,8 @@ def test_vector_array(self):
 
             # this fails if the driver does not cast arrays
             item = Item.objects.get(pk=1)
-            assert item.embeddings[0].tolist() == [1, 2, 3]
-            assert item.embeddings[1].tolist() == [4, 5, 6]
+            assert np.array_equal(item.embeddings[0], [1, 2, 3])
+            assert np.array_equal(item.embeddings[1], [4, 5, 6])
 
     def test_double_array(self):
         Item(id=1, double_embedding=[1, 1, 1]).save()
diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py
index 756adc2..78b4977 100644
--- a/tests/test_half_vector.py
+++ b/tests/test_half_vector.py
@@ -49,11 +49,11 @@ def test_dimensions(self):
     def test_from_text(self):
         vec = HalfVector.from_text('[1.5,2,3]')
         assert vec.to_list() == [1.5, 2, 3]
-        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+        assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
 
     def test_from_binary(self):
         data = pack('>HH3e', 3, 0, 1.5, 2, 3)
         vec = HalfVector.from_binary(data)
         assert vec.to_list() == [1.5, 2, 3]
-        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+        assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
         assert vec.to_binary() == data
diff --git a/tests/test_peewee.py b/tests/test_peewee.py
index d7028c3..64fc009 100644
--- a/tests/test_peewee.py
+++ b/tests/test_peewee.py
@@ -43,7 +43,7 @@ def setup_method(self):
     def test_vector(self):
         Item.create(id=1, embedding=[1, 2, 3])
         item = Item.get_by_id(1)
-        assert np.array_equal(item.embedding, np.array([1, 2, 3]))
+        assert np.array_equal(item.embedding, [1, 2, 3])
         assert item.embedding.dtype == np.float32
 
     def test_vector_l2_distance(self):
@@ -170,7 +170,7 @@ def test_vector_avg(self):
         Item.create(embedding=[1, 2, 3])
         Item.create(embedding=[4, 5, 6])
         avg = Item.select(fn.avg(Item.embedding).coerce(True)).scalar()
-        assert np.array_equal(avg, np.array([2.5, 3.5, 4.5]))
+        assert np.array_equal(avg, [2.5, 3.5, 4.5])
 
     def test_vector_sum(self):
         sum = Item.select(fn.sum(Item.embedding).coerce(True)).scalar()
@@ -178,7 +178,7 @@ def test_vector_sum(self):
         Item.create(embedding=[1, 2, 3])
         Item.create(embedding=[4, 5, 6])
         sum = Item.select(fn.sum(Item.embedding).coerce(True)).scalar()
-        assert np.array_equal(sum, np.array([5, 7, 9]))
+        assert np.array_equal(sum, [5, 7, 9])
 
     def test_halfvec_avg(self):
         avg = Item.select(fn.avg(Item.half_embedding).coerce(True)).scalar()
@@ -220,5 +220,5 @@ class Meta:
         # fails with column "embeddings" is of type vector[] but expression is of type text[]
         # ExtItem.create(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])
         # item = ExtItem.get_by_id(1)
-        # assert np.array_equal(item.embeddings[0], np.array([1, 2, 3]))
-        # assert np.array_equal(item.embeddings[1], np.array([4, 5, 6]))
+        # assert np.array_equal(item.embeddings[0], [1, 2, 3])
+        # assert np.array_equal(item.embeddings[1], [4, 5, 6])
diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py
index f61b4e3..698b34f 100644
--- a/tests/test_psycopg.py
+++ b/tests/test_psycopg.py
@@ -46,23 +46,23 @@ def test_vector_text_format_non_contiguous(self):
         embedding = np.flipud(np.array([1.5, 2, 3]))
         assert not embedding.data.contiguous
         res = conn.execute('SELECT %t::vector', (embedding,)).fetchone()[0]
-        assert np.array_equal(res, np.array([3, 2, 1.5]))
+        assert np.array_equal(res, [3, 2, 1.5])
 
     def test_vector_binary_format_non_contiguous(self):
         embedding = np.flipud(np.array([1.5, 2, 3]))
         assert not embedding.data.contiguous
         res = conn.execute('SELECT %b::vector', (embedding,)).fetchone()[0]
-        assert np.array_equal(res, np.array([3, 2, 1.5]))
+        assert np.array_equal(res, [3, 2, 1.5])
 
     def test_vector_class_binary_format(self):
         embedding = Vector([1.5, 2, 3])
         res = conn.execute('SELECT %b::vector', (embedding,), binary=True).fetchone()[0]
-        assert np.array_equal(res, np.array([1.5, 2, 3]))
+        assert np.array_equal(res, [1.5, 2, 3])
 
     def test_vector_class_text_format(self):
         embedding = Vector([1.5, 2, 3])
         res = conn.execute('SELECT %t::vector', (embedding,)).fetchone()[0]
-        assert np.array_equal(res, np.array([1.5, 2, 3]))
+        assert np.array_equal(res, [1.5, 2, 3])
 
     def test_halfvec(self):
         embedding = HalfVector([1.5, 2, 3])
@@ -182,7 +182,7 @@ def configure(conn):
 
         with pool.connection() as conn:
             res = conn.execute("SELECT '[1,2,3]'::vector").fetchone()
-            assert np.array_equal(res[0], np.array([1, 2, 3]))
+            assert np.array_equal(res[0], [1, 2, 3])
 
         pool.close()
 
@@ -218,6 +218,6 @@ async def configure(conn):
             async with conn.cursor() as cur:
                 await cur.execute("SELECT '[1,2,3]'::vector")
                 res = await cur.fetchone()
-                assert np.array_equal(res[0], np.array([1, 2, 3]))
+                assert np.array_equal(res[0], [1, 2, 3])
 
         await pool.close()
diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 71e0015..8f56ef5 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -122,7 +122,7 @@ def test_pool(self):
             cur = conn.cursor()
             cur.execute("SELECT '[1,2,3]'::vector")
             res = cur.fetchone()
-            assert np.array_equal(res[0], np.array([1, 2, 3]))
+            assert np.array_equal(res[0], [1, 2, 3])
         finally:
             pool.putconn(conn)
 
diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py
index cf5b016..dff03dd 100644
--- a/tests/test_sparse_vector.py
+++ b/tests/test_sparse_vector.py
@@ -9,7 +9,7 @@ class TestSparseVector:
     def test_list(self):
         vec = SparseVector([1, 0, 2, 0, 3, 0])
         assert vec.to_list() == [1, 0, 2, 0, 3, 0]
-        assert vec.to_numpy().tolist() == [1, 0, 2, 0, 3, 0]
+        assert np.array_equal(vec.to_numpy(), [1, 0, 2, 0, 3, 0])
         assert vec.indices() == [0, 2, 4]
 
     def test_list_dimensions(self):
@@ -69,7 +69,7 @@ def test_values(self):
         assert SparseVector([1, 0, 2, 0, 3, 0]).values() == [1, 2, 3]
 
     def test_to_coo(self):
-        assert SparseVector([1, 0, 2, 0, 3, 0]).to_coo().toarray().tolist() == [[1, 0, 2, 0, 3, 0]]
+        assert np.array_equal(SparseVector([1, 0, 2, 0, 3, 0]).to_coo().toarray(), [[1, 0, 2, 0, 3, 0]])
 
     def test_zero_vector_text(self):
         vec = SparseVector({}, 3)
@@ -81,7 +81,7 @@ def test_from_text(self):
         assert vec.indices() == [0, 2, 4]
         assert vec.values() == [1.5, 2, 3]
         assert vec.to_list() == [1.5, 0, 2, 0, 3, 0]
-        assert np.array_equal(vec.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
+        assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0])
 
     def test_from_binary(self):
         data = pack('>iii3i3f', 6, 3, 0, 0, 2, 4, 1.5, 2, 3)
@@ -90,5 +90,5 @@ def test_from_binary(self):
         assert vec.indices() == [0, 2, 4]
         assert vec.values() == [1.5, 2, 3]
         assert vec.to_list() == [1.5, 0, 2, 0, 3, 0]
-        assert np.array_equal(vec.to_numpy(), np.array([1.5, 0, 2, 0, 3, 0]))
+        assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0])
         assert vec.to_binary() == data
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 4b1e516..41c309f 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -201,7 +201,7 @@ def test_vector(self, engine):
             session.add(Item(id=1, embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
-            assert item.embedding.tolist() == [1, 2, 3]
+            assert np.array_equal(item.embedding, [1, 2, 3])
 
     def test_vector_l2_distance(self, engine):
         create_items()
@@ -509,7 +509,7 @@ def test_automap(self, engine):
         with Session(engine) as session:
             session.execute(insert(AutoItem), [{'embedding': np.array([1, 2, 3])}])
             item = session.query(AutoItem).first()
-            assert item.embedding.tolist() == [1, 2, 3]
+            assert np.array_equal(item.embedding, [1, 2, 3])
 
     def test_half_precision(self, engine):
         create_items()
@@ -541,8 +541,8 @@ def test_vector_array(self, engine):
 
             # this fails if the driver does not cast arrays
             item = session.get(Item, 1)
-            assert item.embeddings[0].tolist() == [1, 2, 3]
-            assert item.embeddings[1].tolist() == [4, 5, 6]
+            assert np.array_equal(item.embeddings[0], [1, 2, 3])
+            assert np.array_equal(item.embeddings[1], [4, 5, 6])
 
     def test_halfvec_array(self, engine):
         with Session(engine) as session:
@@ -621,7 +621,7 @@ async def test_avg(self, engine):
                 session.add(Item(embedding=[1, 2, 3]))
                 session.add(Item(embedding=[4, 5, 6]))
                 res = await session.scalars(select(avg(Item.embedding)))
-                assert res.first().tolist() == [2.5, 3.5, 4.5]
+                assert np.array_equal(res.first(), [2.5, 3.5, 4.5])
 
         await engine.dispose()
 
@@ -639,12 +639,12 @@ async def test_vector_array(self, engine):
             async with session.begin():
                 session.add(Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])]))
                 item = await session.get(Item, 1)
-                assert item.embeddings[0].tolist() == [1, 2, 3]
-                assert item.embeddings[1].tolist() == [4, 5, 6]
+                assert np.array_equal(item.embeddings[0], [1, 2, 3])
+                assert np.array_equal(item.embeddings[1], [4, 5, 6])
 
                 session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]))
                 item = await session.get(Item, 2)
-                assert item.embeddings[0].tolist() == [1, 2, 3]
-                assert item.embeddings[1].tolist() == [4, 5, 6]
+                assert np.array_equal(item.embeddings[0], [1, 2, 3])
+                assert np.array_equal(item.embeddings[1], [4, 5, 6])
 
         await engine.dispose()
diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py
index 8a472b1..f4994f4 100644
--- a/tests/test_sqlmodel.py
+++ b/tests/test_sqlmodel.py
@@ -76,7 +76,7 @@ def test_vector(self):
             session.add(Item(id=1, embedding=[1, 2, 3]))
             session.commit()
             item = session.get(Item, 1)
-            assert item.embedding.tolist() == [1, 2, 3]
+            assert np.array_equal(item.embedding, np.array([1, 2, 3]))
 
     def test_vector_l2_distance(self):
         create_items()
diff --git a/tests/test_vector.py b/tests/test_vector.py
index c367a7a..e5a16fe 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -49,11 +49,11 @@ def test_dimensions(self):
     def test_from_text(self):
         vec = Vector.from_text('[1.5,2,3]')
         assert vec.to_list() == [1.5, 2, 3]
-        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+        assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
 
     def test_from_binary(self):
         data = pack('>HH3f', 3, 0, 1.5, 2, 3)
         vec = Vector.from_binary(data)
         assert vec.to_list() == [1.5, 2, 3]
-        assert np.array_equal(vec.to_numpy(), np.array([1.5, 2, 3]))
+        assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
         assert vec.to_binary() == data

From 057eff226bdb992ebdd952628bf3d54996d9437d Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 19:38:31 -0800
Subject: [PATCH 103/123] Improved tests [skip ci]

---
 tests/test_psycopg2.py   | 6 ++----
 tests/test_sqlalchemy.py | 3 +--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 8f56ef5..3e5c8c3 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -82,8 +82,7 @@ def test_halfvec_array(self):
 
         cur.execute('SELECT half_embeddings FROM psycopg2_items ORDER BY id')
         res = cur.fetchone()
-        assert res[0][0] == HalfVector([1.5, 2, 3])
-        assert res[0][1] == HalfVector([4.5, 5, 6])
+        assert res[0] == [HalfVector([1.5, 2, 3]), HalfVector([4.5, 5, 6])]
 
     def test_sparsevec_array(self):
         embeddings = [SparseVector([1.5, 2, 3]), SparseVector([4.5, 5, 6])]
@@ -91,8 +90,7 @@ def test_sparsevec_array(self):
 
         cur.execute('SELECT sparse_embeddings FROM psycopg2_items ORDER BY id')
         res = cur.fetchone()
-        assert res[0][0] == SparseVector([1.5, 2, 3])
-        assert res[0][1] == SparseVector([4.5, 5, 6])
+        assert res[0] == [SparseVector([1.5, 2, 3]), SparseVector([4.5, 5, 6])]
 
     def test_cursor_factory(self):
         for cursor_factory in [DictCursor, RealDictCursor, NamedTupleCursor]:
diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py
index 41c309f..0d8d1ca 100644
--- a/tests/test_sqlalchemy.py
+++ b/tests/test_sqlalchemy.py
@@ -551,8 +551,7 @@ def test_halfvec_array(self, engine):
 
             # this fails if the driver does not cast arrays
             item = session.get(Item, 1)
-            assert item.half_embeddings[0] == HalfVector([1, 2, 3])
-            assert item.half_embeddings[1] == HalfVector([4, 5, 6])
+            assert item.half_embeddings == [HalfVector([1, 2, 3]), HalfVector([4, 5, 6])]
 
 
 @pytest.mark.parametrize('engine', async_engines)

From 8443ff519ac39a9f0b9b2c7233b33accbe6f63ae Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 10 Feb 2025 20:05:01 -0800
Subject: [PATCH 104/123] Added missing dependency for example [skip ci]

---
 examples/implicit/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/implicit/requirements.txt b/examples/implicit/requirements.txt
index 8f04b58..424abbd 100644
--- a/examples/implicit/requirements.txt
+++ b/examples/implicit/requirements.txt
@@ -1,3 +1,4 @@
+h5py
 implicit
 pgvector
 psycopg[binary]

From 2496340bc5e91a0b5cad2462f276c7b488f2e36a Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 14 Feb 2025 16:45:35 -0800
Subject: [PATCH 105/123] Added support for pg8000

---
 CHANGELOG.md                |  1 +
 README.md                   | 48 ++++++++++++++++++++++++++++-
 pgvector/pg8000/__init__.py |  5 ++++
 pgvector/pg8000/register.py | 23 ++++++++++++++
 tests/test_pg8000.py        | 60 +++++++++++++++++++++++++++++++++++++
 5 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 pgvector/pg8000/__init__.py
 create mode 100644 pgvector/pg8000/register.py
 create mode 100644 tests/test_pg8000.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f53a2ce..ebc165a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 ## 0.4.0 (unreleased)
 
 - Added top-level `pgvector` package
+- Added support for pg8000
 - Changed `globally` option to default to `False` for Psycopg 2
 - Changed `arrays` option to default to `True` for Psycopg 2
 - Fixed equality for `Vector`, `HalfVector`, `Bit`, and `SparseVector` classes
diff --git a/README.md b/README.md
index 5a59c9d..7f980bd 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 [pgvector](https://github.com/pgvector/pgvector) support for Python
 
-Supports [Django](https://github.com/django/django), [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy), [SQLModel](https://github.com/tiangolo/sqlmodel), [Psycopg 3](https://github.com/psycopg/psycopg), [Psycopg 2](https://github.com/psycopg/psycopg2), [asyncpg](https://github.com/MagicStack/asyncpg), and [Peewee](https://github.com/coleifer/peewee)
+Supports [Django](https://github.com/django/django), [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy), [SQLModel](https://github.com/tiangolo/sqlmodel), [Psycopg 3](https://github.com/psycopg/psycopg), [Psycopg 2](https://github.com/psycopg/psycopg2), [asyncpg](https://github.com/MagicStack/asyncpg), [pg8000](https://github.com/tlocke/pg8000), and [Peewee](https://github.com/coleifer/peewee)
 
 [![Build Status](https://github.com/pgvector/pgvector-python/actions/workflows/build.yml/badge.svg)](https://github.com/pgvector/pgvector-python/actions)
 
@@ -22,6 +22,7 @@ And follow the instructions for your database library:
 - [Psycopg 3](#psycopg-3)
 - [Psycopg 2](#psycopg-2)
 - [asyncpg](#asyncpg)
+- [pg8000](#pg8000) [unreleased]
 - [Peewee](#peewee)
 
 Or check out some examples:
@@ -562,6 +563,51 @@ await conn.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops
 
 Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
 
+## pg8000
+
+Enable the extension
+
+```python
+conn.run('CREATE EXTENSION IF NOT EXISTS vector')
+```
+
+Register the vector type with your connection
+
+```python
+from pgvector.pg8000 import register_vector
+
+register_vector(conn)
+```
+
+Create a table
+
+```python
+conn.run('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')
+```
+
+Insert a vector
+
+```python
+embedding = np.array([1, 2, 3])
+conn.run('INSERT INTO items (embedding) VALUES (:embedding)', embedding=embedding)
+```
+
+Get the nearest neighbors to a vector
+
+```python
+conn.run('SELECT * FROM items ORDER BY embedding <-> :embedding LIMIT 5', embedding=embedding)
+```
+
+Add an approximate index
+
+```python
+conn.run('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)')
+# or
+conn.run('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')
+```
+
+Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
+
 ## Peewee
 
 Add a vector column
diff --git a/pgvector/pg8000/__init__.py b/pgvector/pg8000/__init__.py
new file mode 100644
index 0000000..b3b4440
--- /dev/null
+++ b/pgvector/pg8000/__init__.py
@@ -0,0 +1,5 @@
+from .register import register_vector
+
+__all__ = [
+    'register_vector'
+]
diff --git a/pgvector/pg8000/register.py b/pgvector/pg8000/register.py
new file mode 100644
index 0000000..15ee219
--- /dev/null
+++ b/pgvector/pg8000/register.py
@@ -0,0 +1,23 @@
+import numpy as np
+from .. import Vector, HalfVector, SparseVector
+
+
+def register_vector(conn):
+    # use to_regtype to get first matching type in search path
+    res = conn.run("SELECT typname, oid FROM pg_type WHERE oid IN (to_regtype('vector'), to_regtype('halfvec'), to_regtype('sparsevec'))")
+    type_info = dict(res)
+
+    if 'vector' not in type_info:
+        raise RuntimeError('vector type not found in the database')
+
+    conn.register_out_adapter(Vector, Vector._to_db)
+    conn.register_out_adapter(np.ndarray, Vector._to_db)
+    conn.register_in_adapter(type_info['vector'], Vector._from_db)
+
+    if 'halfvec' in type_info:
+        conn.register_out_adapter(HalfVector, HalfVector._to_db)
+        conn.register_in_adapter(type_info['halfvec'], HalfVector._from_db)
+
+    if 'sparsevec' in type_info:
+        conn.register_out_adapter(SparseVector, SparseVector._to_db)
+        conn.register_in_adapter(type_info['sparsevec'], SparseVector._from_db)
diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py
new file mode 100644
index 0000000..86c0fb1
--- /dev/null
+++ b/tests/test_pg8000.py
@@ -0,0 +1,60 @@
+import numpy as np
+import os
+from pgvector import HalfVector, SparseVector, Vector
+from pgvector.pg8000 import register_vector
+from pg8000.native import Connection
+
+conn = Connection(os.environ["USER"], database='pgvector_python_test')
+
+conn.run('CREATE EXTENSION IF NOT EXISTS vector')
+conn.run('DROP TABLE IF EXISTS pg8000_items')
+conn.run('CREATE TABLE pg8000_items (id bigserial PRIMARY KEY, embedding vector(3), half_embedding halfvec(3), binary_embedding bit(3), sparse_embedding sparsevec(3), embeddings vector[], half_embeddings halfvec[], sparse_embeddings sparsevec[])')
+
+register_vector(conn)
+
+
+class TestPg8000:
+    def setup_method(self):
+        conn.run('DELETE FROM pg8000_items')
+
+    def test_vector(self):
+        embedding = np.array([1.5, 2, 3])
+        conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding)
+
+        res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id')
+        assert np.array_equal(res[0][0], embedding)
+        assert res[0][0].dtype == np.float32
+        assert res[1][0] is None
+
+    def test_vector_class(self):
+        embedding = Vector([1.5, 2, 3])
+        conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding)
+
+        res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id')
+        assert np.array_equal(res[0][0], embedding.to_numpy())
+        assert res[0][0].dtype == np.float32
+        assert res[1][0] is None
+
+    def test_halfvec(self):
+        embedding = HalfVector([1.5, 2, 3])
+        conn.run('INSERT INTO pg8000_items (half_embedding) VALUES (:embedding), (NULL)', embedding=embedding)
+
+        res = conn.run('SELECT half_embedding FROM pg8000_items ORDER BY id')
+        assert res[0][0] == embedding
+        assert res[1][0] is None
+
+    def test_bit(self):
+        embedding = '101'
+        conn.run('INSERT INTO pg8000_items (binary_embedding) VALUES (:embedding), (NULL)', embedding=embedding)
+
+        res = conn.run('SELECT binary_embedding FROM pg8000_items ORDER BY id')
+        assert res[0][0] == '101'
+        assert res[1][0] is None
+
+    def test_sparsevec(self):
+        embedding = SparseVector([1.5, 2, 3])
+        conn.run('INSERT INTO pg8000_items (sparse_embedding) VALUES (:embedding), (NULL)', embedding=embedding)
+
+        res = conn.run('SELECT sparse_embedding FROM pg8000_items ORDER BY id')
+        assert res[0][0] == embedding
+        assert res[1][0] is None

From df1766b7f9ed6320958c04caf7f1b832d5320e4b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 14 Feb 2025 16:59:42 -0800
Subject: [PATCH 106/123] Simplified test code [skip ci]

---
 tests/test_pg8000.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py
index 86c0fb1..4d3e474 100644
--- a/tests/test_pg8000.py
+++ b/tests/test_pg8000.py
@@ -8,7 +8,7 @@
 
 conn.run('CREATE EXTENSION IF NOT EXISTS vector')
 conn.run('DROP TABLE IF EXISTS pg8000_items')
-conn.run('CREATE TABLE pg8000_items (id bigserial PRIMARY KEY, embedding vector(3), half_embedding halfvec(3), binary_embedding bit(3), sparse_embedding sparsevec(3), embeddings vector[], half_embeddings halfvec[], sparse_embeddings sparsevec[])')
+conn.run('CREATE TABLE pg8000_items (id bigserial PRIMARY KEY, embedding vector(3), half_embedding halfvec(3), binary_embedding bit(3), sparse_embedding sparsevec(3))')
 
 register_vector(conn)
 

From 70ff5d4765bb156a45d806d3cd171b3a38f03fca Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 14 Feb 2025 17:00:58 -0800
Subject: [PATCH 107/123] Improved tests [skip ci]

---
 tests/test_psycopg2.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py
index 3e5c8c3..7f4932d 100644
--- a/tests/test_psycopg2.py
+++ b/tests/test_psycopg2.py
@@ -49,6 +49,15 @@ def test_halfvec(self):
         assert res[0][0] == HalfVector([1.5, 2, 3])
         assert res[1][0] is None
 
+    def test_halfvec_class(self):
+        embedding = HalfVector([1.5, 2, 3])
+        cur.execute('INSERT INTO psycopg2_items (half_embedding) VALUES (%s), (NULL)', (embedding,))
+
+        cur.execute('SELECT half_embedding FROM psycopg2_items ORDER BY id')
+        res = cur.fetchall()
+        assert res[0][0] == embedding
+        assert res[1][0] is None
+
     def test_bit(self):
         embedding = '101'
         cur.execute('INSERT INTO psycopg2_items (binary_embedding) VALUES (%s), (NULL)', (embedding,))

From ac9e398f511ca65f11f62f4296e94f2106367936 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 16 Feb 2025 13:15:19 -0800
Subject: [PATCH 108/123] Improved examples [skip ci]

---
 examples/cohere/example.py                |  6 +++---
 examples/openai/example.py                | 25 ++++++++++++++---------
 examples/sentence_transformers/example.py | 14 ++++++-------
 examples/sparse_search/example.py         |  6 +++---
 4 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/examples/cohere/example.py b/examples/cohere/example.py
index 780352a..393d1e0 100644
--- a/examples/cohere/example.py
+++ b/examples/cohere/example.py
@@ -12,7 +12,7 @@
 conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding bit(1024))')
 
 
-def fetch_embeddings(input, input_type):
+def embed(input, input_type):
     co = cohere.Client()
     response = co.embed(texts=input, model='embed-english-v3.0', input_type=input_type, embedding_types=['ubinary'])
     return [np.unpackbits(np.array(embedding, dtype=np.uint8)) for embedding in response.embeddings.ubinary]
@@ -23,12 +23,12 @@ def fetch_embeddings(input, input_type):
     'The cat is purring',
     'The bear is growling'
 ]
-embeddings = fetch_embeddings(input, 'search_document')
+embeddings = embed(input, 'search_document')
 for content, embedding in zip(input, embeddings):
     conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, Bit(embedding)))
 
 query = 'forest'
-query_embedding = fetch_embeddings([query], 'search_query')[0]
+query_embedding = embed([query], 'search_query')[0]
 result = conn.execute('SELECT content FROM documents ORDER BY embedding <~> %s LIMIT 5', (Bit(query_embedding),)).fetchall()
 for row in result:
     print(row[0])
diff --git a/examples/openai/example.py b/examples/openai/example.py
index ebed3d0..b9a078c 100644
--- a/examples/openai/example.py
+++ b/examples/openai/example.py
@@ -1,3 +1,4 @@
+import numpy as np
 from openai import OpenAI
 from pgvector.psycopg import register_vector
 import psycopg
@@ -10,20 +11,24 @@
 conn.execute('DROP TABLE IF EXISTS documents')
 conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(1536))')
 
+
+def embed(input):
+    client = OpenAI()
+    response = client.embeddings.create(input=input, model='text-embedding-3-small')
+    return [v.embedding for v in response.data]
+
+
 input = [
     'The dog is barking',
     'The cat is purring',
     'The bear is growling'
 ]
-
-client = OpenAI()
-response = client.embeddings.create(input=input, model='text-embedding-3-small')
-embeddings = [v.embedding for v in response.data]
-
+embeddings = embed(input)
 for content, embedding in zip(input, embeddings):
-    conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, embedding))
+    conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, np.array(embedding)))
 
-document_id = 1
-neighbors = conn.execute('SELECT content FROM documents WHERE id != %(id)s ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = %(id)s) LIMIT 5', {'id': document_id}).fetchall()
-for neighbor in neighbors:
-    print(neighbor[0])
+query = 'forest'
+query_embedding = embed([query])[0]
+result = conn.execute('SELECT content FROM documents ORDER BY embedding <=> %s LIMIT 5', (np.array(query_embedding),)).fetchall()
+for row in result:
+    print(row[0])
diff --git a/examples/sentence_transformers/example.py b/examples/sentence_transformers/example.py
index d4e7f96..3a7dca5 100644
--- a/examples/sentence_transformers/example.py
+++ b/examples/sentence_transformers/example.py
@@ -10,19 +10,19 @@
 conn.execute('DROP TABLE IF EXISTS documents')
 conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(384))')
 
+model = SentenceTransformer('all-MiniLM-L6-v2')
+
 input = [
     'The dog is barking',
     'The cat is purring',
     'The bear is growling'
 ]
-
-model = SentenceTransformer('all-MiniLM-L6-v2')
 embeddings = model.encode(input)
-
 for content, embedding in zip(input, embeddings):
     conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, embedding))
 
-document_id = 1
-neighbors = conn.execute('SELECT content FROM documents WHERE id != %(id)s ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = %(id)s) LIMIT 5', {'id': document_id}).fetchall()
-for neighbor in neighbors:
-    print(neighbor[0])
+query = 'forest'
+query_embedding = model.encode(query)
+result = conn.execute('SELECT content FROM documents ORDER BY embedding <=> %s LIMIT 5', (query_embedding,)).fetchall()
+for row in result:
+    print(row[0])
diff --git a/examples/sparse_search/example.py b/examples/sparse_search/example.py
index fa6074e..2b5daea 100644
--- a/examples/sparse_search/example.py
+++ b/examples/sparse_search/example.py
@@ -20,7 +20,7 @@
 special_token_ids = [tokenizer.vocab[token] for token in tokenizer.special_tokens_map.values()]
 
 
-def fetch_embeddings(input):
+def embed(input):
     feature = tokenizer(
         input,
         padding=True,
@@ -42,12 +42,12 @@ def fetch_embeddings(input):
     'The cat is purring',
     'The bear is growling'
 ]
-embeddings = fetch_embeddings(input)
+embeddings = embed(input)
 for content, embedding in zip(input, embeddings):
     conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, SparseVector(embedding)))
 
 query = 'forest'
-query_embedding = fetch_embeddings([query])[0]
+query_embedding = embed([query])[0]
 result = conn.execute('SELECT content FROM documents ORDER BY embedding <#> %s LIMIT 5', (SparseVector(query_embedding),)).fetchall()
 for row in result:
     print(row[0])

From 1443c3c3ca11b9efadb07612758c2ba62fb4ec65 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 16 Feb 2025 13:50:50 -0800
Subject: [PATCH 109/123] Added halfvec example for OpenAI [skip ci]

---
 examples/openai/halfvec.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 examples/openai/halfvec.py

diff --git a/examples/openai/halfvec.py b/examples/openai/halfvec.py
new file mode 100644
index 0000000..185c785
--- /dev/null
+++ b/examples/openai/halfvec.py
@@ -0,0 +1,34 @@
+from openai import OpenAI
+from pgvector.psycopg import register_vector, HalfVector
+import psycopg
+
+conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
+
+conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
+register_vector(conn)
+
+conn.execute('DROP TABLE IF EXISTS documents')
+conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding halfvec(3072))')
+conn.execute('CREATE INDEX ON documents USING hnsw (embedding halfvec_cosine_ops)')
+
+
+def embed(input):
+    client = OpenAI()
+    response = client.embeddings.create(input=input, model='text-embedding-3-large')
+    return [v.embedding for v in response.data]
+
+
+input = [
+    'The dog is barking',
+    'The cat is purring',
+    'The bear is growling'
+]
+embeddings = embed(input)
+for content, embedding in zip(input, embeddings):
+    conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, HalfVector(embedding)))
+
+query = 'forest'
+query_embedding = embed([query])[0]
+result = conn.execute('SELECT content FROM documents ORDER BY embedding <=> %s LIMIT 5', (HalfVector(query_embedding),)).fetchall()
+for row in result:
+    print(row[0])

From 12146d74db24514831138b43ec69273e289cde1a Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 16 Feb 2025 18:34:41 -0800
Subject: [PATCH 110/123] Improved example [skip ci]

---
 examples/sentence_transformers/example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/sentence_transformers/example.py b/examples/sentence_transformers/example.py
index 3a7dca5..50997d9 100644
--- a/examples/sentence_transformers/example.py
+++ b/examples/sentence_transformers/example.py
@@ -10,7 +10,7 @@
 conn.execute('DROP TABLE IF EXISTS documents')
 conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(384))')
 
-model = SentenceTransformer('all-MiniLM-L6-v2')
+model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
 
 input = [
     'The dog is barking',

From 78466224ec95a38441240753f090625056b87b1e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 19 Feb 2025 15:54:25 -0800
Subject: [PATCH 111/123] Added reference section to readme [skip ci]

---
 README.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/README.md b/README.md
index 7f980bd..299753e 100644
--- a/README.md
+++ b/README.md
@@ -665,6 +665,99 @@ Item.add_index('embedding vector_l2_ops', using='hnsw')
 
 Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
 
+## Reference
+
+### Half Vectors
+
+Create a half vector from a list
+
+```python
+vec = HalfVector([1, 2, 3])
+```
+
+Or a NumPy array
+
+```python
+vec = HalfVector(np.array([1, 2, 3]))
+```
+
+Get a list
+
+```python
+lst = vec.to_list()
+```
+
+Get a NumPy array
+
+```python
+arr = vec.to_numpy()
+```
+
+### Sparse Vectors
+
+Create a sparse vector from a list
+
+```python
+vec = SparseVector([1, 0, 2, 0, 3, 0])
+```
+
+Or a NumPy array
+
+```python
+vec = SparseVector(np.array([1, 0, 2, 0, 3, 0]))
+```
+
+Or a SciPy sparse array
+
+```python
+arr = coo_array(([1, 2, 3], ([0, 2, 4],)), shape=(6,))
+vec = SparseVector(arr)
+```
+
+Or a dictionary of non-zero elements
+
+```python
+vec = SparseVector({0: 1, 2: 2, 4: 3}, 6)
+```
+
+Note: Indices start at 0
+
+Get the number of dimensions
+
+```python
+dim = vec.dimensions()
+```
+
+Get the indices of non-zero elements
+
+```python
+indices = vec.indices()
+```
+
+Get the values of non-zero elements
+
+```python
+values = vec.values()
+```
+
+Get a list
+
+```python
+lst = vec.to_list()
+```
+
+Get a NumPy array
+
+```python
+arr = vec.to_numpy()
+```
+
+Get a SciPy sparse array
+
+```python
+arr = vec.to_coo()
+```
+
 ## History
 
 View the [changelog](https://github.com/pgvector/pgvector-python/blob/master/CHANGELOG.md)

From ac1a543ab33a09efa2758f0179cea6a89257b601 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 14:07:29 -0700
Subject: [PATCH 112/123] Improved validation for Bit constructor

---
 pgvector/bit.py   | 4 +++-
 tests/test_bit.py | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 4be7385..9a890a1 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -13,7 +13,9 @@ def __init__(self, value):
                 elif value.dtype != np.bool:
                     raise ValueError('expected dtype to be bool or uint8')
             else:
-                value = np.asarray(value, dtype=bool)
+                value = np.asarray(value)
+                if value.dtype != np.bool:
+                    raise ValueError('expected dtype to be bool')
 
             if value.ndim != 1:
                 raise ValueError('expected ndim to be 1')
diff --git a/tests/test_bit.py b/tests/test_bit.py
index 5e1bff2..0c661d0 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -7,6 +7,11 @@ class TestBit:
     def test_list(self):
         assert Bit([True, False, True]).to_list() == [True, False, True]
 
+    def test_list_int(self):
+        with pytest.raises(ValueError) as error:
+            Bit([254, 7, 0])
+        assert str(error.value) == 'expected dtype to be bool'
+
     def test_tuple(self):
         assert Bit((True, False, True)).to_list() == [True, False, True]
 

From 900cbb38370eebfeebdd519482cfd1a30cf6e937 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 14:14:46 -0700
Subject: [PATCH 113/123] Improved error message

---
 pgvector/bit.py   | 2 +-
 tests/test_bit.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 9a890a1..a8feb55 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -15,7 +15,7 @@ def __init__(self, value):
             else:
                 value = np.asarray(value)
                 if value.dtype != np.bool:
-                    raise ValueError('expected dtype to be bool')
+                    raise ValueError('expected all elements to be boolean')
 
             if value.ndim != 1:
                 raise ValueError('expected ndim to be 1')
diff --git a/tests/test_bit.py b/tests/test_bit.py
index 0c661d0..ae27359 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -7,10 +7,15 @@ class TestBit:
     def test_list(self):
         assert Bit([True, False, True]).to_list() == [True, False, True]
 
+    def test_list_none(self):
+        with pytest.raises(ValueError) as error:
+            Bit([True, None, True])
+        assert str(error.value) == 'expected all elements to be boolean'
+
     def test_list_int(self):
         with pytest.raises(ValueError) as error:
             Bit([254, 7, 0])
-        assert str(error.value) == 'expected dtype to be bool'
+        assert str(error.value) == 'expected all elements to be boolean'
 
     def test_tuple(self):
         assert Bit((True, False, True)).to_list() == [True, False, True]

From 534ec18683d4c5e3058ba14d7810d0d5df7d8c55 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 14:34:18 -0700
Subject: [PATCH 114/123] Added support for bytes to Bit constructor

---
 CHANGELOG.md      | 1 +
 pgvector/bit.py   | 2 ++
 tests/test_bit.py | 4 ++++
 3 files changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebc165a..89e955a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 - Added top-level `pgvector` package
 - Added support for pg8000
+- Added support for `bytes` to `Bit` constructor
 - Changed `globally` option to default to `False` for Psycopg 2
 - Changed `arrays` option to default to `True` for Psycopg 2
 - Fixed equality for `Vector`, `HalfVector`, `Bit`, and `SparseVector` classes
diff --git a/pgvector/bit.py b/pgvector/bit.py
index a8feb55..8766f65 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -6,6 +6,8 @@ class Bit:
     def __init__(self, value):
         if isinstance(value, str):
             self._value = self.from_text(value)._value
+        elif isinstance(value, bytes):
+            self._value = np.unpackbits(np.frombuffer(value, dtype=np.uint8)).astype(bool)
         else:
             if isinstance(value, np.ndarray):
                 if value.dtype == np.uint8:
diff --git a/tests/test_bit.py b/tests/test_bit.py
index ae27359..571205f 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -23,6 +23,10 @@ def test_tuple(self):
     def test_str(self):
         assert Bit('101').to_list() == [True, False, True]
 
+    def test_bytes(self):
+        assert Bit(b'\xff\x00').to_list() == [True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, False]
+        assert Bit(b'\xfe\x07').to_list() == [True, True, True, True, True, True, True, False, False, False, False, False, False, True, True, True]
+
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
         assert Bit(arr).to_text() == '111111100000011100000000'

From 2d1b754773f8c4f41970b3f61b93b20460961f98 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 14:54:26 -0700
Subject: [PATCH 115/123] Restored backwards compatibility of Bit constructor

---
 pgvector/bit.py   | 15 ++++++---------
 tests/test_bit.py | 18 ++++++++----------
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 8766f65..935f0f0 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -1,5 +1,6 @@
 import numpy as np
 from struct import pack, unpack_from
+from warnings import warn
 
 
 class Bit:
@@ -9,15 +10,11 @@ def __init__(self, value):
         elif isinstance(value, bytes):
             self._value = np.unpackbits(np.frombuffer(value, dtype=np.uint8)).astype(bool)
         else:
-            if isinstance(value, np.ndarray):
-                if value.dtype == np.uint8:
-                    value = np.unpackbits(value).astype(bool)
-                elif value.dtype != np.bool:
-                    raise ValueError('expected dtype to be bool or uint8')
-            else:
-                value = np.asarray(value)
-                if value.dtype != np.bool:
-                    raise ValueError('expected all elements to be boolean')
+            value = np.asarray(value)
+
+            if value.dtype != np.bool:
+                warn('expected elements to be boolean', stacklevel=2)
+                value = value.astype(bool)
 
             if value.ndim != 1:
                 raise ValueError('expected ndim to be 1')
diff --git a/tests/test_bit.py b/tests/test_bit.py
index 571205f..a13f476 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -8,14 +8,12 @@ def test_list(self):
         assert Bit([True, False, True]).to_list() == [True, False, True]
 
     def test_list_none(self):
-        with pytest.raises(ValueError) as error:
-            Bit([True, None, True])
-        assert str(error.value) == 'expected all elements to be boolean'
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+            assert Bit([True, None, True]).to_text() == '101'
 
     def test_list_int(self):
-        with pytest.raises(ValueError) as error:
-            Bit([254, 7, 0])
-        assert str(error.value) == 'expected all elements to be boolean'
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+            assert Bit([254, 7, 0]).to_text() == '110'
 
     def test_tuple(self):
         assert Bit((True, False, True)).to_list() == [True, False, True]
@@ -29,13 +27,13 @@ def test_bytes(self):
 
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
-        assert Bit(arr).to_text() == '111111100000011100000000'
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+            assert Bit(arr).to_text() == '110'
 
     def test_ndarray_uint16(self):
         arr = np.array([254, 7, 0], dtype=np.uint16)
-        with pytest.raises(ValueError) as error:
-            Bit(arr)
-        assert str(error.value) == 'expected dtype to be bool or uint8'
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+            assert Bit(arr).to_text() == '110'
 
     def test_ndarray_same_object(self):
         arr = np.array([True, False, True])

From 2ce3f43e6693fec29e92fa84f7d46fefb96f98f0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 15:35:09 -0700
Subject: [PATCH 116/123] Improved internal representation of Bit class

---
 pgvector/bit.py   | 47 +++++++++++++++++++++++++++--------------------
 tests/test_bit.py | 10 +++++-----
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 935f0f0..72b8052 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -5,51 +5,58 @@
 
 class Bit:
     def __init__(self, value):
-        if isinstance(value, str):
-            self._value = self.from_text(value)._value
-        elif isinstance(value, bytes):
-            self._value = np.unpackbits(np.frombuffer(value, dtype=np.uint8)).astype(bool)
+        if isinstance(value, bytes):
+            self._len = 8 * len(value)
+            self._data = value
         else:
-            value = np.asarray(value)
+            if isinstance(value, str):
+                value = [v != '0' for v in value]
+            else:
+                value = np.asarray(value)
 
-            if value.dtype != np.bool:
-                warn('expected elements to be boolean', stacklevel=2)
-                value = value.astype(bool)
+                if value.dtype != np.bool:
+                    warn('expected elements to be boolean', stacklevel=2)
+                    value = value.astype(bool)
 
-            if value.ndim != 1:
-                raise ValueError('expected ndim to be 1')
+                if value.ndim != 1:
+                    raise ValueError('expected ndim to be 1')
 
-            self._value = value
+            self._len = len(value)
+            self._data = np.packbits(value).tobytes()
 
     def __repr__(self):
         return f'Bit({self.to_text()})'
 
     def __eq__(self, other):
         if isinstance(other, self.__class__):
-            return np.array_equal(self.to_numpy(), other.to_numpy())
+            return self._len == other._len and self._data == other._data
         return False
 
     def to_list(self):
-        return self._value.tolist()
+        return self.to_numpy().tolist()
 
     def to_numpy(self):
-        return self._value
+        return np.unpackbits(np.frombuffer(self._data, dtype=np.uint8), count=self._len).astype(bool)
 
     def to_text(self):
-        return ''.join(self._value.astype(np.uint8).astype(str))
+        return ''.join(format(v, '08b') for v in self._data)[:self._len]
 
     def to_binary(self):
-        return pack('>i', len(self._value)) + np.packbits(self._value).tobytes()
+        return pack('>i', self._len) + self._data
 
     @classmethod
     def from_text(cls, value):
-        return cls(np.asarray([v != '0' for v in value], dtype=bool))
+        return cls(str(value))
 
     @classmethod
     def from_binary(cls, value):
-        count = unpack_from('>i', value)[0]
-        buf = np.frombuffer(value, dtype=np.uint8, offset=4)
-        return cls(np.unpackbits(buf, count=count).astype(bool))
+        if not isinstance(value, bytes):
+            raise ValueError('expected bytes')
+
+        bit = cls.__new__(cls)
+        bit._len = unpack_from('>i', value)[0]
+        bit._data = value[4:]
+        return bit
 
     @classmethod
     def _to_db(cls, value):
diff --git a/tests/test_bit.py b/tests/test_bit.py
index a13f476..cf1275e 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -25,6 +25,11 @@ def test_bytes(self):
         assert Bit(b'\xff\x00').to_list() == [True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, False]
         assert Bit(b'\xfe\x07').to_list() == [True, True, True, True, True, True, True, False, False, False, False, False, False, True, True, True]
 
+    def test_ndarray(self):
+        arr = np.array([True, False, True])
+        assert Bit(arr).to_list() == [True, False, True]
+        assert np.array_equal(Bit(arr).to_numpy(), arr)
+
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
         with pytest.warns(UserWarning, match='expected elements to be boolean'):
@@ -35,11 +40,6 @@ def test_ndarray_uint16(self):
         with pytest.warns(UserWarning, match='expected elements to be boolean'):
             assert Bit(arr).to_text() == '110'
 
-    def test_ndarray_same_object(self):
-        arr = np.array([True, False, True])
-        assert Bit(arr).to_list() == [True, False, True]
-        assert Bit(arr).to_numpy() is arr
-
     def test_ndim_two(self):
         with pytest.raises(ValueError) as error:
             Bit([[True, False], [True, False]])

From c2c17c2ab6365e55677bde47d1d13c63b4e87642 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 16:02:46 -0700
Subject: [PATCH 117/123] Removed warning for result of np.unpackbits

---
 pgvector/bit.py   | 4 +++-
 tests/test_bit.py | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 72b8052..edfaec6 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -15,7 +15,9 @@ def __init__(self, value):
                 value = np.asarray(value)
 
                 if value.dtype != np.bool:
-                    warn('expected elements to be boolean', stacklevel=2)
+                    # allow result of np.unpackbits
+                    if value.dtype != np.uint8 or np.any(value > 1):
+                        warn('expected elements to be boolean', stacklevel=2)
                     value = value.astype(bool)
 
                 if value.ndim != 1:
diff --git a/tests/test_bit.py b/tests/test_bit.py
index cf1275e..ef049c7 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -30,6 +30,10 @@ def test_ndarray(self):
         assert Bit(arr).to_list() == [True, False, True]
         assert np.array_equal(Bit(arr).to_numpy(), arr)
 
+    def test_ndarray_unpackbits(self):
+        arr = np.unpackbits(np.array([254, 7, 0], dtype=np.uint8))
+        assert Bit(arr).to_text() == '111111100000011100000000'
+
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
         with pytest.warns(UserWarning, match='expected elements to be boolean'):

From 50fac76f7959a155444e46d9e11be42403b09b26 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 16:04:10 -0700
Subject: [PATCH 118/123] Improved test

---
 tests/test_bit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_bit.py b/tests/test_bit.py
index ef049c7..5a71642 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -22,8 +22,8 @@ def test_str(self):
         assert Bit('101').to_list() == [True, False, True]
 
     def test_bytes(self):
-        assert Bit(b'\xff\x00').to_list() == [True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, False]
-        assert Bit(b'\xfe\x07').to_list() == [True, True, True, True, True, True, True, False, False, False, False, False, False, True, True, True]
+        assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000'
+        assert Bit(b'\xfe\x07\x00').to_text() == '111111100000011100000000'
 
     def test_ndarray(self):
         arr = np.array([True, False, True])

From 92bb02a531fc012369ee20f065028aec230d5dcf Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 16:05:17 -0700
Subject: [PATCH 119/123] Updated comment [skip ci]

---
 pgvector/bit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index edfaec6..26a9d8d 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -15,7 +15,7 @@ def __init__(self, value):
                 value = np.asarray(value)
 
                 if value.dtype != np.bool:
-                    # allow result of np.unpackbits
+                    # skip warning for result of np.unpackbits
                     if value.dtype != np.uint8 or np.any(value > 1):
                         warn('expected elements to be boolean', stacklevel=2)
                     value = value.astype(bool)

From 4e22f9b26545f1b871cfba0fde21812ebc88ca84 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 16:16:01 -0700
Subject: [PATCH 120/123] Updated warning message

---
 pgvector/bit.py   | 2 +-
 tests/test_bit.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index 26a9d8d..e82b325 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -17,7 +17,7 @@ def __init__(self, value):
                 if value.dtype != np.bool:
                     # skip warning for result of np.unpackbits
                     if value.dtype != np.uint8 or np.any(value > 1):
-                        warn('expected elements to be boolean', stacklevel=2)
+                        warn('elements should be boolean', stacklevel=2)
                     value = value.astype(bool)
 
                 if value.ndim != 1:
diff --git a/tests/test_bit.py b/tests/test_bit.py
index 5a71642..e920228 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -8,11 +8,11 @@ def test_list(self):
         assert Bit([True, False, True]).to_list() == [True, False, True]
 
     def test_list_none(self):
-        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+        with pytest.warns(UserWarning, match='elements should be boolean'):
             assert Bit([True, None, True]).to_text() == '101'
 
     def test_list_int(self):
-        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+        with pytest.warns(UserWarning, match='elements should be boolean'):
             assert Bit([254, 7, 0]).to_text() == '110'
 
     def test_tuple(self):
@@ -36,12 +36,12 @@ def test_ndarray_unpackbits(self):
 
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
-        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+        with pytest.warns(UserWarning, match='elements should be boolean'):
             assert Bit(arr).to_text() == '110'
 
     def test_ndarray_uint16(self):
         arr = np.array([254, 7, 0], dtype=np.uint16)
-        with pytest.warns(UserWarning, match='expected elements to be boolean'):
+        with pytest.warns(UserWarning, match='elements should be boolean'):
             assert Bit(arr).to_text() == '110'
 
     def test_ndim_two(self):

From 7a2dd806e79ad82960cc1a89159ca61f9a12a373 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 16:20:17 -0700
Subject: [PATCH 121/123] Revert "Updated warning message"

This reverts commit 4e22f9b26545f1b871cfba0fde21812ebc88ca84.
---
 pgvector/bit.py   | 2 +-
 tests/test_bit.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pgvector/bit.py b/pgvector/bit.py
index e82b325..26a9d8d 100644
--- a/pgvector/bit.py
+++ b/pgvector/bit.py
@@ -17,7 +17,7 @@ def __init__(self, value):
                 if value.dtype != np.bool:
                     # skip warning for result of np.unpackbits
                     if value.dtype != np.uint8 or np.any(value > 1):
-                        warn('elements should be boolean', stacklevel=2)
+                        warn('expected elements to be boolean', stacklevel=2)
                     value = value.astype(bool)
 
                 if value.ndim != 1:
diff --git a/tests/test_bit.py b/tests/test_bit.py
index e920228..5a71642 100644
--- a/tests/test_bit.py
+++ b/tests/test_bit.py
@@ -8,11 +8,11 @@ def test_list(self):
         assert Bit([True, False, True]).to_list() == [True, False, True]
 
     def test_list_none(self):
-        with pytest.warns(UserWarning, match='elements should be boolean'):
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
             assert Bit([True, None, True]).to_text() == '101'
 
     def test_list_int(self):
-        with pytest.warns(UserWarning, match='elements should be boolean'):
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
             assert Bit([254, 7, 0]).to_text() == '110'
 
     def test_tuple(self):
@@ -36,12 +36,12 @@ def test_ndarray_unpackbits(self):
 
     def test_ndarray_uint8(self):
         arr = np.array([254, 7, 0], dtype=np.uint8)
-        with pytest.warns(UserWarning, match='elements should be boolean'):
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
             assert Bit(arr).to_text() == '110'
 
     def test_ndarray_uint16(self):
         arr = np.array([254, 7, 0], dtype=np.uint16)
-        with pytest.warns(UserWarning, match='elements should be boolean'):
+        with pytest.warns(UserWarning, match='expected elements to be boolean'):
             assert Bit(arr).to_text() == '110'
 
     def test_ndim_two(self):

From 6bb6df8cce6d5b03e1a8a9b683ae37faaf12db7a Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 16:35:04 -0700
Subject: [PATCH 122/123] Removed unreleased import

---
 pgvector/psycopg2/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pgvector/psycopg2/__init__.py b/pgvector/psycopg2/__init__.py
index b40c673..33e5124 100644
--- a/pgvector/psycopg2/__init__.py
+++ b/pgvector/psycopg2/__init__.py
@@ -1,11 +1,10 @@
 from .register import register_vector
 
 # TODO remove
-from .. import HalfVector, SparseVector, Vector
+from .. import HalfVector, SparseVector
 
 __all__ = [
     'register_vector',
-    'Vector',
     'HalfVector',
     'SparseVector'
 ]

From a8f2a5f8428ae10d79be53c0367fc007eca4ab78 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 15 Mar 2025 17:53:02 -0700
Subject: [PATCH 123/123] Version bump to 0.4.0 [skip ci]

---
 CHANGELOG.md   | 2 +-
 README.md      | 2 +-
 pyproject.toml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 89e955a..d0e2730 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.4.0 (unreleased)
+## 0.4.0 (2025-03-15)
 
 - Added top-level `pgvector` package
 - Added support for pg8000
diff --git a/README.md b/README.md
index 299753e..b6bc055 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ And follow the instructions for your database library:
 - [Psycopg 3](#psycopg-3)
 - [Psycopg 2](#psycopg-2)
 - [asyncpg](#asyncpg)
-- [pg8000](#pg8000) [unreleased]
+- [pg8000](#pg8000)
 - [Peewee](#peewee)
 
 Or check out some examples:
diff --git a/pyproject.toml b/pyproject.toml
index 0f291f5..b889f4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "pgvector"
-version = "0.3.6"
+version = "0.4.0"
 description = "pgvector support for Python"
 readme = "README.md"
 authors = [