From 40cf29622b29be1f32595e26876593bb5f1e03be Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Mon, 30 Sep 2024 17:57:46 -0400 Subject: [PATCH 1/7] MongoDB Atlas: Two line change to make our CI builds green (#1910) --- docarray/index/backends/mongodb_atlas.py | 6 ++++-- tests/index/mongo_atlas/__init__.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docarray/index/backends/mongodb_atlas.py b/docarray/index/backends/mongodb_atlas.py index f2bbc04983..f1ccdec02d 100644 --- a/docarray/index/backends/mongodb_atlas.py +++ b/docarray/index/backends/mongodb_atlas.py @@ -563,16 +563,18 @@ def _vector_search_stage( max_candidates = self._get_max_candidates(search_field) query = query.astype(np.float64).tolist() - return { + stage = { '$vectorSearch': { 'index': search_index_name, 'path': search_field, 'queryVector': query, 'numCandidates': min(limit * oversampling_factor, max_candidates), 'limit': limit, - 'filter': {"$and": filters} if filters else None, } } + if filters: + stage['$vectorSearch']['filter'] = {"$and": filters} + return stage def _text_search_stage( self, diff --git a/tests/index/mongo_atlas/__init__.py b/tests/index/mongo_atlas/__init__.py index 360ba6ee1c..305bebe1ed 100644 --- a/tests/index/mongo_atlas/__init__.py +++ b/tests/index/mongo_atlas/__init__.py @@ -29,7 +29,7 @@ class FlatSchema(BaseDoc): embedding2: NdArray = Field(dim=N_DIM, index_name="vector_index_2") -def assert_when_ready(callable: Callable, tries: int = 5, interval: float = 2): +def assert_when_ready(callable: Callable, tries: int = 10, interval: float = 2): """ Retry callable to account for time taken to change data on the cluster """ From 83ebef6087e868517681e59877008f80f1e7f113 Mon Sep 17 00:00:00 2001 From: Emmanuel Ferdman Date: Tue, 1 Oct 2024 17:01:07 +0300 Subject: [PATCH 2/7] fix: update license location (#1911) Signed-off-by: Emmanuel Ferdman --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 06acc4f516..1c4e27f989 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ > The README you're currently viewing is for DocArray>0.30, which introduces some significant changes from DocArray 0.21. If you wish to continue using the older DocArray <=0.21, ensure you install it via `pip install docarray==0.21`. Refer to its [codebase](https://github.com/docarray/docarray/tree/v0.21.0), [documentation](https://docarray.jina.ai), and [its hot-fixes branch](https://github.com/docarray/docarray/tree/docarray-v1-fixes) for more information. -DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/). +DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE.md) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/). From d98acb716e0c336a817f65b62d428ab13cf8ac42 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Fri, 21 Mar 2025 09:02:38 +0100 Subject: [PATCH 3/7] fix: fix DocList schema when using Pydantic V2 (#1876) --- .github/workflows/cd.yml | 18 +- .github/workflows/ci.yml | 20 +- .github/workflows/ci_only_pr.yml | 2 +- docarray/__init__.py | 54 ++++ docarray/array/any_array.py | 40 ++- docarray/array/doc_list/doc_list.py | 25 +- docarray/array/doc_list/io.py | 1 - docarray/array/doc_vec/doc_vec.py | 6 +- docarray/base_doc/doc.py | 10 +- docarray/base_doc/mixins/update.py | 4 +- docarray/index/backends/elastic.py | 8 +- docarray/index/backends/epsilla.py | 4 +- docarray/typing/bytes/base_bytes.py | 2 +- docarray/typing/id.py | 2 +- docarray/typing/tensor/abstract_tensor.py | 4 +- docarray/typing/url/any_url.py | 2 +- docarray/utils/_internal/_typing.py | 8 +- docarray/utils/create_dynamic_doc_class.py | 56 +++- tests/benchmark_tests/test_map.py | 6 +- .../index/base_classes/test_base_doc_store.py | 68 +++-- .../array/test_optional_doc_vec.py | 3 +- tests/integrations/externals/test_fastapi.py | 261 +++++++++++++++++- .../torch/data/test_torch_dataset.py | 8 +- .../units/array/stack/storage/test_storage.py | 3 +- tests/units/array/stack/test_array_stacked.py | 12 +- tests/units/array/stack/test_proto.py | 2 + tests/units/array/test_array.py | 4 +- tests/units/array/test_array_from_to_bytes.py | 38 ++- tests/units/array/test_doclist_schema.py | 22 ++ tests/units/document/test_doc_wo_id.py | 7 +- tests/units/typing/da/test_relations.py | 11 + .../util/test_create_dynamic_code_class.py | 35 ++- tests/units/util/test_map.py | 4 +- 33 files changed, 624 insertions(+), 126 deletions(-) create mode 100644 tests/units/array/test_doclist_schema.py diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index a1aae08ec9..e0a14b5252 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -21,7 +21,7 @@ jobs: - name: Pre-release (.devN) run: | git fetch --depth=1 origin +refs/tags/*:refs/tags/* - pip install poetry + pip install poetry==1.7.1 ./scripts/release.sh env: PYPI_USERNAME: ${{ secrets.TWINE_USERNAME }} @@ -35,20 +35,16 @@ jobs: steps: - uses: actions/checkout@v3 with: - fetch-depth: 0 - - - name: Get changed files - id: changed-files-specific - uses: tj-actions/changed-files@v41 - with: - files: | - README.md + fetch-depth: 2 - name: Check if README is modified id: step_output - if: steps.changed-files-specific.outputs.any_changed == 'true' run: | - echo "readme_changed=true" >> $GITHUB_OUTPUT + if git diff --name-only HEAD^ HEAD | grep -q "README.md"; then + echo "readme_changed=true" >> $GITHUB_OUTPUT + else + echo "readme_changed=false" >> $GITHUB_OUTPUT + fi publish-docarray-org: needs: check-readme-modification diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e98f9ce7b..07c32d0b87 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: - name: Lint with ruff run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install # stop the build if there are Python syntax errors or undefined names @@ -44,7 +44,7 @@ jobs: - name: check black run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --only dev poetry run black --check . @@ -62,7 +62,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --without dev poetry run pip install tensorflow==2.12.0 poetry run pip install jax @@ -106,7 +106,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras poetry run pip install elasticsearch==8.6.2 ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} @@ -156,7 +156,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip install protobuf==3.20.0 # we check that we support 3.19 @@ -204,7 +204,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip install protobuf==3.20.0 @@ -253,7 +253,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip install protobuf==3.20.0 @@ -302,7 +302,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip install protobuf==3.20.0 @@ -351,7 +351,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip uninstall -y torch @@ -398,7 +398,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 poetry install --all-extras poetry run pip uninstall -y torch poetry run pip install torch diff --git a/.github/workflows/ci_only_pr.yml b/.github/workflows/ci_only_pr.yml index 1e8d3f9694..9d040e72b6 100644 --- a/.github/workflows/ci_only_pr.yml +++ b/.github/workflows/ci_only_pr.yml @@ -43,7 +43,7 @@ jobs: run: | npm i -g netlify-cli python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 python -m poetry config virtualenvs.create false && python -m poetry install --no-interaction --no-ansi --all-extras cd docs diff --git a/docarray/__init__.py b/docarray/__init__.py index 6ce3f9eb90..5a18bb9588 100644 --- a/docarray/__init__.py +++ b/docarray/__init__.py @@ -20,6 +20,60 @@ from docarray.array import DocList, DocVec from docarray.base_doc.doc import BaseDoc from docarray.utils._internal.misc import _get_path_from_docarray_root_level +from docarray.utils._internal.pydantic import is_pydantic_v2 + + +def unpickle_doclist(doc_type, b): + return DocList[doc_type].from_bytes(b, protocol="protobuf") + + +def unpickle_docvec(doc_type, tensor_type, b): + return DocVec[doc_type].from_bytes(b, protocol="protobuf", tensor_type=tensor_type) + + +if is_pydantic_v2: + # Register the pickle functions + def register_serializers(): + import copyreg + from functools import partial + + unpickle_doc_fn = partial(BaseDoc.from_bytes, protocol="protobuf") + + def pickle_doc(doc): + b = doc.to_bytes(protocol='protobuf') + return unpickle_doc_fn, (doc.__class__, b) + + # Register BaseDoc serialization + copyreg.pickle(BaseDoc, pickle_doc) + + # For DocList, we need to hook into __reduce__ since it's a generic + + def pickle_doclist(doc_list): + b = doc_list.to_bytes(protocol='protobuf') + doc_type = doc_list.doc_type + return unpickle_doclist, (doc_type, b) + + # Replace DocList.__reduce__ with a method that returns the correct format + def doclist_reduce(self): + return pickle_doclist(self) + + DocList.__reduce__ = doclist_reduce + + # For DocVec, we need to hook into __reduce__ since it's a generic + + def pickle_docvec(doc_vec): + b = doc_vec.to_bytes(protocol='protobuf') + doc_type = doc_vec.doc_type + tensor_type = doc_vec.tensor_type + return unpickle_docvec, (doc_type, tensor_type, b) + + # Replace DocList.__reduce__ with a method that returns the correct format + def docvec_reduce(self): + return pickle_docvec(self) + + DocVec.__reduce__ = docvec_reduce + + register_serializers() __all__ = ['BaseDoc', 'DocList', 'DocVec'] diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py index 50c47cf4ec..0c29e54ae8 100644 --- a/docarray/array/any_array.py +++ b/docarray/array/any_array.py @@ -25,6 +25,7 @@ from docarray.exceptions.exceptions import UnusableObjectError from docarray.typing.abstract_type import AbstractType from docarray.utils._internal._typing import change_cls_name, safe_issubclass +from docarray.utils._internal.pydantic import is_pydantic_v2 if TYPE_CHECKING: from docarray.proto import DocListProto, NodeProto @@ -73,8 +74,19 @@ def __class_getitem__(cls, item: Union[Type[BaseDocWithoutId], TypeVar, str]): # Promote to global scope so multiprocessing can pickle it global _DocArrayTyped - class _DocArrayTyped(cls): # type: ignore - doc_type: Type[BaseDocWithoutId] = cast(Type[BaseDocWithoutId], item) + if not is_pydantic_v2: + + class _DocArrayTyped(cls): # type: ignore + doc_type: Type[BaseDocWithoutId] = cast( + Type[BaseDocWithoutId], item + ) + + else: + + class _DocArrayTyped(cls, Generic[T_doc]): # type: ignore + doc_type: Type[BaseDocWithoutId] = cast( + Type[BaseDocWithoutId], item + ) for field in _DocArrayTyped.doc_type._docarray_fields().keys(): @@ -99,14 +111,24 @@ def _setter(self, value): setattr(_DocArrayTyped, field, _property_generator(field)) # this generates property on the fly based on the schema of the item - # The global scope and qualname need to refer to this class a unique name. - # Otherwise, creating another _DocArrayTyped will overwrite this one. - change_cls_name( - _DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals() - ) - - cls.__typed_da__[cls][item] = _DocArrayTyped + # # The global scope and qualname need to refer to this class a unique name. + # # Otherwise, creating another _DocArrayTyped will overwrite this one. + if not is_pydantic_v2: + change_cls_name( + _DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals() + ) + cls.__typed_da__[cls][item] = _DocArrayTyped + else: + change_cls_name(_DocArrayTyped, f'{cls.__name__}', globals()) + if sys.version_info < (3, 12): + cls.__typed_da__[cls][item] = Generic.__class_getitem__.__func__( + _DocArrayTyped, item + ) # type: ignore + # this do nothing that checking that item is valid type var or str + # Keep the approach in #1147 to be compatible with lower versions of Python. + else: + cls.__typed_da__[cls][item] = GenericAlias(_DocArrayTyped, item) # type: ignore return cls.__typed_da__[cls][item] @overload diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index c21cf93413..4923619915 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -12,6 +12,7 @@ Union, cast, overload, + Callable, ) from pydantic import parse_obj_as @@ -28,7 +29,6 @@ from docarray.utils._internal.pydantic import is_pydantic_v2 if is_pydantic_v2: - from pydantic import GetCoreSchemaHandler from pydantic_core import core_schema from docarray.utils._internal._typing import safe_issubclass @@ -45,10 +45,7 @@ class DocList( - ListAdvancedIndexing[T_doc], - PushPullMixin, - IOMixinDocList, - AnyDocArray[T_doc], + ListAdvancedIndexing[T_doc], PushPullMixin, IOMixinDocList, AnyDocArray[T_doc] ): """ DocList is a container of Documents. @@ -357,8 +354,20 @@ def __repr__(self): @classmethod def __get_pydantic_core_schema__( - cls, _source_type: Any, _handler: GetCoreSchemaHandler + cls, source: Any, handler: Callable[[Any], core_schema.CoreSchema] ) -> core_schema.CoreSchema: - return core_schema.general_plain_validator_function( - cls.validate, + instance_schema = core_schema.is_instance_schema(cls) + args = getattr(source, '__args__', None) + if args: + sequence_t_schema = handler(Sequence[args[0]]) + else: + sequence_t_schema = handler(Sequence) + + def validate_fn(v, info): + # input has already been validated + return cls(v, validate_input_docs=False) + + non_instance_schema = core_schema.with_info_after_validator_function( + validate_fn, sequence_t_schema ) + return core_schema.union_schema([instance_schema, non_instance_schema]) diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py index 82d00197e2..3acb66bf6e 100644 --- a/docarray/array/doc_list/io.py +++ b/docarray/array/doc_list/io.py @@ -256,7 +256,6 @@ def to_bytes( :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf` :return: the binary serialization in bytes or None if file_ctx is passed where to store """ - with file_ctx or io.BytesIO() as bf: self._write_bytes( bf=bf, diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py index 9d515cfd96..0cc462f173 100644 --- a/docarray/array/doc_vec/doc_vec.py +++ b/docarray/array/doc_vec/doc_vec.py @@ -198,7 +198,7 @@ def _check_doc_field_not_none(field_name, doc): if safe_issubclass(tensor.__class__, tensor_type): field_type = tensor_type - if isinstance(field_type, type): + if isinstance(field_type, type) or safe_issubclass(field_type, AnyDocArray): if tf_available and safe_issubclass(field_type, TensorFlowTensor): # tf.Tensor does not allow item assignment, therefore the # optimized way @@ -335,7 +335,9 @@ def _docarray_validate( return cast(T, value.to_doc_vec()) else: raise ValueError(f'DocVec[value.doc_type] is not compatible with {cls}') - elif isinstance(value, DocList.__class_getitem__(cls.doc_type)): + elif not is_pydantic_v2 and isinstance( + value, DocList.__class_getitem__(cls.doc_type) + ): return cast(T, value.to_doc_vec()) elif isinstance(value, Sequence): return cls(value) diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py index 48fb3076cd..e880504bc0 100644 --- a/docarray/base_doc/doc.py +++ b/docarray/base_doc/doc.py @@ -326,8 +326,13 @@ def _exclude_doclist( from docarray.array.any_array import AnyDocArray type_ = self._get_field_annotation(field) - if isinstance(type_, type) and safe_issubclass(type_, AnyDocArray): - doclist_exclude_fields.append(field) + if is_pydantic_v2: + # Conservative when touching pydantic v1 logic + if safe_issubclass(type_, AnyDocArray): + doclist_exclude_fields.append(field) + else: + if isinstance(type_, type) and safe_issubclass(type_, AnyDocArray): + doclist_exclude_fields.append(field) original_exclude = exclude if exclude is None: @@ -480,7 +485,6 @@ def model_dump( # type: ignore warnings: bool = True, ) -> Dict[str, Any]: def _model_dump(doc): - ( exclude_, original_exclude, diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py index 721f8225eb..7ce596ce1a 100644 --- a/docarray/base_doc/mixins/update.py +++ b/docarray/base_doc/mixins/update.py @@ -110,9 +110,7 @@ def _group_fields(doc: 'UpdateMixin') -> _FieldGroups: if field_name not in FORBIDDEN_FIELDS_TO_UPDATE: field_type = doc._get_field_annotation(field_name) - if isinstance(field_type, type) and safe_issubclass( - field_type, DocList - ): + if safe_issubclass(field_type, DocList): nested_docarray_fields.append(field_name) else: origin = get_origin(field_type) diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py index c008fa29de..a335f85e32 100644 --- a/docarray/index/backends/elastic.py +++ b/docarray/index/backends/elastic.py @@ -352,12 +352,12 @@ def python_type_to_db_type(self, python_type: Type) -> Any: dict: 'object', } - for type in elastic_py_types.keys(): - if safe_issubclass(python_type, type): + for t in elastic_py_types.keys(): + if safe_issubclass(python_type, t): self._logger.info( - f'Mapped Python type {python_type} to database type "{elastic_py_types[type]}"' + f'Mapped Python type {python_type} to database type "{elastic_py_types[t]}"' ) - return elastic_py_types[type] + return elastic_py_types[t] err_msg = f'Unsupported column type for {type(self)}: {python_type}' self._logger.error(err_msg) diff --git a/docarray/index/backends/epsilla.py b/docarray/index/backends/epsilla.py index 83c171daed..0392e9d010 100644 --- a/docarray/index/backends/epsilla.py +++ b/docarray/index/backends/epsilla.py @@ -100,8 +100,8 @@ def __init__(self, db_config=None, **kwargs): def _validate_column_info(self): vector_columns = [] for info in self._column_infos.values(): - for type in [list, np.ndarray, AbstractTensor]: - if safe_issubclass(info.docarray_type, type) and info.config.get( + for t in [list, np.ndarray, AbstractTensor]: + if safe_issubclass(info.docarray_type, t) and info.config.get( 'is_embedding', False ): # check that dimension is present diff --git a/docarray/typing/bytes/base_bytes.py b/docarray/typing/bytes/base_bytes.py index 4c336ae694..8a944031b4 100644 --- a/docarray/typing/bytes/base_bytes.py +++ b/docarray/typing/bytes/base_bytes.py @@ -62,7 +62,7 @@ def _to_node_protobuf(self: T) -> 'NodeProto': def __get_pydantic_core_schema__( cls, _source_type: Any, _handler: 'GetCoreSchemaHandler' ) -> 'core_schema.CoreSchema': - return core_schema.general_after_validator_function( + return core_schema.with_info_after_validator_function( cls.validate, core_schema.bytes_schema(), ) diff --git a/docarray/typing/id.py b/docarray/typing/id.py index c06951eaef..3e3fdd37ae 100644 --- a/docarray/typing/id.py +++ b/docarray/typing/id.py @@ -77,7 +77,7 @@ def from_protobuf(cls: Type[T], pb_msg: 'str') -> T: def __get_pydantic_core_schema__( cls, source: Type[Any], handler: 'GetCoreSchemaHandler' ) -> core_schema.CoreSchema: - return core_schema.general_plain_validator_function( + return core_schema.with_info_plain_validator_function( cls.validate, ) diff --git a/docarray/typing/tensor/abstract_tensor.py b/docarray/typing/tensor/abstract_tensor.py index 994fe42cc8..e7e4fbe705 100644 --- a/docarray/typing/tensor/abstract_tensor.py +++ b/docarray/typing/tensor/abstract_tensor.py @@ -395,10 +395,10 @@ def _docarray_to_ndarray(self) -> np.ndarray: def __get_pydantic_core_schema__( cls, _source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: - return core_schema.general_plain_validator_function( + return core_schema.with_info_plain_validator_function( cls.validate, serialization=core_schema.plain_serializer_function_ser_schema( - function=orjson_dumps, + function=lambda x: x._docarray_to_ndarray().tolist(), return_schema=handler.generate_schema(bytes), when_used="json-unless-none", ), diff --git a/docarray/typing/url/any_url.py b/docarray/typing/url/any_url.py index ddd1791513..b7c5d71f83 100644 --- a/docarray/typing/url/any_url.py +++ b/docarray/typing/url/any_url.py @@ -56,7 +56,7 @@ def _docarray_validate( def __get_pydantic_core_schema__( cls, source: Type[Any], handler: Optional['GetCoreSchemaHandler'] = None ) -> core_schema.CoreSchema: - return core_schema.general_after_validator_function( + return core_schema.with_info_after_validator_function( cls._docarray_validate, core_schema.str_schema(), ) diff --git a/docarray/utils/_internal/_typing.py b/docarray/utils/_internal/_typing.py index 83e350a060..3c2bd89a8e 100644 --- a/docarray/utils/_internal/_typing.py +++ b/docarray/utils/_internal/_typing.py @@ -61,11 +61,15 @@ def safe_issubclass(x: type, a_tuple: type) -> bool: :return: A boolean value - 'True' if 'x' is a subclass of 'A_tuple', 'False' otherwise. Note that if the origin of 'x' is a list or tuple, the function immediately returns 'False'. """ + origin = get_origin(x) + if origin: # If x is a generic type like DocList[SomeDoc], get its origin + x = origin if ( - (get_origin(x) in (list, tuple, dict, set, Union)) + (origin in (list, tuple, dict, set, Union)) or is_typevar(x) or (type(x) == ForwardRef) or is_typevar(x) ): return False - return issubclass(x, a_tuple) + + return isinstance(x, type) and issubclass(x, a_tuple) diff --git a/docarray/utils/create_dynamic_doc_class.py b/docarray/utils/create_dynamic_doc_class.py index 744fea58c3..c82a7c8948 100644 --- a/docarray/utils/create_dynamic_doc_class.py +++ b/docarray/utils/create_dynamic_doc_class.py @@ -54,8 +54,9 @@ class MyDoc(BaseDoc): fields: Dict[str, Any] = {} import copy - fields_copy = copy.deepcopy(model.__fields__) - annotations_copy = copy.deepcopy(model.__annotations__) + copy_model = copy.deepcopy(model) + fields_copy = copy_model.__fields__ + annotations_copy = copy_model.__annotations__ for field_name, field in annotations_copy.items(): if field_name not in fields_copy: continue @@ -65,7 +66,7 @@ class MyDoc(BaseDoc): else: field_info = fields_copy[field_name].field_info try: - if safe_issubclass(field, DocList): + if safe_issubclass(field, DocList) and not is_pydantic_v2: t: Any = field.doc_type t_aux = create_pure_python_type_model(t) fields[field_name] = (List[t_aux], field_info) @@ -74,13 +75,14 @@ class MyDoc(BaseDoc): except TypeError: fields[field_name] = (field, field_info) - return create_model(model.__name__, __base__=model, __doc__=model.__doc__, **fields) + return create_model( + copy_model.__name__, __base__=copy_model, __doc__=copy_model.__doc__, **fields + ) def _get_field_annotation_from_schema( field_schema: Dict[str, Any], field_name: str, - root_schema: Dict[str, Any], cached_models: Dict[str, Any], is_tensor: bool = False, num_recursions: int = 0, @@ -90,7 +92,6 @@ def _get_field_annotation_from_schema( Private method used to extract the corresponding field type from the schema. :param field_schema: The schema from which to extract the type :param field_name: The name of the field to be created - :param root_schema: The schema of the root object, important to get references :param cached_models: Parameter used when this method is called recursively to reuse partial nested classes. :param is_tensor: Boolean used to tell between tensor and list :param num_recursions: Number of recursions to properly handle nested types (Dict, List, etc ..) @@ -110,7 +111,7 @@ def _get_field_annotation_from_schema( ref_name = obj_ref.split('/')[-1] any_of_types.append( create_base_doc_from_schema( - root_schema['definitions'][ref_name], + definitions[ref_name], ref_name, cached_models=cached_models, definitions=definitions, @@ -121,7 +122,6 @@ def _get_field_annotation_from_schema( _get_field_annotation_from_schema( any_of_schema, field_name, - root_schema=root_schema, cached_models=cached_models, is_tensor=tensor_shape is not None, num_recursions=0, @@ -160,7 +160,10 @@ def _get_field_annotation_from_schema( doc_type: Any if 'additionalProperties' in field_schema: # handle Dictionaries additional_props = field_schema['additionalProperties'] - if additional_props.get('type') == 'object': + if ( + isinstance(additional_props, dict) + and additional_props.get('type') == 'object' + ): doc_type = create_base_doc_from_schema( additional_props, field_name, cached_models=cached_models ) @@ -201,7 +204,6 @@ def _get_field_annotation_from_schema( ret = _get_field_annotation_from_schema( field_schema=field_schema.get('items', {}), field_name=field_name, - root_schema=root_schema, cached_models=cached_models, is_tensor=tensor_shape is not None, num_recursions=num_recursions + 1, @@ -262,6 +264,24 @@ class MyDoc(BaseDoc): :param definitions: Parameter used when this method is called recursively to reuse root definitions of other schemas. :return: A BaseDoc class dynamically created following the `schema`. """ + + def clean_refs(value): + """Recursively remove $ref keys and #/$defs values from a data structure.""" + if isinstance(value, dict): + # Create a new dictionary without $ref keys and without values containing #/$defs + cleaned_dict = {} + for k, v in value.items(): + if k == '$ref': + continue + cleaned_dict[k] = clean_refs(v) + return cleaned_dict + elif isinstance(value, list): + # Process each item in the list + return [clean_refs(item) for item in value] + else: + # Return primitive values as-is + return value + if not definitions: definitions = ( schema.get('definitions', {}) if not is_pydantic_v2 else schema.get('$defs') @@ -275,10 +295,10 @@ class MyDoc(BaseDoc): for field_name, field_schema in schema.get('properties', {}).items(): if field_name == 'id': has_id = True + # Get the field type field_type = _get_field_annotation_from_schema( field_schema=field_schema, field_name=field_name, - root_schema=schema, cached_models=cached_models, is_tensor=False, num_recursions=0, @@ -294,10 +314,22 @@ class MyDoc(BaseDoc): field_kwargs = {} field_json_schema_extra = {} for k, v in field_schema.items(): + if field_name == 'id': + # Skip default_factory for Optional fields and use None + field_kwargs['default'] = None if k in FieldInfo.__slots__: field_kwargs[k] = v else: - field_json_schema_extra[k] = v + if k != '$ref': + if isinstance(v, dict): + cleaned_v = clean_refs(v) + if ( + cleaned_v + ): # Only add if there's something left after cleaning + field_json_schema_extra[k] = cleaned_v + else: + field_json_schema_extra[k] = v + fields[field_name] = ( field_type, FieldInfo( diff --git a/tests/benchmark_tests/test_map.py b/tests/benchmark_tests/test_map.py index e5c664a408..2fc7b09496 100644 --- a/tests/benchmark_tests/test_map.py +++ b/tests/benchmark_tests/test_map.py @@ -29,9 +29,9 @@ def test_map_docs_multiprocessing(): if os.cpu_count() > 1: def time_multiprocessing(num_workers: int) -> float: - n_docs = 5 + n_docs = 10 rng = np.random.RandomState(0) - matrices = [rng.random(size=(1000, 1000)) for _ in range(n_docs)] + matrices = [rng.random(size=(100, 100)) for _ in range(n_docs)] da = DocList[MyMatrix]([MyMatrix(matrix=m) for m in matrices]) start_time = time() list( @@ -65,7 +65,7 @@ def test_map_docs_batched_multiprocessing(): def time_multiprocessing(num_workers: int) -> float: n_docs = 16 rng = np.random.RandomState(0) - matrices = [rng.random(size=(1000, 1000)) for _ in range(n_docs)] + matrices = [rng.random(size=(100, 100)) for _ in range(n_docs)] da = DocList[MyMatrix]([MyMatrix(matrix=m) for m in matrices]) start_time = time() list( diff --git a/tests/index/base_classes/test_base_doc_store.py b/tests/index/base_classes/test_base_doc_store.py index faf146df6f..7337969428 100644 --- a/tests/index/base_classes/test_base_doc_store.py +++ b/tests/index/base_classes/test_base_doc_store.py @@ -13,6 +13,7 @@ from docarray.typing import ID, ImageBytes, ImageUrl, NdArray from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.utils._internal.misc import torch_imported +from docarray.utils._internal._typing import safe_issubclass pytestmark = pytest.mark.index @@ -54,7 +55,7 @@ class DummyDocIndex(BaseDocIndex): def __init__(self, db_config=None, **kwargs): super().__init__(db_config=db_config, **kwargs) for col_name, col in self._column_infos.items(): - if issubclass(col.docarray_type, AnyDocArray): + if safe_issubclass(col.docarray_type, AnyDocArray): sub_db_config = copy.deepcopy(self._db_config) self._subindices[col_name] = self.__class__[col.docarray_type.doc_type]( db_config=sub_db_config, subindex=True @@ -159,7 +160,7 @@ def test_create_columns(): assert index._column_infos['id'].n_dim is None assert index._column_infos['id'].config['hi'] == 'there' - assert issubclass(index._column_infos['tens'].docarray_type, AbstractTensor) + assert safe_issubclass(index._column_infos['tens'].docarray_type, AbstractTensor) assert index._column_infos['tens'].db_type == str assert index._column_infos['tens'].n_dim == 10 assert index._column_infos['tens'].config == {'dim': 1000, 'hi': 'there'} @@ -173,12 +174,16 @@ def test_create_columns(): assert index._column_infos['id'].n_dim is None assert index._column_infos['id'].config['hi'] == 'there' - assert issubclass(index._column_infos['tens_one'].docarray_type, AbstractTensor) + assert safe_issubclass( + index._column_infos['tens_one'].docarray_type, AbstractTensor + ) assert index._column_infos['tens_one'].db_type == str assert index._column_infos['tens_one'].n_dim is None assert index._column_infos['tens_one'].config == {'dim': 10, 'hi': 'there'} - assert issubclass(index._column_infos['tens_two'].docarray_type, AbstractTensor) + assert safe_issubclass( + index._column_infos['tens_two'].docarray_type, AbstractTensor + ) assert index._column_infos['tens_two'].db_type == str assert index._column_infos['tens_two'].n_dim is None assert index._column_infos['tens_two'].config == {'dim': 50, 'hi': 'there'} @@ -192,7 +197,7 @@ def test_create_columns(): assert index._column_infos['id'].n_dim is None assert index._column_infos['id'].config['hi'] == 'there' - assert issubclass(index._column_infos['d__tens'].docarray_type, AbstractTensor) + assert safe_issubclass(index._column_infos['d__tens'].docarray_type, AbstractTensor) assert index._column_infos['d__tens'].db_type == str assert index._column_infos['d__tens'].n_dim == 10 assert index._column_infos['d__tens'].config == {'dim': 1000, 'hi': 'there'} @@ -206,7 +211,7 @@ def test_create_columns(): 'parent_id', ] - assert issubclass(index._column_infos['d'].docarray_type, AnyDocArray) + assert safe_issubclass(index._column_infos['d'].docarray_type, AnyDocArray) assert index._column_infos['d'].db_type is None assert index._column_infos['d'].n_dim is None assert index._column_infos['d'].config == {} @@ -216,7 +221,7 @@ def test_create_columns(): assert index._subindices['d']._column_infos['id'].n_dim is None assert index._subindices['d']._column_infos['id'].config['hi'] == 'there' - assert issubclass( + assert safe_issubclass( index._subindices['d']._column_infos['tens'].docarray_type, AbstractTensor ) assert index._subindices['d']._column_infos['tens'].db_type == str @@ -245,7 +250,7 @@ def test_create_columns(): 'parent_id', ] - assert issubclass( + assert safe_issubclass( index._subindices['d_root']._column_infos['d'].docarray_type, AnyDocArray ) assert index._subindices['d_root']._column_infos['d'].db_type is None @@ -266,7 +271,7 @@ def test_create_columns(): index._subindices['d_root']._subindices['d']._column_infos['id'].config['hi'] == 'there' ) - assert issubclass( + assert safe_issubclass( index._subindices['d_root'] ._subindices['d'] ._column_infos['tens'] @@ -461,11 +466,16 @@ class OtherNestedDoc(NestedDoc): # SIMPLE index = DummyDocIndex[SimpleDoc]() in_list = [SimpleDoc(tens=np.random.random((10,)))] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) + in_da = DocList[SimpleDoc](in_list) assert index._validate_docs(in_da) == in_da in_other_list = [OtherSimpleDoc(tens=np.random.random((10,)))] - assert isinstance(index._validate_docs(in_other_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_other_list), DocList) + for d in index._validate_docs(in_other_list): + assert isinstance(d, BaseDoc) in_other_da = DocList[OtherSimpleDoc](in_other_list) assert index._validate_docs(in_other_da) == in_other_da @@ -494,7 +504,9 @@ class OtherNestedDoc(NestedDoc): in_list = [ FlatDoc(tens_one=np.random.random((10,)), tens_two=np.random.random((50,))) ] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) in_da = DocList[FlatDoc]( [FlatDoc(tens_one=np.random.random((10,)), tens_two=np.random.random((50,)))] ) @@ -502,7 +514,9 @@ class OtherNestedDoc(NestedDoc): in_other_list = [ OtherFlatDoc(tens_one=np.random.random((10,)), tens_two=np.random.random((50,))) ] - assert isinstance(index._validate_docs(in_other_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_other_list), DocList) + for d in index._validate_docs(in_other_list): + assert isinstance(d, BaseDoc) in_other_da = DocList[OtherFlatDoc]( [ OtherFlatDoc( @@ -521,11 +535,15 @@ class OtherNestedDoc(NestedDoc): # NESTED index = DummyDocIndex[NestedDoc]() in_list = [NestedDoc(d=SimpleDoc(tens=np.random.random((10,))))] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) in_da = DocList[NestedDoc]([NestedDoc(d=SimpleDoc(tens=np.random.random((10,))))]) assert index._validate_docs(in_da) == in_da in_other_list = [OtherNestedDoc(d=OtherSimpleDoc(tens=np.random.random((10,))))] - assert isinstance(index._validate_docs(in_other_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_other_list), DocList) + for d in index._validate_docs(in_other_list): + assert isinstance(d, BaseDoc) in_other_da = DocList[OtherNestedDoc]( [OtherNestedDoc(d=OtherSimpleDoc(tens=np.random.random((10,))))] ) @@ -552,7 +570,9 @@ class TensorUnionDoc(BaseDoc): # OPTIONAL index = DummyDocIndex[SimpleDoc]() in_list = [OptionalDoc(tens=np.random.random((10,)))] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) in_da = DocList[OptionalDoc](in_list) assert index._validate_docs(in_da) == in_da @@ -562,9 +582,13 @@ class TensorUnionDoc(BaseDoc): # MIXED UNION index = DummyDocIndex[SimpleDoc]() in_list = [MixedUnionDoc(tens=np.random.random((10,)))] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) in_da = DocList[MixedUnionDoc](in_list) - assert isinstance(index._validate_docs(in_da), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_da), DocList) + for d in index._validate_docs(in_da): + assert isinstance(d, BaseDoc) with pytest.raises(ValueError): index._validate_docs([MixedUnionDoc(tens='hello')]) @@ -572,13 +596,17 @@ class TensorUnionDoc(BaseDoc): # TENSOR UNION index = DummyDocIndex[TensorUnionDoc]() in_list = [SimpleDoc(tens=np.random.random((10,)))] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) in_da = DocList[SimpleDoc](in_list) assert index._validate_docs(in_da) == in_da index = DummyDocIndex[SimpleDoc]() in_list = [TensorUnionDoc(tens=np.random.random((10,)))] - assert isinstance(index._validate_docs(in_list), DocList[BaseDoc]) + assert isinstance(index._validate_docs(in_list), DocList) + for d in index._validate_docs(in_list): + assert isinstance(d, BaseDoc) in_da = DocList[TensorUnionDoc](in_list) assert index._validate_docs(in_da) == in_da diff --git a/tests/integrations/array/test_optional_doc_vec.py b/tests/integrations/array/test_optional_doc_vec.py index bb793152d3..dd77c66762 100644 --- a/tests/integrations/array/test_optional_doc_vec.py +++ b/tests/integrations/array/test_optional_doc_vec.py @@ -20,7 +20,8 @@ class Image(BaseDoc): docs.features = [Features(tensor=np.random.random([100])) for _ in range(10)] print(docs.features) # - assert isinstance(docs.features, DocVec[Features]) + assert isinstance(docs.features, DocVec) + assert isinstance(docs.features[0], Features) docs.features.tensor = np.ones((10, 100)) diff --git a/tests/integrations/externals/test_fastapi.py b/tests/integrations/externals/test_fastapi.py index 02967a07cd..c5ef186821 100644 --- a/tests/integrations/externals/test_fastapi.py +++ b/tests/integrations/externals/test_fastapi.py @@ -1,5 +1,5 @@ -from typing import List - +from typing import Any, Dict, List, Optional, Union, ClassVar +import json import numpy as np import pytest from fastapi import FastAPI @@ -8,7 +8,9 @@ from docarray import BaseDoc, DocList from docarray.base_doc import DocArrayResponse from docarray.documents import ImageDoc, TextDoc -from docarray.typing import NdArray +from docarray.typing import NdArray, AnyTensor, ImageUrl + +from docarray.utils._internal.pydantic import is_pydantic_v2 @pytest.mark.asyncio @@ -135,3 +137,256 @@ async def func(fastapi_docs: List[ImageDoc]) -> List[ImageDoc]: docs = DocList[ImageDoc].from_json(response.content.decode()) assert len(docs) == 2 assert docs[0].tensor.shape == (3, 224, 224) + + +@pytest.mark.asyncio +@pytest.mark.skipif( + not is_pydantic_v2, reason='Behavior is only available for Pydantic V2' +) +async def test_doclist_directly(): + from fastapi import Body + + doc = ImageDoc(tensor=np.zeros((3, 224, 224)), url='url') + docs = DocList[ImageDoc]([doc, doc]) + + app = FastAPI() + + @app.post("/doc/", response_class=DocArrayResponse) + async def func_embed_false( + fastapi_docs: DocList[ImageDoc] = Body(embed=False), + ) -> DocList[ImageDoc]: + return fastapi_docs + + @app.post("/doc_default/", response_class=DocArrayResponse) + async def func_default(fastapi_docs: DocList[ImageDoc]) -> DocList[ImageDoc]: + return fastapi_docs + + @app.post("/doc_embed/", response_class=DocArrayResponse) + async def func_embed_true( + fastapi_docs: DocList[ImageDoc] = Body(embed=True), + ) -> DocList[ImageDoc]: + return fastapi_docs + + async with AsyncClient(app=app, base_url="http://test") as ac: + response = await ac.post("/doc/", data=docs.to_json()) + response_default = await ac.post("/doc_default/", data=docs.to_json()) + embed_content_json = {'fastapi_docs': json.loads(docs.to_json())} + response_embed = await ac.post( + "/doc_embed/", + json=embed_content_json, + ) + resp_doc = await ac.get("/docs") + resp_redoc = await ac.get("/redoc") + + assert response.status_code == 200 + assert response_default.status_code == 200 + assert response_embed.status_code == 200 + assert resp_doc.status_code == 200 + assert resp_redoc.status_code == 200 + + docs = DocList[ImageDoc].from_json(response.content.decode()) + assert len(docs) == 2 + assert docs[0].tensor.shape == (3, 224, 224) + + docs_default = DocList[ImageDoc].from_json(response_default.content.decode()) + assert len(docs_default) == 2 + assert docs_default[0].tensor.shape == (3, 224, 224) + + docs_embed = DocList[ImageDoc].from_json(response_embed.content.decode()) + assert len(docs_embed) == 2 + assert docs_embed[0].tensor.shape == (3, 224, 224) + + +@pytest.mark.asyncio +@pytest.mark.skipif( + not is_pydantic_v2, reason='Behavior is only available for Pydantic V2' +) +async def test_doclist_complex_schema(): + from fastapi import Body + + class Nested2Doc(BaseDoc): + value: str + classvar: ClassVar[str] = 'classvar2' + + class Nested1Doc(BaseDoc): + nested: Nested2Doc + classvar: ClassVar[str] = 'classvar1' + + class CustomDoc(BaseDoc): + tensor: Optional[AnyTensor] = None + url: ImageUrl + num: float = 0.5 + num_num: List[float] = [1.5, 2.5] + lll: List[List[List[int]]] = [[[5]]] + fff: List[List[List[float]]] = [[[5.2]]] + single_text: TextDoc + texts: DocList[TextDoc] + d: Dict[str, str] = {'a': 'b'} + di: Optional[Dict[str, int]] = None + u: Union[str, int] + lu: List[Union[str, int]] = [0, 1, 2] + tags: Optional[Dict[str, Any]] = None + nested: Nested1Doc + embedding: NdArray + classvar: ClassVar[str] = 'classvar' + + docs = DocList[CustomDoc]( + [ + CustomDoc( + num=3.5, + num_num=[4.5, 5.5], + url='photo.jpg', + lll=[[[40]]], + fff=[[[40.2]]], + d={'b': 'a'}, + texts=DocList[TextDoc]([TextDoc(text='hey ha', embedding=np.zeros(3))]), + single_text=TextDoc(text='single hey ha', embedding=np.zeros(2)), + u='a', + lu=[3, 4], + embedding=np.random.random((1, 4)), + nested=Nested1Doc(nested=Nested2Doc(value='hello world')), + ) + ] + ) + + app = FastAPI() + + @app.post("/doc/", response_class=DocArrayResponse) + async def func_embed_false( + fastapi_docs: DocList[CustomDoc] = Body(embed=False), + ) -> DocList[CustomDoc]: + for doc in fastapi_docs: + doc.tensor = np.zeros((10, 10, 10)) + doc.di = {'a': 2} + + return fastapi_docs + + @app.post("/doc_default/", response_class=DocArrayResponse) + async def func_default(fastapi_docs: DocList[CustomDoc]) -> DocList[CustomDoc]: + for doc in fastapi_docs: + doc.tensor = np.zeros((10, 10, 10)) + doc.di = {'a': 2} + return fastapi_docs + + @app.post("/doc_embed/", response_class=DocArrayResponse) + async def func_embed_true( + fastapi_docs: DocList[CustomDoc] = Body(embed=True), + ) -> DocList[CustomDoc]: + for doc in fastapi_docs: + doc.tensor = np.zeros((10, 10, 10)) + doc.di = {'a': 2} + return fastapi_docs + + async with AsyncClient(app=app, base_url="http://test") as ac: + response = await ac.post("/doc/", data=docs.to_json()) + response_default = await ac.post("/doc_default/", data=docs.to_json()) + embed_content_json = {'fastapi_docs': json.loads(docs.to_json())} + response_embed = await ac.post( + "/doc_embed/", + json=embed_content_json, + ) + resp_doc = await ac.get("/docs") + resp_redoc = await ac.get("/redoc") + + assert response.status_code == 200 + assert response_default.status_code == 200 + assert response_embed.status_code == 200 + assert resp_doc.status_code == 200 + assert resp_redoc.status_code == 200 + + resp_json = json.loads(response_default.content.decode()) + assert isinstance(resp_json[0]["tensor"], list) + assert isinstance(resp_json[0]["embedding"], list) + assert isinstance(resp_json[0]["texts"][0]["embedding"], list) + + docs_response = DocList[CustomDoc].from_json(response.content.decode()) + assert len(docs_response) == 1 + assert docs_response[0].url == 'photo.jpg' + assert docs_response[0].num == 3.5 + assert docs_response[0].num_num == [4.5, 5.5] + assert docs_response[0].lll == [[[40]]] + assert docs_response[0].lu == [3, 4] + assert docs_response[0].fff == [[[40.2]]] + assert docs_response[0].di == {'a': 2} + assert docs_response[0].d == {'b': 'a'} + assert len(docs_response[0].texts) == 1 + assert docs_response[0].texts[0].text == 'hey ha' + assert docs_response[0].texts[0].embedding.shape == (3,) + assert docs_response[0].tensor.shape == (10, 10, 10) + assert docs_response[0].u == 'a' + assert docs_response[0].single_text.text == 'single hey ha' + assert docs_response[0].single_text.embedding.shape == (2,) + + docs_default = DocList[CustomDoc].from_json(response_default.content.decode()) + assert len(docs_default) == 1 + assert docs_default[0].url == 'photo.jpg' + assert docs_default[0].num == 3.5 + assert docs_default[0].num_num == [4.5, 5.5] + assert docs_default[0].lll == [[[40]]] + assert docs_default[0].lu == [3, 4] + assert docs_default[0].fff == [[[40.2]]] + assert docs_default[0].di == {'a': 2} + assert docs_default[0].d == {'b': 'a'} + assert len(docs_default[0].texts) == 1 + assert docs_default[0].texts[0].text == 'hey ha' + assert docs_default[0].texts[0].embedding.shape == (3,) + assert docs_default[0].tensor.shape == (10, 10, 10) + assert docs_default[0].u == 'a' + assert docs_default[0].single_text.text == 'single hey ha' + assert docs_default[0].single_text.embedding.shape == (2,) + + docs_embed = DocList[CustomDoc].from_json(response_embed.content.decode()) + assert len(docs_embed) == 1 + assert docs_embed[0].url == 'photo.jpg' + assert docs_embed[0].num == 3.5 + assert docs_embed[0].num_num == [4.5, 5.5] + assert docs_embed[0].lll == [[[40]]] + assert docs_embed[0].lu == [3, 4] + assert docs_embed[0].fff == [[[40.2]]] + assert docs_embed[0].di == {'a': 2} + assert docs_embed[0].d == {'b': 'a'} + assert len(docs_embed[0].texts) == 1 + assert docs_embed[0].texts[0].text == 'hey ha' + assert docs_embed[0].texts[0].embedding.shape == (3,) + assert docs_embed[0].tensor.shape == (10, 10, 10) + assert docs_embed[0].u == 'a' + assert docs_embed[0].single_text.text == 'single hey ha' + assert docs_embed[0].single_text.embedding.shape == (2,) + + +@pytest.mark.asyncio +@pytest.mark.skipif( + not is_pydantic_v2, reason='Behavior is only available for Pydantic V2' +) +async def test_simple_directly(): + app = FastAPI() + + @app.post("/doc_list/", response_class=DocArrayResponse) + async def func_doc_list(fastapi_docs: DocList[TextDoc]) -> DocList[TextDoc]: + return fastapi_docs + + @app.post("/doc_single/", response_class=DocArrayResponse) + async def func_doc_single(fastapi_doc: TextDoc) -> TextDoc: + return fastapi_doc + + async with AsyncClient(app=app, base_url="http://test") as ac: + response_doc_list = await ac.post( + "/doc_list/", data=json.dumps([{"text": "text"}]) + ) + response_single = await ac.post( + "/doc_single/", data=json.dumps({"text": "text"}) + ) + resp_doc = await ac.get("/docs") + resp_redoc = await ac.get("/redoc") + + assert response_doc_list.status_code == 200 + assert response_single.status_code == 200 + assert resp_doc.status_code == 200 + assert resp_redoc.status_code == 200 + + docs = DocList[TextDoc].from_json(response_doc_list.content.decode()) + assert len(docs) == 1 + assert docs[0].text == 'text' + + doc = TextDoc.from_json(response_single.content.decode()) + assert doc == 'text' diff --git a/tests/integrations/torch/data/test_torch_dataset.py b/tests/integrations/torch/data/test_torch_dataset.py index f358f1c16b..5d8236a70b 100644 --- a/tests/integrations/torch/data/test_torch_dataset.py +++ b/tests/integrations/torch/data/test_torch_dataset.py @@ -60,7 +60,9 @@ def test_torch_dataset(captions_da: DocList[PairTextImage]): batch_lens = [] for batch in loader: - assert isinstance(batch, DocVec[PairTextImage]) + assert isinstance(batch, DocVec) + for d in batch: + assert isinstance(d, PairTextImage) batch_lens.append(len(batch)) assert all(x == BATCH_SIZE for x in batch_lens[:-1]) @@ -140,7 +142,9 @@ def test_torch_dl_multiprocessing(captions_da: DocList[PairTextImage]): batch_lens = [] for batch in loader: - assert isinstance(batch, DocVec[PairTextImage]) + assert isinstance(batch, DocVec) + for d in batch: + assert isinstance(d, PairTextImage) batch_lens.append(len(batch)) assert all(x == BATCH_SIZE for x in batch_lens[:-1]) diff --git a/tests/units/array/stack/storage/test_storage.py b/tests/units/array/stack/storage/test_storage.py index 01c1b68a16..b91585d373 100644 --- a/tests/units/array/stack/storage/test_storage.py +++ b/tests/units/array/stack/storage/test_storage.py @@ -26,8 +26,9 @@ class MyDoc(BaseDoc): for name in storage.any_columns['name']: assert name == 'hello' inner_docs = storage.doc_columns['doc'] - assert isinstance(inner_docs, DocVec[InnerDoc]) + assert isinstance(inner_docs, DocVec) for i, doc in enumerate(inner_docs): + assert isinstance(doc, InnerDoc) assert doc.price == i diff --git a/tests/units/array/stack/test_array_stacked.py b/tests/units/array/stack/test_array_stacked.py index 2a3790da1d..b1b385840d 100644 --- a/tests/units/array/stack/test_array_stacked.py +++ b/tests/units/array/stack/test_array_stacked.py @@ -504,7 +504,9 @@ class ImageDoc(BaseDoc): da = parse_obj_as(DocVec[ImageDoc], batch) - assert isinstance(da, DocVec[ImageDoc]) + assert isinstance(da, DocVec) + for d in da: + assert isinstance(d, ImageDoc) def test_validation_column_tensor(batch): @@ -536,14 +538,18 @@ def test_validation_column_doc(batch_nested_doc): batch, Doc, Inner = batch_nested_doc batch.inner = DocList[Inner]([Inner(hello='hello') for _ in range(10)]) - assert isinstance(batch.inner, DocVec[Inner]) + assert isinstance(batch.inner, DocVec) + for d in batch.inner: + assert isinstance(d, Inner) def test_validation_list_doc(batch_nested_doc): batch, Doc, Inner = batch_nested_doc batch.inner = [Inner(hello='hello') for _ in range(10)] - assert isinstance(batch.inner, DocVec[Inner]) + assert isinstance(batch.inner, DocVec) + for d in batch.inner: + assert isinstance(d, Inner) def test_validation_col_doc_fail(batch_nested_doc): diff --git a/tests/units/array/stack/test_proto.py b/tests/units/array/stack/test_proto.py index 8c559826b8..d46766cde3 100644 --- a/tests/units/array/stack/test_proto.py +++ b/tests/units/array/stack/test_proto.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import Dict, Optional, Union import numpy as np @@ -245,6 +246,7 @@ class MyDoc(BaseDoc): assert da_after._storage.any_columns['d'] == [None, None] +@pytest.mark.skipif('GITHUB_WORKFLOW' in os.environ, reason='Flaky in Github') @pytest.mark.proto @pytest.mark.parametrize('tensor_type', [NdArray, TorchTensor]) def test_proto_tensor_type(tensor_type): diff --git a/tests/units/array/test_array.py b/tests/units/array/test_array.py index 1d93fb6b78..8e51cc1c37 100644 --- a/tests/units/array/test_array.py +++ b/tests/units/array/test_array.py @@ -486,6 +486,8 @@ def test_validate_list_dict(): dict(url=f'http://url.com/foo_{i}.png', tensor=NdArray(i)) for i in [2, 0, 1] ] + # docs = DocList[Image]([Image(url=image['url'], tensor=image['tensor']) for image in images]) + docs = parse_obj_as(DocList[Image], images) assert docs.url == [ @@ -520,5 +522,3 @@ def test_not_double_subcriptable(): with pytest.raises(TypeError) as excinfo: da = DocList[TextDoc][TextDoc] assert da is None - - assert 'not subscriptable' in str(excinfo.value) diff --git a/tests/units/array/test_array_from_to_bytes.py b/tests/units/array/test_array_from_to_bytes.py index abc31cb4ac..0ab952ce4a 100644 --- a/tests/units/array/test_array_from_to_bytes.py +++ b/tests/units/array/test_array_from_to_bytes.py @@ -43,11 +43,11 @@ def test_from_to_bytes(protocol, compress, show_progress, array_cls): @pytest.mark.parametrize( - 'protocol', ['protobuf'] # ['pickle-array', 'protobuf-array', 'protobuf', 'pickle'] + 'protocol', ['pickle-array', 'protobuf-array', 'protobuf', 'pickle'] ) -@pytest.mark.parametrize('compress', ['lz4']) # , 'bz2', 'lzma', 'zlib', 'gzip', None]) -@pytest.mark.parametrize('show_progress', [False]) # [False, True]) -@pytest.mark.parametrize('array_cls', [DocVec]) # [DocList, DocVec]) +@pytest.mark.parametrize('compress', ['lz4', 'bz2', 'lzma', 'zlib', 'gzip', None]) +@pytest.mark.parametrize('show_progress', [False, True]) # [False, True]) +@pytest.mark.parametrize('array_cls', [DocList, DocVec]) def test_from_to_base64(protocol, compress, show_progress, array_cls): da = array_cls[MyDoc]( [ @@ -75,27 +75,35 @@ def test_from_to_base64(protocol, compress, show_progress, array_cls): # test_from_to_base64('protobuf', 'lz4', False, DocVec) +class MyTensorTypeDocNdArray(BaseDoc): + embedding: NdArray + text: str + image: ImageDoc -@pytest.mark.parametrize('tensor_type', [NdArray, TorchTensor]) -@pytest.mark.parametrize('protocol', ['protobuf-array', 'pickle-array']) -def test_from_to_base64_tensor_type(tensor_type, protocol): - class MyDoc(BaseDoc): - embedding: tensor_type - text: str - image: ImageDoc +class MyTensorTypeDocTorchTensor(BaseDoc): + embedding: TorchTensor + text: str + image: ImageDoc - da = DocVec[MyDoc]( + +@pytest.mark.parametrize( + 'doc_type, tensor_type', + [(MyTensorTypeDocNdArray, NdArray), (MyTensorTypeDocTorchTensor, TorchTensor)], +) +@pytest.mark.parametrize('protocol', ['protobuf-array', 'pickle-array']) +def test_from_to_base64_tensor_type(doc_type, tensor_type, protocol): + da = DocVec[doc_type]( [ - MyDoc( + doc_type( embedding=[1, 2, 3, 4, 5], text='hello', image=ImageDoc(url='aux.png') ), - MyDoc(embedding=[5, 4, 3, 2, 1], text='hello world', image=ImageDoc()), + doc_type(embedding=[5, 4, 3, 2, 1], text='hello world', image=ImageDoc()), ], tensor_type=tensor_type, ) bytes_da = da.to_base64(protocol=protocol) - da2 = DocVec[MyDoc].from_base64( + da2 = DocVec[doc_type].from_base64( bytes_da, tensor_type=tensor_type, protocol=protocol ) assert da2.tensor_type == tensor_type diff --git a/tests/units/array/test_doclist_schema.py b/tests/units/array/test_doclist_schema.py new file mode 100644 index 0000000000..02a5f56280 --- /dev/null +++ b/tests/units/array/test_doclist_schema.py @@ -0,0 +1,22 @@ +import pytest +from docarray import BaseDoc, DocList +from docarray.utils._internal.pydantic import is_pydantic_v2 + + +@pytest.mark.skipif(not is_pydantic_v2, reason='Feature only available for Pydantic V2') +def test_schema_nested(): + # check issue https://github.com/docarray/docarray/issues/1521 + + class Doc1Test(BaseDoc): + aux: str + + class DocDocTest(BaseDoc): + docs: DocList[Doc1Test] + + assert 'Doc1Test' in DocDocTest.schema()['$defs'] + d = DocDocTest(docs=DocList[Doc1Test]([Doc1Test(aux='aux')])) + + assert isinstance(d.docs, DocList) + for dd in d.docs: + assert isinstance(dd, Doc1Test) + assert d.docs.aux == ['aux'] diff --git a/tests/units/document/test_doc_wo_id.py b/tests/units/document/test_doc_wo_id.py index ffda3ceec4..4e2a8bba11 100644 --- a/tests/units/document/test_doc_wo_id.py +++ b/tests/units/document/test_doc_wo_id.py @@ -23,4 +23,9 @@ class A(BaseDocWithoutId): cls_doc_list = DocList[A] - assert isinstance(cls_doc_list, type) + da = cls_doc_list([A(text='hey here')]) + + assert isinstance(da, DocList) + for d in da: + assert isinstance(d, A) + assert not hasattr(d, 'id') diff --git a/tests/units/typing/da/test_relations.py b/tests/units/typing/da/test_relations.py index f583abef2e..cadac712f5 100644 --- a/tests/units/typing/da/test_relations.py +++ b/tests/units/typing/da/test_relations.py @@ -13,9 +13,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import pytest from docarray import BaseDoc, DocList +from docarray.utils._internal.pydantic import is_pydantic_v2 +@pytest.mark.skipif( + is_pydantic_v2, + reason="Subscripted generics cannot be used with class and instance checks", +) def test_instance_and_equivalence(): class MyDoc(BaseDoc): text: str @@ -28,6 +35,10 @@ class MyDoc(BaseDoc): assert isinstance(docs, DocList[MyDoc]) +@pytest.mark.skipif( + is_pydantic_v2, + reason="Subscripted generics cannot be used with class and instance checks", +) def test_subclassing(): class MyDoc(BaseDoc): text: str diff --git a/tests/units/util/test_create_dynamic_code_class.py b/tests/units/util/test_create_dynamic_code_class.py index eba25911c4..b7df497816 100644 --- a/tests/units/util/test_create_dynamic_code_class.py +++ b/tests/units/util/test_create_dynamic_code_class.py @@ -45,6 +45,7 @@ class CustomDoc(BaseDoc): new_custom_doc_model = create_base_doc_from_schema( CustomDocCopy.schema(), 'CustomDoc', {} ) + print(f'new_custom_doc_model {new_custom_doc_model.schema()}') original_custom_docs = DocList[CustomDoc]( [ @@ -131,6 +132,7 @@ class TextDocWithId(BaseDoc): new_textdoc_with_id_model = create_base_doc_from_schema( TextDocWithIdCopy.schema(), 'TextDocWithId', {} ) + print(f'new_textdoc_with_id_model {new_textdoc_with_id_model.schema()}') original_text_doc_with_id = DocList[TextDocWithId]( [TextDocWithId(ia=f'ID {i}') for i in range(10)] @@ -207,6 +209,7 @@ class CustomDoc(BaseDoc): new_custom_doc_model = create_base_doc_from_schema( CustomDocCopy.schema(), 'CustomDoc' ) + print(f'new_custom_doc_model {new_custom_doc_model.schema()}') original_custom_docs = DocList[CustomDoc]() if transformation == 'proto': @@ -232,6 +235,7 @@ class TextDocWithId(BaseDoc): new_textdoc_with_id_model = create_base_doc_from_schema( TextDocWithIdCopy.schema(), 'TextDocWithId', {} ) + print(f'new_textdoc_with_id_model {new_textdoc_with_id_model.schema()}') original_text_doc_with_id = DocList[TextDocWithId]() if transformation == 'proto': @@ -255,6 +259,9 @@ class ResultTestDoc(BaseDoc): new_result_test_doc_with_id_model = create_base_doc_from_schema( ResultTestDocCopy.schema(), 'ResultTestDoc', {} ) + print( + f'new_result_test_doc_with_id_model {new_result_test_doc_with_id_model.schema()}' + ) result_test_docs = DocList[ResultTestDoc]() if transformation == 'proto': @@ -309,9 +316,10 @@ class SearchResult(BaseDoc): models_created_by_name = {} SearchResult_aux = create_pure_python_type_model(SearchResult) - _ = create_base_doc_from_schema( + m = create_base_doc_from_schema( SearchResult_aux.schema(), 'SearchResult', models_created_by_name ) + print(f'm {m.schema()}') QuoteFile_reconstructed_in_gateway_from_Search_results = models_created_by_name[ 'QuoteFile' ] @@ -323,3 +331,28 @@ class SearchResult(BaseDoc): QuoteFile_reconstructed_in_gateway_from_Search_results(id='0', texts=textlist) ) assert reconstructed_in_gateway_from_Search_results.texts[0].text == 'hey' + + +def test_id_optional(): + from docarray import BaseDoc + import json + + class MyTextDoc(BaseDoc): + text: str + opt: Optional[str] = None + + MyTextDoc_aux = create_pure_python_type_model(MyTextDoc) + td = create_base_doc_from_schema(MyTextDoc_aux.schema(), 'MyTextDoc') + print(f'{td.schema()}') + direct = MyTextDoc.from_json(json.dumps({"text": "text"})) + aux = MyTextDoc_aux.from_json(json.dumps({"text": "text"})) + indirect = td.from_json(json.dumps({"text": "text"})) + assert direct.text == 'text' + assert aux.text == 'text' + assert indirect.text == 'text' + direct = MyTextDoc(text='hey') + aux = MyTextDoc_aux(text='hey') + indirect = td(text='hey') + assert direct.text == 'hey' + assert aux.text == 'hey' + assert indirect.text == 'hey' diff --git a/tests/units/util/test_map.py b/tests/units/util/test_map.py index 3b9f102d92..65dd3c1738 100644 --- a/tests/units/util/test_map.py +++ b/tests/units/util/test_map.py @@ -96,4 +96,6 @@ def test_map_docs_batched(n_docs, batch_size, backend): assert isinstance(it, Generator) for batch in it: - assert isinstance(batch, DocList[MyImage]) + assert isinstance(batch, DocList) + for d in batch: + assert isinstance(d, MyImage) From d3358105db645418c3cebfc6acb0f353127364aa Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Fri, 21 Mar 2025 09:04:06 +0100 Subject: [PATCH 4/7] chore: update pyproject version (#1919) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c908917161..efbfcb4fbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docarray" -version = '0.40.0' +version = '0.41.0' description='The data structure for multimodal data' readme = 'README.md' authors=['DocArray'] From b5696b227161f087fa32834dcd6c2d212cf82c0e Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Fri, 21 Mar 2025 09:07:50 +0100 Subject: [PATCH 5/7] chore: fix poetry in ci (#1921) --- .github/workflows/force-release.yml | 2 +- .github/workflows/uncaped.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/force-release.yml b/.github/workflows/force-release.yml index 3037e79108..3ad1af18ce 100644 --- a/.github/workflows/force-release.yml +++ b/.github/workflows/force-release.yml @@ -40,7 +40,7 @@ jobs: - run: | git fetch --depth=1 origin +refs/tags/*:refs/tags/* npm install git-release-notes - pip install poetry + python -m pip install poetry==1.7.1 ./scripts/release.sh final "${{ github.event.inputs.release_reason }}" "${{github.actor}}" env: TWINE_USERNAME: __token__ diff --git a/.github/workflows/uncaped.yml b/.github/workflows/uncaped.yml index e1cbafb6d4..ccb56bc249 100644 --- a/.github/workflows/uncaped.yml +++ b/.github/workflows/uncaped.yml @@ -21,7 +21,7 @@ jobs: - name: Prepare environment run: | python -m pip install --upgrade pip - python -m pip install poetry + python -m pip install poetry==1.7.1 rm poetry.lock poetry install --all-extras poetry run pip install elasticsearch==8.6.2 From a162a4b09f4ad8e8c5c117c0c0101541af4c00a1 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Fri, 21 Mar 2025 09:32:00 +0100 Subject: [PATCH 6/7] ci: fix release procedure (#1922) --- scripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release.sh b/scripts/release.sh index 03f492674b..f63e07282f 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -46,7 +46,7 @@ function clean_build { function pub_pypi { clean_build - poetry config http-basic.pypi $PYPI_USERNAME $PYPI_PASSWORD + poetry config http-basic.pypi $TWINE_USERNAME $TWINE_PASSWORD poetry publish --build clean_build } From f5fc0f6d5f3dcb0201dc735262ef3256bdf054b9 Mon Sep 17 00:00:00 2001 From: Jina Dev Bot Date: Fri, 21 Mar 2025 08:34:45 +0000 Subject: [PATCH 7/7] chore(version): the next version will be 0.40.2 build(JoanFM): release 0.41.0 --- CHANGELOG.md | 52 ++++++++++++++++++++++++++++++++++++++++++++ docarray/__init__.py | 2 +- docs/_versions.json | 2 +- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f062072288..48f2dedcd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ + ## Release Note (`0.30.0`) @@ -746,3 +747,54 @@ - [[```8de3e175```](https://github.com/jina-ai/docarray/commit/8de3e1757bdb23b509ad2630219c3c26605308f0)] __-__ refactor test of the torchtensor (#1837) (*Naymul Islam*) - [[```d5d928b8```](https://github.com/jina-ai/docarray/commit/d5d928b82f36a3279277c07bed44fd22bb0bba34)] __-__ __version__: the next version will be 0.39.2 (*Jina Dev Bot*) + +## Release Note (`0.40.1`) + +> Release time: 2025-03-21 08:34:40 + + + +🙇 We'd like to thank all contributors for this new release! In particular, + Joan Fontanals, Emmanuel Ferdman, Casey Clements, YuXuan Tay, dependabot[bot], James Brown, Jina Dev Bot, 🙇 + + +### 🐞 Bug fixes + + - [[```d98acb71```](https://github.com/jina-ai/docarray/commit/d98acb716e0c336a817f65b62d428ab13cf8ac42)] __-__ fix DocList schema when using Pydantic V2 (#1876) (*Joan Fontanals*) + - [[```83ebef60```](https://github.com/jina-ai/docarray/commit/83ebef6087e868517681e59877008f80f1e7f113)] __-__ update license location (#1911) (*Emmanuel Ferdman*) + - [[```8f4ba7cd```](https://github.com/jina-ai/docarray/commit/8f4ba7cdf177f3e4ecc838eef659496d6038af03)] __-__ use docker compose (#1905) (*YuXuan Tay*) + - [[```febbdc42```](https://github.com/jina-ai/docarray/commit/febbdc4291c4af7ad2058d7feebf6a3169de93e9)] __-__ fix float in dynamic Document creation (#1877) (*Joan Fontanals*) + - [[```7c1e18ef```](https://github.com/jina-ai/docarray/commit/7c1e18ef01b09ef3d864b200248c875d0d9ced29)] __-__ fix create pure python class iteratively (#1867) (*Joan Fontanals*) + +### 📗 Documentation + + - [[```e4665e91```](https://github.com/jina-ai/docarray/commit/e4665e91b37f97a4a18a80399431d624db8ca453)] __-__ move hint about schemas to common docindex section (#1868) (*Joan Fontanals*) + - [[```8da50c92```](https://github.com/jina-ai/docarray/commit/8da50c927c24b981867650399f64d4930bd7c574)] __-__ add code review to contributing.md (#1853) (*Joan Fontanals*) + +### 🏁 Unit Test and CICD + + - [[```a162a4b0```](https://github.com/jina-ai/docarray/commit/a162a4b09f4ad8e8c5c117c0c0101541af4c00a1)] __-__ fix release procedure (#1922) (*Joan Fontanals*) + - [[```82d7cee7```](https://github.com/jina-ai/docarray/commit/82d7cee71ccdd4d5874985aef0567631424b5bfd)] __-__ fix some ci (#1893) (*Joan Fontanals*) + - [[```791e4a04```](https://github.com/jina-ai/docarray/commit/791e4a0473afe9d9bde87733074eef0ce217d198)] __-__ update release procedure (#1869) (*Joan Fontanals*) + - [[```aa15b9ef```](https://github.com/jina-ai/docarray/commit/aa15b9eff2f5293849e83291d79bf519994c3503)] __-__ add license (#1861) (*Joan Fontanals*) + +### 🍹 Other Improvements + + - [[```b5696b22```](https://github.com/jina-ai/docarray/commit/b5696b227161f087fa32834dcd6c2d212cf82c0e)] __-__ fix poetry in ci (#1921) (*Joan Fontanals*) + - [[```d3358105```](https://github.com/jina-ai/docarray/commit/d3358105db645418c3cebfc6acb0f353127364aa)] __-__ update pyproject version (#1919) (*Joan Fontanals*) + - [[```40cf2962```](https://github.com/jina-ai/docarray/commit/40cf29622b29be1f32595e26876593bb5f1e03be)] __-__ MongoDB Atlas: Two line change to make our CI builds green (#1910) (*Casey Clements*) + - [[```75e0033a```](https://github.com/jina-ai/docarray/commit/75e0033a361a31280709899e94d6f5e14ff4b8ae)] __-__ __deps__: bump setuptools from 65.5.1 to 70.0.0 (#1899) (*dependabot[bot]*) + - [[```75a743c9```](https://github.com/jina-ai/docarray/commit/75a743c99dc549eaf4c3ffe01086d09a8f3f3e44)] __-__ __deps-dev__: bump tornado from 6.2 to 6.4.1 (#1894) (*dependabot[bot]*) + - [[```f3fa7c23```](https://github.com/jina-ai/docarray/commit/f3fa7c2376da2449e98aff159167bf41467d610c)] __-__ __deps__: bump pydantic from 1.10.8 to 1.10.13 (#1884) (*dependabot[bot]*) + - [[```46d50828```](https://github.com/jina-ai/docarray/commit/46d5082844602689de97c904af7c8139980711ed)] __-__ __deps__: bump urllib3 from 1.26.14 to 1.26.19 (#1896) (*dependabot[bot]*) + - [[```f0f4236e```](https://github.com/jina-ai/docarray/commit/f0f4236ebf75528e6c5344dc75328ce9cf56cae9)] __-__ __deps__: bump zipp from 3.10.0 to 3.19.1 (#1898) (*dependabot[bot]*) + - [[```d65d27ce```](https://github.com/jina-ai/docarray/commit/d65d27ce37f5e7c930b7792fd665ac4da9c6398d)] __-__ __deps__: bump certifi from 2022.9.24 to 2024.7.4 (#1897) (*dependabot[bot]*) + - [[```b8b62173```](https://github.com/jina-ai/docarray/commit/b8b621735dbe16c188bf8c1c03cb3f1a22076ae8)] __-__ __deps__: bump authlib from 1.2.0 to 1.3.1 (#1895) (*dependabot[bot]*) + - [[```6a972d1c```](https://github.com/jina-ai/docarray/commit/6a972d1c0dcf6d0c2816dea14df37e0039945542)] __-__ __deps__: bump qdrant-client from 1.4.0 to 1.9.0 (#1892) (*dependabot[bot]*) + - [[```f71a5e6a```](https://github.com/jina-ai/docarray/commit/f71a5e6af58b77fdeb15ba27abd0b7d40b84fd09)] __-__ __deps__: bump cryptography from 40.0.1 to 42.0.4 (#1872) (*dependabot[bot]*) + - [[```065aab44```](https://github.com/jina-ai/docarray/commit/065aab441cd71635ee3711ad862240e967ca3da6)] __-__ __deps__: bump orjson from 3.8.2 to 3.9.15 (#1873) (*dependabot[bot]*) + - [[```caf97135```](https://github.com/jina-ai/docarray/commit/caf9713502791a8fbbf0aa53b3ca2db126f18df7)] __-__ add license notice to every file (#1860) (*Joan Fontanals*) + - [[```50376358```](https://github.com/jina-ai/docarray/commit/50376358163005e66a76cd0cb40217fd7a4f1252)] __-__ __deps-dev__: bump jupyterlab from 3.5.0 to 3.6.7 (#1848) (*dependabot[bot]*) + - [[```104b403b```](https://github.com/jina-ai/docarray/commit/104b403b2b61a485e2cc032a357f46e7dc8044fe)] __-__ __deps__: bump tj-actions/changed-files from 34 to 41 in /.github/workflows (#1844) (*dependabot[bot]*) + - [[```f9426a29```](https://github.com/jina-ai/docarray/commit/f9426a29b29580beae8805d2556b4a94ff493edc)] __-__ __version__: the next version will be 0.40.1 (*Jina Dev Bot*) + diff --git a/docarray/__init__.py b/docarray/__init__.py index 5a18bb9588..20b08ba173 100644 --- a/docarray/__init__.py +++ b/docarray/__init__.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -__version__ = '0.40.1' +__version__ = '0.40.2' import logging diff --git a/docs/_versions.json b/docs/_versions.json index b7c4791e91..f318a2796a 100644 --- a/docs/_versions.json +++ b/docs/_versions.json @@ -1 +1 @@ -[{"version": "v0.40.0"}, {"version": "v0.39.1"}, {"version": "v0.39.0"}, {"version": "v0.38.0"}, {"version": "v0.37.1"}, {"version": "v0.37.0"}, {"version": "v0.36.0"}, {"version": "v0.35.0"}, {"version": "v0.34.0"}, {"version": "v0.33.0"}, {"version": "v0.32.1"}, {"version": "v0.32.0"}, {"version": "v0.31.1"}, {"version": "v0.31.0"}, {"version": "v0.30.0"}, {"version": "v0.21.0"}, {"version": "v0.20.1"}, {"version": "v0.20.0"}, {"version": "v0.19.0"}, {"version": "v0.18.1"}, {"version": "v0.18.0"}, {"version": "v0.17.0"}, {"version": "v0.16.5"}, {"version": "v0.16.4"}, {"version": "v0.16.3"}, {"version": "v0.16.2"}, {"version": "v0.16.1"}, {"version": "v0.16.0"}, {"version": "v0.15.4"}, {"version": "v0.15.3"}, {"version": "v0.15.2"}, {"version": "v0.15.1"}, {"version": "v0.15.0"}, {"version": "v0.14.11"}, {"version": "v0.14.10"}, {"version": "v0.14.9"}, {"version": "v0.14.8"}, {"version": "v0.14.7"}, {"version": "v0.14.6"}, {"version": "v0.14.5"}, {"version": "v0.14.4"}, {"version": "v0.14.3"}, {"version": "v0.14.2"}, {"version": "v0.14.1"}, {"version": "v0.14.0"}, {"version": "v0.13.33"}, {"version": "v0.13.0"}, {"version": "v0.12.9"}, {"version": "v0.12.0"}, {"version": "v0.11.3"}, {"version": "v0.11.2"}, {"version": "v0.11.1"}, {"version": "v0.11.0"}, {"version": "v0.10.5"}, {"version": "v0.10.4"}, {"version": "v0.10.3"}, {"version": "v0.10.2"}, {"version": "v0.10.1"}, {"version": "v0.10.0"}] \ No newline at end of file +[{"version": "v0.40.1"}, {"version": "v0.40.0"}, {"version": "v0.39.1"}, {"version": "v0.39.0"}, {"version": "v0.38.0"}, {"version": "v0.37.1"}, {"version": "v0.37.0"}, {"version": "v0.36.0"}, {"version": "v0.35.0"}, {"version": "v0.34.0"}, {"version": "v0.33.0"}, {"version": "v0.32.1"}, {"version": "v0.32.0"}, {"version": "v0.31.1"}, {"version": "v0.31.0"}, {"version": "v0.30.0"}, {"version": "v0.21.0"}, {"version": "v0.20.1"}, {"version": "v0.20.0"}, {"version": "v0.19.0"}, {"version": "v0.18.1"}, {"version": "v0.18.0"}, {"version": "v0.17.0"}, {"version": "v0.16.5"}, {"version": "v0.16.4"}, {"version": "v0.16.3"}, {"version": "v0.16.2"}, {"version": "v0.16.1"}, {"version": "v0.16.0"}, {"version": "v0.15.4"}, {"version": "v0.15.3"}, {"version": "v0.15.2"}, {"version": "v0.15.1"}, {"version": "v0.15.0"}, {"version": "v0.14.11"}, {"version": "v0.14.10"}, {"version": "v0.14.9"}, {"version": "v0.14.8"}, {"version": "v0.14.7"}, {"version": "v0.14.6"}, {"version": "v0.14.5"}, {"version": "v0.14.4"}, {"version": "v0.14.3"}, {"version": "v0.14.2"}, {"version": "v0.14.1"}, {"version": "v0.14.0"}, {"version": "v0.13.33"}, {"version": "v0.13.0"}, {"version": "v0.12.9"}, {"version": "v0.12.0"}, {"version": "v0.11.3"}, {"version": "v0.11.2"}, {"version": "v0.11.1"}, {"version": "v0.11.0"}, {"version": "v0.10.5"}, {"version": "v0.10.4"}, {"version": "v0.10.3"}, {"version": "v0.10.2"}, {"version": "v0.10.1"}, {"version": "v0.10.0"}] \ No newline at end of file