diff --git a/integration/test_collection_hfresh.py b/integration/test_collection_hfresh.py new file mode 100644 index 000000000..9c93fb0a5 --- /dev/null +++ b/integration/test_collection_hfresh.py @@ -0,0 +1,124 @@ +import pytest +import weaviate +from integration.conftest import CollectionFactory +from weaviate.collections.classes.config import ( + Configure, + Reconfigure, + VectorDistances, + VectorIndexType, + Vectorizers, + _VectorIndexConfigHFresh, +) + + +def test_collection_config_hfresh(collection_factory: CollectionFactory) -> None: + collection_dummy = collection_factory("dummy") + if collection_dummy._connection._weaviate_version.is_lower_than(1, 36, 0): + pytest.skip("Hfresh index is not supported in Weaviate versions lower than 1.36.0") + + collection = collection_factory( + vector_index_config=Configure.VectorIndex.hfresh( + distance_metric=VectorDistances.COSINE, + max_posting_size_kb=1024, + replicas=2, + search_probe=50, + ) + ) + + config = collection.config.get() + + assert config.vector_index_type == VectorIndexType.HFRESH + assert isinstance(config.vector_index_config, _VectorIndexConfigHFresh) + assert config.vector_index_config.distance_metric == VectorDistances.COSINE + assert config.vector_index_config.max_posting_size_kb == 1024 + assert config.vector_index_config.replicas == 2 + assert config.vector_index_config.search_probe == 50 + + +def test_collection_named_vectors_hfresh(collection_factory: CollectionFactory) -> None: + collection_dummy = collection_factory("dummy") + if collection_dummy._connection._weaviate_version.is_lower_than(1, 36, 0): + pytest.skip("Hfresh index is not supported in Weaviate versions lower than 1.36.0") + + collection = collection_factory( + vector_config=[ + Configure.Vectors.self_provided( + name="title_vec", + vector_index_config=Configure.VectorIndex.hfresh( + distance_metric=VectorDistances.COSINE, + max_posting_size_kb=512, + replicas=1, + search_probe=25, + ), + ), + ], + ) + + config = collection.config.get() + + assert config.vector_config is not None + assert "title_vec" in config.vector_config + + title_config = config.vector_config["title_vec"] + assert title_config.vectorizer.vectorizer == Vectorizers.NONE + assert isinstance(title_config.vector_index_config, _VectorIndexConfigHFresh) + assert title_config.vector_index_config.distance_metric == VectorDistances.COSINE + assert title_config.vector_index_config.max_posting_size_kb == 512 + assert title_config.vector_index_config.replicas == 1 + assert title_config.vector_index_config.search_probe == 25 + + +def test_collection_update_hfresh(collection_factory: CollectionFactory) -> None: + collection_dummy = collection_factory("dummy") + if collection_dummy._connection._weaviate_version.is_lower_than(1, 36, 0): + pytest.skip("Hfresh index is not supported in Weaviate versions lower than 1.36.0") + + collection = collection_factory( + vector_index_config=Configure.VectorIndex.hfresh( + distance_metric=VectorDistances.COSINE, + max_posting_size_kb=512, + replicas=1, + search_probe=25, + ) + ) + + config = collection.config.get() + assert isinstance(config.vector_index_config, _VectorIndexConfigHFresh) + assert config.vector_index_config.max_posting_size_kb == 512 + assert config.vector_index_config.replicas == 1 + assert config.vector_index_config.search_probe == 25 + + collection.config.update(vectorizer_config=Reconfigure.VectorIndex.hfresh(search_probe=100)) + + config = collection.config.get() + assert isinstance(config.vector_index_config, _VectorIndexConfigHFresh) + assert config.vector_index_config.max_posting_size_kb == 512 + assert config.vector_index_config.replicas == 1 + assert config.vector_index_config.search_probe == 100 + + +def test_collection_hfresh_export_and_reimport(collection_factory: CollectionFactory) -> None: + collection_dummy = collection_factory("dummy") + if collection_dummy._connection._weaviate_version.is_lower_than(1, 36, 0): + pytest.skip("Hfresh index is not supported in Weaviate versions lower than 1.36.0") + + collection = collection_factory( + vector_index_config=Configure.VectorIndex.hfresh( + distance_metric=VectorDistances.COSINE, + max_posting_size_kb=1024, + replicas=2, + search_probe=50, + ) + ) + + config = collection.config.get() + + name = f"TestHFreshExportAndReimport_{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 61375ed7d..ccb131ce0 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -52,6 +52,7 @@ _VectorIndexConfigDynamicUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vector_index import ( @@ -1887,6 +1888,27 @@ def vector_index_type() -> str: VectorIndexConfigHNSW = _VectorIndexConfigHNSW +@dataclass +class _VectorIndexConfigHFresh(_VectorIndexConfig): + distance_metric: VectorDistances + max_posting_size_kb: int + replicas: int + search_probe: int + + @staticmethod + def vector_index_type() -> str: + return VectorIndexType.HFRESH.value + + def to_dict(self) -> Dict[str, Any]: + out = super().to_dict() + if "maxPostingSizeKb" in out: + out["maxPostingSizeKB"] = out.pop("maxPostingSizeKb") + return out + + +VectorIndexConfigHFresh = _VectorIndexConfigHFresh + + @dataclass class _VectorIndexConfigFlat(_VectorIndexConfig): distance_metric: VectorDistances @@ -1960,7 +1982,10 @@ def to_dict(self) -> Dict[str, Any]: class _NamedVectorConfig(_ConfigBase): vectorizer: _NamedVectorizerConfig vector_index_config: Union[ - VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic + VectorIndexConfigHNSW, + VectorIndexConfigFlat, + VectorIndexConfigDynamic, + VectorIndexConfigHFresh, ] def to_dict(self) -> Dict: @@ -1997,7 +2022,11 @@ class _CollectionConfig(_ConfigBase): reranker_config: Optional[RerankerConfig] sharding_config: Optional[ShardingConfig] vector_index_config: Union[ - VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, None + VectorIndexConfigHNSW, + VectorIndexConfigFlat, + VectorIndexConfigDynamic, + VectorIndexConfigHFresh, + None, ] vector_index_type: Optional[VectorIndexType] vectorizer_config: Optional[VectorizerConfig] @@ -2749,6 +2778,25 @@ def dynamic( quantizer=quantizer, ) + @staticmethod + def hfresh( + max_posting_size_kb: Optional[int] = None, + search_probe: Optional[int] = None, + quantizer: Optional[_RQConfigUpdate] = None, + ) -> _VectorIndexConfigHFreshUpdate: + """Create an `_VectorIndexConfigHFreshUpdate` object to update the configuration of the HFresh vector index. + + Use this method when defining the `vectorizer_config` argument in `collection.update()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigHFreshUpdate( + maxPostingSizeKB=max_posting_size_kb, + searchProbe=search_probe, + quantizer=quantizer, + ) + class Reconfigure: """Use this factory class to generate the correct `xxxConfig` object for use when using the `collection.update()` method. diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 15e82b9a0..26f94e2e3 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -41,6 +41,7 @@ _VectorIndexConfigDynamic, _VectorIndexConfigFlat, _VectorIndexConfigHNSW, + _VectorIndexConfigHFresh, _VectorizerConfig, ) @@ -212,6 +213,18 @@ def __get_hnsw_config(config: Dict[str, Any]) -> _VectorIndexConfigHNSW: ) +def __get_hfresh_config(config: Dict[str, Any]) -> _VectorIndexConfigHFresh: + quantizer = __get_quantizer_config(config) + return _VectorIndexConfigHFresh( + distance_metric=VectorDistances(config.get("distance")), + max_posting_size_kb=config["maxPostingSizeKB"], + replicas=config["replicas"], + search_probe=config["searchProbe"], + quantizer=quantizer, + multi_vector=None, + ) + + def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: quantizer = __get_quantizer_config(config) return _VectorIndexConfigFlat( @@ -224,7 +237,13 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: def __get_vector_index_config( schema: Dict[str, Any], -) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None]: +) -> Union[ + _VectorIndexConfigHNSW, + _VectorIndexConfigFlat, + _VectorIndexConfigDynamic, + _VectorIndexConfigHFresh, + None, +]: if "vectorIndexConfig" not in schema: return None if schema["vectorIndexType"] == "hnsw": @@ -238,6 +257,8 @@ def __get_vector_index_config( hnsw=__get_hnsw_config(schema["vectorIndexConfig"]["hnsw"]), flat=__get_flat_config(schema["vectorIndexConfig"]["flat"]), ) + elif schema["vectorIndexType"] == "hfresh": + return __get_hfresh_config(schema["vectorIndexConfig"]) else: return None diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index de2f5577d..e7b600325 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -15,6 +15,7 @@ _VectorIndexConfigDynamicUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import ( @@ -1340,6 +1341,7 @@ def update( *, vector_index_config: Union[ _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, ], diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index 596cf0585..ff6a0ba40 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -34,11 +34,14 @@ class VectorIndexType(str, Enum): Attributes: HNSW: Hierarchical Navigable Small World (HNSW) index. FLAT: Flat index. + DYNAMIC: Dynamic index. + HFRESH: HFRESH index. """ HNSW = "hnsw" FLAT = "flat" DYNAMIC = "dynamic" + HFRESH = "hfresh" class _MultiVectorConfigCreateBase(_ConfigCreateModel): @@ -127,6 +130,16 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _VectorIndexConfigHFreshCreate(_VectorIndexConfigCreate): + maxPostingSizeKB: Optional[int] + replicas: Optional[int] + searchProbe: Optional[int] + + @staticmethod + def vector_index_type() -> VectorIndexType: + return VectorIndexType.HFRESH + + class _VectorIndexConfigFlatCreate(_VectorIndexConfigCreate): vectorCacheMaxObjects: Optional[int] @@ -149,6 +162,15 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _VectorIndexConfigHFreshUpdate(_VectorIndexConfigUpdate): + maxPostingSizeKB: Optional[int] + searchProbe: Optional[int] + + @staticmethod + def vector_index_type() -> VectorIndexType: + return VectorIndexType.HFRESH + + class _VectorIndexConfigFlatUpdate(_VectorIndexConfigUpdate): vectorCacheMaxObjects: Optional[int] @@ -581,6 +603,31 @@ def hnsw( multivector=multi_vector, ) + @staticmethod + def hfresh( + distance_metric: Optional[VectorDistances] = None, + max_posting_size_kb: Optional[int] = None, + replicas: Optional[int] = None, + search_probe: Optional[int] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, + ) -> _VectorIndexConfigHFreshCreate: + """Create a `_VectorIndexConfigHFreshCreate` object to be used when defining the HFresh vector index configuration of Weaviate. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hfresh) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigHFreshCreate( + distance=distance_metric, + maxPostingSizeKB=max_posting_size_kb, + replicas=replicas, + searchProbe=search_probe, + quantizer=quantizer, + multivector=multi_vector, + ) + @staticmethod def flat( distance_metric: Optional[VectorDistances] = None, diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index cbfe5c8cd..7f9d0d492 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -20,6 +20,8 @@ _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWCreate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshCreate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import ( @@ -128,6 +130,17 @@ def __hnsw( multivector=multivector, ) + @staticmethod + def __hfresh(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigHFreshCreate: + return _VectorIndexConfigHFreshCreate( + maxPostingSizeKB=None, + replicas=None, + searchProbe=None, + quantizer=quantizer, + multivector=None, + distance=None, + ) + @staticmethod def __flat(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigFlatCreate: return _VectorIndexConfigFlatCreate( @@ -1804,6 +1817,7 @@ def update( name: Optional[str] = None, vector_index_config: Union[ _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, ], diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi index 61ee09fdd..3ea5b83fb 100644 --- a/weaviate/collections/config/async_.pyi +++ b/weaviate/collections/config/async_.pyi @@ -22,6 +22,7 @@ from weaviate.collections.classes.config import ( _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate @@ -48,13 +49,18 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]): object_ttl_config: Optional[_ObjectTTLConfigUpdate] = None, replication_config: Optional[_ReplicationConfigUpdate] = None, vector_index_config: Optional[ - Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate] + Union[ + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, + ] ] = None, vectorizer_config: Optional[ Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index 0a7bf1a49..e28899c29 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -39,6 +39,7 @@ _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_methods import ( _collection_config_from_json, @@ -141,6 +142,7 @@ def update( Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, ] ] = None, vectorizer_config: Optional[ @@ -148,6 +150,7 @@ def update( _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, @@ -192,6 +195,7 @@ def update( _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, ), ): _Warnings.vectorizer_config_in_config_update() diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi index 8aafc32a2..466482a8b 100644 --- a/weaviate/collections/config/sync.pyi +++ b/weaviate/collections/config/sync.pyi @@ -22,6 +22,7 @@ from weaviate.collections.classes.config import ( _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate @@ -46,13 +47,18 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]): object_ttl_config: Optional[_ObjectTTLConfigUpdate] = None, replication_config: Optional[_ReplicationConfigUpdate] = None, vector_index_config: Optional[ - Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate] + Union[ + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, + ] ] = None, vectorizer_config: Optional[ Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, diff --git a/weaviate/outputs/config.py b/weaviate/outputs/config.py index d6c8ed230..d6c3b4965 100644 --- a/weaviate/outputs/config.py +++ b/weaviate/outputs/config.py @@ -23,6 +23,7 @@ VectorDistances, VectorIndexConfigFlat, VectorIndexConfigHNSW, + VectorIndexConfigHFresh, VectorIndexType, VectorizerConfig, Vectorizers, @@ -52,6 +53,7 @@ "ShardTypes", "VectorDistances", "VectorIndexConfigHNSW", + "VectorIndexConfigHFresh", "VectorIndexConfigFlat", "VectorIndexType", "Vectorizers",