Skip to content

Commit 4e0ccb9

Browse files
damiengajcvickersmarkwallace-microsoftwestey-mrogerbarreto
authored
.Net: Switch MEVD MongoDB Driver to v3.51 (#13370)
### Motivation and Context Switches the MongoDB Driver to the latest 3.5 release which had a number of breaking changes. This affects both the MongoDB vector data project and the CosmosMongoDB project. This fixes #11652 and likely addresses #12707 and partly #10291. ### Description Switches the MongoDB driver to 3.5. Part of the breaking changes is that GUIDs in BSON no longer have a default storage format specified due to the need to switch from the C#-only format to the cross-MongoDB-driver standard format. Setting this is achieved in this PR by way of both a convention for the registry based mode and an alternative to BsonValue.Create in the scenarios where we don't have access to conventions/serialization such as key creation and using the dynamic mapper. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 cc @roji --------- Co-authored-by: Arthur Vickers <ajcvickers@hotmail.com> Co-authored-by: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com> Co-authored-by: westey <164392973+westey-m@users.noreply.github.com> Co-authored-by: Roger Barreto <19890735+rogerbarreto@users.noreply.github.com> Co-authored-by: Shay Rojansky <roji@roji.org>
1 parent 43da976 commit 4e0ccb9

20 files changed

+177
-38
lines changed

dotnet/Directory.Packages.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@
161161
<PackageVersion Include="Microsoft.Data.Sqlite" Version="9.0.10" />
162162
<PackageVersion Include="DuckDB.NET.Data.Full" Version="1.2.0" />
163163
<PackageVersion Include="DuckDB.NET.Data" Version="1.1.3" />
164-
<PackageVersion Include="MongoDB.Driver" Version="2.30.0" />
164+
<PackageVersion Include="MongoDB.Driver" Version="3.5.2" />
165165
<PackageVersion Include="Microsoft.Graph" Version="5.94.0" />
166166
<PackageVersion Include="Microsoft.OpenApi" Version="1.6.24" />
167167
<PackageVersion Include="Microsoft.OpenApi.Readers" Version="1.6.24" />

dotnet/src/IntegrationTests/Connectors/Memory/CosmosMongoDB/CosmosMongoVectorStoreFixture.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ public CosmosMongoVectorStoreFixture()
4040
.Build();
4141

4242
var connectionString = GetConnectionString(configuration);
43+
#pragma warning disable CA2000 // Dispose objects before losing scope
4344
var client = new MongoClient(connectionString);
45+
#pragma warning restore CA2000
4446

4547
this.MongoDatabase = client.GetDatabase("test");
4648

dotnet/src/IntegrationTests/Connectors/Memory/MongoDB/MongoDBVectorStoreFixture.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,13 @@ public async Task InitializeAsync()
3737
cts.CancelAfter(TimeSpan.FromSeconds(60));
3838
await this._container.StartAsync(cts.Token);
3939

40+
#pragma warning disable CA2000 // Dispose objects before losing scope
4041
var mongoClient = new MongoClient(new MongoClientSettings
4142
{
4243
Server = new MongoServerAddress(this._container.Hostname, this._container.GetMappedPublicPort(MongoDbBuilder.MongoDbPort)),
4344
DirectConnection = true,
4445
});
46+
#pragma warning restore CA2000
4547

4648
this.MongoDatabase = mongoClient.GetDatabase("test");
4749

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Linq;
6+
using MongoDB.Bson;
7+
8+
namespace Microsoft.SemanticKernel.Connectors.MongoDB;
9+
10+
/// <summary>
11+
/// A class that constructs the correct BsonValue for a given CLR type.
12+
/// </summary>
13+
internal static class BsonValueFactory
14+
{
15+
/// <summary>
16+
/// Create a BsonValue for the given CLR type.
17+
/// </summary>
18+
/// <param name="value">The CLR object to create a BSON value for.</param>
19+
/// <returns>The appropriate <see cref="BsonValue"/> for that CLR type.</returns>
20+
public static BsonValue Create(object? value)
21+
=> value switch
22+
{
23+
null => BsonNull.Value,
24+
Guid guid => new BsonBinaryData(guid, GuidRepresentation.Standard),
25+
object[] array => new BsonArray(Array.ConvertAll(array, Create)),
26+
Array array => new BsonArray(array),
27+
IEnumerable<object> enumerable => new BsonArray(enumerable.Select(Create)),
28+
_ => BsonValue.Create(value)
29+
};
30+
}

dotnet/src/InternalUtilities/connectors/Memory/MongoDB/MongoDynamicMapper.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,21 @@ public BsonDocument MapFromDataToStorageModel(Dictionary<string, object?> dataMo
3030
: keyValue switch
3131
{
3232
string s => s,
33-
Guid g => BsonValue.Create(g),
33+
Guid g => new BsonBinaryData(g, GuidRepresentation.Standard),
3434
ObjectId o => o,
3535
long i => i,
3636
int i => i,
3737

3838
null => throw new InvalidOperationException($"Key property '{model.KeyProperty.ModelName}' is null."),
39-
_ => throw new InvalidCastException($"Key property '{model.KeyProperty.ModelName}' must be a string.")
39+
_ => throw new InvalidCastException($"Key property '{model.KeyProperty.ModelName}' must be a string, Guid, ObjectID, long or int.")
4040
}
4141
};
4242

4343
foreach (var property in model.DataProperties)
4444
{
4545
if (dataModel.TryGetValue(property.ModelName, out var dataValue))
4646
{
47-
document[property.StorageName] = BsonValue.Create(dataValue);
47+
document[property.StorageName] = BsonValueFactory.Create(dataValue);
4848
}
4949
}
5050

dotnet/src/InternalUtilities/connectors/Memory/MongoDB/MongoMapper.cs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
using MongoDB.Bson.Serialization;
1212
using MongoDB.Bson.Serialization.Attributes;
1313
using MongoDB.Bson.Serialization.Conventions;
14+
using MongoDB.Bson.Serialization.Serializers;
1415

1516
namespace Microsoft.SemanticKernel.Connectors.MongoDB;
1617

@@ -40,7 +41,8 @@ public MongoMapper(CollectionModel model)
4041

4142
var conventionPack = new ConventionPack
4243
{
43-
new IgnoreExtraElementsConvention(ignoreExtraElements: true)
44+
new IgnoreExtraElementsConvention(ignoreExtraElements: true),
45+
new GuidStandardRepresentationConvention()
4446
};
4547

4648
ConventionRegistry.Register(
@@ -139,4 +141,15 @@ public TRecord MapFromStorageToDataModel(BsonDocument storageModel, bool include
139141

140142
return BsonSerializer.Deserialize<TRecord>(storageModel);
141143
}
144+
145+
private class GuidStandardRepresentationConvention : ConventionBase, IMemberMapConvention
146+
{
147+
public void Apply(BsonMemberMap memberMap)
148+
{
149+
if (memberMap.MemberType == typeof(Guid) && memberMap.MemberInfo.GetCustomAttribute<BsonRepresentationAttribute>() is null)
150+
{
151+
memberMap.SetSerializer(new GuidSerializer(GuidRepresentation.Standard));
152+
}
153+
}
154+
}
142155
}

dotnet/src/InternalUtilities/src/Diagnostics/NullableAttributes.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
// This was copied from https://github.com/dotnet/runtime/blob/39b9607807f29e48cae4652cd74735182b31182e/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/NullableAttributes.cs
88
// and updated to have the scope of the attributes be internal.
99

10-
#if !NETCOREAPP
1110
namespace System.Diagnostics.CodeAnalysis;
1211

12+
#if !NETCOREAPP && !NETSTANDARD2_1
13+
1314
/// <summary>Specifies that null is allowed as an input even if the corresponding type disallows it.</summary>
1415
[AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Property, Inherited = false)]
1516
internal sealed class AllowNullAttribute : Attribute

dotnet/src/InternalUtilities/src/Http/HttpClientProvider.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ private static SocketsHttpHandler CreateHandler()
8888
},
8989
};
9090
}
91-
#elif NETSTANDARD2_0
91+
#elif NETSTANDARD2_0_OR_GREATER
9292
private static HttpClientHandler CreateHandler()
9393
{
9494
var handler = new HttpClientHandler();
@@ -99,7 +99,7 @@ private static HttpClientHandler CreateHandler()
9999
catch (PlatformNotSupportedException) { } // not supported on older frameworks
100100
return handler;
101101
}
102-
#elif NET462
102+
#elif NETFRAMEWORK
103103
private static HttpClientHandler CreateHandler()
104104
=> new();
105105
#endif

dotnet/src/VectorData/CosmosMongoDB/CosmosMongoCollection.cs

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
using Microsoft.Extensions.VectorData.ProviderServices;
1616
using Microsoft.SemanticKernel.Connectors.MongoDB;
1717
using MongoDB.Bson;
18+
using MongoDB.Bson.Serialization;
1819
using MongoDB.Driver;
1920
using MEVD = Microsoft.Extensions.VectorData;
2021

@@ -67,6 +68,9 @@ public class CosmosMongoCollection<TKey, TRecord> : VectorStoreCollection<TKey,
6768
/// <summary>The size of the dynamic candidate list for search.</summary>
6869
private readonly int _efSearch;
6970

71+
/// <summary><see cref="BsonSerializationInfo"/> to use for serializing key values.</summary>
72+
private readonly BsonSerializationInfo? _keySerializationInfo;
73+
7074
private static readonly Type[] s_validKeyTypes = [typeof(string), typeof(Guid), typeof(ObjectId), typeof(int), typeof(long)];
7175

7276
/// <summary>
@@ -123,6 +127,11 @@ internal CosmosMongoCollection(IMongoDatabase mongoDatabase, string name, Func<C
123127
VectorStoreName = mongoDatabase.DatabaseNamespace?.DatabaseName,
124128
CollectionName = name
125129
};
130+
131+
// Cache the key serialization info if possible
132+
this._keySerializationInfo = typeof(TKey) == typeof(object)
133+
? null
134+
: this.GetKeySerializationInfo();
126135
}
127136

128137
/// <inheritdoc />
@@ -142,9 +151,9 @@ await this.RunOperationAsync("CreateIndexes",
142151
/// <inheritdoc />
143152
public override async Task DeleteAsync(TKey key, CancellationToken cancellationToken = default)
144153
{
145-
var stringKey = this.GetStringKey(key);
154+
Verify.NotNull(key);
146155

147-
await this.RunOperationAsync("DeleteOne", () => this._mongoCollection.DeleteOneAsync(this.GetFilterById(stringKey), cancellationToken))
156+
await this.RunOperationAsync("DeleteOne", () => this._mongoCollection.DeleteOneAsync(this.GetFilterById(key), cancellationToken))
148157
.ConfigureAwait(false);
149158
}
150159

@@ -153,9 +162,7 @@ public override async Task DeleteAsync(IEnumerable<TKey> keys, CancellationToken
153162
{
154163
Verify.NotNull(keys);
155164

156-
var stringKeys = keys is IEnumerable<string> k ? k : keys.Cast<string>();
157-
158-
await this.RunOperationAsync("DeleteMany", () => this._mongoCollection.DeleteManyAsync(this.GetFilterByIds(stringKeys), cancellationToken))
165+
await this.RunOperationAsync("DeleteMany", () => this._mongoCollection.DeleteManyAsync(this.GetFilterByIds(keys), cancellationToken))
159166
.ConfigureAwait(false);
160167
}
161168

@@ -166,7 +173,7 @@ public override Task EnsureCollectionDeletedAsync(CancellationToken cancellation
166173
/// <inheritdoc />
167174
public override async Task<TRecord?> GetAsync(TKey key, RecordRetrievalOptions? options = null, CancellationToken cancellationToken = default)
168175
{
169-
var stringKey = this.GetStringKey(key);
176+
Verify.NotNull(key);
170177

171178
var includeVectors = options?.IncludeVectors ?? false;
172179
if (includeVectors && this._model.EmbeddingGenerationRequired)
@@ -175,7 +182,7 @@ public override Task EnsureCollectionDeletedAsync(CancellationToken cancellation
175182
}
176183

177184
using var cursor = await this
178-
.FindAsync(this.GetFilterById(stringKey), top: 1, skip: null, includeVectors, sortDefinition: null, cancellationToken)
185+
.FindAsync(this.GetFilterById(key), top: 1, skip: null, includeVectors, sortDefinition: null, cancellationToken)
179186
.ConfigureAwait(false);
180187

181188
var record = await cursor.SingleOrDefaultAsync(cancellationToken).ConfigureAwait(false);
@@ -202,10 +209,8 @@ public override async IAsyncEnumerable<TRecord> GetAsync(
202209
throw new NotSupportedException(VectorDataStrings.IncludeVectorsNotSupportedWithEmbeddingGeneration);
203210
}
204211

205-
var stringKeys = keys is IEnumerable<string> k ? k : keys.Cast<string>();
206-
207212
using var cursor = await this
208-
.FindAsync(this.GetFilterByIds(stringKeys), top: null, skip: null, includeVectors, sortDefinition: null, cancellationToken)
213+
.FindAsync(this.GetFilterByIds(keys), top: null, skip: null, includeVectors, sortDefinition: null, cancellationToken)
209214
.ConfigureAwait(false);
210215

211216
while (await cursor.MoveNextAsync(cancellationToken).ConfigureAwait(false))
@@ -252,14 +257,17 @@ private async Task UpsertCoreAsync(TRecord record, int recordIndex, IReadOnlyLis
252257
var replaceOptions = new ReplaceOptions { IsUpsert = true };
253258
var storageModel = this._mapper.MapFromDataToStorageModel(record, recordIndex, generatedEmbeddings);
254259

255-
var key = storageModel[MongoConstants.MongoReservedKeyPropertyName].AsString;
260+
var key = GetStorageKey(storageModel);
256261

257262
await this.RunOperationAsync(OperationName, async () =>
258263
await this._mongoCollection
259264
.ReplaceOneAsync(this.GetFilterById(key), storageModel, replaceOptions, cancellationToken)
260265
.ConfigureAwait(false)).ConfigureAwait(false);
261266
}
262267

268+
private static TKey GetStorageKey(BsonDocument document)
269+
=> (TKey)BsonTypeMapper.MapToDotNetValue(document[MongoConstants.MongoReservedKeyPropertyName]);
270+
263271
private static async ValueTask<(IEnumerable<TRecord> records, IReadOnlyList<Embedding>?[]?)> ProcessEmbeddingsAsync(
264272
CollectionModel model,
265273
IEnumerable<TRecord> records,
@@ -562,11 +570,40 @@ private async IAsyncEnumerable<VectorSearchResult<TRecord>> EnumerateAndMapSearc
562570
}
563571
}
564572

565-
private FilterDefinition<BsonDocument> GetFilterById(string id)
566-
=> Builders<BsonDocument>.Filter.Eq(document => document[MongoConstants.MongoReservedKeyPropertyName], id);
573+
private FilterDefinition<BsonDocument> GetFilterById(TKey id)
574+
{
575+
// Use cached key serialization info but fall back to BsonValueFactory for dynamic mapper.
576+
var bsonValue = this._keySerializationInfo?.SerializeValue(id) ?? BsonValueFactory.Create(id);
577+
return Builders<BsonDocument>.Filter.Eq(MongoConstants.MongoReservedKeyPropertyName, bsonValue);
578+
}
579+
580+
private FilterDefinition<BsonDocument> GetFilterByIds(IEnumerable<TKey> ids)
581+
{
582+
// Use cached key serialization info but fall back to BsonValueFactory for dynamic mapper.
583+
var bsonValues = this._keySerializationInfo?.SerializeValues(ids) ?? (BsonArray)BsonValueFactory.Create(ids);
584+
return Builders<BsonDocument>.Filter.In(MongoConstants.MongoReservedKeyPropertyName, bsonValues);
585+
}
567586

568-
private FilterDefinition<BsonDocument> GetFilterByIds(IEnumerable<string> ids)
569-
=> Builders<BsonDocument>.Filter.In(document => document[MongoConstants.MongoReservedKeyPropertyName].AsString, ids);
587+
private BsonSerializationInfo GetKeySerializationInfo()
588+
{
589+
var documentSerializer = BsonSerializer.LookupSerializer<TRecord>();
590+
if (documentSerializer is null)
591+
{
592+
throw new InvalidOperationException($"BsonSerializer not found for type '{typeof(TRecord)}'");
593+
}
594+
595+
if (documentSerializer is not IBsonDocumentSerializer bsonDocumentSerializer)
596+
{
597+
throw new InvalidOperationException($"BsonSerializer for type '{typeof(TRecord)}' does not implement IBsonDocumentSerializer");
598+
}
599+
600+
if (!bsonDocumentSerializer.TryGetMemberSerializationInfo(this._model.KeyProperty.ModelName, out var keySerializationInfo))
601+
{
602+
throw new InvalidOperationException($"BsonSerializer for type '{typeof(TRecord)}' does not recognize key property {this._model.KeyProperty.ModelName}");
603+
}
604+
605+
return keySerializationInfo;
606+
}
570607

571608
private async Task<bool> InternalCollectionExistsAsync(CancellationToken cancellationToken)
572609
{

dotnet/src/VectorData/CosmosMongoDB/CosmosMongoCollectionCreateMapping.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ private static string GetIndexKind(string? indexKind, string vectorPropertyName)
112112
{
113113
IndexKind.Hnsw => "vector-hnsw",
114114
IndexKind.IvfFlat => "vector-ivf",
115-
_ => throw new InvalidOperationException($"Index kind '{indexKind}' on {nameof(VectorStoreVectorProperty)} '{vectorPropertyName}' is not supported by the Azure CosmosDB for MongoDB VectorStore.")
115+
_ => throw new NotSupportedException($"Index kind '{indexKind}' on {nameof(VectorStoreVectorProperty)} '{vectorPropertyName}' is not supported by the Azure CosmosDB for MongoDB VectorStore.")
116116
};
117117

118118
/// <summary>
@@ -124,6 +124,6 @@ private static string GetDistanceFunction(string? distanceFunction, string vecto
124124
DistanceFunction.CosineDistance => "COS",
125125
DistanceFunction.DotProductSimilarity => "IP",
126126
DistanceFunction.EuclideanDistance => "L2",
127-
_ => throw new InvalidOperationException($"Distance function '{distanceFunction}' for {nameof(VectorStoreVectorProperty)} '{vectorPropertyName}' is not supported by the Azure CosmosDB for MongoDB VectorStore.")
127+
_ => throw new NotSupportedException($"Distance function '{distanceFunction}' for {nameof(VectorStoreVectorProperty)} '{vectorPropertyName}' is not supported by the Azure CosmosDB for MongoDB VectorStore.")
128128
};
129129
}

0 commit comments

Comments
 (0)