Skip to content

Commit ae09e23

Browse files
[8.19] Add custom vector data converters for high performance ingest scenarios (#8794) (#8795)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 53ee577 commit ae09e23

File tree

6 files changed

+588
-41
lines changed

6 files changed

+588
-41
lines changed

src/Elastic.Clients.Elasticsearch/_Shared/Core/Configuration/ElasticsearchClientSettings.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ public abstract class ElasticsearchClientSettingsBase<TConnectionSettings> :
115115
private readonly Serializer _sourceSerializer;
116116
private BeforeRequestEvent? _onBeforeRequest;
117117
private bool _experimentalEnableSerializeNullInferredValues;
118+
private FloatVectorDataEncoding _floatVectorDataEncoding = Serialization.FloatVectorDataEncoding.Base64;
119+
private ByteVectorDataEncoding _byteVectorDataEncoding = Serialization.ByteVectorDataEncoding.Base64;
118120
private ExperimentalSettings _experimentalSettings = new();
119121

120122
private bool _defaultDisableAllInference;
@@ -165,6 +167,8 @@ protected ElasticsearchClientSettingsBase(
165167
FluentDictionary<Type, string> IElasticsearchClientSettings.RouteProperties => _routeProperties;
166168
Serializer IElasticsearchClientSettings.SourceSerializer => _sourceSerializer;
167169
BeforeRequestEvent? IElasticsearchClientSettings.OnBeforeRequest => _onBeforeRequest;
170+
FloatVectorDataEncoding IElasticsearchClientSettings.FloatVectorDataEncoding => _floatVectorDataEncoding;
171+
ByteVectorDataEncoding IElasticsearchClientSettings.ByteVectorDataEncoding => _byteVectorDataEncoding;
168172
ExperimentalSettings IElasticsearchClientSettings.Experimental => _experimentalSettings;
169173

170174
bool IElasticsearchClientSettings.ExperimentalEnableSerializeNullInferredValues => _experimentalEnableSerializeNullInferredValues;
@@ -198,6 +202,18 @@ public TConnectionSettings DefaultFieldNameInferrer(Func<string, string> fieldNa
198202
public TConnectionSettings ExperimentalEnableSerializeNullInferredValues(bool enabled = true) =>
199203
Assign(enabled, (a, v) => a._experimentalEnableSerializeNullInferredValues = v);
200204

205+
/// <inheritdoc cref="IElasticsearchClientSettings.FloatVectorDataEncoding"/>
206+
/// <param name="encoding">The default vector data encoding to use.</param>
207+
/// <returns>This settings instance for chaining.</returns>
208+
public TConnectionSettings FloatVectorDataEncoding(FloatVectorDataEncoding encoding) =>
209+
Assign(encoding, (a, v) => a._floatVectorDataEncoding = v);
210+
211+
/// <inheritdoc cref="IElasticsearchClientSettings.ByteVectorDataEncoding"/>
212+
/// <param name="encoding">The default vector data encoding to use.</param>
213+
/// <returns>This settings instance for chaining.</returns>
214+
public TConnectionSettings ByteVectorDataEncoding(ByteVectorDataEncoding encoding) =>
215+
Assign(encoding, (a, v) => a._byteVectorDataEncoding = v);
216+
201217
public TConnectionSettings Experimental(ExperimentalSettings settings) =>
202218
Assign(settings, (a, v) => a._experimentalSettings = v);
203219

src/Elastic.Clients.Elasticsearch/_Shared/Core/Configuration/IElasticsearchClientSettings.cs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
using System;
66
using System.Collections.Generic;
77
using System.Reflection;
8+
89
using Elastic.Clients.Elasticsearch.Requests;
10+
using Elastic.Clients.Elasticsearch.Serialization;
11+
912
using Elastic.Transport;
1013

1114
namespace Elastic.Clients.Elasticsearch;
@@ -116,14 +119,37 @@ public interface IElasticsearchClientSettings : ITransportConfiguration
116119
BeforeRequestEvent? OnBeforeRequest { get; }
117120

118121
/// <summary>
119-
/// This is an advanced setting which controls serialization behaviour for inferred properies such as ID, routing and index name.
120-
/// <para>When enabled, it may reduce allocations on serialisation paths where the cost can be more significant, such as in bulk operations.</para>
122+
/// This is an advanced setting which controls serialization behaviour for inferred properties such as ID, routing and index name.
123+
/// <para>When enabled, it may reduce allocations on serialization paths where the cost can be more significant, such as in bulk operations.</para>
121124
/// <para>As a by-product it may cause null values to be included in the serialized data and impact payload size. This will only be a concern should some
122-
/// typed not have inferrence mappings defined for the required properties.</para>
125+
/// typed not have inference mappings defined for the required properties.</para>
123126
/// </summary>
124-
/// <remarks>This is marked as experiemental and may be removed or renamed in the future once its impact is evaluated.</remarks>
127+
/// <remarks>This is marked as experimental and may be removed or renamed in the future once its impact is evaluated.</remarks>
125128
bool ExperimentalEnableSerializeNullInferredValues { get; }
126129

130+
/// <summary>
131+
/// Controls the vector data encoding to use for <see cref="ReadOnlyMemory{T}"/> properties
132+
/// in documents during ingestion when the <see cref="FloatVectorDataConverter"/> is used.
133+
/// </summary>
134+
/// <remarks>
135+
/// Setting this value to <see cref="FloatVectorDataEncoding.Legacy"/> provides backwards
136+
/// compatibility when talking to Elasticsearch servers with a version older than 9.3.0
137+
/// (required for <see cref="ByteVectorDataEncoding.Base64"/>).
138+
/// </remarks>
139+
FloatVectorDataEncoding FloatVectorDataEncoding { get; }
140+
141+
/// <summary>
142+
/// Controls the vector data encoding to use for <see cref="ReadOnlyMemory{T}"/> properties
143+
/// in documents during ingestion when the <see cref="ByteVectorDataConverter"/> is used.
144+
/// </summary>
145+
/// <remarks>
146+
/// Setting this value to <see cref="ByteVectorDataEncoding.Legacy"/> provides backwards
147+
/// compatibility when talking to Elasticsearch servers with a version older than 8.14.0
148+
/// (required for <see cref="ByteVectorDataEncoding.Hex"/>) or older than 9.3.0 (required
149+
/// for <see cref="ByteVectorDataEncoding.Base64"/>).
150+
/// </remarks>
151+
ByteVectorDataEncoding ByteVectorDataEncoding { get; }
152+
127153
/// <summary>
128154
/// Experimental settings.
129155
/// </summary>

src/Elastic.Clients.Elasticsearch/_Shared/Core/LazyJsonConverter.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Diagnostics.CodeAnalysis;
67
using System.Text.Json;
78
using System.Text.Json.Serialization;
89

@@ -14,13 +15,13 @@ public sealed class LazyJsonConverter : JsonConverter<LazyJson>
1415
{
1516
private IElasticsearchClientSettings? _settings;
1617

18+
[UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute'", Justification = "Always using explicit TypeInfoResolver")]
19+
[UnconditionalSuppressMessage("Trimming", "IL2026:Members annotated with 'RequiresUnreferencedCodeAttribute'", Justification = "Always using explicit TypeInfoResolver")]
1720
public override LazyJson Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
1821
{
1922
InitializeSettings(options);
2023

21-
#pragma warning disable IL2026, IL3050 // The `TypeInfoResolver` for `RequestResponseConverter` knows how to handle `JsonElement`.
2224
return new LazyJson(JsonSerializer.Deserialize<JsonElement>(ref reader, options), _settings!);
23-
#pragma warning restore IL2026, IL3050
2425
}
2526

2627
private void InitializeSettings(JsonSerializerOptions options)

src/Elastic.Clients.Elasticsearch/_Shared/Next/JsonWriterExtensions.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,36 @@ public static void WriteUnionValue<T1, T2>(this Utf8JsonWriter writer, JsonSeria
249249
);
250250
}
251251

252+
public static void WriteMemoryValue<T>(this Utf8JsonWriter writer, JsonSerializerOptions options, ReadOnlyMemory<T> memory,
253+
JsonWriteFunc<T>? writeElement)
254+
{
255+
if (writeElement is null)
256+
{
257+
var converter = options.GetConverter<T>(null);
258+
259+
writeElement = (w, o, v) =>
260+
{
261+
if ((v is null) && !converter.HandleNull)
262+
{
263+
w.WriteNullValue();
264+
return;
265+
}
266+
267+
converter.Write(w, v, o);
268+
};
269+
}
270+
271+
writer.WriteStartArray();
272+
273+
var span = memory.Span;
274+
foreach (var element in span)
275+
{
276+
writeElement(writer, options, element);
277+
}
278+
279+
writer.WriteEndArray();
280+
}
281+
252282
#endregion Delegate Based Write Methods
253283

254284
#region Specialized Write Methods

0 commit comments

Comments
 (0)