feat: 新增向量嵌入服务支持
新增SiliconFlow向量嵌入服务实现,支持文本向量化功能: - 新增ITextEmbeddingService接口和SiliconFlowTextEmbeddingService实现 - 新增EmbeddingCreateRequest/Response等向量相关DTO - 在AiGateWayManager中新增EmbeddingForStatisticsAsync方法 - 在OpenApiService中新增向量生成API接口 - 扩展ModelTypeEnum枚举支持Embedding类型 - 优化ThorChatMessage的Content属性处理逻辑
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Yi.Framework.AiHub.Application.Contracts.Dtos.OpenAi.Embeddings;
|
||||
|
||||
//TODO add model validation
|
||||
//TODO check what is string or array for prompt,..
|
||||
public record EmbeddingCreateRequest
|
||||
{
|
||||
/// <summary>
|
||||
/// Input text to get embeddings for, encoded as a string or array of tokens. To get embeddings for multiple inputs
|
||||
/// in a single request, pass an array of strings or array of token arrays. Each input must not exceed 2048 tokens in
|
||||
/// length.
|
||||
/// Unless your are embedding code, we suggest replacing newlines (`\n`) in your input with a single space, as we have
|
||||
/// observed inferior results when newlines are present.
|
||||
/// </summary>
|
||||
/// <see href="https://platform.openai.com/docs/api-reference/embeddings/create#embeddings/create-input" />
|
||||
[JsonIgnore]
|
||||
public List<string>? InputAsList { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Input text to get embeddings for, encoded as a string or array of tokens. To get embeddings for multiple inputs
|
||||
/// in a single request, pass an array of strings or array of token arrays. Each input must not exceed 2048 tokens in
|
||||
/// length.
|
||||
/// Unless your are embedding code, we suggest replacing newlines (`\n`) in your input with a single space, as we have
|
||||
/// observed inferior results when newlines are present.
|
||||
/// </summary>
|
||||
/// <see href="https://platform.openai.com/docs/api-reference/embeddings/create#embeddings/create-input" />
|
||||
[JsonIgnore]
|
||||
public string? Input { get; set; }
|
||||
|
||||
|
||||
[JsonPropertyName("input")]
|
||||
public IList<string>? InputCalculated
|
||||
{
|
||||
get
|
||||
{
|
||||
if (Input != null && InputAsList != null)
|
||||
{
|
||||
throw new ValidationException(
|
||||
"Input and InputAsList can not be assigned at the same time. One of them is should be null.");
|
||||
}
|
||||
|
||||
if (Input != null)
|
||||
{
|
||||
return new List<string> { Input };
|
||||
}
|
||||
|
||||
return InputAsList;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your
|
||||
/// available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
|
||||
/// </summary>
|
||||
/// <see href="https://platform.openai.com/docs/api-reference/embeddings/create#embeddings/create-model" />
|
||||
[JsonPropertyName("model")]
|
||||
public string? Model { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
|
||||
/// </summary>
|
||||
/// <see href="https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-dimensions" />
|
||||
[JsonPropertyName("dimensions")]
|
||||
public int? Dimensions { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The format to return the embeddings in. Can be either float or base64.
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
[JsonPropertyName("encoding_format")]
|
||||
public string? EncodingFormat { get; set; }
|
||||
|
||||
public IEnumerable<ValidationResult> Validate()
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
using System.Buffers;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Yi.Framework.AiHub.Application.Contracts.Dtos.OpenAi.Embeddings;
|
||||
|
||||
public record EmbeddingCreateResponse : ThorBaseResponse
|
||||
{
|
||||
[JsonPropertyName("model")] public string Model { get; set; }
|
||||
|
||||
[JsonPropertyName("data")] public List<EmbeddingResponse> Data { get; set; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// 类型转换,如果类型是base64,则将float[]转换为base64,如果是空或是float和原始类型一样,则不转换
|
||||
/// </summary>
|
||||
public void ConvertEmbeddingData(string? encodingFormat)
|
||||
{
|
||||
if (Data.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
switch (encodingFormat)
|
||||
{
|
||||
// 判断第一个是否是float[],如果是则不转换
|
||||
case null or "float" when Data[0].Embedding is float[]:
|
||||
return;
|
||||
// 否则转换成float[]
|
||||
case null or "float":
|
||||
{
|
||||
foreach (var embeddingResponse in Data)
|
||||
{
|
||||
if (embeddingResponse.Embedding is string base64)
|
||||
{
|
||||
embeddingResponse.Embedding = Convert.FromBase64String(base64);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
// 判断第一个是否是string,如果是则不转换
|
||||
case "base64" when Data[0].Embedding is string:
|
||||
return;
|
||||
// 否则转换成base64
|
||||
case "base64":
|
||||
{
|
||||
foreach (var embeddingResponse in Data)
|
||||
{
|
||||
if (embeddingResponse.Embedding is JsonElement str)
|
||||
{
|
||||
if (str.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
var floats = str.EnumerateArray().Select(element => element.GetSingle()).ToArray();
|
||||
|
||||
embeddingResponse.Embedding = ConvertFloatArrayToBase64(floats);
|
||||
}
|
||||
}
|
||||
else if (embeddingResponse.Embedding is IList<double> doubles)
|
||||
{
|
||||
embeddingResponse.Embedding = ConvertFloatArrayToBase64(doubles.ToArray());
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static string ConvertFloatArrayToBase64(double[] floatArray)
|
||||
{
|
||||
// 将 float[] 转换成 byte[]
|
||||
byte[] byteArray = ArrayPool<byte>.Shared.Rent(floatArray.Length * sizeof(float));
|
||||
try
|
||||
{
|
||||
Buffer.BlockCopy(floatArray, 0, byteArray, 0, byteArray.Length);
|
||||
|
||||
// 将 byte[] 转换成 base64 字符串
|
||||
return Convert.ToBase64String(byteArray);
|
||||
}
|
||||
finally
|
||||
{
|
||||
ArrayPool<byte>.Shared.Return(byteArray);
|
||||
}
|
||||
}
|
||||
|
||||
public static string ConvertFloatArrayToBase64(float[] floatArray)
|
||||
{
|
||||
// 将 float[] 转换成 byte[]
|
||||
byte[] byteArray = ArrayPool<byte>.Shared.Rent(floatArray.Length * sizeof(float));
|
||||
try
|
||||
{
|
||||
Buffer.BlockCopy(floatArray, 0, byteArray, 0, floatArray.Length);
|
||||
|
||||
// 将 byte[] 转换成 base64 字符串
|
||||
return Convert.ToBase64String(byteArray);
|
||||
}
|
||||
finally
|
||||
{
|
||||
ArrayPool<byte>.Shared.Return(byteArray);
|
||||
}
|
||||
}
|
||||
|
||||
[JsonPropertyName("usage")] public ThorUsageResponse? Usage { get; set; }
|
||||
}
|
||||
|
||||
public record EmbeddingResponse
|
||||
{
|
||||
[JsonPropertyName("index")] public int? Index { get; set; }
|
||||
|
||||
[JsonPropertyName("embedding")] public object Embedding { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Yi.Framework.AiHub.Application.Contracts.Dtos.OpenAi.Embeddings;
|
||||
|
||||
public sealed class ThorEmbeddingInput
|
||||
{
|
||||
[JsonPropertyName("model")]
|
||||
public string Model { get; set; }
|
||||
|
||||
[JsonPropertyName("input")]
|
||||
public object Input { get; set; }
|
||||
|
||||
[JsonPropertyName("encoding_format")]
|
||||
public string EncodingFormat { get; set; }
|
||||
|
||||
[JsonPropertyName("dimensions")]
|
||||
public int? Dimensions { get; set; }
|
||||
|
||||
[JsonPropertyName("user")]
|
||||
public string? User { get; set; }
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@ public class ThorChatMessage
|
||||
/// </summary>
|
||||
public ThorChatMessage()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -74,20 +73,19 @@ public class ThorChatMessage
|
||||
{
|
||||
if (value is JsonElement str)
|
||||
{
|
||||
if (str.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
Content = value?.ToString();
|
||||
}
|
||||
else if (str.ValueKind == JsonValueKind.Array)
|
||||
if (str.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
Contents = JsonSerializer.Deserialize<IList<ThorChatMessageContent>>(value?.ToString());
|
||||
}
|
||||
}
|
||||
else if (value is string strInput)
|
||||
{
|
||||
Content = strInput;
|
||||
}
|
||||
else
|
||||
{
|
||||
Content = value?.ToString();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,15 +106,14 @@ public class ThorChatMessage
|
||||
/// </summary>
|
||||
[JsonPropertyName("function_call")]
|
||||
public ThorChatMessageFunction? FunctionCall { get; set; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 【可选】推理内容
|
||||
/// </summary>
|
||||
[JsonPropertyName("reasoning_content")]
|
||||
public string? ReasoningContent { get; set; }
|
||||
|
||||
[JsonPropertyName("id")]
|
||||
public string? Id { get; set; }
|
||||
|
||||
[JsonPropertyName("id")] public string? Id { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// 工具调用列表,模型生成的工具调用,例如函数调用。<br/>
|
||||
@@ -164,14 +161,15 @@ public class ThorChatMessage
|
||||
/// <param name="name">参与者的可选名称。提供模型信息以区分同一角色的参与者。</param>
|
||||
/// <param name="toolCalls">工具调用参数列表</param>
|
||||
/// <returns></returns>
|
||||
public static ThorChatMessage CreateAssistantMessage(string content, string? name = null, List<ThorToolCall> toolCalls = null)
|
||||
public static ThorChatMessage CreateAssistantMessage(string content, string? name = null,
|
||||
List<ThorToolCall> toolCalls = null)
|
||||
{
|
||||
return new()
|
||||
{
|
||||
Role = ThorChatMessageRoleConst.Assistant,
|
||||
Content = content,
|
||||
Name = name,
|
||||
ToolCalls=toolCalls,
|
||||
ToolCalls = toolCalls,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -187,7 +185,7 @@ public class ThorChatMessage
|
||||
{
|
||||
Role = ThorChatMessageRoleConst.Tool,
|
||||
Content = content,
|
||||
ToolCallId= toolCallId
|
||||
ToolCallId = toolCallId
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user