Skip to content

Commit 6ed0e1b

Browse files
Merge pull request #60 from wisedev-code/feat/vl-integration
Feat/vl integration
2 parents b06232f + e3e5715 commit 6ed0e1b

File tree

19 files changed

+114
-64
lines changed

19 files changed

+114
-64
lines changed

Examples/Examples/Agents/AgentsWithRedirectImageExample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,6 @@ Generate image based on given prompt
5151
}]
5252
});
5353

54-
ImagePreview.ShowImage(result.Message.Images);
54+
ImagePreview.ShowImage(result.Message.Image);
5555
}
5656
}

Examples/Examples/Chat/ChatWithImageGenExample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ public async Task Start()
1414
.WithMessage("Generate cyberpunk godzilla cat warrior")
1515
.CompleteAsync();
1616

17-
ImagePreview.ShowImage(result.Message.Images);
17+
ImagePreview.ShowImage(result.Message.Image);
1818
}
1919
}

Examples/Examples/Chat/ChatWithImageGenGeminiExample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ public async Task Start()
1515
.WithMessage("Generate hamster as a astronaut on the moon")
1616
.CompleteAsync();
1717

18-
ImagePreview.ShowImage(result.Message.Images);
18+
ImagePreview.ShowImage(result.Message.Image);
1919
}
2020
}

Examples/Examples/Chat/ChatWithImageGenOpenAiExample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ public async Task Start()
1515
.WithMessage("Generate rock style cow playing guitar")
1616
.CompleteAsync();
1717

18-
ImagePreview.ShowImage(result.Message.Images);
18+
ImagePreview.ShowImage(result.Message.Image);
1919
}
2020
}
Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
1-
using System.Reflection;
21
using MaIN.Core.Hub;
32

43
namespace Examples;
54

65
public class ChatWithVisionExample : IExample
76
{
8-
/// <summary>
9-
/// Vision via Multimodal models as Llava is not supported yet
10-
/// </summary>
117
public async Task Start()
128
{
13-
Console.WriteLine("ChatExample with files is running!");
9+
//https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf - Tried with this model
10+
Console.WriteLine("ChatExample with vision model is running!");
1411

15-
List<string> images = [Path.Combine(AppContext.BaseDirectory, "Files", "gamex.jpg")];
12+
var image = await File.ReadAllBytesAsync(
13+
Path.Combine(AppContext.BaseDirectory, "Files", "gamex.jpg"));
1614

17-
var result = await AIHub.Chat()
18-
.WithModel("llama3.2:3b")
19-
.WithMessage("What is the title of game?")
20-
.WithFiles(images)
21-
.CompleteAsync();
22-
23-
Console.WriteLine(result.Message.Content);
15+
await AIHub.Chat()
16+
.WithCustomModel("Llava1.6-Mistral",
17+
path: "<path_to_model>.gguf",
18+
mmProject: "<path_to_mmproj>.gguf")
19+
.WithMessage("What can you see on the image?", image)
20+
.CompleteAsync(interactive: true);
2421
}
2522
}

MaIN.Core.IntegrationTests/ChatTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public async Task Should_GenerateImage_BasedOnPrompt()
9898
throw new ArgumentException("Invalid file extension");
9999

100100
Assert.True(result.Done);
101-
Assert.NotNull(result.Message.Images);
101+
Assert.NotNull(result.Message.Image);
102102
}
103103

104104
[Fact]

Releases/0.2.5.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# 0.2.5 release
2+
3+
- Integrate with vision llava models

src/MaIN.Core/.nuspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package>
33
<metadata>
44
<id>MaIN.NET</id>
5-
<version>0.2.4</version>
5+
<version>0.2.5</version>
66
<authors>Wisedev</authors>
77
<owners>Wisedev</owners>
88
<icon>favicon.png</icon>

src/MaIN.Core/Hub/Contexts/AgentContext.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ public AgentContext WithMemoryParams(MemoryParams memoryParams)
114114
return this;
115115
}
116116

117-
public AgentContext WithCustomModel(string model, string path)
117+
public AgentContext WithCustomModel(string model, string path, string? mmProject = null)
118118
{
119-
KnownModels.AddModel(model, path);
119+
KnownModels.AddModel(model, path, mmProject);
120120
_agent.Model = model;
121121
return this;
122122
}

src/MaIN.Core/Hub/Contexts/ChatContext.cs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ public ChatContext WithMemoryParams(MemoryParams memoryParams)
5454
return this;
5555
}
5656

57-
public ChatContext WithCustomModel(string model, string path)
57+
public ChatContext WithCustomModel(string model, string path, string? mmProject = null)
5858
{
59-
KnownModels.AddModel(model, path);
59+
KnownModels.AddModel(model, path, mmProject);
6060
_chat.Model = model;
6161
return this;
6262
}
@@ -79,6 +79,21 @@ public ChatContext WithMessage(string content)
7979
_chat.Messages.Add(message);
8080
return this;
8181
}
82+
83+
84+
public ChatContext WithMessage(string content, byte[] image)
85+
{
86+
var message = new Message
87+
{
88+
Role = "User",
89+
Content = content,
90+
Time = DateTime.Now,
91+
Image = image
92+
};
93+
94+
_chat.Messages.Add(message);
95+
return this;
96+
}
8297

8398
public ChatContext WithSystemPrompt(string systemPrompt)
8499
{

src/MaIN.Domain/Entities/Message.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public class Message
1010
public List<LLMTokenValue> Tokens { get; set; } = [];
1111
public bool Tool { get; init; }
1212
public DateTime Time { get; set; }
13-
public byte[]? Images { get; init; }
13+
public byte[]? Image { get; init; }
1414
public List<FileInfo>? Files { get; set; }
1515
public Dictionary<string, string> Properties { get; set; } = [];
1616
}

src/MaIN.Domain/Models/SupportedModels.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ public class Model
44
{
55
public required string Name { get; init; }
66
public required string FileName { get; init; }
7+
public string? MMProject { get; set; }
78
public string? DownloadUrl { get; set; }
89
public string? AdditionalPrompt { get; set; }
910
public string? Description { get; set; }
@@ -231,12 +232,13 @@ public static Model GetModel(string path, string? name)
231232
throw new Exception($"Model {fileName} is not downloaded");
232233
}
233234

234-
public static void AddModel(string model, string path)
235+
public static void AddModel(string model, string path, string? mmProject)
235236
{
236237
Models.Add(new Model()
237238
{
238239
Description = string.Empty,
239240
DownloadUrl = string.Empty,
241+
MMProject = mmProject,
240242
Name = model,
241243
FileName = $"{Path.GetFileName(path)}",
242244
Path = path

src/MaIN.InferPage/Components/Pages/Home.razor

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@
3838
<FluentCard class="message-card-img bot-message"
3939
Style="height: 30rem !important; width: 30rem !important; ">
4040
<div>
41-
<a href="data:image/png;base64,@Convert.ToBase64String(conversation.Message.Images!)"
41+
<a href="data:image/png;base64,@Convert.ToBase64String(conversation.Message.Image!)"
4242
style="cursor: -webkit-zoom-in; cursor: zoom-in;" target="_blank">
43-
<img src="data:image/png;base64,@Convert.ToBase64String(conversation.Message.Images!)"
43+
<img src="data:image/png;base64,@Convert.ToBase64String(conversation.Message.Image!)"
4444
style="object-fit: fill; width:100%; height:100%;"
4545
alt="imageResponse"/>
4646
</a>

src/MaIN.Services/Mappers/ChatMapper.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ private static MessageDto ToDto(this Message message)
2828
{
2929
Content = message.Content,
3030
Role = message.Tool ? "System" : message.Role,
31-
Images = message.Images,
31+
Images = message.Image,
3232
Time = message.Time,
3333
Properties = message.Properties,
3434
Files = message.Files?.Select(x => new FileInfoDto()
@@ -57,7 +57,7 @@ private static Message ToDomain(this MessageDto message)
5757
{
5858
Content = message.Content,
5959
Role = message.Role,
60-
Images = message.Images,
60+
Image = message.Images,
6161
Time = message.Time,
6262
Properties = message.Properties,
6363
Files = message.Files?.Select(x => new FileInfo()
@@ -74,7 +74,7 @@ private static MessageDocument ToDocument(this Message message)
7474
Content = message.Content,
7575
Role = message.Role,
7676
Time = message.Time,
77-
Images = message.Images,
77+
Images = message.Image,
7878
Tokens = message.Tokens.Select(x => x.ToDocument()).ToList(),
7979
Properties = message.Properties,
8080
Tool = message.Tool,
@@ -125,7 +125,7 @@ private static Message ToDomain(this MessageDocument message)
125125
Time = message.Time,
126126
Tokens = message.Tokens.Select(x => x.ToDomain()).ToList(),
127127
Role = message.Role,
128-
Images = message.Images,
128+
Image = message.Images,
129129
Properties = message.Properties,
130130
};
131131

src/MaIN.Services/Services/ImageGenServices/GeminiImageGenService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ private static ChatResult CreateChatResult(byte[] imageBytes)
6464
{
6565
Content = ServiceConstants.Messages.GeneratedImageContent,
6666
Role = ServiceConstants.Roles.Assistant,
67-
Images = imageBytes
67+
Image = imageBytes
6868
},
6969
Model = Models.IMAGEN_GENERATE,
7070
CreatedAt = DateTime.UtcNow

src/MaIN.Services/Services/ImageGenServices/ImageGenDalleService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ private static ChatResult CreateChatResult(byte[] imageBytes)
7070
{
7171
Content = ServiceConstants.Messages.GeneratedImageContent,
7272
Role = ServiceConstants.Roles.Assistant,
73-
Images = imageBytes
73+
Image = imageBytes
7474
},
7575
Model = Models.DALLE,
7676
CreatedAt = DateTime.UtcNow

src/MaIN.Services/Services/ImageGenServices/ImageGenService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ private static ChatResult CreateChatResult(byte[] imageBytes)
4949
{
5050
Content = ServiceConstants.Messages.GeneratedImageContent,
5151
Role = ServiceConstants.Roles.Assistant,
52-
Images = imageBytes
52+
Image = imageBytes
5353
},
5454
Model = LocalImageModels.FLUX,
5555
CreatedAt = DateTime.UtcNow

src/MaIN.Services/Services/LLMService/LLMService.cs

Lines changed: 62 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ public Task CleanSessionCache(string? id)
120120
{
121121
llmModel.Dispose();
122122
}
123-
123+
124124
memory.TextGenerationContext.Dispose();
125125
memory.EmbeddingGenerator.Dispose();
126126

@@ -154,10 +154,15 @@ private async Task<List<LLMTokenValue>> ProcessChatRequest(
154154
? await LLamaWeights.LoadFromFileAsync(parameters, cancellationToken)
155155
: await ModelLoader.GetOrLoadModelAsync(modelsPath, modelKey);
156156

157+
var llavaWeights = model.MMProject != null
158+
? await LLavaWeights.LoadFromFileAsync(model.MMProject, cancellationToken)
159+
: null;
160+
161+
157162
using var executor = new BatchedExecutor(llmModel, parameters);
158163

159-
var (conversation, isComplete, hasFailed) = InitializeConversation(
160-
chat, lastMsg, model, llmModel, executor, cancellationToken);
164+
var (conversation, isComplete, hasFailed) = await InitializeConversation(
165+
chat, lastMsg, model, llmModel, llavaWeights, executor, cancellationToken);
161166

162167
if (!isComplete)
163168
{
@@ -196,49 +201,77 @@ private ModelParams CreateModelParameters(Chat chat, string modelKey)
196201
};
197202
}
198203

199-
private (Conversation Conversation, bool IsComplete, bool HasFailed) InitializeConversation(
200-
Chat chat,
204+
private async Task<(Conversation Conversation, bool IsComplete, bool HasFailed)> InitializeConversation(Chat chat,
201205
Message lastMsg,
202206
Model model,
203207
LLamaWeights llmModel,
208+
LLavaWeights? llavaWeights,
204209
BatchedExecutor executor,
205210
CancellationToken cancellationToken)
206211
{
207212
var isNewConversation = chat.ConversationState == null;
208-
Conversation conversation;
213+
var conversation = isNewConversation
214+
? executor.Create()
215+
: executor.Load(chat.ConversationState!);
216+
217+
if (lastMsg.Image != null)
218+
{
219+
await ProcessImageMessage(conversation, lastMsg, llmModel, llavaWeights, executor, cancellationToken);
220+
}
221+
else
222+
{
223+
ProcessTextMessage(conversation, chat, lastMsg, model, llmModel, executor, isNewConversation);
224+
}
225+
226+
return (conversation, false, false);
227+
}
228+
229+
private static async Task ProcessImageMessage(Conversation conversation,
230+
Message lastMsg,
231+
LLamaWeights llmModel,
232+
LLavaWeights? llavaWeights,
233+
BatchedExecutor executor,
234+
CancellationToken cancellationToken)
235+
{
236+
var imageEmbeddings = llavaWeights?.CreateImageEmbeddings(lastMsg.Image!);
237+
conversation.Prompt(imageEmbeddings!);
238+
239+
while (executor.BatchedTokenCount > 0)
240+
await executor.Infer(cancellationToken);
241+
242+
var prompt = llmModel.Tokenize($"USER: {lastMsg.Content}\nASSISTANT:", true, false, Encoding.UTF8);
243+
conversation.Prompt(prompt);
244+
}
245+
246+
private static void ProcessTextMessage(Conversation conversation,
247+
Chat chat,
248+
Message lastMsg,
249+
Model model,
250+
LLamaWeights llmModel,
251+
BatchedExecutor executor,
252+
bool isNewConversation)
253+
{
254+
var template = new LLamaTemplate(llmModel);
255+
var finalPrompt = ChatHelper.GetFinalPrompt(lastMsg, model, isNewConversation);
209256

210257
if (isNewConversation)
211258
{
212259
var systemMsg = chat.Messages.FirstOrDefault(x => x.Role == nameof(AuthorRole.System));
213-
var template = new LLamaTemplate(llmModel);
214-
var finalPrompt = ChatHelper.GetFinalPrompt(lastMsg, model, true);
215-
216260
if (systemMsg != null)
217261
{
218262
template.Add(systemMsg.Role, systemMsg.Content);
219263
}
220-
221-
template.Add(ServiceConstants.Roles.User, finalPrompt);
222-
template.AddAssistant = true;
223-
224-
var templatedMessage = Encoding.UTF8.GetString(template.Apply());
225-
conversation = executor.Create();
226-
conversation.Prompt(executor.Context.Tokenize(templatedMessage, addBos: true, special: true));
227264
}
228-
else
229-
{
230-
conversation = executor.Load(chat.ConversationState!);
231-
var template = new LLamaTemplate(llmModel);
232-
var finalPrompt = ChatHelper.GetFinalPrompt(lastMsg, model, false);
233265

234-
template.Add(ServiceConstants.Roles.User, finalPrompt);
235-
template.AddAssistant = true;
266+
template.Add(ServiceConstants.Roles.User, finalPrompt);
267+
template.AddAssistant = true;
236268

237-
var templatedMessage = Encoding.UTF8.GetString(template.Apply());
238-
conversation.Prompt(executor.Context.Tokenize(templatedMessage));
239-
}
269+
var templatedMessage = Encoding.UTF8.GetString(template.Apply());
270+
var tokens = isNewConversation
271+
? executor.Context.Tokenize(templatedMessage, addBos: true, special: true)
272+
: executor.Context.Tokenize(templatedMessage);
240273

241-
return (conversation, false, false);
274+
conversation.Prompt(tokens);
242275
}
243276

244277
private async Task<(List<LLMTokenValue> Tokens, bool IsComplete, bool HasFailed)> ProcessTokens(
@@ -340,7 +373,7 @@ private string GetModelsPath()
340373

341374
return path;
342375
}
343-
376+
344377
private async Task<ChatResult> CreateChatResult(Chat chat, List<LLMTokenValue> tokens,
345378
ChatRequestOptions requestOptions)
346379
{
@@ -375,4 +408,4 @@ await notificationService.DispatchNotification(
375408
NotificationMessageBuilder.CreateChatCompletion(chatId, token, isComplete),
376409
ServiceConstants.Notifications.ReceiveMessageUpdate);
377410
}
378-
}
411+
}

0 commit comments

Comments
 (0)