From be6cd1a4bf4fbea26fe4df52a140e6ea27316873 Mon Sep 17 00:00:00 2001 From: leejet Date: Sat, 1 Jun 2024 13:44:09 +0800 Subject: [PATCH] sync: update ggml --- CMakeLists.txt | 2 +- clip.hpp | 8 ++++---- esrgan.hpp | 8 ++++---- ggml | 2 +- ggml_extend.hpp | 2 +- model.cpp | 6 ++---- pmid.hpp | 12 ++++++------ stable-diffusion.h | 21 ++++++++++++--------- unet.hpp | 4 ++-- 9 files changed, 33 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f28343..28a03fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) if(SD_CUBLAS) message("Use CUBLAS as backend stable-diffusion") - set(GGML_CUBLAS ON) + set(GGML_CUDA ON) add_definitions(-DSD_USE_CUBLAS) endif() diff --git a/clip.hpp b/clip.hpp index bf562e3..cf82fdf 100644 --- a/clip.hpp +++ b/clip.hpp @@ -679,8 +679,8 @@ public: class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim] class_embedding = ggml_reshape_4d(ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1] - struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding); // [N, num_positions, embed_dim, 1] - x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim] + struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding, 2); // [N, num_positions, embed_dim, 1] + x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim] x = ggml_add(ctx, x, position_embed_weight); return x; // [N, num_positions, embed_dim] } @@ -1036,7 +1036,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule { hidden_states2->ne[3]); hidden_states2 = ggml_cont(ctx, ggml_permute(ctx, hidden_states2, 2, 0, 1, 3)); - hidden_states = ggml_concat(ctx, hidden_states, hidden_states2); // [N, n_token, hidden_size + hidden_size2] + hidden_states = ggml_concat(ctx, hidden_states, hidden_states2, 2); // [N, n_token, hidden_size + hidden_size2] hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3)); } @@ -1069,7 +1069,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule { auto token_embed_weight = text_model.get_token_embed_weight(); token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]); // concatenate custom embeddings - embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings); + embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 2); embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]); } diff --git a/esrgan.hpp b/esrgan.hpp index 982dd2a..234de9e 100644 --- a/esrgan.hpp +++ b/esrgan.hpp @@ -42,13 +42,13 @@ public: auto conv5 = std::dynamic_pointer_cast(blocks["conv5"]); auto x1 = lrelu(ctx, conv1->forward(ctx, x)); - auto x_cat = ggml_concat(ctx, x, x1); + auto x_cat = ggml_concat(ctx, x, x1, 2); auto x2 = lrelu(ctx, conv2->forward(ctx, x_cat)); - x_cat = ggml_concat(ctx, x_cat, x2); + x_cat = ggml_concat(ctx, x_cat, x2, 2); auto x3 = lrelu(ctx, conv3->forward(ctx, x_cat)); - x_cat = ggml_concat(ctx, x_cat, x3); + x_cat = ggml_concat(ctx, x_cat, x3, 2); auto x4 = lrelu(ctx, conv4->forward(ctx, x_cat)); - x_cat = ggml_concat(ctx, x_cat, x4); + x_cat = ggml_concat(ctx, x_cat, x4, 2); auto x5 = conv5->forward(ctx, x_cat); x5 = ggml_add(ctx, ggml_scale(ctx, x5, 0.2f), x); diff --git a/ggml b/ggml index 57869ad..2aae01f 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 57869ad3b7b1f49ae18e3238b0d69a9467a8f068 +Subproject commit 2aae01fd9b8f9399f343cf18f46f38996ef52e2c diff --git a/ggml_extend.hpp b/ggml_extend.hpp index f206066..dbe9303 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -917,7 +917,7 @@ public: return NULL; } // it's performing a compute, check if backend isn't cpu - if (!ggml_backend_is_cpu(backend) && tensor->backend == GGML_BACKEND_TYPE_CPU) { + if (!ggml_backend_is_cpu(backend) && (tensor->buffer == NULL || ggml_backend_buffer_is_host(tensor->buffer))) { // pass input tensors to gpu memory auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor); diff --git a/model.cpp b/model.cpp index 7241ff3..c4556a9 100644 --- a/model.cpp +++ b/model.cpp @@ -571,10 +571,9 @@ void convert_tensor(void* src, if (dst_type == GGML_TYPE_F16) { ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n); } else { - int64_t hist[16]; std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix const float* im = imatrix.data(); - ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, hist, im); + ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, im); } } else if (dst_type == GGML_TYPE_F32) { if (src_type == GGML_TYPE_F16) { @@ -602,10 +601,9 @@ void convert_tensor(void* src, if (dst_type == GGML_TYPE_F16) { ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n); } else { - int64_t hist[16]; std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix const float* im = imatrix.data(); - ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, hist, im); + ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, im); } } } diff --git a/pmid.hpp b/pmid.hpp index 153f562..0a7acd0 100644 --- a/pmid.hpp +++ b/pmid.hpp @@ -64,7 +64,7 @@ public: auto prompt_embeds0 = ggml_cont(ctx, ggml_permute(ctx, prompt_embeds, 2, 0, 1, 3)); auto id_embeds0 = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3)); // concat is along dim 2 - auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0); + auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0, 2); stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 1, 2, 0, 3)); // stacked_id_embeds = mlp1.forward(ctx, stacked_id_embeds); @@ -102,12 +102,12 @@ public: stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3)); if (left && right) { - stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds); - stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right); + stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 2); + stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 2); } else if (left) { - stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds); + stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 2); } else if (right) { - stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right); + stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 2); } stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3)); class_tokens_mask = ggml_cont(ctx, ggml_transpose(ctx, class_tokens_mask)); @@ -146,7 +146,7 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection { id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3)); id_embeds_2 = ggml_cont(ctx, ggml_permute(ctx, id_embeds_2, 2, 0, 1, 3)); - id_embeds = ggml_concat(ctx, id_embeds, id_embeds_2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right + id_embeds = ggml_concat(ctx, id_embeds, id_embeds_2, 2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 1, 2, 0, 3)); struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx, diff --git a/stable-diffusion.h b/stable-diffusion.h index 4031a09..052e648 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -60,12 +60,11 @@ enum sd_type_t { SD_TYPE_Q4_0 = 2, SD_TYPE_Q4_1 = 3, // SD_TYPE_Q4_2 = 4, support has been removed - // SD_TYPE_Q4_3 (5) support has been removed - SD_TYPE_Q5_0 = 6, - SD_TYPE_Q5_1 = 7, - SD_TYPE_Q8_0 = 8, - SD_TYPE_Q8_1 = 9, - // k-quantizations + // SD_TYPE_Q4_3 = 5, support has been removed + SD_TYPE_Q5_0 = 6, + SD_TYPE_Q5_1 = 7, + SD_TYPE_Q8_0 = 8, + SD_TYPE_Q8_1 = 9, SD_TYPE_Q2_K = 10, SD_TYPE_Q3_K = 11, SD_TYPE_Q4_K = 12, @@ -80,9 +79,13 @@ enum sd_type_t { SD_TYPE_IQ3_S = 21, SD_TYPE_IQ2_S = 22, SD_TYPE_IQ4_XS = 23, - SD_TYPE_I8, - SD_TYPE_I16, - SD_TYPE_I32, + SD_TYPE_I8 = 24, + SD_TYPE_I16 = 25, + SD_TYPE_I32 = 26, + SD_TYPE_I64 = 27, + SD_TYPE_F64 = 28, + SD_TYPE_IQ1_M = 29, + SD_TYPE_BF16 = 30, SD_TYPE_COUNT, }; diff --git a/unet.hpp b/unet.hpp index 33c63af..e6f9fb1 100644 --- a/unet.hpp +++ b/unet.hpp @@ -396,7 +396,7 @@ public: if (c_concat->ne[3] != x->ne[3]) { c_concat = ggml_repeat(ctx, c_concat, x); } - x = ggml_concat(ctx, x, c_concat); + x = ggml_concat(ctx, x, c_concat, 2); } if (y != NULL) { @@ -491,7 +491,7 @@ public: control_offset--; } - h = ggml_concat(ctx, h, h_skip); + h = ggml_concat(ctx, h, h_skip, 2); std::string name = "output_blocks." + std::to_string(output_block_idx) + ".0";