sync: update ggml

This commit is contained in:
leejet 2024-06-01 13:44:09 +08:00
parent e1384defca
commit be6cd1a4bf
9 changed files with 33 additions and 32 deletions

View File

@ -34,7 +34,7 @@ option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
if(SD_CUBLAS) if(SD_CUBLAS)
message("Use CUBLAS as backend stable-diffusion") message("Use CUBLAS as backend stable-diffusion")
set(GGML_CUBLAS ON) set(GGML_CUDA ON)
add_definitions(-DSD_USE_CUBLAS) add_definitions(-DSD_USE_CUBLAS)
endif() endif()

View File

@ -679,7 +679,7 @@ public:
class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim] class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim]
class_embedding = ggml_reshape_4d(ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1] class_embedding = ggml_reshape_4d(ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1]
struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding); // [N, num_positions, embed_dim, 1] struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding, 2); // [N, num_positions, embed_dim, 1]
x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim] x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim]
x = ggml_add(ctx, x, position_embed_weight); x = ggml_add(ctx, x, position_embed_weight);
return x; // [N, num_positions, embed_dim] return x; // [N, num_positions, embed_dim]
@ -1036,7 +1036,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
hidden_states2->ne[3]); hidden_states2->ne[3]);
hidden_states2 = ggml_cont(ctx, ggml_permute(ctx, hidden_states2, 2, 0, 1, 3)); hidden_states2 = ggml_cont(ctx, ggml_permute(ctx, hidden_states2, 2, 0, 1, 3));
hidden_states = ggml_concat(ctx, hidden_states, hidden_states2); // [N, n_token, hidden_size + hidden_size2] hidden_states = ggml_concat(ctx, hidden_states, hidden_states2, 2); // [N, n_token, hidden_size + hidden_size2]
hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3)); hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3));
} }
@ -1069,7 +1069,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
auto token_embed_weight = text_model.get_token_embed_weight(); auto token_embed_weight = text_model.get_token_embed_weight();
token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]); token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]);
// concatenate custom embeddings // concatenate custom embeddings
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings); embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 2);
embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]); embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]);
} }

View File

@ -42,13 +42,13 @@ public:
auto conv5 = std::dynamic_pointer_cast<Conv2d>(blocks["conv5"]); auto conv5 = std::dynamic_pointer_cast<Conv2d>(blocks["conv5"]);
auto x1 = lrelu(ctx, conv1->forward(ctx, x)); auto x1 = lrelu(ctx, conv1->forward(ctx, x));
auto x_cat = ggml_concat(ctx, x, x1); auto x_cat = ggml_concat(ctx, x, x1, 2);
auto x2 = lrelu(ctx, conv2->forward(ctx, x_cat)); auto x2 = lrelu(ctx, conv2->forward(ctx, x_cat));
x_cat = ggml_concat(ctx, x_cat, x2); x_cat = ggml_concat(ctx, x_cat, x2, 2);
auto x3 = lrelu(ctx, conv3->forward(ctx, x_cat)); auto x3 = lrelu(ctx, conv3->forward(ctx, x_cat));
x_cat = ggml_concat(ctx, x_cat, x3); x_cat = ggml_concat(ctx, x_cat, x3, 2);
auto x4 = lrelu(ctx, conv4->forward(ctx, x_cat)); auto x4 = lrelu(ctx, conv4->forward(ctx, x_cat));
x_cat = ggml_concat(ctx, x_cat, x4); x_cat = ggml_concat(ctx, x_cat, x4, 2);
auto x5 = conv5->forward(ctx, x_cat); auto x5 = conv5->forward(ctx, x_cat);
x5 = ggml_add(ctx, ggml_scale(ctx, x5, 0.2f), x); x5 = ggml_add(ctx, ggml_scale(ctx, x5, 0.2f), x);

2
ggml

@ -1 +1 @@
Subproject commit 57869ad3b7b1f49ae18e3238b0d69a9467a8f068 Subproject commit 2aae01fd9b8f9399f343cf18f46f38996ef52e2c

View File

@ -917,7 +917,7 @@ public:
return NULL; return NULL;
} }
// it's performing a compute, check if backend isn't cpu // it's performing a compute, check if backend isn't cpu
if (!ggml_backend_is_cpu(backend) && tensor->backend == GGML_BACKEND_TYPE_CPU) { if (!ggml_backend_is_cpu(backend) && (tensor->buffer == NULL || ggml_backend_buffer_is_host(tensor->buffer))) {
// pass input tensors to gpu memory // pass input tensors to gpu memory
auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor); auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor);

View File

@ -571,10 +571,9 @@ void convert_tensor(void* src,
if (dst_type == GGML_TYPE_F16) { if (dst_type == GGML_TYPE_F16) {
ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n); ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n);
} else { } else {
int64_t hist[16];
std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix
const float* im = imatrix.data(); const float* im = imatrix.data();
ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, hist, im); ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, im);
} }
} else if (dst_type == GGML_TYPE_F32) { } else if (dst_type == GGML_TYPE_F32) {
if (src_type == GGML_TYPE_F16) { if (src_type == GGML_TYPE_F16) {
@ -602,10 +601,9 @@ void convert_tensor(void* src,
if (dst_type == GGML_TYPE_F16) { if (dst_type == GGML_TYPE_F16) {
ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n); ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n);
} else { } else {
int64_t hist[16];
std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix
const float* im = imatrix.data(); const float* im = imatrix.data();
ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, hist, im); ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, im);
} }
} }
} }

View File

@ -64,7 +64,7 @@ public:
auto prompt_embeds0 = ggml_cont(ctx, ggml_permute(ctx, prompt_embeds, 2, 0, 1, 3)); auto prompt_embeds0 = ggml_cont(ctx, ggml_permute(ctx, prompt_embeds, 2, 0, 1, 3));
auto id_embeds0 = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3)); auto id_embeds0 = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3));
// concat is along dim 2 // concat is along dim 2
auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0); auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0, 2);
stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 1, 2, 0, 3)); stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 1, 2, 0, 3));
// stacked_id_embeds = mlp1.forward(ctx, stacked_id_embeds); // stacked_id_embeds = mlp1.forward(ctx, stacked_id_embeds);
@ -102,12 +102,12 @@ public:
stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3)); stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3));
if (left && right) { if (left && right) {
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds); stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 2);
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right); stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 2);
} else if (left) { } else if (left) {
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds); stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 2);
} else if (right) { } else if (right) {
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right); stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 2);
} }
stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3)); stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3));
class_tokens_mask = ggml_cont(ctx, ggml_transpose(ctx, class_tokens_mask)); class_tokens_mask = ggml_cont(ctx, ggml_transpose(ctx, class_tokens_mask));
@ -146,7 +146,7 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection {
id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3)); id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3));
id_embeds_2 = ggml_cont(ctx, ggml_permute(ctx, id_embeds_2, 2, 0, 1, 3)); id_embeds_2 = ggml_cont(ctx, ggml_permute(ctx, id_embeds_2, 2, 0, 1, 3));
id_embeds = ggml_concat(ctx, id_embeds, id_embeds_2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right id_embeds = ggml_concat(ctx, id_embeds, id_embeds_2, 2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right
id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 1, 2, 0, 3)); id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 1, 2, 0, 3));
struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx, struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,

View File

@ -60,12 +60,11 @@ enum sd_type_t {
SD_TYPE_Q4_0 = 2, SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3, SD_TYPE_Q4_1 = 3,
// SD_TYPE_Q4_2 = 4, support has been removed // SD_TYPE_Q4_2 = 4, support has been removed
// SD_TYPE_Q4_3 (5) support has been removed // SD_TYPE_Q4_3 = 5, support has been removed
SD_TYPE_Q5_0 = 6, SD_TYPE_Q5_0 = 6,
SD_TYPE_Q5_1 = 7, SD_TYPE_Q5_1 = 7,
SD_TYPE_Q8_0 = 8, SD_TYPE_Q8_0 = 8,
SD_TYPE_Q8_1 = 9, SD_TYPE_Q8_1 = 9,
// k-quantizations
SD_TYPE_Q2_K = 10, SD_TYPE_Q2_K = 10,
SD_TYPE_Q3_K = 11, SD_TYPE_Q3_K = 11,
SD_TYPE_Q4_K = 12, SD_TYPE_Q4_K = 12,
@ -80,9 +79,13 @@ enum sd_type_t {
SD_TYPE_IQ3_S = 21, SD_TYPE_IQ3_S = 21,
SD_TYPE_IQ2_S = 22, SD_TYPE_IQ2_S = 22,
SD_TYPE_IQ4_XS = 23, SD_TYPE_IQ4_XS = 23,
SD_TYPE_I8, SD_TYPE_I8 = 24,
SD_TYPE_I16, SD_TYPE_I16 = 25,
SD_TYPE_I32, SD_TYPE_I32 = 26,
SD_TYPE_I64 = 27,
SD_TYPE_F64 = 28,
SD_TYPE_IQ1_M = 29,
SD_TYPE_BF16 = 30,
SD_TYPE_COUNT, SD_TYPE_COUNT,
}; };

View File

@ -396,7 +396,7 @@ public:
if (c_concat->ne[3] != x->ne[3]) { if (c_concat->ne[3] != x->ne[3]) {
c_concat = ggml_repeat(ctx, c_concat, x); c_concat = ggml_repeat(ctx, c_concat, x);
} }
x = ggml_concat(ctx, x, c_concat); x = ggml_concat(ctx, x, c_concat, 2);
} }
if (y != NULL) { if (y != NULL) {
@ -491,7 +491,7 @@ public:
control_offset--; control_offset--;
} }
h = ggml_concat(ctx, h, h_skip); h = ggml_concat(ctx, h, h_skip, 2);
std::string name = "output_blocks." + std::to_string(output_block_idx) + ".0"; std::string name = "output_blocks." + std::to_string(output_block_idx) + ".0";