sync: update ggml (#134)
This commit is contained in:
parent
db382348cc
commit
2b6ec97fe2
9
clip.hpp
9
clip.hpp
@ -443,8 +443,6 @@ struct ResidualAttentionBlock {
|
|||||||
struct ggml_tensor* ln2_w; // [hidden_size, ]
|
struct ggml_tensor* ln2_w; // [hidden_size, ]
|
||||||
struct ggml_tensor* ln2_b; // [hidden_size, ]
|
struct ggml_tensor* ln2_b; // [hidden_size, ]
|
||||||
|
|
||||||
struct ggml_tensor* attn_scale; // [hidden_size, ]
|
|
||||||
|
|
||||||
size_t calculate_mem_size(ggml_type wtype) {
|
size_t calculate_mem_size(ggml_type wtype) {
|
||||||
double mem_size = 0;
|
double mem_size = 0;
|
||||||
mem_size += 4 * hidden_size * hidden_size * ggml_type_sizef(wtype); // q_w/k_w/v_w/out_w
|
mem_size += 4 * hidden_size * hidden_size * ggml_type_sizef(wtype); // q_w/k_w/v_w/out_w
|
||||||
@ -452,7 +450,6 @@ struct ResidualAttentionBlock {
|
|||||||
mem_size += 2 * hidden_size * intermediate_size * ggml_type_sizef(wtype); // fc1_w/fc2_w
|
mem_size += 2 * hidden_size * intermediate_size * ggml_type_sizef(wtype); // fc1_w/fc2_w
|
||||||
mem_size += intermediate_size * ggml_type_sizef(GGML_TYPE_F32); // fc1_b
|
mem_size += intermediate_size * ggml_type_sizef(GGML_TYPE_F32); // fc1_b
|
||||||
mem_size += hidden_size * ggml_type_sizef(GGML_TYPE_F32); // fc2_b
|
mem_size += hidden_size * ggml_type_sizef(GGML_TYPE_F32); // fc2_b
|
||||||
mem_size += ggml_type_sizef(GGML_TYPE_F32); // attn_scale
|
|
||||||
return static_cast<size_t>(mem_size);
|
return static_cast<size_t>(mem_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -479,10 +476,6 @@ struct ResidualAttentionBlock {
|
|||||||
ln2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
ln2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||||
ln2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
ln2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||||
|
|
||||||
attn_scale = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
|
||||||
ggml_allocr_alloc(alloc, attn_scale);
|
|
||||||
float scale = 1.0f / sqrt((float)d_model);
|
|
||||||
ggml_backend_tensor_set(attn_scale, &scale, 0, sizeof(scale));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void map_by_name(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void map_by_name(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
@ -521,7 +514,7 @@ struct ResidualAttentionBlock {
|
|||||||
// self-attention
|
// self-attention
|
||||||
{
|
{
|
||||||
struct ggml_tensor* q = ggml_nn_linear(ctx, x, q_w, q_b);
|
struct ggml_tensor* q = ggml_nn_linear(ctx, x, q_w, q_b);
|
||||||
q = ggml_scale_inplace(ctx, q, attn_scale);
|
q = ggml_scale_inplace(ctx, q, 1.0f / sqrt((float)d_model));
|
||||||
q = ggml_reshape_4d(ctx, q, d_model, n_head, n_token, N); // [N, n_token, n_head, d_model]
|
q = ggml_reshape_4d(ctx, q, d_model, n_head, n_token, N); // [N, n_token, n_head, d_model]
|
||||||
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, n_token, d_model]
|
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, n_token, d_model]
|
||||||
q = ggml_reshape_3d(ctx, q, d_model, n_token, n_head * N); // [N * n_head, n_token, d_model]
|
q = ggml_reshape_3d(ctx, q, d_model, n_token, n_head * N); // [N * n_head, n_token, d_model]
|
||||||
|
15
esrgan.hpp
15
esrgan.hpp
@ -91,7 +91,7 @@ struct ResidualDenseBlock {
|
|||||||
tensors[prefix + "conv5.bias"] = conv5_b;
|
tensors[prefix + "conv5.bias"] = conv5_b;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* forward(ggml_context* ctx, ggml_tensor* out_scale, ggml_tensor* x /* feat */) {
|
ggml_tensor* forward(ggml_context* ctx, float out_scale, ggml_tensor* x /* feat */) {
|
||||||
// x1 = self.lrelu(self.conv1(x))
|
// x1 = self.lrelu(self.conv1(x))
|
||||||
ggml_tensor* x1 = ggml_nn_conv_2d(ctx, x, conv1_w, conv1_b, 1, 1, 1, 1);
|
ggml_tensor* x1 = ggml_nn_conv_2d(ctx, x, conv1_w, conv1_b, 1, 1, 1, 1);
|
||||||
x1 = ggml_leaky_relu(ctx, x1, 0.2f, true);
|
x1 = ggml_leaky_relu(ctx, x1, 0.2f, true);
|
||||||
@ -161,7 +161,7 @@ struct EsrganBlock {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* forward(ggml_context* ctx, ggml_tensor* out_scale, ggml_tensor* x) {
|
ggml_tensor* forward(ggml_context* ctx, float out_scale, ggml_tensor* x) {
|
||||||
ggml_tensor* out = x;
|
ggml_tensor* out = x;
|
||||||
for (int i = 0; i < num_residual_blocks; i++) {
|
for (int i = 0; i < num_residual_blocks; i++) {
|
||||||
// out = self.rdb...(x)
|
// out = self.rdb...(x)
|
||||||
@ -325,7 +325,7 @@ struct ESRGAN : public GGMLModule {
|
|||||||
tensors["conv_last.bias"] = conv_last_b;
|
tensors["conv_last.bias"] = conv_last_b;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* forward(ggml_context* ctx0, ggml_tensor* out_scale, ggml_tensor* x /* feat */) {
|
ggml_tensor* forward(ggml_context* ctx0, float out_scale, ggml_tensor* x /* feat */) {
|
||||||
// feat = self.conv_first(feat)
|
// feat = self.conv_first(feat)
|
||||||
auto h = ggml_nn_conv_2d(ctx0, x, conv_first_w, conv_first_b, 1, 1, 1, 1);
|
auto h = ggml_nn_conv_2d(ctx0, x, conv_first_w, conv_first_b, 1, 1, 1, 1);
|
||||||
|
|
||||||
@ -376,12 +376,7 @@ struct ESRGAN : public GGMLModule {
|
|||||||
struct ggml_cgraph* gf = ggml_new_graph(ctx0);
|
struct ggml_cgraph* gf = ggml_new_graph(ctx0);
|
||||||
|
|
||||||
struct ggml_tensor* x_ = NULL;
|
struct ggml_tensor* x_ = NULL;
|
||||||
struct ggml_tensor* os = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
|
float out_scale = 0.2f;
|
||||||
ggml_allocr_alloc(compute_allocr, os);
|
|
||||||
if (!ggml_allocr_is_measure(compute_allocr)) {
|
|
||||||
float scale = 0.2f;
|
|
||||||
ggml_backend_tensor_set(os, &scale, 0, sizeof(scale));
|
|
||||||
}
|
|
||||||
|
|
||||||
// it's performing a compute, check if backend isn't cpu
|
// it's performing a compute, check if backend isn't cpu
|
||||||
if (!ggml_backend_is_cpu(backend)) {
|
if (!ggml_backend_is_cpu(backend)) {
|
||||||
@ -397,7 +392,7 @@ struct ESRGAN : public GGMLModule {
|
|||||||
x_ = x;
|
x_ = x;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* out = forward(ctx0, os, x);
|
struct ggml_tensor* out = forward(ctx0, out_scale, x);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, out);
|
ggml_build_forward_expand(gf, out);
|
||||||
ggml_free(ctx0);
|
ggml_free(ctx0);
|
||||||
|
2
ggml
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit 9ab842f210f02cdb8ac7a13d02da10cdda683cfc
|
Subproject commit 5e449697f0e9e4c3dff7e66e31bcce37a7517a1b
|
@ -449,7 +449,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
|||||||
struct ggml_tensor* w,
|
struct ggml_tensor* w,
|
||||||
struct ggml_tensor* b,
|
struct ggml_tensor* b,
|
||||||
int num_groups = 32) {
|
int num_groups = 32) {
|
||||||
if (x->n_dims == 4) {
|
if (ggml_n_dims(x) >= 3) {
|
||||||
w = ggml_reshape_4d(ctx, w, 1, 1, w->ne[0], 1);
|
w = ggml_reshape_4d(ctx, w, 1, 1, w->ne[0], 1);
|
||||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||||
}
|
}
|
||||||
|
15
lora.hpp
15
lora.hpp
@ -113,7 +113,7 @@ struct LoraModel : public GGMLModule {
|
|||||||
applied_lora_tensors.insert(scale_name);
|
applied_lora_tensors.insert(scale_name);
|
||||||
|
|
||||||
// calc_cale
|
// calc_cale
|
||||||
int64_t dim = lora_down->ne[lora_down->n_dims - 1];
|
int64_t dim = lora_down->ne[ggml_n_dims(lora_down) - 1];
|
||||||
float scale_value = 1.0f;
|
float scale_value = 1.0f;
|
||||||
if (lora_tensors.find(scale_name) != lora_tensors.end()) {
|
if (lora_tensors.find(scale_name) != lora_tensors.end()) {
|
||||||
scale_value = ggml_backend_tensor_get_f32(lora_tensors[scale_name]);
|
scale_value = ggml_backend_tensor_get_f32(lora_tensors[scale_name]);
|
||||||
@ -123,17 +123,10 @@ struct LoraModel : public GGMLModule {
|
|||||||
}
|
}
|
||||||
scale_value *= multiplier;
|
scale_value *= multiplier;
|
||||||
|
|
||||||
ggml_tensor* lora_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
|
|
||||||
|
|
||||||
ggml_allocr_alloc(compute_allocr, lora_scale);
|
|
||||||
if (!ggml_allocr_is_measure(compute_allocr)) {
|
|
||||||
ggml_backend_tensor_set(lora_scale, &scale_value, 0, ggml_nbytes(lora_scale));
|
|
||||||
}
|
|
||||||
|
|
||||||
// flat lora tensors to multiply it
|
// flat lora tensors to multiply it
|
||||||
int64_t lora_up_rows = lora_up->ne[lora_up->n_dims - 1];
|
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
|
||||||
lora_up = ggml_reshape_2d(ctx0, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
|
lora_up = ggml_reshape_2d(ctx0, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
|
||||||
int64_t lora_down_rows = lora_down->ne[lora_down->n_dims - 1];
|
int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) - 1];
|
||||||
lora_down = ggml_reshape_2d(ctx0, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
|
lora_down = ggml_reshape_2d(ctx0, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
|
||||||
|
|
||||||
// ggml_mul_mat requires tensor b transposed
|
// ggml_mul_mat requires tensor b transposed
|
||||||
@ -142,7 +135,7 @@ struct LoraModel : public GGMLModule {
|
|||||||
updown = ggml_cont(ctx0, ggml_transpose(ctx0, updown));
|
updown = ggml_cont(ctx0, ggml_transpose(ctx0, updown));
|
||||||
updown = ggml_reshape(ctx0, updown, weight);
|
updown = ggml_reshape(ctx0, updown, weight);
|
||||||
GGML_ASSERT(ggml_nelements(updown) == ggml_nelements(weight));
|
GGML_ASSERT(ggml_nelements(updown) == ggml_nelements(weight));
|
||||||
updown = ggml_scale_inplace(ctx0, updown, lora_scale);
|
updown = ggml_scale_inplace(ctx0, updown, scale_value);
|
||||||
ggml_tensor* final_weight;
|
ggml_tensor* final_weight;
|
||||||
// if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
|
// if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
|
||||||
// final_weight = ggml_new_tensor(ctx0, GGML_TYPE_F32, weight->n_dims, weight->ne);
|
// final_weight = ggml_new_tensor(ctx0, GGML_TYPE_F32, weight->n_dims, weight->ne);
|
||||||
|
@ -673,7 +673,7 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
|
|||||||
|
|
||||||
// LOG_DEBUG("%s", name.c_str());
|
// LOG_DEBUG("%s", name.c_str());
|
||||||
|
|
||||||
TensorStorage tensor_storage(prefix + name, dummy->type, dummy->ne, dummy->n_dims, file_index, offset);
|
TensorStorage tensor_storage(prefix + name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
|
||||||
|
|
||||||
GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
|
GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
|
||||||
|
|
||||||
@ -1417,6 +1417,9 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||||||
if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {
|
if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (pair.first.find("alphas_cumprod") != std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (pair.first.find("alphas_cumprod") != std::string::npos) {
|
if (pair.first.find("alphas_cumprod") != std::string::npos) {
|
||||||
continue;
|
continue;
|
||||||
|
17
tae.hpp
17
tae.hpp
@ -278,9 +278,6 @@ struct TinyDecoder {
|
|||||||
ggml_tensor* conv_final_w; // [output_channels, channels, 3, 3]
|
ggml_tensor* conv_final_w; // [output_channels, channels, 3, 3]
|
||||||
ggml_tensor* conv_final_b; // [output_channels]
|
ggml_tensor* conv_final_b; // [output_channels]
|
||||||
|
|
||||||
ggml_tensor* in_scale_1d3; // [1]
|
|
||||||
ggml_tensor* in_scale_3; // [1]
|
|
||||||
|
|
||||||
TinyDecoder() {
|
TinyDecoder() {
|
||||||
for (int i = 0; i < num_blocks; i++) {
|
for (int i = 0; i < num_blocks; i++) {
|
||||||
input_blocks[i].in_channels = channels;
|
input_blocks[i].in_channels = channels;
|
||||||
@ -351,16 +348,6 @@ struct TinyDecoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
final_block.init_params(ctx);
|
final_block.init_params(ctx);
|
||||||
|
|
||||||
// initialize constants scales
|
|
||||||
in_scale_1d3 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
|
||||||
in_scale_3 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
|
||||||
ggml_allocr_alloc(alloc, in_scale_1d3);
|
|
||||||
float scale_1d3 = 1.0f / 3.0f;
|
|
||||||
ggml_backend_tensor_set(in_scale_1d3, &scale_1d3, 0, sizeof(scale_1d3));
|
|
||||||
ggml_allocr_alloc(alloc, in_scale_3);
|
|
||||||
float scale_3 = 3.0f;
|
|
||||||
ggml_backend_tensor_set(in_scale_3, &scale_3, 0, sizeof(scale_3));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void map_by_name(std::map<std::string, ggml_tensor*>& tensors, std::string prefix) {
|
void map_by_name(std::map<std::string, ggml_tensor*>& tensors, std::string prefix) {
|
||||||
@ -391,9 +378,9 @@ struct TinyDecoder {
|
|||||||
|
|
||||||
ggml_tensor* forward(ggml_context* ctx, ggml_tensor* z) {
|
ggml_tensor* forward(ggml_context* ctx, ggml_tensor* z) {
|
||||||
// torch.tanh(x / 3) * 3
|
// torch.tanh(x / 3) * 3
|
||||||
auto h = ggml_scale(ctx, z, in_scale_1d3);
|
auto h = ggml_scale(ctx, z, 1.0f / 3.0f);
|
||||||
h = ggml_tanh_inplace(ctx, h);
|
h = ggml_tanh_inplace(ctx, h);
|
||||||
h = ggml_scale(ctx, h, in_scale_3);
|
h = ggml_scale(ctx, h, 3.0f);
|
||||||
|
|
||||||
// conv(4, 64)
|
// conv(4, 64)
|
||||||
h = ggml_nn_conv_2d(ctx, h, conv_input_w, conv_input_b, 1, 1, 1, 1);
|
h = ggml_nn_conv_2d(ctx, h, conv_input_w, conv_input_b, 1, 1, 1, 1);
|
||||||
|
12
unet.hpp
12
unet.hpp
@ -182,8 +182,6 @@ struct SpatialTransformer {
|
|||||||
|
|
||||||
std::vector<Transformer> transformers;
|
std::vector<Transformer> transformers;
|
||||||
|
|
||||||
struct ggml_tensor* attn_scale;
|
|
||||||
|
|
||||||
// proj_out
|
// proj_out
|
||||||
struct ggml_tensor* proj_out_w; // [in_channels, in_channels, 1, 1]
|
struct ggml_tensor* proj_out_w; // [in_channels, in_channels, 1, 1]
|
||||||
struct ggml_tensor* proj_out_b; // [in_channels,]
|
struct ggml_tensor* proj_out_b; // [in_channels,]
|
||||||
@ -202,7 +200,6 @@ struct SpatialTransformer {
|
|||||||
mem_size += 2 * in_channels * ggml_type_sizef(GGML_TYPE_F32); // norm_w/norm_b
|
mem_size += 2 * in_channels * ggml_type_sizef(GGML_TYPE_F32); // norm_w/norm_b
|
||||||
mem_size += 2 * in_channels * in_channels * 1 * 1 * ggml_type_sizef(GGML_TYPE_F16); // proj_in_w/proj_out_w
|
mem_size += 2 * in_channels * in_channels * 1 * 1 * ggml_type_sizef(GGML_TYPE_F16); // proj_in_w/proj_out_w
|
||||||
mem_size += 2 * in_channels * ggml_type_sizef(GGML_TYPE_F32); // proj_in_b/proj_out_b
|
mem_size += 2 * in_channels * ggml_type_sizef(GGML_TYPE_F32); // proj_in_b/proj_out_b
|
||||||
mem_size += 1 * ggml_type_sizef(GGML_TYPE_F32); // attn_scale
|
|
||||||
|
|
||||||
// transformer
|
// transformer
|
||||||
for (auto& transformer : transformers) {
|
for (auto& transformer : transformers) {
|
||||||
@ -226,11 +223,6 @@ struct SpatialTransformer {
|
|||||||
proj_out_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, in_channels, in_channels);
|
proj_out_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, in_channels, in_channels);
|
||||||
proj_out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
|
proj_out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
|
||||||
|
|
||||||
attn_scale = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
|
||||||
ggml_allocr_alloc(alloc, attn_scale);
|
|
||||||
float scale = 1.0f / sqrt((float)d_head);
|
|
||||||
ggml_backend_tensor_set(attn_scale, &scale, 0, sizeof(scale));
|
|
||||||
|
|
||||||
// transformer
|
// transformer
|
||||||
for (auto& transformer : transformers) {
|
for (auto& transformer : transformers) {
|
||||||
transformer.norm1_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
|
transformer.norm1_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
|
||||||
@ -332,7 +324,7 @@ struct SpatialTransformer {
|
|||||||
x = ggml_reshape_2d(ctx, x, c, h * w * n); // [N * h * w, in_channels]
|
x = ggml_reshape_2d(ctx, x, c, h * w * n); // [N * h * w, in_channels]
|
||||||
struct ggml_tensor* q = ggml_mul_mat(ctx, transformer.attn1_q_w, x); // [N * h * w, in_channels]
|
struct ggml_tensor* q = ggml_mul_mat(ctx, transformer.attn1_q_w, x); // [N * h * w, in_channels]
|
||||||
#if !defined(SD_USE_FLASH_ATTENTION) || defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
|
#if !defined(SD_USE_FLASH_ATTENTION) || defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
|
||||||
q = ggml_scale_inplace(ctx, q, attn_scale);
|
q = ggml_scale_inplace(ctx, q, 1.0f / sqrt((float)d_head));
|
||||||
#endif
|
#endif
|
||||||
q = ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n); // [N, h * w, n_head, d_head]
|
q = ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n); // [N, h * w, n_head, d_head]
|
||||||
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, h * w, d_head]
|
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, h * w, d_head]
|
||||||
@ -380,7 +372,7 @@ struct SpatialTransformer {
|
|||||||
context = ggml_reshape_2d(ctx, context, context->ne[0], context->ne[1] * context->ne[2]); // [N * max_position, hidden_size]
|
context = ggml_reshape_2d(ctx, context, context->ne[0], context->ne[1] * context->ne[2]); // [N * max_position, hidden_size]
|
||||||
struct ggml_tensor* q = ggml_mul_mat(ctx, transformer.attn2_q_w, x); // [N * h * w, in_channels]
|
struct ggml_tensor* q = ggml_mul_mat(ctx, transformer.attn2_q_w, x); // [N * h * w, in_channels]
|
||||||
#if !defined(SD_USE_FLASH_ATTENTION) || defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
|
#if !defined(SD_USE_FLASH_ATTENTION) || defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
|
||||||
q = ggml_scale_inplace(ctx, q, attn_scale);
|
q = ggml_scale_inplace(ctx, q, 1.0f / sqrt((float)d_head));
|
||||||
#endif
|
#endif
|
||||||
q = ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n); // [N, h * w, n_head, d_head]
|
q = ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n); // [N, h * w, n_head, d_head]
|
||||||
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, h * w, d_head]
|
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, h * w, d_head]
|
||||||
|
9
vae.hpp
9
vae.hpp
@ -118,8 +118,6 @@ struct AttnBlock {
|
|||||||
struct ggml_tensor* proj_out_w; // [in_channels, in_channels, 1, 1]
|
struct ggml_tensor* proj_out_w; // [in_channels, in_channels, 1, 1]
|
||||||
struct ggml_tensor* proj_out_b; // [in_channels,]
|
struct ggml_tensor* proj_out_b; // [in_channels,]
|
||||||
|
|
||||||
struct ggml_tensor* attn_scale;
|
|
||||||
|
|
||||||
size_t calculate_mem_size(ggml_type wtype) {
|
size_t calculate_mem_size(ggml_type wtype) {
|
||||||
double mem_size = 0;
|
double mem_size = 0;
|
||||||
mem_size += 6 * in_channels * ggml_type_sizef(GGML_TYPE_F32); // norm_w/norm_b/q_b/k_v/v_b/proj_out_b
|
mem_size += 6 * in_channels * ggml_type_sizef(GGML_TYPE_F32); // norm_w/norm_b/q_b/k_v/v_b/proj_out_b
|
||||||
@ -140,11 +138,6 @@ struct AttnBlock {
|
|||||||
|
|
||||||
proj_out_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, in_channels, in_channels);
|
proj_out_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, in_channels, in_channels);
|
||||||
proj_out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
|
proj_out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
|
||||||
|
|
||||||
attn_scale = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
|
||||||
ggml_allocr_alloc(alloc, attn_scale);
|
|
||||||
float scale = 1.0f / sqrt((float)in_channels);
|
|
||||||
ggml_backend_tensor_set(attn_scale, &scale, 0, sizeof(scale));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void map_by_name(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void map_by_name(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
@ -181,7 +174,7 @@ struct AttnBlock {
|
|||||||
k = ggml_reshape_3d(ctx, k, c, h * w, n); // [N, h * w, in_channels]
|
k = ggml_reshape_3d(ctx, k, c, h * w, n); // [N, h * w, in_channels]
|
||||||
|
|
||||||
auto w_ = ggml_mul_mat(ctx, k, q); // [N, h * w, h * w]
|
auto w_ = ggml_mul_mat(ctx, k, q); // [N, h * w, h * w]
|
||||||
w_ = ggml_scale_inplace(ctx, w_, attn_scale);
|
w_ = ggml_scale_inplace(ctx, w_, 1.0f / sqrt((float)in_channels));
|
||||||
w_ = ggml_soft_max_inplace(ctx, w_);
|
w_ = ggml_soft_max_inplace(ctx, w_);
|
||||||
|
|
||||||
v = ggml_reshape_3d(ctx, v, h * w, c, n); // [N, in_channels, h * w]
|
v = ggml_reshape_3d(ctx, v, h * w, c, n); // [N, in_channels, h * w]
|
||||||
|
Loading…
Reference in New Issue
Block a user