style: format code
This commit is contained in:
parent
36ec16ac99
commit
349439f239
@ -3,7 +3,6 @@ UseTab: Never
|
|||||||
IndentWidth: 4
|
IndentWidth: 4
|
||||||
TabWidth: 4
|
TabWidth: 4
|
||||||
AllowShortIfStatementsOnASingleLine: false
|
AllowShortIfStatementsOnASingleLine: false
|
||||||
IndentCaseLabels: false
|
|
||||||
ColumnLimit: 0
|
ColumnLimit: 0
|
||||||
AccessModifierOffset: -4
|
AccessModifierOffset: -4
|
||||||
NamespaceIndentation: All
|
NamespaceIndentation: All
|
||||||
|
27
clip.hpp
27
clip.hpp
@ -486,7 +486,6 @@ struct ResidualAttentionBlock {
|
|||||||
|
|
||||||
ln2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
ln2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||||
ln2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
ln2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void map_by_name(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void map_by_name(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
@ -661,7 +660,7 @@ struct CLIPTextModel {
|
|||||||
mem_size += ggml_row_size(GGML_TYPE_I32, hidden_size * max_position_embeddings); // position_ids
|
mem_size += ggml_row_size(GGML_TYPE_I32, hidden_size * max_position_embeddings); // position_ids
|
||||||
mem_size += ggml_row_size(wtype, hidden_size * vocab_size); // token_embed_weight
|
mem_size += ggml_row_size(wtype, hidden_size * vocab_size); // token_embed_weight
|
||||||
mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // position_embed_weight
|
mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // position_embed_weight
|
||||||
if(version == OPENAI_CLIP_VIT_L_14) {
|
if (version == OPENAI_CLIP_VIT_L_14) {
|
||||||
mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // token_embed_custom
|
mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // token_embed_custom
|
||||||
}
|
}
|
||||||
for (int i = 0; i < num_hidden_layers; i++) {
|
for (int i = 0; i < num_hidden_layers; i++) {
|
||||||
@ -688,10 +687,10 @@ struct CLIPTextModel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool load_embedding(std::string embd_name, std::string embd_path, std::vector<int32_t> &bpe_tokens) {
|
bool load_embedding(std::string embd_name, std::string embd_path, std::vector<int32_t>& bpe_tokens) {
|
||||||
// the order matters
|
// the order matters
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
if(!model_loader.init_from_file(embd_path)) {
|
if (!model_loader.init_from_file(embd_path)) {
|
||||||
LOG_ERROR("embedding '%s' failed", embd_name.c_str());
|
LOG_ERROR("embedding '%s' failed", embd_name.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -702,7 +701,7 @@ struct CLIPTextModel {
|
|||||||
struct ggml_context* embd_ctx = ggml_init(params);
|
struct ggml_context* embd_ctx = ggml_init(params);
|
||||||
struct ggml_tensor* embd = NULL;
|
struct ggml_tensor* embd = NULL;
|
||||||
auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
|
auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
|
||||||
if(tensor_storage.ne[0] != hidden_size) {
|
if (tensor_storage.ne[0] != hidden_size) {
|
||||||
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], hidden_size);
|
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], hidden_size);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -713,7 +712,7 @@ struct CLIPTextModel {
|
|||||||
model_loader.load_tensors(on_load, NULL);
|
model_loader.load_tensors(on_load, NULL);
|
||||||
ggml_backend_tensor_set(token_embed_custom, embd->data, num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type), ggml_nbytes(embd));
|
ggml_backend_tensor_set(token_embed_custom, embd->data, num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type), ggml_nbytes(embd));
|
||||||
readed_embeddings.push_back(embd_name);
|
readed_embeddings.push_back(embd_name);
|
||||||
for(int i = 0; i < embd->ne[1]; i++) {
|
for (int i = 0; i < embd->ne[1]; i++) {
|
||||||
bpe_tokens.push_back(vocab_size + num_custom_embeddings);
|
bpe_tokens.push_back(vocab_size + num_custom_embeddings);
|
||||||
// LOG_DEBUG("new custom token: %i", vocab_size + num_custom_embeddings);
|
// LOG_DEBUG("new custom token: %i", vocab_size + num_custom_embeddings);
|
||||||
num_custom_embeddings++;
|
num_custom_embeddings++;
|
||||||
@ -775,7 +774,7 @@ struct CLIPTextModel {
|
|||||||
|
|
||||||
final_ln_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
final_ln_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||||
|
|
||||||
if(version == OPENAI_CLIP_VIT_L_14) {
|
if (version == OPENAI_CLIP_VIT_L_14) {
|
||||||
token_embed_custom = ggml_new_tensor_2d(ctx, wtype, hidden_size, max_position_embeddings);
|
token_embed_custom = ggml_new_tensor_2d(ctx, wtype, hidden_size, max_position_embeddings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -913,20 +912,20 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
|
|||||||
LOG_DEBUG("parse '%s' to %s", text.c_str(), ss.str().c_str());
|
LOG_DEBUG("parse '%s' to %s", text.c_str(), ss.str().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto on_new_token_cb = [&] (std::string& str, std::vector<int32_t> &bpe_tokens) -> bool {
|
auto on_new_token_cb = [&](std::string& str, std::vector<int32_t>& bpe_tokens) -> bool {
|
||||||
size_t word_end = str.find(",");
|
size_t word_end = str.find(",");
|
||||||
std::string embd_name = word_end == std::string::npos ? str : str.substr(0, word_end);
|
std::string embd_name = word_end == std::string::npos ? str : str.substr(0, word_end);
|
||||||
embd_name = trim(embd_name);
|
embd_name = trim(embd_name);
|
||||||
std::string embd_path = get_full_path(text_model.embd_dir, embd_name + ".pt");
|
std::string embd_path = get_full_path(text_model.embd_dir, embd_name + ".pt");
|
||||||
if(embd_path.size() == 0) {
|
if (embd_path.size() == 0) {
|
||||||
embd_path = get_full_path(text_model.embd_dir, embd_name + ".ckpt");
|
embd_path = get_full_path(text_model.embd_dir, embd_name + ".ckpt");
|
||||||
}
|
}
|
||||||
if(embd_path.size() == 0) {
|
if (embd_path.size() == 0) {
|
||||||
embd_path = get_full_path(text_model.embd_dir, embd_name + ".safetensors");
|
embd_path = get_full_path(text_model.embd_dir, embd_name + ".safetensors");
|
||||||
}
|
}
|
||||||
if(embd_path.size() > 0) {
|
if (embd_path.size() > 0) {
|
||||||
if(text_model.load_embedding(embd_name, embd_path, bpe_tokens)) {
|
if (text_model.load_embedding(embd_name, embd_path, bpe_tokens)) {
|
||||||
if(word_end != std::string::npos) {
|
if (word_end != std::string::npos) {
|
||||||
str = str.substr(word_end);
|
str = str.substr(word_end);
|
||||||
} else {
|
} else {
|
||||||
str = "";
|
str = "";
|
||||||
@ -1033,7 +1032,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
|
|||||||
|
|
||||||
struct ggml_tensor* embeddings = NULL;
|
struct ggml_tensor* embeddings = NULL;
|
||||||
|
|
||||||
if(text_model.num_custom_embeddings > 0 && version != VERSION_XL) {
|
if (text_model.num_custom_embeddings > 0 && version != VERSION_XL) {
|
||||||
embeddings = ggml_new_tensor_2d(ctx0, wtype, text_model.hidden_size, text_model.vocab_size + text_model.num_custom_embeddings /* custom placeholder */);
|
embeddings = ggml_new_tensor_2d(ctx0, wtype, text_model.hidden_size, text_model.vocab_size + text_model.num_custom_embeddings /* custom placeholder */);
|
||||||
ggml_allocr_alloc(allocr, embeddings);
|
ggml_allocr_alloc(allocr, embeddings);
|
||||||
if (!ggml_allocr_is_measure(allocr)) {
|
if (!ggml_allocr_is_measure(allocr)) {
|
||||||
|
@ -281,7 +281,7 @@ struct SpatialTransformer {
|
|||||||
mem_size += 6 * ggml_row_size(GGML_TYPE_F32, in_channels); // norm1-3_w/b
|
mem_size += 6 * ggml_row_size(GGML_TYPE_F32, in_channels); // norm1-3_w/b
|
||||||
mem_size += 6 * ggml_row_size(wtype, in_channels * in_channels); // attn1_q/k/v/out_w attn2_q/out_w
|
mem_size += 6 * ggml_row_size(wtype, in_channels * in_channels); // attn1_q/k/v/out_w attn2_q/out_w
|
||||||
mem_size += 2 * ggml_row_size(wtype, in_channels * context_dim); // attn2_k/v_w
|
mem_size += 2 * ggml_row_size(wtype, in_channels * context_dim); // attn2_k/v_w
|
||||||
mem_size += ggml_row_size(wtype, in_channels * 4 * 2 * in_channels ); // ff_0_proj_w
|
mem_size += ggml_row_size(wtype, in_channels * 4 * 2 * in_channels); // ff_0_proj_w
|
||||||
mem_size += ggml_row_size(GGML_TYPE_F32, in_channels * 4 * 2); // ff_0_proj_b
|
mem_size += ggml_row_size(GGML_TYPE_F32, in_channels * 4 * 2); // ff_0_proj_b
|
||||||
mem_size += ggml_row_size(wtype, in_channels * 4 * in_channels); // ff_2_w
|
mem_size += ggml_row_size(wtype, in_channels * 4 * in_channels); // ff_2_w
|
||||||
mem_size += ggml_row_size(GGML_TYPE_F32, in_channels); // ff_2_b
|
mem_size += ggml_row_size(GGML_TYPE_F32, in_channels); // ff_2_b
|
||||||
|
30
control.hpp
30
control.hpp
@ -1,8 +1,8 @@
|
|||||||
#ifndef __CONTROL_HPP__
|
#ifndef __CONTROL_HPP__
|
||||||
#define __CONTROL_HPP__
|
#define __CONTROL_HPP__
|
||||||
|
|
||||||
#include "ggml_extend.hpp"
|
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
|
#include "ggml_extend.hpp"
|
||||||
#include "model.h"
|
#include "model.h"
|
||||||
|
|
||||||
#define CONTROL_NET_GRAPH_SIZE 1536
|
#define CONTROL_NET_GRAPH_SIZE 1536
|
||||||
@ -16,7 +16,7 @@
|
|||||||
struct CNHintBlock {
|
struct CNHintBlock {
|
||||||
int hint_channels = 3;
|
int hint_channels = 3;
|
||||||
int model_channels = 320; // SD 1.5
|
int model_channels = 320; // SD 1.5
|
||||||
int feat_channels[4] = { 16, 32, 96, 256 };
|
int feat_channels[4] = {16, 32, 96, 256};
|
||||||
int num_blocks = 3;
|
int num_blocks = 3;
|
||||||
ggml_tensor* conv_first_w; // [feat_channels[0], hint_channels, 3, 3]
|
ggml_tensor* conv_first_w; // [feat_channels[0], hint_channels, 3, 3]
|
||||||
ggml_tensor* conv_first_b; // [feat_channels[0]]
|
ggml_tensor* conv_first_b; // [feat_channels[0]]
|
||||||
@ -67,11 +67,11 @@ struct CNHintBlock {
|
|||||||
tensors[prefix + "input_hint_block.0.bias"] = conv_first_b;
|
tensors[prefix + "input_hint_block.0.bias"] = conv_first_b;
|
||||||
int index = 2;
|
int index = 2;
|
||||||
for (int i = 0; i < num_blocks; i++) {
|
for (int i = 0; i < num_blocks; i++) {
|
||||||
tensors[prefix + "input_hint_block." + std::to_string(index) +".weight"] = blocks[i].conv_0_w;
|
tensors[prefix + "input_hint_block." + std::to_string(index) + ".weight"] = blocks[i].conv_0_w;
|
||||||
tensors[prefix + "input_hint_block." + std::to_string(index) +".bias"] = blocks[i].conv_0_b;
|
tensors[prefix + "input_hint_block." + std::to_string(index) + ".bias"] = blocks[i].conv_0_b;
|
||||||
index += 2;
|
index += 2;
|
||||||
tensors[prefix + "input_hint_block." + std::to_string(index) +".weight"] = blocks[i].conv_1_w;
|
tensors[prefix + "input_hint_block." + std::to_string(index) + ".weight"] = blocks[i].conv_1_w;
|
||||||
tensors[prefix + "input_hint_block." + std::to_string(index) +".bias"] = blocks[i].conv_1_b;
|
tensors[prefix + "input_hint_block." + std::to_string(index) + ".bias"] = blocks[i].conv_1_b;
|
||||||
index += 2;
|
index += 2;
|
||||||
}
|
}
|
||||||
tensors[prefix + "input_hint_block.14.weight"] = conv_final_w;
|
tensors[prefix + "input_hint_block.14.weight"] = conv_final_w;
|
||||||
@ -83,7 +83,7 @@ struct CNHintBlock {
|
|||||||
h = ggml_silu_inplace(ctx, h);
|
h = ggml_silu_inplace(ctx, h);
|
||||||
|
|
||||||
auto body_h = h;
|
auto body_h = h;
|
||||||
for(int i = 0; i < num_blocks; i++) {
|
for (int i = 0; i < num_blocks; i++) {
|
||||||
// operations.conv_nd(dims, 16, 16, 3, padding=1)
|
// operations.conv_nd(dims, 16, 16, 3, padding=1)
|
||||||
body_h = ggml_nn_conv_2d(ctx, body_h, blocks[i].conv_0_w, blocks[i].conv_0_b, 1, 1, 1, 1);
|
body_h = ggml_nn_conv_2d(ctx, body_h, blocks[i].conv_0_w, blocks[i].conv_0_b, 1, 1, 1, 1);
|
||||||
body_h = ggml_silu_inplace(ctx, body_h);
|
body_h = ggml_silu_inplace(ctx, body_h);
|
||||||
@ -104,7 +104,7 @@ struct CNZeroConv {
|
|||||||
ggml_tensor* conv_b; // [channels]
|
ggml_tensor* conv_b; // [channels]
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx) {
|
void init_params(struct ggml_context* ctx) {
|
||||||
conv_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, channels,channels);
|
conv_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, channels, channels);
|
||||||
conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, channels);
|
conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, channels);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -229,7 +229,7 @@ struct ControlNet : public GGMLModule {
|
|||||||
mem_size += ggml_row_size(wtype, time_embed_dim * model_channels); // time_embed_0_w
|
mem_size += ggml_row_size(wtype, time_embed_dim * model_channels); // time_embed_0_w
|
||||||
mem_size += ggml_row_size(GGML_TYPE_F32, time_embed_dim); // time_embed_0_b
|
mem_size += ggml_row_size(GGML_TYPE_F32, time_embed_dim); // time_embed_0_b
|
||||||
mem_size += ggml_row_size(wtype, time_embed_dim * time_embed_dim); // time_embed_2_w
|
mem_size += ggml_row_size(wtype, time_embed_dim * time_embed_dim); // time_embed_2_w
|
||||||
mem_size += ggml_row_size(GGML_TYPE_F32,time_embed_dim); // time_embed_2_b
|
mem_size += ggml_row_size(GGML_TYPE_F32, time_embed_dim); // time_embed_2_b
|
||||||
|
|
||||||
mem_size += ggml_row_size(GGML_TYPE_F16, model_channels * in_channels * 3 * 3); // input_block_0_w
|
mem_size += ggml_row_size(GGML_TYPE_F16, model_channels * in_channels * 3 * 3); // input_block_0_w
|
||||||
mem_size += ggml_row_size(GGML_TYPE_F32, model_channels); // input_block_0_b
|
mem_size += ggml_row_size(GGML_TYPE_F32, model_channels); // input_block_0_b
|
||||||
@ -449,8 +449,8 @@ struct ControlNet : public GGMLModule {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < num_zero_convs; i++) {
|
for (int i = 0; i < num_zero_convs; i++) {
|
||||||
tensors[prefix + "zero_convs."+ std::to_string(i) + ".0.weight"] = zero_convs[i].conv_w;
|
tensors[prefix + "zero_convs." + std::to_string(i) + ".0.weight"] = zero_convs[i].conv_w;
|
||||||
tensors[prefix + "zero_convs."+ std::to_string(i) + ".0.bias"] = zero_convs[i].conv_b;
|
tensors[prefix + "zero_convs." + std::to_string(i) + ".0.bias"] = zero_convs[i].conv_b;
|
||||||
}
|
}
|
||||||
|
|
||||||
// middle_blocks
|
// middle_blocks
|
||||||
@ -655,11 +655,13 @@ struct ControlNet : public GGMLModule {
|
|||||||
control_ctx = ggml_init(params);
|
control_ctx = ggml_init(params);
|
||||||
size_t control_buffer_size = 0;
|
size_t control_buffer_size = 0;
|
||||||
int w = x->ne[0], h = x->ne[1], steps = 0;
|
int w = x->ne[0], h = x->ne[1], steps = 0;
|
||||||
for(int i = 0; i < (num_zero_convs + 1); i++) {
|
for (int i = 0; i < (num_zero_convs + 1); i++) {
|
||||||
bool last = i == num_zero_convs;
|
bool last = i == num_zero_convs;
|
||||||
int c = last ? middle_out_channel : zero_convs[i].channels;
|
int c = last ? middle_out_channel : zero_convs[i].channels;
|
||||||
if(!last && steps == 3) {
|
if (!last && steps == 3) {
|
||||||
w /= 2; h /= 2; steps = 0;
|
w /= 2;
|
||||||
|
h /= 2;
|
||||||
|
steps = 0;
|
||||||
}
|
}
|
||||||
controls.push_back(ggml_new_tensor_4d(control_ctx, GGML_TYPE_F32, w, h, c, 1));
|
controls.push_back(ggml_new_tensor_4d(control_ctx, GGML_TYPE_F32, w, h, c, 1));
|
||||||
control_buffer_size += ggml_nbytes(controls[i]);
|
control_buffer_size += ggml_nbytes(controls[i]);
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "stable-diffusion.h"
|
|
||||||
#include "preprocessing.hpp"
|
#include "preprocessing.hpp"
|
||||||
|
#include "stable-diffusion.h"
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
#define STB_IMAGE_IMPLEMENTATION
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
@ -590,10 +590,10 @@ int main(int argc, const char* argv[]) {
|
|||||||
sd_image_t* results;
|
sd_image_t* results;
|
||||||
if (params.mode == TXT2IMG) {
|
if (params.mode == TXT2IMG) {
|
||||||
sd_image_t* control_image = NULL;
|
sd_image_t* control_image = NULL;
|
||||||
if(params.controlnet_path.size() > 0 && params.control_image_path.size() > 0) {
|
if (params.controlnet_path.size() > 0 && params.control_image_path.size() > 0) {
|
||||||
int c = 0;
|
int c = 0;
|
||||||
input_image_buffer = stbi_load(params.control_image_path.c_str(), ¶ms.width, ¶ms.height, &c, 3);
|
input_image_buffer = stbi_load(params.control_image_path.c_str(), ¶ms.width, ¶ms.height, &c, 3);
|
||||||
if(input_image_buffer == NULL) {
|
if (input_image_buffer == NULL) {
|
||||||
fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str());
|
fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -601,7 +601,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
(uint32_t)params.height,
|
(uint32_t)params.height,
|
||||||
3,
|
3,
|
||||||
input_image_buffer};
|
input_image_buffer};
|
||||||
if(params.canny_preprocess) { // apply preprocessor
|
if (params.canny_preprocess) { // apply preprocessor
|
||||||
LOG_INFO("Applying canny preprocessor");
|
LOG_INFO("Applying canny preprocessor");
|
||||||
control_image->data = preprocess_canny(control_image->data, control_image->width, control_image->height);
|
control_image->data = preprocess_canny(control_image->data, control_image->width, control_image->height);
|
||||||
}
|
}
|
||||||
|
@ -462,7 +462,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
|||||||
|
|
||||||
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
||||||
#ifdef SD_USE_CUBLAS
|
#ifdef SD_USE_CUBLAS
|
||||||
if(!ggml_backend_is_cpu(backend)) {
|
if (!ggml_backend_is_cpu(backend)) {
|
||||||
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
||||||
ggml_backend_synchronize(backend);
|
ggml_backend_synchronize(backend);
|
||||||
} else {
|
} else {
|
||||||
|
@ -11,7 +11,7 @@ void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml
|
|||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
struct ggml_context* ctx0 = ggml_init(params);
|
struct ggml_context* ctx0 = ggml_init(params);
|
||||||
struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
|
struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
|
||||||
ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*) kernel_fp16->data, ggml_nelements(kernel));
|
ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel));
|
||||||
ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1);
|
ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1);
|
||||||
ggml_cgraph* gf = ggml_new_graph(ctx0);
|
ggml_cgraph* gf = ggml_new_graph(ctx0);
|
||||||
ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output));
|
ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output));
|
||||||
@ -23,11 +23,11 @@ void gaussian_kernel(struct ggml_tensor* kernel) {
|
|||||||
int ks_mid = kernel->ne[0] / 2;
|
int ks_mid = kernel->ne[0] / 2;
|
||||||
float sigma = 1.4f;
|
float sigma = 1.4f;
|
||||||
float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f));
|
float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f));
|
||||||
for(int y = 0; y < kernel->ne[0]; y++) {
|
for (int y = 0; y < kernel->ne[0]; y++) {
|
||||||
float gx = -ks_mid + y;
|
float gx = -ks_mid + y;
|
||||||
for(int x = 0; x < kernel->ne[1]; x++) {
|
for (int x = 0; x < kernel->ne[1]; x++) {
|
||||||
float gy = -ks_mid + x;
|
float gy = -ks_mid + x;
|
||||||
float k_ = expf(-((gx*gx + gy*gy) / (2.0f * powf(sigma, 2.0f)))) * normal;
|
float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal;
|
||||||
ggml_tensor_set_f32(kernel, k_, x, y);
|
ggml_tensor_set_f32(kernel, k_, x, y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor
|
|||||||
float* dx = (float*)x->data;
|
float* dx = (float*)x->data;
|
||||||
float* dy = (float*)y->data;
|
float* dy = (float*)y->data;
|
||||||
float* dh = (float*)h->data;
|
float* dh = (float*)h->data;
|
||||||
for (int i = 0; i <n_elements; i++) {
|
for (int i = 0; i < n_elements; i++) {
|
||||||
dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]);
|
dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -69,11 +69,11 @@ void normalize_tensor(struct ggml_tensor* g) {
|
|||||||
int n_elements = ggml_nelements(g);
|
int n_elements = ggml_nelements(g);
|
||||||
float* dg = (float*)g->data;
|
float* dg = (float*)g->data;
|
||||||
float max = -INFINITY;
|
float max = -INFINITY;
|
||||||
for (int i = 0; i <n_elements; i++) {
|
for (int i = 0; i < n_elements; i++) {
|
||||||
max = dg[i] > max ? dg[i] : max;
|
max = dg[i] > max ? dg[i] : max;
|
||||||
}
|
}
|
||||||
max = 1.0f / max;
|
max = 1.0f / max;
|
||||||
for (int i = 0; i <n_elements; i++) {
|
for (int i = 0; i < n_elements; i++) {
|
||||||
dg[i] *= max;
|
dg[i] *= max;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -87,7 +87,7 @@ void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struc
|
|||||||
float r = 1.0f;
|
float r = 1.0f;
|
||||||
|
|
||||||
// angle 0
|
// angle 0
|
||||||
if((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)){
|
if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) {
|
||||||
q = ggml_tensor_get_f32(G, ix, iy + 1);
|
q = ggml_tensor_get_f32(G, ix, iy + 1);
|
||||||
r = ggml_tensor_get_f32(G, ix, iy - 1);
|
r = ggml_tensor_get_f32(G, ix, iy - 1);
|
||||||
}
|
}
|
||||||
@ -128,16 +128,16 @@ void threshold_hystersis(struct ggml_tensor* img, float highThreshold, float low
|
|||||||
float lt = ht * lowThreshold;
|
float lt = ht * lowThreshold;
|
||||||
for (int i = 0; i < n_elements; i++) {
|
for (int i = 0; i < n_elements; i++) {
|
||||||
float img_v = imd[i];
|
float img_v = imd[i];
|
||||||
if(img_v >= ht) { // strong pixel
|
if (img_v >= ht) { // strong pixel
|
||||||
imd[i] = strong;
|
imd[i] = strong;
|
||||||
} else if(img_v <= ht && img_v >= lt) { // strong pixel
|
} else if (img_v <= ht && img_v >= lt) { // strong pixel
|
||||||
imd[i] = weak;
|
imd[i] = weak;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int iy = 0; iy < img->ne[1]; iy++) {
|
for (int iy = 0; iy < img->ne[1]; iy++) {
|
||||||
for (int ix = 0; ix < img->ne[0]; ix++) {
|
for (int ix = 0; ix < img->ne[0]; ix++) {
|
||||||
if(ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) {
|
if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) {
|
||||||
ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy);
|
ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy);
|
||||||
} else {
|
} else {
|
||||||
ggml_tensor_set_f32(img, 0.0f, ix, iy);
|
ggml_tensor_set_f32(img, 0.0f, ix, iy);
|
||||||
@ -149,8 +149,8 @@ void threshold_hystersis(struct ggml_tensor* img, float highThreshold, float low
|
|||||||
for (int iy = 1; iy < img->ne[1] - 1; iy++) {
|
for (int iy = 1; iy < img->ne[1] - 1; iy++) {
|
||||||
for (int ix = 1; ix < img->ne[0] - 1; ix++) {
|
for (int ix = 1; ix < img->ne[0] - 1; ix++) {
|
||||||
float imd_v = ggml_tensor_get_f32(img, ix, iy);
|
float imd_v = ggml_tensor_get_f32(img, ix, iy);
|
||||||
if(imd_v == weak) {
|
if (imd_v == weak) {
|
||||||
if(ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong ||
|
if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong ||
|
||||||
ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong ||
|
ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong ||
|
||||||
ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) {
|
ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) {
|
||||||
ggml_tensor_set_f32(img, strong, ix, iy);
|
ggml_tensor_set_f32(img, strong, ix, iy);
|
||||||
@ -177,14 +177,12 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float highThresho
|
|||||||
float kX[9] = {
|
float kX[9] = {
|
||||||
-1, 0, 1,
|
-1, 0, 1,
|
||||||
-2, 0, 2,
|
-2, 0, 2,
|
||||||
-1, 0, 1
|
-1, 0, 1};
|
||||||
};
|
|
||||||
|
|
||||||
float kY[9] = {
|
float kY[9] = {
|
||||||
1, 2, 1,
|
1, 2, 1,
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
-1, -2, -1
|
-1, -2, -1};
|
||||||
};
|
|
||||||
|
|
||||||
// generate kernel
|
// generate kernel
|
||||||
int kernel_size = 5;
|
int kernel_size = 5;
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
#include "clip.hpp"
|
#include "clip.hpp"
|
||||||
#include "denoiser.hpp"
|
|
||||||
#include "control.hpp"
|
#include "control.hpp"
|
||||||
|
#include "denoiser.hpp"
|
||||||
#include "esrgan.hpp"
|
#include "esrgan.hpp"
|
||||||
#include "lora.hpp"
|
#include "lora.hpp"
|
||||||
#include "tae.hpp"
|
#include "tae.hpp"
|
||||||
@ -320,15 +320,15 @@ public:
|
|||||||
LOG_DEBUG("finished loaded file");
|
LOG_DEBUG("finished loaded file");
|
||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
|
|
||||||
if(control_net_path.size() > 0) {
|
if (control_net_path.size() > 0) {
|
||||||
ggml_backend_t cn_backend = NULL;
|
ggml_backend_t cn_backend = NULL;
|
||||||
if(control_net_cpu && !ggml_backend_is_cpu(backend)) {
|
if (control_net_cpu && !ggml_backend_is_cpu(backend)) {
|
||||||
LOG_DEBUG("ControlNet: Using CPU backend");
|
LOG_DEBUG("ControlNet: Using CPU backend");
|
||||||
cn_backend = ggml_backend_cpu_init();
|
cn_backend = ggml_backend_cpu_init();
|
||||||
} else {
|
} else {
|
||||||
cn_backend = backend;
|
cn_backend = backend;
|
||||||
}
|
}
|
||||||
if(!control_net.load_from_file(control_net_path, cn_backend, GGML_TYPE_F16 /* just f16 controlnet models */)) {
|
if (!control_net.load_from_file(control_net_path, cn_backend, GGML_TYPE_F16 /* just f16 controlnet models */)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -550,7 +550,7 @@ public:
|
|||||||
struct ggml_tensor* timesteps = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, 1); // [N, ]
|
struct ggml_tensor* timesteps = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, 1); // [N, ]
|
||||||
struct ggml_tensor* t_emb = new_timestep_embedding(work_ctx, NULL, timesteps, diffusion_model.model_channels); // [N, model_channels]
|
struct ggml_tensor* t_emb = new_timestep_embedding(work_ctx, NULL, timesteps, diffusion_model.model_channels); // [N, model_channels]
|
||||||
struct ggml_tensor* guided_hint = NULL;
|
struct ggml_tensor* guided_hint = NULL;
|
||||||
if(control_hint != NULL) {
|
if (control_hint != NULL) {
|
||||||
guided_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, noised_input->ne[0], noised_input->ne[1], diffusion_model.model_channels, 1);
|
guided_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, noised_input->ne[0], noised_input->ne[1], diffusion_model.model_channels, 1);
|
||||||
control_net.process_hint(guided_hint, n_threads, control_hint);
|
control_net.process_hint(guided_hint, n_threads, control_hint);
|
||||||
control_net.alloc_compute_buffer(noised_input, guided_hint, c, t_emb);
|
control_net.alloc_compute_buffer(noised_input, guided_hint, c, t_emb);
|
||||||
@ -606,7 +606,7 @@ public:
|
|||||||
ggml_tensor_scale(noised_input, c_in);
|
ggml_tensor_scale(noised_input, c_in);
|
||||||
|
|
||||||
// cond
|
// cond
|
||||||
if(control_hint != NULL) {
|
if (control_hint != NULL) {
|
||||||
control_net.compute(n_threads, noised_input, guided_hint, c, t_emb);
|
control_net.compute(n_threads, noised_input, guided_hint, c, t_emb);
|
||||||
}
|
}
|
||||||
diffusion_model.compute(out_cond, n_threads, noised_input, NULL, c, control_net.controls, control_strength, t_emb, c_vector);
|
diffusion_model.compute(out_cond, n_threads, noised_input, NULL, c, control_net.controls, control_strength, t_emb, c_vector);
|
||||||
@ -614,7 +614,7 @@ public:
|
|||||||
float* negative_data = NULL;
|
float* negative_data = NULL;
|
||||||
if (has_unconditioned) {
|
if (has_unconditioned) {
|
||||||
// uncond
|
// uncond
|
||||||
if(control_hint != NULL) {
|
if (control_hint != NULL) {
|
||||||
control_net.compute(n_threads, noised_input, guided_hint, uc, t_emb);
|
control_net.compute(n_threads, noised_input, guided_hint, uc, t_emb);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1276,7 +1276,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* image_hint = NULL;
|
struct ggml_tensor* image_hint = NULL;
|
||||||
if(control_cond != NULL) {
|
if (control_cond != NULL) {
|
||||||
image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||||
sd_image_to_tensor(control_cond->data, image_hint);
|
sd_image_to_tensor(control_cond->data, image_hint);
|
||||||
}
|
}
|
||||||
|
10
unet.hpp
10
unet.hpp
@ -495,7 +495,7 @@ struct UNetModel : public GGMLModule {
|
|||||||
h = middle_block_1.forward(ctx0, h, context); // [N, 4*model_channels, h/8, w/8]
|
h = middle_block_1.forward(ctx0, h, context); // [N, 4*model_channels, h/8, w/8]
|
||||||
h = middle_block_2.forward(ctx0, h, emb); // [N, 4*model_channels, h/8, w/8]
|
h = middle_block_2.forward(ctx0, h, emb); // [N, 4*model_channels, h/8, w/8]
|
||||||
|
|
||||||
if(control.size() > 0) {
|
if (control.size() > 0) {
|
||||||
auto cs = ggml_scale_inplace(ctx0, control[control.size() - 1], control_net_strength);
|
auto cs = ggml_scale_inplace(ctx0, control[control.size() - 1], control_net_strength);
|
||||||
h = ggml_add(ctx0, h, cs); // middle control
|
h = ggml_add(ctx0, h, cs); // middle control
|
||||||
}
|
}
|
||||||
@ -507,7 +507,7 @@ struct UNetModel : public GGMLModule {
|
|||||||
auto h_skip = hs.back();
|
auto h_skip = hs.back();
|
||||||
hs.pop_back();
|
hs.pop_back();
|
||||||
|
|
||||||
if(control.size() > 0) {
|
if (control.size() > 0) {
|
||||||
auto cs = ggml_scale_inplace(ctx0, control[control_offset], control_net_strength);
|
auto cs = ggml_scale_inplace(ctx0, control[control_offset], control_net_strength);
|
||||||
h_skip = ggml_add(ctx0, h_skip, cs); // control net condition
|
h_skip = ggml_add(ctx0, h_skip, cs); // control net condition
|
||||||
control_offset--;
|
control_offset--;
|
||||||
@ -611,12 +611,12 @@ struct UNetModel : public GGMLModule {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// offload all controls tensors to gpu
|
// offload all controls tensors to gpu
|
||||||
if(control.size() > 0 && !ggml_backend_is_cpu(backend) && control[0]->backend != GGML_BACKEND_GPU) {
|
if (control.size() > 0 && !ggml_backend_is_cpu(backend) && control[0]->backend != GGML_BACKEND_GPU) {
|
||||||
for(int i = 0; i < control.size(); i++) {
|
for (int i = 0; i < control.size(); i++) {
|
||||||
ggml_tensor* cntl_t = ggml_dup_tensor(ctx0, control[i]);
|
ggml_tensor* cntl_t = ggml_dup_tensor(ctx0, control[i]);
|
||||||
control_t.push_back(cntl_t);
|
control_t.push_back(cntl_t);
|
||||||
ggml_allocr_alloc(compute_allocr, cntl_t);
|
ggml_allocr_alloc(compute_allocr, cntl_t);
|
||||||
if(!ggml_allocr_is_measure(compute_allocr)) {
|
if (!ggml_allocr_is_measure(compute_allocr)) {
|
||||||
ggml_backend_tensor_copy(control[i], control_t[i]);
|
ggml_backend_tensor_copy(control[i], control_t[i]);
|
||||||
ggml_backend_synchronize(backend);
|
ggml_backend_synchronize(backend);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user