style: format code

This commit is contained in:
leejet 2024-08-25 00:19:37 +08:00
parent d08d7fa632
commit c837c5d9cc
10 changed files with 937 additions and 947 deletions

View File

@ -10,8 +10,8 @@ struct SDCondition {
struct ggml_tensor* c_concat = NULL; struct ggml_tensor* c_concat = NULL;
SDCondition() = default; SDCondition() = default;
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) : SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {} : c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
}; };
struct Conditioner { struct Conditioner {
@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
} }
}; };
struct FluxCLIPEmbedder : public Conditioner { struct FluxCLIPEmbedder : public Conditioner {
ggml_type wtype; ggml_type wtype;
CLIPTokenizer clip_l_tokenizer; CLIPTokenizer clip_l_tokenizer;

View File

@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
} }
}; };
float flux_time_shift(float mu, float sigma, float t) { float flux_time_shift(float mu, float sigma, float t) {
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma)); return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
} }
@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
void set_parameters(float shift = 1.15f) { void set_parameters(float shift = 1.15f) {
this->shift = shift; this->shift = shift;
for (int i = 1; i < TIMESTEPS + 1; i++) { for (int i = 1; i < TIMESTEPS + 1; i++) {
sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS); sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
} }
} }

View File

@ -1,9 +1,9 @@
#ifndef __DIFFUSION_MODEL_H__ #ifndef __DIFFUSION_MODEL_H__
#define __DIFFUSION_MODEL_H__ #define __DIFFUSION_MODEL_H__
#include "flux.hpp"
#include "mmdit.hpp" #include "mmdit.hpp"
#include "unet.hpp" #include "unet.hpp"
#include "flux.hpp"
struct DiffusionModel { struct DiffusionModel {
virtual void compute(int n_threads, virtual void compute(int n_threads,
@ -124,7 +124,6 @@ struct MMDiTModel : public DiffusionModel {
} }
}; };
struct FluxModel : public DiffusionModel { struct FluxModel : public DiffusionModel {
Flux::FluxRunner flux; Flux::FluxRunner flux;

130
flux.hpp
View File

@ -10,8 +10,8 @@
namespace Flux { namespace Flux {
struct MLPEmbedder : public UnaryBlock { struct MLPEmbedder : public UnaryBlock {
public: public:
MLPEmbedder(int64_t in_dim, int64_t hidden_dim) { MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true)); blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true));
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true)); blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
@ -28,10 +28,10 @@ public:
x = out_layer->forward(ctx, x); x = out_layer->forward(ctx, x);
return x; return x;
} }
}; };
class RMSNorm : public UnaryBlock { class RMSNorm : public UnaryBlock {
protected: protected:
int64_t hidden_size; int64_t hidden_size;
float eps; float eps;
@ -39,7 +39,7 @@ protected:
params["scale"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size); params["scale"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
} }
public: public:
RMSNorm(int64_t hidden_size, RMSNorm(int64_t hidden_size,
float eps = 1e-06f) float eps = 1e-06f)
: hidden_size(hidden_size), : hidden_size(hidden_size),
@ -51,11 +51,10 @@ public:
x = ggml_mul(ctx, x, w); x = ggml_mul(ctx, x, w);
return x; return x;
} }
}; };
struct QKNorm : public GGMLBlock {
struct QKNorm : public GGMLBlock { public:
public:
QKNorm(int64_t dim) { QKNorm(int64_t dim) {
blocks["query_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim)); blocks["query_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
blocks["key_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim)); blocks["key_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
@ -78,9 +77,9 @@ public:
x = norm->forward(ctx, x); x = norm->forward(ctx, x);
return x; return x;
} }
}; };
__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx, __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
struct ggml_tensor* x, struct ggml_tensor* x,
struct ggml_tensor* pe) { struct ggml_tensor* pe) {
// x: [N, L, n_head, d_head] // x: [N, L, n_head, d_head]
@ -90,7 +89,7 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
int64_t L = x->ne[2]; int64_t L = x->ne[2];
int64_t N = x->ne[3]; int64_t N = x->ne[3];
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, n_head, L, d_head] x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, n_head, L, d_head]
x = ggml_reshape_4d(ctx, x, 2, d_head/2, L, n_head * N); // [N * n_head, L, d_head/2, 2] x = ggml_reshape_4d(ctx, x, 2, d_head / 2, L, n_head * N); // [N * n_head, L, d_head/2, 2]
x = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2)); // [2, N * n_head, L, d_head/2] x = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2)); // [2, N * n_head, L, d_head/2]
int64_t offset = x->nb[2] * x->ne[2]; int64_t offset = x->nb[2] * x->ne[2];
@ -108,11 +107,11 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1); // [L, d_head/2, 2] auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1); // [L, d_head/2, 2]
auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1)); // [N * n_head, L, d_head/2, 2] auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1)); // [N * n_head, L, d_head/2, 2]
x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head*N); // [N*n_head, L, d_head] x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head * N); // [N*n_head, L, d_head]
return x_out; return x_out;
} }
__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx, __STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
struct ggml_tensor* q, struct ggml_tensor* q,
struct ggml_tensor* k, struct ggml_tensor* k,
struct ggml_tensor* v, struct ggml_tensor* v,
@ -125,13 +124,13 @@ __STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], NULL, false, true); // [N, L, n_head*d_head] auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], NULL, false, true); // [N, L, n_head*d_head]
return x; return x;
} }
struct SelfAttention : public GGMLBlock { struct SelfAttention : public GGMLBlock {
public: public:
int64_t num_heads; int64_t num_heads;
public: public:
SelfAttention(int64_t dim, SelfAttention(int64_t dim,
int64_t num_heads = 8, int64_t num_heads = 8,
bool qkv_bias = false) bool qkv_bias = false)
@ -146,7 +145,6 @@ public:
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]); auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]); auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);
auto qkv = qkv_proj->forward(ctx, x); auto qkv = qkv_proj->forward(ctx, x);
auto qkv_vec = split_qkv(ctx, qkv); auto qkv_vec = split_qkv(ctx, qkv);
int64_t head_dim = qkv_vec[0]->ne[0] / num_heads; int64_t head_dim = qkv_vec[0]->ne[0] / num_heads;
@ -174,25 +172,26 @@ public:
x = post_attention(ctx, x); // [N, n_token, dim] x = post_attention(ctx, x); // [N, n_token, dim]
return x; return x;
} }
}; };
struct ModulationOut {
struct ModulationOut {
ggml_tensor* shift = NULL; ggml_tensor* shift = NULL;
ggml_tensor* scale = NULL; ggml_tensor* scale = NULL;
ggml_tensor* gate = NULL; ggml_tensor* gate = NULL;
ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL) ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL)
: shift(shift), scale(scale), gate(gate) {} : shift(shift), scale(scale), gate(gate) {}
}; };
struct Modulation : public GGMLBlock { struct Modulation : public GGMLBlock {
public: public:
bool is_double; bool is_double;
int multiplier; int multiplier;
public:
Modulation(int64_t dim, bool is_double): is_double(is_double) { public:
multiplier = is_double? 6 : 3; Modulation(int64_t dim, bool is_double)
: is_double(is_double) {
multiplier = is_double ? 6 : 3;
blocks["lin"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * multiplier)); blocks["lin"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * multiplier));
} }
@ -221,9 +220,9 @@ public:
return {ModulationOut(shift_0, scale_0, gate_0), ModulationOut()}; return {ModulationOut(shift_0, scale_0, gate_0), ModulationOut()};
} }
}; };
__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx, __STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
struct ggml_tensor* x, struct ggml_tensor* x,
struct ggml_tensor* shift, struct ggml_tensor* shift,
struct ggml_tensor* scale) { struct ggml_tensor* scale) {
@ -235,10 +234,10 @@ __STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
x = ggml_add(ctx, x, ggml_mul(ctx, x, scale)); x = ggml_add(ctx, x, ggml_mul(ctx, x, scale));
x = ggml_add(ctx, x, shift); x = ggml_add(ctx, x, shift);
return x; return x;
} }
struct DoubleStreamBlock : public GGMLBlock { struct DoubleStreamBlock : public GGMLBlock {
public: public:
DoubleStreamBlock(int64_t hidden_size, DoubleStreamBlock(int64_t hidden_size,
int64_t num_heads, int64_t num_heads,
float mlp_ratio, float mlp_ratio,
@ -289,7 +288,6 @@ public:
auto txt_mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.0"]); auto txt_mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.0"]);
auto txt_mlp_2 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.2"]); auto txt_mlp_2 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.2"]);
auto img_mods = img_mod->forward(ctx, vec); auto img_mods = img_mod->forward(ctx, vec);
ModulationOut img_mod1 = img_mods[0]; ModulationOut img_mod1 = img_mods[0];
ModulationOut img_mod2 = img_mods[1]; ModulationOut img_mod2 = img_mods[1];
@ -359,20 +357,20 @@ public:
return {img, txt}; return {img, txt};
} }
}; };
struct SingleStreamBlock : public GGMLBlock {
struct SingleStreamBlock : public GGMLBlock { public:
public:
int64_t num_heads; int64_t num_heads;
int64_t hidden_size; int64_t hidden_size;
int64_t mlp_hidden_dim; int64_t mlp_hidden_dim;
public:
public:
SingleStreamBlock(int64_t hidden_size, SingleStreamBlock(int64_t hidden_size,
int64_t num_heads, int64_t num_heads,
float mlp_ratio = 4.0f, float mlp_ratio = 4.0f,
float qk_scale = 0.f) : float qk_scale = 0.f)
hidden_size(hidden_size), num_heads(num_heads) { : hidden_size(hidden_size), num_heads(num_heads) {
int64_t head_dim = hidden_size / num_heads; int64_t head_dim = hidden_size / num_heads;
float scale = qk_scale; float scale = qk_scale;
if (scale <= 0.f) { if (scale <= 0.f) {
@ -443,11 +441,10 @@ public:
output = ggml_add(ctx, x, ggml_mul(ctx, output, mod.gate)); output = ggml_add(ctx, x, ggml_mul(ctx, output, mod.gate));
return output; return output;
} }
}; };
struct LastLayer : public GGMLBlock {
struct LastLayer : public GGMLBlock { public:
public:
LastLayer(int64_t hidden_size, LastLayer(int64_t hidden_size,
int64_t patch_size, int64_t patch_size,
int64_t out_channels) { int64_t out_channels) {
@ -479,11 +476,11 @@ public:
return x; return x;
} }
}; };
struct FluxParams { struct FluxParams {
int64_t in_channels = 64; int64_t in_channels = 64;
int64_t vec_in_dim=768; int64_t vec_in_dim = 768;
int64_t context_in_dim = 4096; int64_t context_in_dim = 4096;
int64_t hidden_size = 3072; int64_t hidden_size = 3072;
float mlp_ratio = 4.0f; float mlp_ratio = 4.0f;
@ -495,11 +492,10 @@ struct FluxParams {
int theta = 10000; int theta = 10000;
bool qkv_bias = true; bool qkv_bias = true;
bool guidance_embed = true; bool guidance_embed = true;
}; };
struct Flux : public GGMLBlock {
struct Flux : public GGMLBlock { public:
public:
std::vector<float> linspace(float start, float end, int num) { std::vector<float> linspace(float start, float end, int num) {
std::vector<float> result(num); std::vector<float> result(num);
float step = (end - start) / (num - 1); float step = (end - start) / (num - 1);
@ -609,9 +605,9 @@ public:
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl; // std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
} }
int emb_dim = 0; int emb_dim = 0;
for (int d : axes_dim) emb_dim += d / 2; for (int d : axes_dim)
emb_dim += d / 2;
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0)); std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
int offset = 0; int offset = 0;
@ -629,10 +625,12 @@ public:
return flatten(emb); return flatten(emb);
} }
public:
public:
FluxParams params; FluxParams params;
Flux() {} Flux() {}
Flux(FluxParams params) : params(params) { Flux(FluxParams params)
: params(params) {
int64_t out_channels = params.in_channels; int64_t out_channels = params.in_channels;
int64_t pe_dim = params.hidden_size / params.num_heads; int64_t pe_dim = params.hidden_size / params.num_heads;
@ -675,11 +673,11 @@ public:
GGML_ASSERT(h * p == H && w * p == W); GGML_ASSERT(h * p == H && w * p == W);
x = ggml_reshape_4d(ctx, x, p, w, p, h*C*N); // [N*C*h, p, w, p] x = ggml_reshape_4d(ctx, x, p, w, p, h * C * N); // [N*C*h, p, w, p]
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, p, p] x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, p, p]
x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N); // [N, C, h*w, p*p] x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N); // [N, C, h*w, p*p]
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, p*p] x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, p*p]
x = ggml_reshape_3d(ctx, x, p*p*C, w*h, N); // [N, h*w, C*p*p] x = ggml_reshape_3d(ctx, x, p * p * C, w * h, N); // [N, h*w, C*p*p]
return x; return x;
} }
@ -800,11 +798,10 @@ public:
return out; return out;
} }
}; };
struct FluxRunner : public GGMLRunner {
struct FluxRunner : public GGMLRunner { public:
public:
FluxParams flux_params; FluxParams flux_params;
Flux flux; Flux flux;
std::vector<float> pe_vec; // for cache std::vector<float> pe_vec; // for cache
@ -847,13 +844,12 @@ public:
pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], flux_params.theta, flux_params.axes_dim); pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], flux_params.theta, flux_params.axes_dim);
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2; int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
// LOG_DEBUG("pos_len %d", pos_len); // LOG_DEBUG("pos_len %d", pos_len);
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum/2, pos_len); auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
// pe->data = pe_vec.data(); // pe->data = pe_vec.data();
// print_ggml_tensor(pe); // print_ggml_tensor(pe);
// pe->data = NULL; // pe->data = NULL;
set_backend_tensor_data(pe, pe_vec.data()); set_backend_tensor_data(pe, pe_vec.data());
struct ggml_tensor* out = flux.forward(compute_ctx, struct ggml_tensor* out = flux.forward(compute_ctx,
x, x,
timesteps, timesteps,
@ -958,7 +954,7 @@ public:
} }
flux->test(); flux->test();
} }
}; };
} // namespace Flux } // namespace Flux

View File

@ -686,8 +686,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
bool skip_reshape = false) { bool skip_reshape = false) {
int64_t L_q; int64_t L_q;
int64_t L_k; int64_t L_k;
int64_t C ; int64_t C;
int64_t N ; int64_t N;
int64_t d_head; int64_t d_head;
if (!skip_reshape) { if (!skip_reshape) {
L_q = q->ne[1]; L_q = q->ne[1];
@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
} }
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) { __STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL) #if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
if (!ggml_backend_is_cpu(backend)) { if (!ggml_backend_is_cpu(backend)) {
ggml_backend_tensor_get_async(backend, tensor, data, offset, size); ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
ggml_backend_synchronize(backend); ggml_backend_synchronize(backend);

View File

@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
return GGML_TYPE_COUNT; return GGML_TYPE_COUNT;
} }
ggml_type ModelLoader::get_diffusion_model_wtype() { ggml_type ModelLoader::get_diffusion_model_wtype() {
for (auto& tensor_storage : tensor_storages) { for (auto& tensor_storage : tensor_storages) {
if (is_unused_tensor(tensor_storage.name)) { if (is_unused_tensor(tensor_storage.name)) {

View File

@ -165,4 +165,3 @@ public:
}; };
#endif // __MODEL_H__ #endif // __MODEL_H__

View File

@ -74,7 +74,6 @@ public:
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT; ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
ggml_type vae_wtype = GGML_TYPE_COUNT; ggml_type vae_wtype = GGML_TYPE_COUNT;
SDVersion version; SDVersion version;
bool vae_decode_only = false; bool vae_decode_only = false;
bool free_params_immediately = false; bool free_params_immediately = false;
@ -171,7 +170,7 @@ public:
backend = ggml_backend_cpu_init(); backend = ggml_backend_cpu_init();
} }
#ifdef SD_USE_FLASH_ATTENTION #ifdef SD_USE_FLASH_ATTENTION
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL) #if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
LOG_WARN("Flash Attention not supported with GPU Backend"); LOG_WARN("Flash Attention not supported with GPU Backend");
#else #else
LOG_INFO("Flash Attention enabled"); LOG_INFO("Flash Attention enabled");