style: format code
This commit is contained in:
parent
d08d7fa632
commit
c837c5d9cc
@ -10,8 +10,8 @@ struct SDCondition {
|
||||
struct ggml_tensor* c_concat = NULL;
|
||||
|
||||
SDCondition() = default;
|
||||
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) :
|
||||
c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
|
||||
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
|
||||
: c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
|
||||
};
|
||||
|
||||
struct Conditioner {
|
||||
@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct FluxCLIPEmbedder : public Conditioner {
|
||||
ggml_type wtype;
|
||||
CLIPTokenizer clip_l_tokenizer;
|
||||
|
@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
float flux_time_shift(float mu, float sigma, float t) {
|
||||
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
|
||||
}
|
||||
@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
|
||||
void set_parameters(float shift = 1.15f) {
|
||||
this->shift = shift;
|
||||
for (int i = 1; i < TIMESTEPS + 1; i++) {
|
||||
sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS);
|
||||
sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
#ifndef __DIFFUSION_MODEL_H__
|
||||
#define __DIFFUSION_MODEL_H__
|
||||
|
||||
#include "flux.hpp"
|
||||
#include "mmdit.hpp"
|
||||
#include "unet.hpp"
|
||||
#include "flux.hpp"
|
||||
|
||||
struct DiffusionModel {
|
||||
virtual void compute(int n_threads,
|
||||
@ -124,7 +124,6 @@ struct MMDiTModel : public DiffusionModel {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct FluxModel : public DiffusionModel {
|
||||
Flux::FluxRunner flux;
|
||||
|
||||
|
130
flux.hpp
130
flux.hpp
@ -10,8 +10,8 @@
|
||||
|
||||
namespace Flux {
|
||||
|
||||
struct MLPEmbedder : public UnaryBlock {
|
||||
public:
|
||||
struct MLPEmbedder : public UnaryBlock {
|
||||
public:
|
||||
MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
|
||||
blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true));
|
||||
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
|
||||
@ -28,10 +28,10 @@ public:
|
||||
x = out_layer->forward(ctx, x);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
class RMSNorm : public UnaryBlock {
|
||||
protected:
|
||||
class RMSNorm : public UnaryBlock {
|
||||
protected:
|
||||
int64_t hidden_size;
|
||||
float eps;
|
||||
|
||||
@ -39,7 +39,7 @@ protected:
|
||||
params["scale"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||
}
|
||||
|
||||
public:
|
||||
public:
|
||||
RMSNorm(int64_t hidden_size,
|
||||
float eps = 1e-06f)
|
||||
: hidden_size(hidden_size),
|
||||
@ -51,11 +51,10 @@ public:
|
||||
x = ggml_mul(ctx, x, w);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct QKNorm : public GGMLBlock {
|
||||
public:
|
||||
struct QKNorm : public GGMLBlock {
|
||||
public:
|
||||
QKNorm(int64_t dim) {
|
||||
blocks["query_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
|
||||
blocks["key_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
|
||||
@ -78,9 +77,9 @@ public:
|
||||
x = norm->forward(ctx, x);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
||||
__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* pe) {
|
||||
// x: [N, L, n_head, d_head]
|
||||
@ -90,7 +89,7 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
||||
int64_t L = x->ne[2];
|
||||
int64_t N = x->ne[3];
|
||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, n_head, L, d_head]
|
||||
x = ggml_reshape_4d(ctx, x, 2, d_head/2, L, n_head * N); // [N * n_head, L, d_head/2, 2]
|
||||
x = ggml_reshape_4d(ctx, x, 2, d_head / 2, L, n_head * N); // [N * n_head, L, d_head/2, 2]
|
||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2)); // [2, N * n_head, L, d_head/2]
|
||||
|
||||
int64_t offset = x->nb[2] * x->ne[2];
|
||||
@ -108,11 +107,11 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
||||
auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1); // [L, d_head/2, 2]
|
||||
|
||||
auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1)); // [N * n_head, L, d_head/2, 2]
|
||||
x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head*N); // [N*n_head, L, d_head]
|
||||
x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head * N); // [N*n_head, L, d_head]
|
||||
return x_out;
|
||||
}
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
||||
__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
||||
struct ggml_tensor* q,
|
||||
struct ggml_tensor* k,
|
||||
struct ggml_tensor* v,
|
||||
@ -125,13 +124,13 @@ __STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
||||
|
||||
auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], NULL, false, true); // [N, L, n_head*d_head]
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
struct SelfAttention : public GGMLBlock {
|
||||
public:
|
||||
struct SelfAttention : public GGMLBlock {
|
||||
public:
|
||||
int64_t num_heads;
|
||||
|
||||
public:
|
||||
public:
|
||||
SelfAttention(int64_t dim,
|
||||
int64_t num_heads = 8,
|
||||
bool qkv_bias = false)
|
||||
@ -146,7 +145,6 @@ public:
|
||||
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
|
||||
auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);
|
||||
|
||||
|
||||
auto qkv = qkv_proj->forward(ctx, x);
|
||||
auto qkv_vec = split_qkv(ctx, qkv);
|
||||
int64_t head_dim = qkv_vec[0]->ne[0] / num_heads;
|
||||
@ -174,25 +172,26 @@ public:
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
return x;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct ModulationOut {
|
||||
struct ModulationOut {
|
||||
ggml_tensor* shift = NULL;
|
||||
ggml_tensor* scale = NULL;
|
||||
ggml_tensor* gate = NULL;
|
||||
|
||||
ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL)
|
||||
: shift(shift), scale(scale), gate(gate) {}
|
||||
};
|
||||
};
|
||||
|
||||
struct Modulation : public GGMLBlock {
|
||||
public:
|
||||
struct Modulation : public GGMLBlock {
|
||||
public:
|
||||
bool is_double;
|
||||
int multiplier;
|
||||
public:
|
||||
Modulation(int64_t dim, bool is_double): is_double(is_double) {
|
||||
multiplier = is_double? 6 : 3;
|
||||
|
||||
public:
|
||||
Modulation(int64_t dim, bool is_double)
|
||||
: is_double(is_double) {
|
||||
multiplier = is_double ? 6 : 3;
|
||||
blocks["lin"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * multiplier));
|
||||
}
|
||||
|
||||
@ -221,9 +220,9 @@ public:
|
||||
|
||||
return {ModulationOut(shift_0, scale_0, gate_0), ModulationOut()};
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
||||
__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* shift,
|
||||
struct ggml_tensor* scale) {
|
||||
@ -235,10 +234,10 @@ __STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
||||
x = ggml_add(ctx, x, ggml_mul(ctx, x, scale));
|
||||
x = ggml_add(ctx, x, shift);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
struct DoubleStreamBlock : public GGMLBlock {
|
||||
public:
|
||||
struct DoubleStreamBlock : public GGMLBlock {
|
||||
public:
|
||||
DoubleStreamBlock(int64_t hidden_size,
|
||||
int64_t num_heads,
|
||||
float mlp_ratio,
|
||||
@ -289,7 +288,6 @@ public:
|
||||
auto txt_mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.0"]);
|
||||
auto txt_mlp_2 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.2"]);
|
||||
|
||||
|
||||
auto img_mods = img_mod->forward(ctx, vec);
|
||||
ModulationOut img_mod1 = img_mods[0];
|
||||
ModulationOut img_mod2 = img_mods[1];
|
||||
@ -359,20 +357,20 @@ public:
|
||||
|
||||
return {img, txt};
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct SingleStreamBlock : public GGMLBlock {
|
||||
public:
|
||||
struct SingleStreamBlock : public GGMLBlock {
|
||||
public:
|
||||
int64_t num_heads;
|
||||
int64_t hidden_size;
|
||||
int64_t mlp_hidden_dim;
|
||||
public:
|
||||
|
||||
public:
|
||||
SingleStreamBlock(int64_t hidden_size,
|
||||
int64_t num_heads,
|
||||
float mlp_ratio = 4.0f,
|
||||
float qk_scale = 0.f) :
|
||||
hidden_size(hidden_size), num_heads(num_heads) {
|
||||
float qk_scale = 0.f)
|
||||
: hidden_size(hidden_size), num_heads(num_heads) {
|
||||
int64_t head_dim = hidden_size / num_heads;
|
||||
float scale = qk_scale;
|
||||
if (scale <= 0.f) {
|
||||
@ -443,11 +441,10 @@ public:
|
||||
output = ggml_add(ctx, x, ggml_mul(ctx, output, mod.gate));
|
||||
return output;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct LastLayer : public GGMLBlock {
|
||||
public:
|
||||
struct LastLayer : public GGMLBlock {
|
||||
public:
|
||||
LastLayer(int64_t hidden_size,
|
||||
int64_t patch_size,
|
||||
int64_t out_channels) {
|
||||
@ -479,11 +476,11 @@ public:
|
||||
|
||||
return x;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
struct FluxParams {
|
||||
struct FluxParams {
|
||||
int64_t in_channels = 64;
|
||||
int64_t vec_in_dim=768;
|
||||
int64_t vec_in_dim = 768;
|
||||
int64_t context_in_dim = 4096;
|
||||
int64_t hidden_size = 3072;
|
||||
float mlp_ratio = 4.0f;
|
||||
@ -495,11 +492,10 @@ struct FluxParams {
|
||||
int theta = 10000;
|
||||
bool qkv_bias = true;
|
||||
bool guidance_embed = true;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct Flux : public GGMLBlock {
|
||||
public:
|
||||
struct Flux : public GGMLBlock {
|
||||
public:
|
||||
std::vector<float> linspace(float start, float end, int num) {
|
||||
std::vector<float> result(num);
|
||||
float step = (end - start) / (num - 1);
|
||||
@ -609,9 +605,9 @@ public:
|
||||
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
|
||||
}
|
||||
|
||||
|
||||
int emb_dim = 0;
|
||||
for (int d : axes_dim) emb_dim += d / 2;
|
||||
for (int d : axes_dim)
|
||||
emb_dim += d / 2;
|
||||
|
||||
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
|
||||
int offset = 0;
|
||||
@ -629,10 +625,12 @@ public:
|
||||
|
||||
return flatten(emb);
|
||||
}
|
||||
public:
|
||||
|
||||
public:
|
||||
FluxParams params;
|
||||
Flux() {}
|
||||
Flux(FluxParams params) : params(params) {
|
||||
Flux(FluxParams params)
|
||||
: params(params) {
|
||||
int64_t out_channels = params.in_channels;
|
||||
int64_t pe_dim = params.hidden_size / params.num_heads;
|
||||
|
||||
@ -675,11 +673,11 @@ public:
|
||||
|
||||
GGML_ASSERT(h * p == H && w * p == W);
|
||||
|
||||
x = ggml_reshape_4d(ctx, x, p, w, p, h*C*N); // [N*C*h, p, w, p]
|
||||
x = ggml_reshape_4d(ctx, x, p, w, p, h * C * N); // [N*C*h, p, w, p]
|
||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, p, p]
|
||||
x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N); // [N, C, h*w, p*p]
|
||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, p*p]
|
||||
x = ggml_reshape_3d(ctx, x, p*p*C, w*h, N); // [N, h*w, C*p*p]
|
||||
x = ggml_reshape_3d(ctx, x, p * p * C, w * h, N); // [N, h*w, C*p*p]
|
||||
return x;
|
||||
}
|
||||
|
||||
@ -800,11 +798,10 @@ public:
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct FluxRunner : public GGMLRunner {
|
||||
public:
|
||||
struct FluxRunner : public GGMLRunner {
|
||||
public:
|
||||
FluxParams flux_params;
|
||||
Flux flux;
|
||||
std::vector<float> pe_vec; // for cache
|
||||
@ -847,13 +844,12 @@ public:
|
||||
pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], flux_params.theta, flux_params.axes_dim);
|
||||
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
||||
// LOG_DEBUG("pos_len %d", pos_len);
|
||||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum/2, pos_len);
|
||||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
|
||||
// pe->data = pe_vec.data();
|
||||
// print_ggml_tensor(pe);
|
||||
// pe->data = NULL;
|
||||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
|
||||
struct ggml_tensor* out = flux.forward(compute_ctx,
|
||||
x,
|
||||
timesteps,
|
||||
@ -958,7 +954,7 @@ public:
|
||||
}
|
||||
flux->test();
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace Flux
|
||||
|
||||
|
@ -686,8 +686,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
||||
bool skip_reshape = false) {
|
||||
int64_t L_q;
|
||||
int64_t L_k;
|
||||
int64_t C ;
|
||||
int64_t N ;
|
||||
int64_t C;
|
||||
int64_t N;
|
||||
int64_t d_head;
|
||||
if (!skip_reshape) {
|
||||
L_q = q->ne[1];
|
||||
@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
||||
#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL)
|
||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
|
||||
if (!ggml_backend_is_cpu(backend)) {
|
||||
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
||||
ggml_backend_synchronize(backend);
|
||||
|
@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
|
||||
return GGML_TYPE_COUNT;
|
||||
}
|
||||
|
||||
|
||||
ggml_type ModelLoader::get_diffusion_model_wtype() {
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
if (is_unused_tensor(tensor_storage.name)) {
|
||||
|
@ -74,7 +74,6 @@ public:
|
||||
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type vae_wtype = GGML_TYPE_COUNT;
|
||||
|
||||
|
||||
SDVersion version;
|
||||
bool vae_decode_only = false;
|
||||
bool free_params_immediately = false;
|
||||
@ -171,7 +170,7 @@ public:
|
||||
backend = ggml_backend_cpu_init();
|
||||
}
|
||||
#ifdef SD_USE_FLASH_ATTENTION
|
||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL)
|
||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
|
||||
LOG_WARN("Flash Attention not supported with GPU Backend");
|
||||
#else
|
||||
LOG_INFO("Flash Attention enabled");
|
||||
|
Loading…
Reference in New Issue
Block a user