style: format code
This commit is contained in:
parent
d08d7fa632
commit
c837c5d9cc
@ -10,8 +10,8 @@ struct SDCondition {
|
|||||||
struct ggml_tensor* c_concat = NULL;
|
struct ggml_tensor* c_concat = NULL;
|
||||||
|
|
||||||
SDCondition() = default;
|
SDCondition() = default;
|
||||||
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) :
|
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
|
||||||
c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
|
: c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Conditioner {
|
struct Conditioner {
|
||||||
@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct FluxCLIPEmbedder : public Conditioner {
|
struct FluxCLIPEmbedder : public Conditioner {
|
||||||
ggml_type wtype;
|
ggml_type wtype;
|
||||||
CLIPTokenizer clip_l_tokenizer;
|
CLIPTokenizer clip_l_tokenizer;
|
||||||
|
@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
float flux_time_shift(float mu, float sigma, float t) {
|
float flux_time_shift(float mu, float sigma, float t) {
|
||||||
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
|
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
|
||||||
}
|
}
|
||||||
@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
|
|||||||
void set_parameters(float shift = 1.15f) {
|
void set_parameters(float shift = 1.15f) {
|
||||||
this->shift = shift;
|
this->shift = shift;
|
||||||
for (int i = 1; i < TIMESTEPS + 1; i++) {
|
for (int i = 1; i < TIMESTEPS + 1; i++) {
|
||||||
sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS);
|
sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef __DIFFUSION_MODEL_H__
|
#ifndef __DIFFUSION_MODEL_H__
|
||||||
#define __DIFFUSION_MODEL_H__
|
#define __DIFFUSION_MODEL_H__
|
||||||
|
|
||||||
|
#include "flux.hpp"
|
||||||
#include "mmdit.hpp"
|
#include "mmdit.hpp"
|
||||||
#include "unet.hpp"
|
#include "unet.hpp"
|
||||||
#include "flux.hpp"
|
|
||||||
|
|
||||||
struct DiffusionModel {
|
struct DiffusionModel {
|
||||||
virtual void compute(int n_threads,
|
virtual void compute(int n_threads,
|
||||||
@ -124,7 +124,6 @@ struct MMDiTModel : public DiffusionModel {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct FluxModel : public DiffusionModel {
|
struct FluxModel : public DiffusionModel {
|
||||||
Flux::FluxRunner flux;
|
Flux::FluxRunner flux;
|
||||||
|
|
||||||
|
130
flux.hpp
130
flux.hpp
@ -10,8 +10,8 @@
|
|||||||
|
|
||||||
namespace Flux {
|
namespace Flux {
|
||||||
|
|
||||||
struct MLPEmbedder : public UnaryBlock {
|
struct MLPEmbedder : public UnaryBlock {
|
||||||
public:
|
public:
|
||||||
MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
|
MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
|
||||||
blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true));
|
blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true));
|
||||||
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
|
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
|
||||||
@ -28,10 +28,10 @@ public:
|
|||||||
x = out_layer->forward(ctx, x);
|
x = out_layer->forward(ctx, x);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class RMSNorm : public UnaryBlock {
|
class RMSNorm : public UnaryBlock {
|
||||||
protected:
|
protected:
|
||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
float eps;
|
float eps;
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ protected:
|
|||||||
params["scale"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
params["scale"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RMSNorm(int64_t hidden_size,
|
RMSNorm(int64_t hidden_size,
|
||||||
float eps = 1e-06f)
|
float eps = 1e-06f)
|
||||||
: hidden_size(hidden_size),
|
: hidden_size(hidden_size),
|
||||||
@ -51,11 +51,10 @@ public:
|
|||||||
x = ggml_mul(ctx, x, w);
|
x = ggml_mul(ctx, x, w);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct QKNorm : public GGMLBlock {
|
||||||
struct QKNorm : public GGMLBlock {
|
public:
|
||||||
public:
|
|
||||||
QKNorm(int64_t dim) {
|
QKNorm(int64_t dim) {
|
||||||
blocks["query_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
|
blocks["query_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
|
||||||
blocks["key_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
|
blocks["key_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
|
||||||
@ -78,9 +77,9 @@ public:
|
|||||||
x = norm->forward(ctx, x);
|
x = norm->forward(ctx, x);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
||||||
struct ggml_tensor* x,
|
struct ggml_tensor* x,
|
||||||
struct ggml_tensor* pe) {
|
struct ggml_tensor* pe) {
|
||||||
// x: [N, L, n_head, d_head]
|
// x: [N, L, n_head, d_head]
|
||||||
@ -90,7 +89,7 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
|||||||
int64_t L = x->ne[2];
|
int64_t L = x->ne[2];
|
||||||
int64_t N = x->ne[3];
|
int64_t N = x->ne[3];
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, n_head, L, d_head]
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, n_head, L, d_head]
|
||||||
x = ggml_reshape_4d(ctx, x, 2, d_head/2, L, n_head * N); // [N * n_head, L, d_head/2, 2]
|
x = ggml_reshape_4d(ctx, x, 2, d_head / 2, L, n_head * N); // [N * n_head, L, d_head/2, 2]
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2)); // [2, N * n_head, L, d_head/2]
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2)); // [2, N * n_head, L, d_head/2]
|
||||||
|
|
||||||
int64_t offset = x->nb[2] * x->ne[2];
|
int64_t offset = x->nb[2] * x->ne[2];
|
||||||
@ -108,11 +107,11 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
|||||||
auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1); // [L, d_head/2, 2]
|
auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1); // [L, d_head/2, 2]
|
||||||
|
|
||||||
auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1)); // [N * n_head, L, d_head/2, 2]
|
auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1)); // [N * n_head, L, d_head/2, 2]
|
||||||
x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head*N); // [N*n_head, L, d_head]
|
x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head * N); // [N*n_head, L, d_head]
|
||||||
return x_out;
|
return x_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
||||||
struct ggml_tensor* q,
|
struct ggml_tensor* q,
|
||||||
struct ggml_tensor* k,
|
struct ggml_tensor* k,
|
||||||
struct ggml_tensor* v,
|
struct ggml_tensor* v,
|
||||||
@ -125,13 +124,13 @@ __STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
|||||||
|
|
||||||
auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], NULL, false, true); // [N, L, n_head*d_head]
|
auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], NULL, false, true); // [N, L, n_head*d_head]
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct SelfAttention : public GGMLBlock {
|
struct SelfAttention : public GGMLBlock {
|
||||||
public:
|
public:
|
||||||
int64_t num_heads;
|
int64_t num_heads;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SelfAttention(int64_t dim,
|
SelfAttention(int64_t dim,
|
||||||
int64_t num_heads = 8,
|
int64_t num_heads = 8,
|
||||||
bool qkv_bias = false)
|
bool qkv_bias = false)
|
||||||
@ -146,7 +145,6 @@ public:
|
|||||||
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
|
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
|
||||||
auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);
|
auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);
|
||||||
|
|
||||||
|
|
||||||
auto qkv = qkv_proj->forward(ctx, x);
|
auto qkv = qkv_proj->forward(ctx, x);
|
||||||
auto qkv_vec = split_qkv(ctx, qkv);
|
auto qkv_vec = split_qkv(ctx, qkv);
|
||||||
int64_t head_dim = qkv_vec[0]->ne[0] / num_heads;
|
int64_t head_dim = qkv_vec[0]->ne[0] / num_heads;
|
||||||
@ -174,25 +172,26 @@ public:
|
|||||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ModulationOut {
|
||||||
struct ModulationOut {
|
|
||||||
ggml_tensor* shift = NULL;
|
ggml_tensor* shift = NULL;
|
||||||
ggml_tensor* scale = NULL;
|
ggml_tensor* scale = NULL;
|
||||||
ggml_tensor* gate = NULL;
|
ggml_tensor* gate = NULL;
|
||||||
|
|
||||||
ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL)
|
ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL)
|
||||||
: shift(shift), scale(scale), gate(gate) {}
|
: shift(shift), scale(scale), gate(gate) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Modulation : public GGMLBlock {
|
struct Modulation : public GGMLBlock {
|
||||||
public:
|
public:
|
||||||
bool is_double;
|
bool is_double;
|
||||||
int multiplier;
|
int multiplier;
|
||||||
public:
|
|
||||||
Modulation(int64_t dim, bool is_double): is_double(is_double) {
|
public:
|
||||||
multiplier = is_double? 6 : 3;
|
Modulation(int64_t dim, bool is_double)
|
||||||
|
: is_double(is_double) {
|
||||||
|
multiplier = is_double ? 6 : 3;
|
||||||
blocks["lin"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * multiplier));
|
blocks["lin"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * multiplier));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -221,9 +220,9 @@ public:
|
|||||||
|
|
||||||
return {ModulationOut(shift_0, scale_0, gate_0), ModulationOut()};
|
return {ModulationOut(shift_0, scale_0, gate_0), ModulationOut()};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
||||||
struct ggml_tensor* x,
|
struct ggml_tensor* x,
|
||||||
struct ggml_tensor* shift,
|
struct ggml_tensor* shift,
|
||||||
struct ggml_tensor* scale) {
|
struct ggml_tensor* scale) {
|
||||||
@ -235,10 +234,10 @@ __STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
|||||||
x = ggml_add(ctx, x, ggml_mul(ctx, x, scale));
|
x = ggml_add(ctx, x, ggml_mul(ctx, x, scale));
|
||||||
x = ggml_add(ctx, x, shift);
|
x = ggml_add(ctx, x, shift);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DoubleStreamBlock : public GGMLBlock {
|
struct DoubleStreamBlock : public GGMLBlock {
|
||||||
public:
|
public:
|
||||||
DoubleStreamBlock(int64_t hidden_size,
|
DoubleStreamBlock(int64_t hidden_size,
|
||||||
int64_t num_heads,
|
int64_t num_heads,
|
||||||
float mlp_ratio,
|
float mlp_ratio,
|
||||||
@ -289,7 +288,6 @@ public:
|
|||||||
auto txt_mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.0"]);
|
auto txt_mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.0"]);
|
||||||
auto txt_mlp_2 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.2"]);
|
auto txt_mlp_2 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.2"]);
|
||||||
|
|
||||||
|
|
||||||
auto img_mods = img_mod->forward(ctx, vec);
|
auto img_mods = img_mod->forward(ctx, vec);
|
||||||
ModulationOut img_mod1 = img_mods[0];
|
ModulationOut img_mod1 = img_mods[0];
|
||||||
ModulationOut img_mod2 = img_mods[1];
|
ModulationOut img_mod2 = img_mods[1];
|
||||||
@ -359,20 +357,20 @@ public:
|
|||||||
|
|
||||||
return {img, txt};
|
return {img, txt};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SingleStreamBlock : public GGMLBlock {
|
||||||
struct SingleStreamBlock : public GGMLBlock {
|
public:
|
||||||
public:
|
|
||||||
int64_t num_heads;
|
int64_t num_heads;
|
||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
int64_t mlp_hidden_dim;
|
int64_t mlp_hidden_dim;
|
||||||
public:
|
|
||||||
|
public:
|
||||||
SingleStreamBlock(int64_t hidden_size,
|
SingleStreamBlock(int64_t hidden_size,
|
||||||
int64_t num_heads,
|
int64_t num_heads,
|
||||||
float mlp_ratio = 4.0f,
|
float mlp_ratio = 4.0f,
|
||||||
float qk_scale = 0.f) :
|
float qk_scale = 0.f)
|
||||||
hidden_size(hidden_size), num_heads(num_heads) {
|
: hidden_size(hidden_size), num_heads(num_heads) {
|
||||||
int64_t head_dim = hidden_size / num_heads;
|
int64_t head_dim = hidden_size / num_heads;
|
||||||
float scale = qk_scale;
|
float scale = qk_scale;
|
||||||
if (scale <= 0.f) {
|
if (scale <= 0.f) {
|
||||||
@ -443,11 +441,10 @@ public:
|
|||||||
output = ggml_add(ctx, x, ggml_mul(ctx, output, mod.gate));
|
output = ggml_add(ctx, x, ggml_mul(ctx, output, mod.gate));
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct LastLayer : public GGMLBlock {
|
||||||
struct LastLayer : public GGMLBlock {
|
public:
|
||||||
public:
|
|
||||||
LastLayer(int64_t hidden_size,
|
LastLayer(int64_t hidden_size,
|
||||||
int64_t patch_size,
|
int64_t patch_size,
|
||||||
int64_t out_channels) {
|
int64_t out_channels) {
|
||||||
@ -479,11 +476,11 @@ public:
|
|||||||
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FluxParams {
|
struct FluxParams {
|
||||||
int64_t in_channels = 64;
|
int64_t in_channels = 64;
|
||||||
int64_t vec_in_dim=768;
|
int64_t vec_in_dim = 768;
|
||||||
int64_t context_in_dim = 4096;
|
int64_t context_in_dim = 4096;
|
||||||
int64_t hidden_size = 3072;
|
int64_t hidden_size = 3072;
|
||||||
float mlp_ratio = 4.0f;
|
float mlp_ratio = 4.0f;
|
||||||
@ -495,11 +492,10 @@ struct FluxParams {
|
|||||||
int theta = 10000;
|
int theta = 10000;
|
||||||
bool qkv_bias = true;
|
bool qkv_bias = true;
|
||||||
bool guidance_embed = true;
|
bool guidance_embed = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Flux : public GGMLBlock {
|
||||||
struct Flux : public GGMLBlock {
|
public:
|
||||||
public:
|
|
||||||
std::vector<float> linspace(float start, float end, int num) {
|
std::vector<float> linspace(float start, float end, int num) {
|
||||||
std::vector<float> result(num);
|
std::vector<float> result(num);
|
||||||
float step = (end - start) / (num - 1);
|
float step = (end - start) / (num - 1);
|
||||||
@ -609,9 +605,9 @@ public:
|
|||||||
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
|
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int emb_dim = 0;
|
int emb_dim = 0;
|
||||||
for (int d : axes_dim) emb_dim += d / 2;
|
for (int d : axes_dim)
|
||||||
|
emb_dim += d / 2;
|
||||||
|
|
||||||
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
|
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
@ -629,10 +625,12 @@ public:
|
|||||||
|
|
||||||
return flatten(emb);
|
return flatten(emb);
|
||||||
}
|
}
|
||||||
public:
|
|
||||||
|
public:
|
||||||
FluxParams params;
|
FluxParams params;
|
||||||
Flux() {}
|
Flux() {}
|
||||||
Flux(FluxParams params) : params(params) {
|
Flux(FluxParams params)
|
||||||
|
: params(params) {
|
||||||
int64_t out_channels = params.in_channels;
|
int64_t out_channels = params.in_channels;
|
||||||
int64_t pe_dim = params.hidden_size / params.num_heads;
|
int64_t pe_dim = params.hidden_size / params.num_heads;
|
||||||
|
|
||||||
@ -675,11 +673,11 @@ public:
|
|||||||
|
|
||||||
GGML_ASSERT(h * p == H && w * p == W);
|
GGML_ASSERT(h * p == H && w * p == W);
|
||||||
|
|
||||||
x = ggml_reshape_4d(ctx, x, p, w, p, h*C*N); // [N*C*h, p, w, p]
|
x = ggml_reshape_4d(ctx, x, p, w, p, h * C * N); // [N*C*h, p, w, p]
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, p, p]
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, p, p]
|
||||||
x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N); // [N, C, h*w, p*p]
|
x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N); // [N, C, h*w, p*p]
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, p*p]
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, p*p]
|
||||||
x = ggml_reshape_3d(ctx, x, p*p*C, w*h, N); // [N, h*w, C*p*p]
|
x = ggml_reshape_3d(ctx, x, p * p * C, w * h, N); // [N, h*w, C*p*p]
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -800,11 +798,10 @@ public:
|
|||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct FluxRunner : public GGMLRunner {
|
||||||
struct FluxRunner : public GGMLRunner {
|
public:
|
||||||
public:
|
|
||||||
FluxParams flux_params;
|
FluxParams flux_params;
|
||||||
Flux flux;
|
Flux flux;
|
||||||
std::vector<float> pe_vec; // for cache
|
std::vector<float> pe_vec; // for cache
|
||||||
@ -847,13 +844,12 @@ public:
|
|||||||
pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], flux_params.theta, flux_params.axes_dim);
|
pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], flux_params.theta, flux_params.axes_dim);
|
||||||
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
||||||
// LOG_DEBUG("pos_len %d", pos_len);
|
// LOG_DEBUG("pos_len %d", pos_len);
|
||||||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum/2, pos_len);
|
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
|
||||||
// pe->data = pe_vec.data();
|
// pe->data = pe_vec.data();
|
||||||
// print_ggml_tensor(pe);
|
// print_ggml_tensor(pe);
|
||||||
// pe->data = NULL;
|
// pe->data = NULL;
|
||||||
set_backend_tensor_data(pe, pe_vec.data());
|
set_backend_tensor_data(pe, pe_vec.data());
|
||||||
|
|
||||||
|
|
||||||
struct ggml_tensor* out = flux.forward(compute_ctx,
|
struct ggml_tensor* out = flux.forward(compute_ctx,
|
||||||
x,
|
x,
|
||||||
timesteps,
|
timesteps,
|
||||||
@ -958,7 +954,7 @@ public:
|
|||||||
}
|
}
|
||||||
flux->test();
|
flux->test();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Flux
|
} // namespace Flux
|
||||||
|
|
||||||
|
@ -686,8 +686,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
bool skip_reshape = false) {
|
bool skip_reshape = false) {
|
||||||
int64_t L_q;
|
int64_t L_q;
|
||||||
int64_t L_k;
|
int64_t L_k;
|
||||||
int64_t C ;
|
int64_t C;
|
||||||
int64_t N ;
|
int64_t N;
|
||||||
int64_t d_head;
|
int64_t d_head;
|
||||||
if (!skip_reshape) {
|
if (!skip_reshape) {
|
||||||
L_q = q->ne[1];
|
L_q = q->ne[1];
|
||||||
@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
|||||||
}
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
||||||
#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL)
|
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
|
||||||
if (!ggml_backend_is_cpu(backend)) {
|
if (!ggml_backend_is_cpu(backend)) {
|
||||||
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
||||||
ggml_backend_synchronize(backend);
|
ggml_backend_synchronize(backend);
|
||||||
|
@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
|
|||||||
return GGML_TYPE_COUNT;
|
return GGML_TYPE_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ggml_type ModelLoader::get_diffusion_model_wtype() {
|
ggml_type ModelLoader::get_diffusion_model_wtype() {
|
||||||
for (auto& tensor_storage : tensor_storages) {
|
for (auto& tensor_storage : tensor_storages) {
|
||||||
if (is_unused_tensor(tensor_storage.name)) {
|
if (is_unused_tensor(tensor_storage.name)) {
|
||||||
|
@ -74,7 +74,6 @@ public:
|
|||||||
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
|
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
|
||||||
ggml_type vae_wtype = GGML_TYPE_COUNT;
|
ggml_type vae_wtype = GGML_TYPE_COUNT;
|
||||||
|
|
||||||
|
|
||||||
SDVersion version;
|
SDVersion version;
|
||||||
bool vae_decode_only = false;
|
bool vae_decode_only = false;
|
||||||
bool free_params_immediately = false;
|
bool free_params_immediately = false;
|
||||||
@ -171,7 +170,7 @@ public:
|
|||||||
backend = ggml_backend_cpu_init();
|
backend = ggml_backend_cpu_init();
|
||||||
}
|
}
|
||||||
#ifdef SD_USE_FLASH_ATTENTION
|
#ifdef SD_USE_FLASH_ATTENTION
|
||||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL)
|
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
|
||||||
LOG_WARN("Flash Attention not supported with GPU Backend");
|
LOG_WARN("Flash Attention not supported with GPU Backend");
|
||||||
#else
|
#else
|
||||||
LOG_INFO("Flash Attention enabled");
|
LOG_INFO("Flash Attention enabled");
|
||||||
|
Loading…
Reference in New Issue
Block a user