feat: force using f32 for some layers
This commit is contained in:
parent
79c9fe9556
commit
1bdc767aaf
8
flux.hpp
8
flux.hpp
@ -13,7 +13,7 @@ namespace Flux {
|
|||||||
struct MLPEmbedder : public UnaryBlock {
|
struct MLPEmbedder : public UnaryBlock {
|
||||||
public:
|
public:
|
||||||
MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
|
MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
|
||||||
blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true));
|
blocks["in_layer"] = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true, true));
|
||||||
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
|
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -449,7 +449,7 @@ namespace Flux {
|
|||||||
int64_t patch_size,
|
int64_t patch_size,
|
||||||
int64_t out_channels) {
|
int64_t out_channels) {
|
||||||
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
||||||
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
|
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels, true, true));
|
||||||
blocks["adaLN_modulation.1"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, 2 * hidden_size));
|
blocks["adaLN_modulation.1"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, 2 * hidden_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -634,13 +634,13 @@ namespace Flux {
|
|||||||
int64_t out_channels = params.in_channels;
|
int64_t out_channels = params.in_channels;
|
||||||
int64_t pe_dim = params.hidden_size / params.num_heads;
|
int64_t pe_dim = params.hidden_size / params.num_heads;
|
||||||
|
|
||||||
blocks["img_in"] = std::shared_ptr<GGMLBlock>(new Linear(params.in_channels, params.hidden_size));
|
blocks["img_in"] = std::shared_ptr<GGMLBlock>(new Linear(params.in_channels, params.hidden_size, true, true));
|
||||||
blocks["time_in"] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(256, params.hidden_size));
|
blocks["time_in"] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(256, params.hidden_size));
|
||||||
blocks["vector_in"] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(params.vec_in_dim, params.hidden_size));
|
blocks["vector_in"] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(params.vec_in_dim, params.hidden_size));
|
||||||
if (params.guidance_embed) {
|
if (params.guidance_embed) {
|
||||||
blocks["guidance_in"] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(256, params.hidden_size));
|
blocks["guidance_in"] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(256, params.hidden_size));
|
||||||
}
|
}
|
||||||
blocks["txt_in"] = std::shared_ptr<GGMLBlock>(new Linear(params.context_in_dim, params.hidden_size));
|
blocks["txt_in"] = std::shared_ptr<GGMLBlock>(new Linear(params.context_in_dim, params.hidden_size, true, true));
|
||||||
|
|
||||||
for (int i = 0; i < params.depth; i++) {
|
for (int i = 0; i < params.depth; i++) {
|
||||||
blocks["double_blocks." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new DoubleStreamBlock(params.hidden_size,
|
blocks["double_blocks." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new DoubleStreamBlock(params.hidden_size,
|
||||||
|
@ -1187,9 +1187,10 @@ protected:
|
|||||||
int64_t in_features;
|
int64_t in_features;
|
||||||
int64_t out_features;
|
int64_t out_features;
|
||||||
bool bias;
|
bool bias;
|
||||||
|
bool force_f32;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, ggml_type wtype) {
|
void init_params(struct ggml_context* ctx, ggml_type wtype) {
|
||||||
if (in_features % ggml_blck_size(wtype) != 0) {
|
if (in_features % ggml_blck_size(wtype) != 0 || force_f32) {
|
||||||
wtype = GGML_TYPE_F32;
|
wtype = GGML_TYPE_F32;
|
||||||
}
|
}
|
||||||
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
|
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
|
||||||
@ -1201,10 +1202,12 @@ protected:
|
|||||||
public:
|
public:
|
||||||
Linear(int64_t in_features,
|
Linear(int64_t in_features,
|
||||||
int64_t out_features,
|
int64_t out_features,
|
||||||
bool bias = true)
|
bool bias = true,
|
||||||
|
bool force_f32 = false)
|
||||||
: in_features(in_features),
|
: in_features(in_features),
|
||||||
out_features(out_features),
|
out_features(out_features),
|
||||||
bias(bias) {}
|
bias(bias),
|
||||||
|
force_f32(force_f32) {}
|
||||||
|
|
||||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||||
struct ggml_tensor* w = params["weight"];
|
struct ggml_tensor* w = params["weight"];
|
||||||
|
12
mmdit.hpp
12
mmdit.hpp
@ -101,8 +101,8 @@ public:
|
|||||||
TimestepEmbedder(int64_t hidden_size,
|
TimestepEmbedder(int64_t hidden_size,
|
||||||
int64_t frequency_embedding_size = 256)
|
int64_t frequency_embedding_size = 256)
|
||||||
: frequency_embedding_size(frequency_embedding_size) {
|
: frequency_embedding_size(frequency_embedding_size) {
|
||||||
blocks["mlp.0"] = std::shared_ptr<GGMLBlock>(new Linear(frequency_embedding_size, hidden_size));
|
blocks["mlp.0"] = std::shared_ptr<GGMLBlock>(new Linear(frequency_embedding_size, hidden_size, true, true));
|
||||||
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size));
|
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size, true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* t) {
|
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* t) {
|
||||||
@ -125,8 +125,8 @@ struct VectorEmbedder : public GGMLBlock {
|
|||||||
public:
|
public:
|
||||||
VectorEmbedder(int64_t input_dim,
|
VectorEmbedder(int64_t input_dim,
|
||||||
int64_t hidden_size) {
|
int64_t hidden_size) {
|
||||||
blocks["mlp.0"] = std::shared_ptr<GGMLBlock>(new Linear(input_dim, hidden_size));
|
blocks["mlp.0"] = std::shared_ptr<GGMLBlock>(new Linear(input_dim, hidden_size, true, true));
|
||||||
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size));
|
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size, true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||||
@ -423,7 +423,7 @@ public:
|
|||||||
int64_t out_channels) {
|
int64_t out_channels) {
|
||||||
// total_out_channels is always None
|
// total_out_channels is always None
|
||||||
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
||||||
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
|
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels, true, true));
|
||||||
blocks["adaLN_modulation.1"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, 2 * hidden_size));
|
blocks["adaLN_modulation.1"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, 2 * hidden_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -510,7 +510,7 @@ public:
|
|||||||
blocks["y_embedder"] = std::shared_ptr<GGMLBlock>(new VectorEmbedder(adm_in_channels, hidden_size));
|
blocks["y_embedder"] = std::shared_ptr<GGMLBlock>(new VectorEmbedder(adm_in_channels, hidden_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
blocks["context_embedder"] = std::shared_ptr<GGMLBlock>(new Linear(4096, 1536));
|
blocks["context_embedder"] = std::shared_ptr<GGMLBlock>(new Linear(4096, 1536, true, true));
|
||||||
|
|
||||||
for (int i = 0; i < depth; i++) {
|
for (int i = 0; i < depth; i++) {
|
||||||
blocks["joint_blocks." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new JointBlock(hidden_size,
|
blocks["joint_blocks." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new JointBlock(hidden_size,
|
||||||
|
12
model.cpp
12
model.cpp
@ -1740,9 +1740,17 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
|
|||||||
// Pass, do not convert
|
// Pass, do not convert
|
||||||
} else if (ends_with(name, ".bias")) {
|
} else if (ends_with(name, ".bias")) {
|
||||||
// Pass, do not convert
|
// Pass, do not convert
|
||||||
} else if (contains(name, "img_in.") || contains(name, "time_in.in_layer.") || contains(name, "vector_in.in_layer.") || contains(name, "guidance_in.in_layer.") || contains(name, "final_layer.linear.")) {
|
} else if (contains(name, "img_in.") ||
|
||||||
|
contains(name, "time_in.in_layer.") ||
|
||||||
|
contains(name, "vector_in.in_layer.") ||
|
||||||
|
contains(name, "guidance_in.in_layer.") ||
|
||||||
|
contains(name, "final_layer.linear.")) {
|
||||||
// Pass, do not convert. For FLUX
|
// Pass, do not convert. For FLUX
|
||||||
} else if (contains(name, "x_embedder.") || contains(name, "t_embedder.") || contains(name, "y_embedder.") || contains(name, "context_embedder.")) {
|
} else if (contains(name, "x_embedder.") ||
|
||||||
|
contains(name, "t_embedder.") ||
|
||||||
|
contains(name, "y_embedder.") ||
|
||||||
|
contains(name, "pos_embed") ||
|
||||||
|
contains(name, "context_embedder.")) {
|
||||||
// Pass, do not convert. For MMDiT
|
// Pass, do not convert. For MMDiT
|
||||||
} else if (contains(name, "time_embed.") || contains(name, "label_emb.")) {
|
} else if (contains(name, "time_embed.") || contains(name, "label_emb.")) {
|
||||||
// Pass, do not convert. For Unet
|
// Pass, do not convert. For Unet
|
||||||
|
Loading…
Reference in New Issue
Block a user