style: format code

This commit is contained in:
leejet 2024-08-25 00:19:37 +08:00
parent d08d7fa632
commit c837c5d9cc
10 changed files with 937 additions and 947 deletions

View File

@ -10,8 +10,8 @@ struct SDCondition {
struct ggml_tensor* c_concat = NULL;
SDCondition() = default;
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) :
c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
: c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
};
struct Conditioner {
@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
}
};
struct FluxCLIPEmbedder : public Conditioner {
ggml_type wtype;
CLIPTokenizer clip_l_tokenizer;
@ -987,8 +986,8 @@ struct FluxCLIPEmbedder : public Conditioner {
std::shared_ptr<T5Runner> t5;
FluxCLIPEmbedder(ggml_backend_t backend,
ggml_type wtype,
int clip_skip = -1)
ggml_type wtype,
int clip_skip = -1)
: wtype(wtype) {
if (clip_skip <= 0) {
clip_skip = 2;
@ -1085,10 +1084,10 @@ struct FluxCLIPEmbedder : public Conditioner {
auto& t5_tokens = token_and_weights[1].first;
auto& t5_weights = token_and_weights[1].second;
int64_t t0 = ggml_time_ms();
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
struct ggml_tensor* pooled = NULL; // [768,]
int64_t t0 = ggml_time_ms();
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
struct ggml_tensor* pooled = NULL; // [768,]
std::vector<float> hidden_states_vec;
size_t chunk_len = 256;

View File

@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
}
};
float flux_time_shift(float mu, float sigma, float t) {
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
}
@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
void set_parameters(float shift = 1.15f) {
this->shift = shift;
for (int i = 1; i < TIMESTEPS + 1; i++) {
sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS);
sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
}
}

View File

@ -1,9 +1,9 @@
#ifndef __DIFFUSION_MODEL_H__
#define __DIFFUSION_MODEL_H__
#include "flux.hpp"
#include "mmdit.hpp"
#include "unet.hpp"
#include "flux.hpp"
struct DiffusionModel {
virtual void compute(int n_threads,
@ -124,13 +124,12 @@ struct MMDiTModel : public DiffusionModel {
}
};
struct FluxModel : public DiffusionModel {
Flux::FluxRunner flux;
FluxModel(ggml_backend_t backend,
ggml_type wtype,
SDVersion version = VERSION_FLUX_DEV)
ggml_type wtype,
SDVersion version = VERSION_FLUX_DEV)
: flux(backend, wtype, version) {
}

1712
flux.hpp

File diff suppressed because it is too large Load Diff

View File

@ -541,7 +541,7 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
__STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx,
struct ggml_tensor* a) {
const float eps = 1e-6f; // default eps parameter
const float eps = 1e-6f; // default eps parameter
return ggml_group_norm(ctx, a, 32, eps);
}
@ -683,27 +683,27 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
int64_t n_head,
struct ggml_tensor* mask = NULL,
bool diag_mask_inf = false,
bool skip_reshape = false) {
bool skip_reshape = false) {
int64_t L_q;
int64_t L_k;
int64_t C ;
int64_t N ;
int64_t C;
int64_t N;
int64_t d_head;
if (!skip_reshape) {
L_q = q->ne[1];
L_k = k->ne[1];
C = q->ne[0];
N = q->ne[2];
L_q = q->ne[1];
L_k = k->ne[1];
C = q->ne[0];
N = q->ne[2];
d_head = C / n_head;
q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N); // [N, L_q, n_head, d_head]
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, L_q, d_head]
q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N); // [N * n_head, L_q, d_head]
q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N); // [N, L_q, n_head, d_head]
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, L_q, d_head]
q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N); // [N * n_head, L_q, d_head]
k = ggml_reshape_4d(ctx, k, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
k = ggml_reshape_3d(ctx, k, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
} else {
L_q = q->ne[1];
L_k = k->ne[1];
@ -712,10 +712,10 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
C = d_head * n_head;
}
float scale = (1.0f / sqrt((float)d_head));
float scale = (1.0f / sqrt((float)d_head));
bool use_flash_attn = false;
ggml_tensor* kqv = NULL;
ggml_tensor* kqv = NULL;
if (use_flash_attn) {
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
@ -770,8 +770,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
}
const float eps = 1e-6f; // default eps parameter
x = ggml_group_norm(ctx, x, num_groups, eps);
const float eps = 1e-6f; // default eps parameter
x = ggml_group_norm(ctx, x, num_groups, eps);
if (w != NULL && b != NULL) {
x = ggml_mul(ctx, x, w);
// b = ggml_repeat(ctx, b, x);
@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
}
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL)
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
if (!ggml_backend_is_cpu(backend)) {
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
ggml_backend_synchronize(backend);
@ -889,7 +889,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_timestep_embedding(
struct ggml_context* ctx,
struct ggml_tensor* timesteps,
int dim,
int max_period = 10000,
int max_period = 10000,
float time_factor = 1.0f) {
timesteps = ggml_scale(ctx, timesteps, time_factor);
return ggml_timestep_embedding(ctx, timesteps, dim, max_period);

View File

@ -10,10 +10,10 @@ struct LoraModel : public GGMLRunner {
std::map<std::string, struct ggml_tensor*> lora_tensors;
std::string file_path;
ModelLoader model_loader;
bool load_failed = false;
bool applied = false;
bool load_failed = false;
bool applied = false;
std::vector<int> zero_index_vec = {0};
ggml_tensor* zero_index = NULL;
ggml_tensor* zero_index = NULL;
LoraModel(ggml_backend_t backend,
ggml_type wtype,
@ -72,8 +72,8 @@ struct LoraModel : public GGMLRunner {
ggml_tensor* to_f32(ggml_context* ctx, ggml_tensor* a) {
auto out = ggml_reshape_1d(ctx, a, ggml_nelements(a));
out = ggml_get_rows(ctx, out, zero_index);
out = ggml_reshape(ctx, out, a);
out = ggml_get_rows(ctx, out, zero_index);
out = ggml_reshape(ctx, out, a);
return out;
}

View File

@ -567,10 +567,10 @@ uint16_t f8_e4m3_to_f16(uint8_t f8) {
return ggml_fp32_to_fp16(NAN);
}
uint32_t sign = f8 & 0x80;
uint32_t sign = f8 & 0x80;
uint32_t exponent = (f8 & 0x78) >> 3;
uint32_t mantissa = f8 & 0x07;
uint32_t result = sign << 24;
uint32_t result = sign << 24;
if (exponent == 0) {
if (mantissa > 0) {
exponent = 0x7f - exponent_bias;
@ -1399,8 +1399,8 @@ ggml_type ModelLoader::get_sd_wtype() {
if (tensor_storage.name.find(".weight") != std::string::npos &&
(tensor_storage.name.find("time_embed") != std::string::npos ||
tensor_storage.name.find("context_embedder") != std::string::npos ||
tensor_storage.name.find("time_in") != std::string::npos)) {
tensor_storage.name.find("context_embedder") != std::string::npos ||
tensor_storage.name.find("time_in") != std::string::npos)) {
return tensor_storage.type;
}
}
@ -1414,9 +1414,9 @@ ggml_type ModelLoader::get_conditioner_wtype() {
}
if ((tensor_storage.name.find("text_encoders") == std::string::npos &&
tensor_storage.name.find("cond_stage_model") == std::string::npos &&
tensor_storage.name.find("te.text_model.") == std::string::npos &&
tensor_storage.name.find("conditioner") == std::string::npos)) {
tensor_storage.name.find("cond_stage_model") == std::string::npos &&
tensor_storage.name.find("te.text_model.") == std::string::npos &&
tensor_storage.name.find("conditioner") == std::string::npos)) {
continue;
}
@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
return GGML_TYPE_COUNT;
}
ggml_type ModelLoader::get_diffusion_model_wtype() {
for (auto& tensor_storage : tensor_storages) {
if (is_unused_tensor(tensor_storage.name)) {
@ -1440,8 +1439,8 @@ ggml_type ModelLoader::get_diffusion_model_wtype() {
if (tensor_storage.name.find(".weight") != std::string::npos &&
(tensor_storage.name.find("time_embed") != std::string::npos ||
tensor_storage.name.find("context_embedder") != std::string::npos ||
tensor_storage.name.find("time_in") != std::string::npos)) {
tensor_storage.name.find("context_embedder") != std::string::npos ||
tensor_storage.name.find("time_in") != std::string::npos)) {
return tensor_storage.type;
}
}

View File

@ -165,4 +165,3 @@ public:
};
#endif // __MODEL_H__

View File

@ -69,11 +69,10 @@ public:
ggml_backend_t clip_backend = NULL;
ggml_backend_t control_net_backend = NULL;
ggml_backend_t vae_backend = NULL;
ggml_type model_wtype = GGML_TYPE_COUNT;
ggml_type conditioner_wtype = GGML_TYPE_COUNT;
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
ggml_type vae_wtype = GGML_TYPE_COUNT;
ggml_type model_wtype = GGML_TYPE_COUNT;
ggml_type conditioner_wtype = GGML_TYPE_COUNT;
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
ggml_type vae_wtype = GGML_TYPE_COUNT;
SDVersion version;
bool vae_decode_only = false;
@ -171,7 +170,7 @@ public:
backend = ggml_backend_cpu_init();
}
#ifdef SD_USE_FLASH_ATTENTION
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL)
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
LOG_WARN("Flash Attention not supported with GPU Backend");
#else
LOG_INFO("Flash Attention enabled");
@ -243,10 +242,10 @@ public:
vae_wtype = wtype;
}
} else {
model_wtype = wtype;
conditioner_wtype = wtype;
model_wtype = wtype;
conditioner_wtype = wtype;
diffusion_model_wtype = wtype;
vae_wtype = wtype;
vae_wtype = wtype;
}
if (version == VERSION_SDXL) {
@ -290,7 +289,7 @@ public:
first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "first_stage_model");
} else {
clip_backend = backend;
clip_backend = backend;
bool use_t5xxl = false;
if (version == VERSION_SD3_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
use_t5xxl = true;
@ -508,7 +507,7 @@ public:
LOG_INFO("running in Flux FLOW mode");
float shift = 1.15f;
if (version == VERSION_FLUX_SCHNELL) {
shift = 1.0f; // TODO: validate
shift = 1.0f; // TODO: validate
}
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
} else if (is_using_v_parameterization) {

View File

@ -55,37 +55,37 @@ enum schedule_t {
// same as enum ggml_type
enum sd_type_t {
SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3,
SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3,
// SD_TYPE_Q4_2 = 4, support has been removed
// SD_TYPE_Q4_3 = 5, support has been removed
SD_TYPE_Q5_0 = 6,
SD_TYPE_Q5_1 = 7,
SD_TYPE_Q8_0 = 8,
SD_TYPE_Q8_1 = 9,
SD_TYPE_Q2_K = 10,
SD_TYPE_Q3_K = 11,
SD_TYPE_Q4_K = 12,
SD_TYPE_Q5_K = 13,
SD_TYPE_Q6_K = 14,
SD_TYPE_Q8_K = 15,
SD_TYPE_IQ2_XXS = 16,
SD_TYPE_IQ2_XS = 17,
SD_TYPE_IQ3_XXS = 18,
SD_TYPE_IQ1_S = 19,
SD_TYPE_IQ4_NL = 20,
SD_TYPE_IQ3_S = 21,
SD_TYPE_IQ2_S = 22,
SD_TYPE_IQ4_XS = 23,
SD_TYPE_I8 = 24,
SD_TYPE_I16 = 25,
SD_TYPE_I32 = 26,
SD_TYPE_I64 = 27,
SD_TYPE_F64 = 28,
SD_TYPE_IQ1_M = 29,
SD_TYPE_BF16 = 30,
SD_TYPE_Q5_0 = 6,
SD_TYPE_Q5_1 = 7,
SD_TYPE_Q8_0 = 8,
SD_TYPE_Q8_1 = 9,
SD_TYPE_Q2_K = 10,
SD_TYPE_Q3_K = 11,
SD_TYPE_Q4_K = 12,
SD_TYPE_Q5_K = 13,
SD_TYPE_Q6_K = 14,
SD_TYPE_Q8_K = 15,
SD_TYPE_IQ2_XXS = 16,
SD_TYPE_IQ2_XS = 17,
SD_TYPE_IQ3_XXS = 18,
SD_TYPE_IQ1_S = 19,
SD_TYPE_IQ4_NL = 20,
SD_TYPE_IQ3_S = 21,
SD_TYPE_IQ2_S = 22,
SD_TYPE_IQ4_XS = 23,
SD_TYPE_I8 = 24,
SD_TYPE_I16 = 25,
SD_TYPE_I32 = 26,
SD_TYPE_I64 = 27,
SD_TYPE_F64 = 28,
SD_TYPE_IQ1_M = 29,
SD_TYPE_BF16 = 30,
SD_TYPE_Q4_0_4_4 = 31,
SD_TYPE_Q4_0_4_8 = 32,
SD_TYPE_Q4_0_8_8 = 33,