style: format code
This commit is contained in:
parent
d08d7fa632
commit
c837c5d9cc
@ -10,8 +10,8 @@ struct SDCondition {
|
||||
struct ggml_tensor* c_concat = NULL;
|
||||
|
||||
SDCondition() = default;
|
||||
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) :
|
||||
c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
|
||||
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
|
||||
: c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
|
||||
};
|
||||
|
||||
struct Conditioner {
|
||||
@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct FluxCLIPEmbedder : public Conditioner {
|
||||
ggml_type wtype;
|
||||
CLIPTokenizer clip_l_tokenizer;
|
||||
@ -987,8 +986,8 @@ struct FluxCLIPEmbedder : public Conditioner {
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
|
||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||
ggml_type wtype,
|
||||
int clip_skip = -1)
|
||||
ggml_type wtype,
|
||||
int clip_skip = -1)
|
||||
: wtype(wtype) {
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 2;
|
||||
@ -1085,10 +1084,10 @@ struct FluxCLIPEmbedder : public Conditioner {
|
||||
auto& t5_tokens = token_and_weights[1].first;
|
||||
auto& t5_weights = token_and_weights[1].second;
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL; // [768,]
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL; // [768,]
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
size_t chunk_len = 256;
|
||||
|
@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
float flux_time_shift(float mu, float sigma, float t) {
|
||||
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
|
||||
}
|
||||
@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
|
||||
void set_parameters(float shift = 1.15f) {
|
||||
this->shift = shift;
|
||||
for (int i = 1; i < TIMESTEPS + 1; i++) {
|
||||
sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS);
|
||||
sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
#ifndef __DIFFUSION_MODEL_H__
|
||||
#define __DIFFUSION_MODEL_H__
|
||||
|
||||
#include "flux.hpp"
|
||||
#include "mmdit.hpp"
|
||||
#include "unet.hpp"
|
||||
#include "flux.hpp"
|
||||
|
||||
struct DiffusionModel {
|
||||
virtual void compute(int n_threads,
|
||||
@ -124,13 +124,12 @@ struct MMDiTModel : public DiffusionModel {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct FluxModel : public DiffusionModel {
|
||||
Flux::FluxRunner flux;
|
||||
|
||||
FluxModel(ggml_backend_t backend,
|
||||
ggml_type wtype,
|
||||
SDVersion version = VERSION_FLUX_DEV)
|
||||
ggml_type wtype,
|
||||
SDVersion version = VERSION_FLUX_DEV)
|
||||
: flux(backend, wtype, version) {
|
||||
}
|
||||
|
||||
|
@ -541,7 +541,7 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx,
|
||||
struct ggml_tensor* a) {
|
||||
const float eps = 1e-6f; // default eps parameter
|
||||
const float eps = 1e-6f; // default eps parameter
|
||||
return ggml_group_norm(ctx, a, 32, eps);
|
||||
}
|
||||
|
||||
@ -683,27 +683,27 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
||||
int64_t n_head,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
bool diag_mask_inf = false,
|
||||
bool skip_reshape = false) {
|
||||
bool skip_reshape = false) {
|
||||
int64_t L_q;
|
||||
int64_t L_k;
|
||||
int64_t C ;
|
||||
int64_t N ;
|
||||
int64_t C;
|
||||
int64_t N;
|
||||
int64_t d_head;
|
||||
if (!skip_reshape) {
|
||||
L_q = q->ne[1];
|
||||
L_k = k->ne[1];
|
||||
C = q->ne[0];
|
||||
N = q->ne[2];
|
||||
L_q = q->ne[1];
|
||||
L_k = k->ne[1];
|
||||
C = q->ne[0];
|
||||
N = q->ne[2];
|
||||
d_head = C / n_head;
|
||||
q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N); // [N, L_q, n_head, d_head]
|
||||
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, L_q, d_head]
|
||||
q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N); // [N * n_head, L_q, d_head]
|
||||
q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N); // [N, L_q, n_head, d_head]
|
||||
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, L_q, d_head]
|
||||
q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N); // [N * n_head, L_q, d_head]
|
||||
|
||||
k = ggml_reshape_4d(ctx, k, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
|
||||
k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
|
||||
k = ggml_reshape_3d(ctx, k, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
|
||||
|
||||
v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
|
||||
v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N); // [N, L_k, n_head, d_head]
|
||||
} else {
|
||||
L_q = q->ne[1];
|
||||
L_k = k->ne[1];
|
||||
@ -712,10 +712,10 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
||||
C = d_head * n_head;
|
||||
}
|
||||
|
||||
float scale = (1.0f / sqrt((float)d_head));
|
||||
float scale = (1.0f / sqrt((float)d_head));
|
||||
|
||||
bool use_flash_attn = false;
|
||||
ggml_tensor* kqv = NULL;
|
||||
ggml_tensor* kqv = NULL;
|
||||
if (use_flash_attn) {
|
||||
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
|
||||
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
|
||||
@ -770,8 +770,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||
}
|
||||
|
||||
const float eps = 1e-6f; // default eps parameter
|
||||
x = ggml_group_norm(ctx, x, num_groups, eps);
|
||||
const float eps = 1e-6f; // default eps parameter
|
||||
x = ggml_group_norm(ctx, x, num_groups, eps);
|
||||
if (w != NULL && b != NULL) {
|
||||
x = ggml_mul(ctx, x, w);
|
||||
// b = ggml_repeat(ctx, b, x);
|
||||
@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
||||
#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL)
|
||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
|
||||
if (!ggml_backend_is_cpu(backend)) {
|
||||
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
||||
ggml_backend_synchronize(backend);
|
||||
@ -889,7 +889,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_timestep_embedding(
|
||||
struct ggml_context* ctx,
|
||||
struct ggml_tensor* timesteps,
|
||||
int dim,
|
||||
int max_period = 10000,
|
||||
int max_period = 10000,
|
||||
float time_factor = 1.0f) {
|
||||
timesteps = ggml_scale(ctx, timesteps, time_factor);
|
||||
return ggml_timestep_embedding(ctx, timesteps, dim, max_period);
|
||||
|
10
lora.hpp
10
lora.hpp
@ -10,10 +10,10 @@ struct LoraModel : public GGMLRunner {
|
||||
std::map<std::string, struct ggml_tensor*> lora_tensors;
|
||||
std::string file_path;
|
||||
ModelLoader model_loader;
|
||||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
std::vector<int> zero_index_vec = {0};
|
||||
ggml_tensor* zero_index = NULL;
|
||||
ggml_tensor* zero_index = NULL;
|
||||
|
||||
LoraModel(ggml_backend_t backend,
|
||||
ggml_type wtype,
|
||||
@ -72,8 +72,8 @@ struct LoraModel : public GGMLRunner {
|
||||
|
||||
ggml_tensor* to_f32(ggml_context* ctx, ggml_tensor* a) {
|
||||
auto out = ggml_reshape_1d(ctx, a, ggml_nelements(a));
|
||||
out = ggml_get_rows(ctx, out, zero_index);
|
||||
out = ggml_reshape(ctx, out, a);
|
||||
out = ggml_get_rows(ctx, out, zero_index);
|
||||
out = ggml_reshape(ctx, out, a);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
19
model.cpp
19
model.cpp
@ -567,10 +567,10 @@ uint16_t f8_e4m3_to_f16(uint8_t f8) {
|
||||
return ggml_fp32_to_fp16(NAN);
|
||||
}
|
||||
|
||||
uint32_t sign = f8 & 0x80;
|
||||
uint32_t sign = f8 & 0x80;
|
||||
uint32_t exponent = (f8 & 0x78) >> 3;
|
||||
uint32_t mantissa = f8 & 0x07;
|
||||
uint32_t result = sign << 24;
|
||||
uint32_t result = sign << 24;
|
||||
if (exponent == 0) {
|
||||
if (mantissa > 0) {
|
||||
exponent = 0x7f - exponent_bias;
|
||||
@ -1399,8 +1399,8 @@ ggml_type ModelLoader::get_sd_wtype() {
|
||||
|
||||
if (tensor_storage.name.find(".weight") != std::string::npos &&
|
||||
(tensor_storage.name.find("time_embed") != std::string::npos ||
|
||||
tensor_storage.name.find("context_embedder") != std::string::npos ||
|
||||
tensor_storage.name.find("time_in") != std::string::npos)) {
|
||||
tensor_storage.name.find("context_embedder") != std::string::npos ||
|
||||
tensor_storage.name.find("time_in") != std::string::npos)) {
|
||||
return tensor_storage.type;
|
||||
}
|
||||
}
|
||||
@ -1414,9 +1414,9 @@ ggml_type ModelLoader::get_conditioner_wtype() {
|
||||
}
|
||||
|
||||
if ((tensor_storage.name.find("text_encoders") == std::string::npos &&
|
||||
tensor_storage.name.find("cond_stage_model") == std::string::npos &&
|
||||
tensor_storage.name.find("te.text_model.") == std::string::npos &&
|
||||
tensor_storage.name.find("conditioner") == std::string::npos)) {
|
||||
tensor_storage.name.find("cond_stage_model") == std::string::npos &&
|
||||
tensor_storage.name.find("te.text_model.") == std::string::npos &&
|
||||
tensor_storage.name.find("conditioner") == std::string::npos)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
|
||||
return GGML_TYPE_COUNT;
|
||||
}
|
||||
|
||||
|
||||
ggml_type ModelLoader::get_diffusion_model_wtype() {
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
if (is_unused_tensor(tensor_storage.name)) {
|
||||
@ -1440,8 +1439,8 @@ ggml_type ModelLoader::get_diffusion_model_wtype() {
|
||||
|
||||
if (tensor_storage.name.find(".weight") != std::string::npos &&
|
||||
(tensor_storage.name.find("time_embed") != std::string::npos ||
|
||||
tensor_storage.name.find("context_embedder") != std::string::npos ||
|
||||
tensor_storage.name.find("time_in") != std::string::npos)) {
|
||||
tensor_storage.name.find("context_embedder") != std::string::npos ||
|
||||
tensor_storage.name.find("time_in") != std::string::npos)) {
|
||||
return tensor_storage.type;
|
||||
}
|
||||
}
|
||||
|
@ -69,11 +69,10 @@ public:
|
||||
ggml_backend_t clip_backend = NULL;
|
||||
ggml_backend_t control_net_backend = NULL;
|
||||
ggml_backend_t vae_backend = NULL;
|
||||
ggml_type model_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type conditioner_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type vae_wtype = GGML_TYPE_COUNT;
|
||||
|
||||
ggml_type model_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type conditioner_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type diffusion_model_wtype = GGML_TYPE_COUNT;
|
||||
ggml_type vae_wtype = GGML_TYPE_COUNT;
|
||||
|
||||
SDVersion version;
|
||||
bool vae_decode_only = false;
|
||||
@ -171,7 +170,7 @@ public:
|
||||
backend = ggml_backend_cpu_init();
|
||||
}
|
||||
#ifdef SD_USE_FLASH_ATTENTION
|
||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL)
|
||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
|
||||
LOG_WARN("Flash Attention not supported with GPU Backend");
|
||||
#else
|
||||
LOG_INFO("Flash Attention enabled");
|
||||
@ -243,10 +242,10 @@ public:
|
||||
vae_wtype = wtype;
|
||||
}
|
||||
} else {
|
||||
model_wtype = wtype;
|
||||
conditioner_wtype = wtype;
|
||||
model_wtype = wtype;
|
||||
conditioner_wtype = wtype;
|
||||
diffusion_model_wtype = wtype;
|
||||
vae_wtype = wtype;
|
||||
vae_wtype = wtype;
|
||||
}
|
||||
|
||||
if (version == VERSION_SDXL) {
|
||||
@ -290,7 +289,7 @@ public:
|
||||
first_stage_model->alloc_params_buffer();
|
||||
first_stage_model->get_param_tensors(tensors, "first_stage_model");
|
||||
} else {
|
||||
clip_backend = backend;
|
||||
clip_backend = backend;
|
||||
bool use_t5xxl = false;
|
||||
if (version == VERSION_SD3_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
|
||||
use_t5xxl = true;
|
||||
@ -508,7 +507,7 @@ public:
|
||||
LOG_INFO("running in Flux FLOW mode");
|
||||
float shift = 1.15f;
|
||||
if (version == VERSION_FLUX_SCHNELL) {
|
||||
shift = 1.0f; // TODO: validate
|
||||
shift = 1.0f; // TODO: validate
|
||||
}
|
||||
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
|
||||
} else if (is_using_v_parameterization) {
|
||||
|
@ -55,37 +55,37 @@ enum schedule_t {
|
||||
|
||||
// same as enum ggml_type
|
||||
enum sd_type_t {
|
||||
SD_TYPE_F32 = 0,
|
||||
SD_TYPE_F16 = 1,
|
||||
SD_TYPE_Q4_0 = 2,
|
||||
SD_TYPE_Q4_1 = 3,
|
||||
SD_TYPE_F32 = 0,
|
||||
SD_TYPE_F16 = 1,
|
||||
SD_TYPE_Q4_0 = 2,
|
||||
SD_TYPE_Q4_1 = 3,
|
||||
// SD_TYPE_Q4_2 = 4, support has been removed
|
||||
// SD_TYPE_Q4_3 = 5, support has been removed
|
||||
SD_TYPE_Q5_0 = 6,
|
||||
SD_TYPE_Q5_1 = 7,
|
||||
SD_TYPE_Q8_0 = 8,
|
||||
SD_TYPE_Q8_1 = 9,
|
||||
SD_TYPE_Q2_K = 10,
|
||||
SD_TYPE_Q3_K = 11,
|
||||
SD_TYPE_Q4_K = 12,
|
||||
SD_TYPE_Q5_K = 13,
|
||||
SD_TYPE_Q6_K = 14,
|
||||
SD_TYPE_Q8_K = 15,
|
||||
SD_TYPE_IQ2_XXS = 16,
|
||||
SD_TYPE_IQ2_XS = 17,
|
||||
SD_TYPE_IQ3_XXS = 18,
|
||||
SD_TYPE_IQ1_S = 19,
|
||||
SD_TYPE_IQ4_NL = 20,
|
||||
SD_TYPE_IQ3_S = 21,
|
||||
SD_TYPE_IQ2_S = 22,
|
||||
SD_TYPE_IQ4_XS = 23,
|
||||
SD_TYPE_I8 = 24,
|
||||
SD_TYPE_I16 = 25,
|
||||
SD_TYPE_I32 = 26,
|
||||
SD_TYPE_I64 = 27,
|
||||
SD_TYPE_F64 = 28,
|
||||
SD_TYPE_IQ1_M = 29,
|
||||
SD_TYPE_BF16 = 30,
|
||||
SD_TYPE_Q5_0 = 6,
|
||||
SD_TYPE_Q5_1 = 7,
|
||||
SD_TYPE_Q8_0 = 8,
|
||||
SD_TYPE_Q8_1 = 9,
|
||||
SD_TYPE_Q2_K = 10,
|
||||
SD_TYPE_Q3_K = 11,
|
||||
SD_TYPE_Q4_K = 12,
|
||||
SD_TYPE_Q5_K = 13,
|
||||
SD_TYPE_Q6_K = 14,
|
||||
SD_TYPE_Q8_K = 15,
|
||||
SD_TYPE_IQ2_XXS = 16,
|
||||
SD_TYPE_IQ2_XS = 17,
|
||||
SD_TYPE_IQ3_XXS = 18,
|
||||
SD_TYPE_IQ1_S = 19,
|
||||
SD_TYPE_IQ4_NL = 20,
|
||||
SD_TYPE_IQ3_S = 21,
|
||||
SD_TYPE_IQ2_S = 22,
|
||||
SD_TYPE_IQ4_XS = 23,
|
||||
SD_TYPE_I8 = 24,
|
||||
SD_TYPE_I16 = 25,
|
||||
SD_TYPE_I32 = 26,
|
||||
SD_TYPE_I64 = 27,
|
||||
SD_TYPE_F64 = 28,
|
||||
SD_TYPE_IQ1_M = 29,
|
||||
SD_TYPE_BF16 = 30,
|
||||
SD_TYPE_Q4_0_4_4 = 31,
|
||||
SD_TYPE_Q4_0_4_8 = 32,
|
||||
SD_TYPE_Q4_0_8_8 = 33,
|
||||
|
Loading…
Reference in New Issue
Block a user