style: format code

2024-08-25 00:19:37 +08:00
parent d08d7fa632
commit c837c5d9cc
10 changed files with 937 additions and 947 deletions
--- a/conditioner.hpp
+++ b/conditioner.hpp
@@ -10,8 +10,8 @@ struct SDCondition {
    struct ggml_tensor* c_concat    = NULL;

    SDCondition() = default;
-    SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) :
-    c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
+    SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
+        : c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
 };

 struct Conditioner {
@@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
    }
 };

-
 struct FluxCLIPEmbedder : public Conditioner {
    ggml_type wtype;
    CLIPTokenizer clip_l_tokenizer;
@@ -987,8 +986,8 @@ struct FluxCLIPEmbedder : public Conditioner {
    std::shared_ptr<T5Runner> t5;

    FluxCLIPEmbedder(ggml_backend_t backend,
-                    ggml_type wtype,
-                    int clip_skip = -1)
+                     ggml_type wtype,
+                     int clip_skip = -1)
        : wtype(wtype) {
        if (clip_skip <= 0) {
            clip_skip = 2;
@@ -1085,10 +1084,10 @@ struct FluxCLIPEmbedder : public Conditioner {
        auto& t5_tokens      = token_and_weights[1].first;
        auto& t5_weights     = token_and_weights[1].second;

-        int64_t t0                                 = ggml_time_ms();
-        struct ggml_tensor* hidden_states          = NULL;  // [N, n_token, 4096]
-        struct ggml_tensor* chunk_hidden_states    = NULL;  // [n_token, 4096]
-        struct ggml_tensor* pooled                 = NULL;  // [768,]
+        int64_t t0                              = ggml_time_ms();
+        struct ggml_tensor* hidden_states       = NULL;  // [N, n_token, 4096]
+        struct ggml_tensor* chunk_hidden_states = NULL;  // [n_token, 4096]
+        struct ggml_tensor* pooled              = NULL;  // [768,]
        std::vector<float> hidden_states_vec;

        size_t chunk_len   = 256;
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
    }
 };

-
 float flux_time_shift(float mu, float sigma, float t) {
    return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
 }
@@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
    void set_parameters(float shift = 1.15f) {
        this->shift = shift;
        for (int i = 1; i < TIMESTEPS + 1; i++) {
-            sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS);
+            sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
        }
    }

--- a/diffusion_model.hpp
+++ b/diffusion_model.hpp
@@ -1,9 +1,9 @@
 #ifndef __DIFFUSION_MODEL_H__
 #define __DIFFUSION_MODEL_H__

+#include "flux.hpp"
 #include "mmdit.hpp"
 #include "unet.hpp"
-#include "flux.hpp"

 struct DiffusionModel {
    virtual void compute(int n_threads,
@@ -124,13 +124,12 @@ struct MMDiTModel : public DiffusionModel {
    }
 };

-
 struct FluxModel : public DiffusionModel {
    Flux::FluxRunner flux;

    FluxModel(ggml_backend_t backend,
-               ggml_type wtype,
-               SDVersion version = VERSION_FLUX_DEV)
+              ggml_type wtype,
+              SDVersion version = VERSION_FLUX_DEV)
        : flux(backend, wtype, version) {
    }

--- a/flux.hpp
+++ b/flux.hpp
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -541,7 +541,7 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const

 __STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx,
                                                         struct ggml_tensor* a) {
-    const float eps = 1e-6f; // default eps parameter
+    const float eps = 1e-6f;  // default eps parameter
    return ggml_group_norm(ctx, a, 32, eps);
 }

@@ -683,27 +683,27 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
                                                            int64_t n_head,
                                                            struct ggml_tensor* mask = NULL,
                                                            bool diag_mask_inf       = false,
-                                                            bool skip_reshape = false) {
+                                                            bool skip_reshape        = false) {
    int64_t L_q;
    int64_t L_k;
-    int64_t C  ;
-    int64_t N  ;
+    int64_t C;
+    int64_t N;
    int64_t d_head;
    if (!skip_reshape) {
-        L_q = q->ne[1];
-        L_k = k->ne[1];
-        C   = q->ne[0];
-        N   = q->ne[2];
+        L_q    = q->ne[1];
+        L_k    = k->ne[1];
+        C      = q->ne[0];
+        N      = q->ne[2];
        d_head = C / n_head;
-        q = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N);   // [N, L_q, n_head, d_head]
-        q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3));  // [N, n_head, L_q, d_head]
-        q = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N);  // [N * n_head, L_q, d_head]
+        q      = ggml_reshape_4d(ctx, q, d_head, n_head, L_q, N);   // [N, L_q, n_head, d_head]
+        q      = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3));  // [N, n_head, L_q, d_head]
+        q      = ggml_reshape_3d(ctx, q, d_head, L_q, n_head * N);  // [N * n_head, L_q, d_head]

        k = ggml_reshape_4d(ctx, k, d_head, n_head, L_k, N);   // [N, L_k, n_head, d_head]
        k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3));  // [N, n_head, L_k, d_head]
        k = ggml_reshape_3d(ctx, k, d_head, L_k, n_head * N);  // [N * n_head, L_k, d_head]

-        v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N);   // [N, L_k, n_head, d_head]
+        v = ggml_reshape_4d(ctx, v, d_head, n_head, L_k, N);  // [N, L_k, n_head, d_head]
    } else {
        L_q    = q->ne[1];
        L_k    = k->ne[1];
@@ -712,10 +712,10 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
        C      = d_head * n_head;
    }

-    float scale    = (1.0f / sqrt((float)d_head));
+    float scale = (1.0f / sqrt((float)d_head));

    bool use_flash_attn = false;
-    ggml_tensor* kqv = NULL;
+    ggml_tensor* kqv    = NULL;
    if (use_flash_attn) {
        v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3));  // [N, n_head, L_k, d_head]
        v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N);  // [N * n_head, L_k, d_head]
@@ -770,8 +770,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
        b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
    }

-    const float eps = 1e-6f; // default eps parameter
-    x = ggml_group_norm(ctx, x, num_groups, eps);
+    const float eps = 1e-6f;  // default eps parameter
+    x               = ggml_group_norm(ctx, x, num_groups, eps);
    if (w != NULL && b != NULL) {
        x = ggml_mul(ctx, x, w);
        // b = ggml_repeat(ctx, b, x);
@@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
 }

 __STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
-#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL)
+#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
    if (!ggml_backend_is_cpu(backend)) {
        ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
        ggml_backend_synchronize(backend);
@@ -889,7 +889,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_timestep_embedding(
    struct ggml_context* ctx,
    struct ggml_tensor* timesteps,
    int dim,
-    int max_period = 10000,
+    int max_period    = 10000,
    float time_factor = 1.0f) {
    timesteps = ggml_scale(ctx, timesteps, time_factor);
    return ggml_timestep_embedding(ctx, timesteps, dim, max_period);
--- a/lora.hpp
+++ b/lora.hpp
@@ -10,10 +10,10 @@ struct LoraModel : public GGMLRunner {
    std::map<std::string, struct ggml_tensor*> lora_tensors;
    std::string file_path;
    ModelLoader model_loader;
-    bool load_failed = false;
-    bool applied     = false;
+    bool load_failed                = false;
+    bool applied                    = false;
    std::vector<int> zero_index_vec = {0};
-    ggml_tensor* zero_index = NULL;
+    ggml_tensor* zero_index         = NULL;

    LoraModel(ggml_backend_t backend,
              ggml_type wtype,
@@ -72,8 +72,8 @@ struct LoraModel : public GGMLRunner {

    ggml_tensor* to_f32(ggml_context* ctx, ggml_tensor* a) {
        auto out = ggml_reshape_1d(ctx, a, ggml_nelements(a));
-        out = ggml_get_rows(ctx, out, zero_index);
-        out = ggml_reshape(ctx, out, a);
+        out      = ggml_get_rows(ctx, out, zero_index);
+        out      = ggml_reshape(ctx, out, a);
        return out;
    }

--- a/model.cpp
+++ b/model.cpp
@@ -567,10 +567,10 @@ uint16_t f8_e4m3_to_f16(uint8_t f8) {
        return ggml_fp32_to_fp16(NAN);
    }

-    uint32_t sign = f8 & 0x80;
+    uint32_t sign     = f8 & 0x80;
    uint32_t exponent = (f8 & 0x78) >> 3;
    uint32_t mantissa = f8 & 0x07;
-    uint32_t result = sign << 24;
+    uint32_t result   = sign << 24;
    if (exponent == 0) {
        if (mantissa > 0) {
            exponent = 0x7f - exponent_bias;
@@ -1399,8 +1399,8 @@ ggml_type ModelLoader::get_sd_wtype() {

        if (tensor_storage.name.find(".weight") != std::string::npos &&
            (tensor_storage.name.find("time_embed") != std::string::npos ||
-            tensor_storage.name.find("context_embedder") != std::string::npos ||
-            tensor_storage.name.find("time_in") != std::string::npos)) {
+             tensor_storage.name.find("context_embedder") != std::string::npos ||
+             tensor_storage.name.find("time_in") != std::string::npos)) {
            return tensor_storage.type;
        }
    }
@@ -1414,9 +1414,9 @@ ggml_type ModelLoader::get_conditioner_wtype() {
        }

        if ((tensor_storage.name.find("text_encoders") == std::string::npos &&
-            tensor_storage.name.find("cond_stage_model") == std::string::npos &&
-            tensor_storage.name.find("te.text_model.") == std::string::npos &&
-            tensor_storage.name.find("conditioner") == std::string::npos)) {
+             tensor_storage.name.find("cond_stage_model") == std::string::npos &&
+             tensor_storage.name.find("te.text_model.") == std::string::npos &&
+             tensor_storage.name.find("conditioner") == std::string::npos)) {
            continue;
        }

@@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
    return GGML_TYPE_COUNT;
 }

-
 ggml_type ModelLoader::get_diffusion_model_wtype() {
    for (auto& tensor_storage : tensor_storages) {
        if (is_unused_tensor(tensor_storage.name)) {
@@ -1440,8 +1439,8 @@ ggml_type ModelLoader::get_diffusion_model_wtype() {

        if (tensor_storage.name.find(".weight") != std::string::npos &&
            (tensor_storage.name.find("time_embed") != std::string::npos ||
-            tensor_storage.name.find("context_embedder") != std::string::npos ||
-            tensor_storage.name.find("time_in") != std::string::npos)) {
+             tensor_storage.name.find("context_embedder") != std::string::npos ||
+             tensor_storage.name.find("time_in") != std::string::npos)) {
            return tensor_storage.type;
        }
    }
--- a/model.h
+++ b/model.h
@@ -165,4 +165,3 @@ public:
 };

 #endif  // __MODEL_H__
-
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -69,11 +69,10 @@ public:
    ggml_backend_t clip_backend        = NULL;
    ggml_backend_t control_net_backend = NULL;
    ggml_backend_t vae_backend         = NULL;
-    ggml_type model_wtype          = GGML_TYPE_COUNT;
-    ggml_type conditioner_wtype          = GGML_TYPE_COUNT;
-    ggml_type diffusion_model_wtype          = GGML_TYPE_COUNT;
-    ggml_type vae_wtype          = GGML_TYPE_COUNT;
-
+    ggml_type model_wtype              = GGML_TYPE_COUNT;
+    ggml_type conditioner_wtype        = GGML_TYPE_COUNT;
+    ggml_type diffusion_model_wtype    = GGML_TYPE_COUNT;
+    ggml_type vae_wtype                = GGML_TYPE_COUNT;

    SDVersion version;
    bool vae_decode_only         = false;
@@ -171,7 +170,7 @@ public:
            backend = ggml_backend_cpu_init();
        }
 #ifdef SD_USE_FLASH_ATTENTION
-#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL)
+#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
        LOG_WARN("Flash Attention not supported with GPU Backend");
 #else
        LOG_INFO("Flash Attention enabled");
@@ -243,10 +242,10 @@ public:
                vae_wtype = wtype;
            }
        } else {
-            model_wtype = wtype;
-            conditioner_wtype = wtype;
+            model_wtype           = wtype;
+            conditioner_wtype     = wtype;
            diffusion_model_wtype = wtype;
-            vae_wtype = wtype;
+            vae_wtype             = wtype;
        }

        if (version == VERSION_SDXL) {
@@ -290,7 +289,7 @@ public:
            first_stage_model->alloc_params_buffer();
            first_stage_model->get_param_tensors(tensors, "first_stage_model");
        } else {
-            clip_backend = backend;
+            clip_backend   = backend;
            bool use_t5xxl = false;
            if (version == VERSION_SD3_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
                use_t5xxl = true;
@@ -508,7 +507,7 @@ public:
            LOG_INFO("running in Flux FLOW mode");
            float shift = 1.15f;
            if (version == VERSION_FLUX_SCHNELL) {
-                shift = 1.0f; // TODO: validate
+                shift = 1.0f;  // TODO: validate
            }
            denoiser = std::make_shared<FluxFlowDenoiser>(shift);
        } else if (is_using_v_parameterization) {
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -55,37 +55,37 @@ enum schedule_t {

 // same as enum ggml_type
 enum sd_type_t {
-    SD_TYPE_F32     = 0,
-    SD_TYPE_F16     = 1,
-    SD_TYPE_Q4_0    = 2,
-    SD_TYPE_Q4_1    = 3,
+    SD_TYPE_F32  = 0,
+    SD_TYPE_F16  = 1,
+    SD_TYPE_Q4_0 = 2,
+    SD_TYPE_Q4_1 = 3,
    // SD_TYPE_Q4_2 = 4, support has been removed
    // SD_TYPE_Q4_3 = 5, support has been removed
-    SD_TYPE_Q5_0    = 6,
-    SD_TYPE_Q5_1    = 7,
-    SD_TYPE_Q8_0    = 8,
-    SD_TYPE_Q8_1    = 9,
-    SD_TYPE_Q2_K    = 10,
-    SD_TYPE_Q3_K    = 11,
-    SD_TYPE_Q4_K    = 12,
-    SD_TYPE_Q5_K    = 13,
-    SD_TYPE_Q6_K    = 14,
-    SD_TYPE_Q8_K    = 15,
-    SD_TYPE_IQ2_XXS = 16,
-    SD_TYPE_IQ2_XS  = 17,
-    SD_TYPE_IQ3_XXS = 18,
-    SD_TYPE_IQ1_S   = 19,
-    SD_TYPE_IQ4_NL  = 20,
-    SD_TYPE_IQ3_S   = 21,
-    SD_TYPE_IQ2_S   = 22,
-    SD_TYPE_IQ4_XS  = 23,
-    SD_TYPE_I8      = 24,
-    SD_TYPE_I16     = 25,
-    SD_TYPE_I32     = 26,
-    SD_TYPE_I64     = 27,
-    SD_TYPE_F64     = 28,
-    SD_TYPE_IQ1_M   = 29,
-    SD_TYPE_BF16    = 30,
+    SD_TYPE_Q5_0     = 6,
+    SD_TYPE_Q5_1     = 7,
+    SD_TYPE_Q8_0     = 8,
+    SD_TYPE_Q8_1     = 9,
+    SD_TYPE_Q2_K     = 10,
+    SD_TYPE_Q3_K     = 11,
+    SD_TYPE_Q4_K     = 12,
+    SD_TYPE_Q5_K     = 13,
+    SD_TYPE_Q6_K     = 14,
+    SD_TYPE_Q8_K     = 15,
+    SD_TYPE_IQ2_XXS  = 16,
+    SD_TYPE_IQ2_XS   = 17,
+    SD_TYPE_IQ3_XXS  = 18,
+    SD_TYPE_IQ1_S    = 19,
+    SD_TYPE_IQ4_NL   = 20,
+    SD_TYPE_IQ3_S    = 21,
+    SD_TYPE_IQ2_S    = 22,
+    SD_TYPE_IQ4_XS   = 23,
+    SD_TYPE_I8       = 24,
+    SD_TYPE_I16      = 25,
+    SD_TYPE_I32      = 26,
+    SD_TYPE_I64      = 27,
+    SD_TYPE_F64      = 28,
+    SD_TYPE_IQ1_M    = 29,
+    SD_TYPE_BF16     = 30,
    SD_TYPE_Q4_0_4_4 = 31,
    SD_TYPE_Q4_0_4_8 = 32,
    SD_TYPE_Q4_0_8_8 = 33,