style: format code

2024-08-25 00:19:37 +08:00
parent d08d7fa632
commit c837c5d9cc
10 changed files with 937 additions and 947 deletions
--- a/conditioner.hpp
+++ b/conditioner.hpp
@@ -10,8 +10,8 @@ struct SDCondition {
    struct ggml_tensor* c_concat    = NULL;

    SDCondition() = default;
-    SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat) :
-    c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
+    SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
+        : c_crossattn(c_crossattn), c_vector(c_vector), c_concat(c_concat) {}
 };

 struct Conditioner {
@@ -978,7 +978,6 @@ struct SD3CLIPEmbedder : public Conditioner {
    }
 };

-
 struct FluxCLIPEmbedder : public Conditioner {
    ggml_type wtype;
    CLIPTokenizer clip_l_tokenizer;
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -351,7 +351,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
    }
 };

-
 float flux_time_shift(float mu, float sigma, float t) {
    return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
 }
@@ -369,7 +368,7 @@ struct FluxFlowDenoiser : public Denoiser {
    void set_parameters(float shift = 1.15f) {
        this->shift = shift;
        for (int i = 1; i < TIMESTEPS + 1; i++) {
-            sigmas[i - 1] = t_to_sigma(i/TIMESTEPS * TIMESTEPS);
+            sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
        }
    }

--- a/diffusion_model.hpp
+++ b/diffusion_model.hpp
@@ -1,9 +1,9 @@
 #ifndef __DIFFUSION_MODEL_H__
 #define __DIFFUSION_MODEL_H__

+#include "flux.hpp"
 #include "mmdit.hpp"
 #include "unet.hpp"
-#include "flux.hpp"

 struct DiffusionModel {
    virtual void compute(int n_threads,
@@ -124,7 +124,6 @@ struct MMDiTModel : public DiffusionModel {
    }
 };

-
 struct FluxModel : public DiffusionModel {
    Flux::FluxRunner flux;

--- a/flux.hpp
+++ b/flux.hpp
@@ -10,8 +10,8 @@

 namespace Flux {

-struct MLPEmbedder : public UnaryBlock {
-public:
+    struct MLPEmbedder : public UnaryBlock {
+    public:
        MLPEmbedder(int64_t in_dim, int64_t hidden_dim) {
            blocks["in_layer"]  = std::shared_ptr<GGMLBlock>(new Linear(in_dim, hidden_dim, true));
            blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
@@ -28,10 +28,10 @@ public:
            x = out_layer->forward(ctx, x);
            return x;
        }
-};
+    };

-class RMSNorm : public UnaryBlock {
-protected:
+    class RMSNorm : public UnaryBlock {
+    protected:
        int64_t hidden_size;
        float eps;

@@ -39,7 +39,7 @@ protected:
            params["scale"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
        }

-public:
+    public:
        RMSNorm(int64_t hidden_size,
                float eps = 1e-06f)
            : hidden_size(hidden_size),
@@ -51,11 +51,10 @@ public:
            x                     = ggml_mul(ctx, x, w);
            return x;
        }
-};
+    };

-
-struct QKNorm : public GGMLBlock {
-public:
+    struct QKNorm : public GGMLBlock {
+    public:
        QKNorm(int64_t dim) {
            blocks["query_norm"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
            blocks["key_norm"]   = std::shared_ptr<GGMLBlock>(new RMSNorm(dim));
@@ -78,9 +77,9 @@ public:
            x = norm->forward(ctx, x);
            return x;
        }
-};
+    };

-__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
+    __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
                                                     struct ggml_tensor* x,
                                                     struct ggml_tensor* pe) {
        // x: [N, L, n_head, d_head]
@@ -90,7 +89,7 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
        int64_t L      = x->ne[2];
        int64_t N      = x->ne[3];
        x              = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3));       // [N, n_head, L, d_head]
-    x = ggml_reshape_4d(ctx, x, 2, d_head/2, L, n_head * N);  // [N * n_head, L, d_head/2, 2]
+        x              = ggml_reshape_4d(ctx, x, 2, d_head / 2, L, n_head * N);  // [N * n_head, L, d_head/2, 2]
        x              = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2));       // [2, N * n_head, L, d_head/2]

        int64_t offset = x->nb[2] * x->ne[2];
@@ -108,11 +107,11 @@ __STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
        auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1);  // [L, d_head/2, 2]

        auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1));  // [N * n_head, L, d_head/2, 2]
-    x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head*N); // [N*n_head, L, d_head]
+        x_out      = ggml_reshape_3d(ctx, x_out, d_head, L, n_head * N);                         // [N*n_head, L, d_head]
        return x_out;
-}
+    }

-__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
+    __STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
                                                    struct ggml_tensor* q,
                                                    struct ggml_tensor* k,
                                                    struct ggml_tensor* v,
@@ -125,13 +124,13 @@ __STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,

        auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], NULL, false, true);  // [N, L, n_head*d_head]
        return x;
-}
+    }

-struct SelfAttention : public GGMLBlock {
-public:
+    struct SelfAttention : public GGMLBlock {
+    public:
        int64_t num_heads;

-public:
+    public:
        SelfAttention(int64_t dim,
                      int64_t num_heads = 8,
                      bool qkv_bias     = false)
@@ -146,7 +145,6 @@ public:
            auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
            auto norm     = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);

-
            auto qkv         = qkv_proj->forward(ctx, x);
            auto qkv_vec     = split_qkv(ctx, qkv);
            int64_t head_dim = qkv_vec[0]->ne[0] / num_heads;
@@ -174,25 +172,26 @@ public:
            x        = post_attention(ctx, x);                      // [N, n_token, dim]
            return x;
        }
-};
+    };

-
-struct ModulationOut {
+    struct ModulationOut {
        ggml_tensor* shift = NULL;
        ggml_tensor* scale = NULL;
        ggml_tensor* gate  = NULL;

        ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL)
            : shift(shift), scale(scale), gate(gate) {}
-};
+    };

-struct Modulation : public GGMLBlock {
-public:
+    struct Modulation : public GGMLBlock {
+    public:
        bool is_double;
        int multiplier;
-public:
-    Modulation(int64_t dim, bool is_double): is_double(is_double) {
-        multiplier = is_double? 6 : 3;
+
+    public:
+        Modulation(int64_t dim, bool is_double)
+            : is_double(is_double) {
+            multiplier    = is_double ? 6 : 3;
            blocks["lin"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * multiplier));
        }

@@ -221,9 +220,9 @@ public:

            return {ModulationOut(shift_0, scale_0, gate_0), ModulationOut()};
        }
-};
+    };

-__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
+    __STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
                                                   struct ggml_tensor* x,
                                                   struct ggml_tensor* shift,
                                                   struct ggml_tensor* scale) {
@@ -235,10 +234,10 @@ __STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
        x     = ggml_add(ctx, x, ggml_mul(ctx, x, scale));
        x     = ggml_add(ctx, x, shift);
        return x;
-}
+    }

-struct DoubleStreamBlock : public GGMLBlock {
-public:
+    struct DoubleStreamBlock : public GGMLBlock {
+    public:
        DoubleStreamBlock(int64_t hidden_size,
                          int64_t num_heads,
                          float mlp_ratio,
@@ -289,7 +288,6 @@ public:
            auto txt_mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.0"]);
            auto txt_mlp_2 = std::dynamic_pointer_cast<Linear>(blocks["txt_mlp.2"]);

-
            auto img_mods          = img_mod->forward(ctx, vec);
            ModulationOut img_mod1 = img_mods[0];
            ModulationOut img_mod2 = img_mods[1];
@@ -359,20 +357,20 @@ public:

            return {img, txt};
        }
-};
+    };

-
-struct SingleStreamBlock : public GGMLBlock {
-public:
+    struct SingleStreamBlock : public GGMLBlock {
+    public:
        int64_t num_heads;
        int64_t hidden_size;
        int64_t mlp_hidden_dim;
-public:
+
+    public:
        SingleStreamBlock(int64_t hidden_size,
                          int64_t num_heads,
                          float mlp_ratio = 4.0f,
-                  float qk_scale     = 0.f) :
-                  hidden_size(hidden_size), num_heads(num_heads) {
+                          float qk_scale  = 0.f)
+            : hidden_size(hidden_size), num_heads(num_heads) {
            int64_t head_dim = hidden_size / num_heads;
            float scale      = qk_scale;
            if (scale <= 0.f) {
@@ -443,11 +441,10 @@ public:
            output = ggml_add(ctx, x, ggml_mul(ctx, output, mod.gate));
            return output;
        }
-};
+    };

-
-struct LastLayer : public GGMLBlock {
-public:
+    struct LastLayer : public GGMLBlock {
+    public:
        LastLayer(int64_t hidden_size,
                  int64_t patch_size,
                  int64_t out_channels) {
@@ -479,11 +476,11 @@ public:

            return x;
        }
-};
+    };

-struct FluxParams {
+    struct FluxParams {
        int64_t in_channels         = 64;
-    int64_t vec_in_dim=768;
+        int64_t vec_in_dim          = 768;
        int64_t context_in_dim      = 4096;
        int64_t hidden_size         = 3072;
        float mlp_ratio             = 4.0f;
@@ -495,11 +492,10 @@ struct FluxParams {
        int theta                   = 10000;
        bool qkv_bias               = true;
        bool guidance_embed         = true;
-};
+    };

-
-struct Flux : public GGMLBlock {
-public:
+    struct Flux : public GGMLBlock {
+    public:
        std::vector<float> linspace(float start, float end, int num) {
            std::vector<float> result(num);
            float step = (end - start) / (num - 1);
@@ -609,9 +605,9 @@ public:
                // std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
            }

-        
            int emb_dim = 0;
-        for (int d : axes_dim) emb_dim += d / 2;
+            for (int d : axes_dim)
+                emb_dim += d / 2;

            std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
            int offset = 0;
@@ -629,10 +625,12 @@ public:

            return flatten(emb);
        }
-public:
+
+    public:
        FluxParams params;
        Flux() {}
-    Flux(FluxParams params) : params(params) {
+        Flux(FluxParams params)
+            : params(params) {
            int64_t out_channels = params.in_channels;
            int64_t pe_dim       = params.hidden_size / params.num_heads;

@@ -675,11 +673,11 @@ public:

            GGML_ASSERT(h * p == H && w * p == W);

-        x = ggml_reshape_4d(ctx, x, p, w, p, h*C*N);           // [N*C*h, p, w, p]
+            x = ggml_reshape_4d(ctx, x, p, w, p, h * C * N);       // [N*C*h, p, w, p]
            x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3));  // [N*C*h, w, p, p]
            x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N);       // [N, C, h*w, p*p]
            x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3));  // [N, h*w, C, p*p]
-        x = ggml_reshape_3d(ctx, x, p*p*C, w*h, N);            // [N, h*w, C*p*p]
+            x = ggml_reshape_3d(ctx, x, p * p * C, w * h, N);      // [N, h*w, C*p*p]
            return x;
        }

@@ -800,11 +798,10 @@ public:

            return out;
        }
-};
+    };

-
-struct FluxRunner : public GGMLRunner {
-public:
+    struct FluxRunner : public GGMLRunner {
+    public:
        FluxParams flux_params;
        Flux flux;
        std::vector<float> pe_vec;  // for cache
@@ -847,13 +844,12 @@ public:
            pe_vec      = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], flux_params.theta, flux_params.axes_dim);
            int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
            // LOG_DEBUG("pos_len %d", pos_len);
-        auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum/2, pos_len);
+            auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
            // pe->data = pe_vec.data();
            // print_ggml_tensor(pe);
            // pe->data = NULL;
            set_backend_tensor_data(pe, pe_vec.data());

-
            struct ggml_tensor* out = flux.forward(compute_ctx,
                                                   x,
                                                   timesteps,
@@ -958,7 +954,7 @@ public:
            }
            flux->test();
        }
-};
+    };

 }  // namespace Flux

--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -686,8 +686,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
                                                            bool skip_reshape        = false) {
    int64_t L_q;
    int64_t L_k;
-    int64_t C  ;
-    int64_t N  ;
+    int64_t C;
+    int64_t N;
    int64_t d_head;
    if (!skip_reshape) {
        L_q    = q->ne[1];
@@ -781,7 +781,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
 }

 __STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
-#if defined (SD_USE_CUBLAS) || defined (SD_USE_SYCL)
+#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
    if (!ggml_backend_is_cpu(backend)) {
        ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
        ggml_backend_synchronize(backend);
--- a/model.cpp
+++ b/model.cpp
@@ -1427,7 +1427,6 @@ ggml_type ModelLoader::get_conditioner_wtype() {
    return GGML_TYPE_COUNT;
 }

-
 ggml_type ModelLoader::get_diffusion_model_wtype() {
    for (auto& tensor_storage : tensor_storages) {
        if (is_unused_tensor(tensor_storage.name)) {
--- a/model.h
+++ b/model.h
@@ -165,4 +165,3 @@ public:
 };

 #endif  // __MODEL_H__
-
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -74,7 +74,6 @@ public:
    ggml_type diffusion_model_wtype    = GGML_TYPE_COUNT;
    ggml_type vae_wtype                = GGML_TYPE_COUNT;

-
    SDVersion version;
    bool vae_decode_only         = false;
    bool free_params_immediately = false;
@@ -171,7 +170,7 @@ public:
            backend = ggml_backend_cpu_init();
        }
 #ifdef SD_USE_FLASH_ATTENTION
-#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL)
+#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL)
        LOG_WARN("Flash Attention not supported with GPU Backend");
 #else
        LOG_INFO("Flash Attention enabled");