fix: avoid stack overflow on MSVC

2023-10-23 21:10:46 +08:00
parent 09cab2a2ae
commit fbd18e1059
1 changed files with 14 additions and 8 deletions
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -3155,6 +3155,8 @@ class StableDiffusionGGML {
        struct ggml_tensor* c = ggml_new_tensor_4d(res_ctx, GGML_TYPE_F32, 1024, 2, 1, 1);
        ggml_set_f32(c, 0.5);

+        struct ggml_cplan cplan;
+
        size_t ctx_size = 10 * 1024 * 1024;  // 10MB
        // calculate the amount of memory required
        {
@@ -3179,7 +3181,7 @@ class StableDiffusionGGML {
            ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

            struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-            struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+            cplan = ggml_graph_plan(diffusion_graph, n_threads);

            ctx_size += cplan.work_size;
            LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB",
@@ -3212,7 +3214,7 @@ class StableDiffusionGGML {
        ggml_hold_dynamic_tensor(out);

        struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-        struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+        cplan = ggml_graph_plan(diffusion_graph, n_threads);

        ggml_set_dynamic(ctx, false);
        struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
@@ -3257,6 +3259,7 @@ class StableDiffusionGGML {
                                                            true);
        std::vector<int>& tokens = tokens_and_weights.first;
        std::vector<float>& weights = tokens_and_weights.second;
+        struct ggml_cplan cplan;
        size_t ctx_size = 10 * 1024 * 1024;  // 10MB
        // calculate the amount of memory required
        {
@@ -3278,8 +3281,8 @@ class StableDiffusionGGML {

            struct ggml_tensor* hidden_states = cond_stage_model.text_model.forward(ctx, input_ids);

-            struct ggml_cgraph cond_graph = ggml_build_forward(hidden_states);
-            struct ggml_cplan cplan = ggml_graph_plan(&cond_graph, n_threads);
+            struct ggml_cgraph* cond_graph = ggml_build_forward_ctx(ctx, hidden_states);
+            cplan = ggml_graph_plan(cond_graph, n_threads);
            ctx_size += cplan.work_size;

            ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
@@ -3390,6 +3393,7 @@ class StableDiffusionGGML {
        // print_ggml_tensor(x_t);
        struct ggml_tensor* x = ggml_dup_tensor(res_ctx, x_t);
        copy_ggml_tensor(x, x_t);
+        struct ggml_cplan cplan;

        size_t ctx_size = 10 * 1024 * 1024;  // 10MB
        // calculate the amount of memory required
@@ -3417,7 +3421,7 @@ class StableDiffusionGGML {
            ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

            struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-            struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+            cplan = ggml_graph_plan(diffusion_graph, n_threads);

            ctx_size += cplan.work_size;
            LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB",
@@ -3450,7 +3454,7 @@ class StableDiffusionGGML {
        ggml_hold_dynamic_tensor(out);

        struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-        struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+        cplan = ggml_graph_plan(diffusion_graph, n_threads);

        ggml_set_dynamic(ctx, false);
        struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
@@ -3961,6 +3965,7 @@ class StableDiffusionGGML {
        int64_t W = x->ne[0];
        int64_t H = x->ne[1];
        struct ggml_tensor* result = NULL;
+        struct ggml_cplan cplan;

        // calculate the amount of memory required
        size_t ctx_size = 10 * 1024 * 1024;  // 10MB
@@ -3981,7 +3986,7 @@ class StableDiffusionGGML {
            ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

            struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, moments);
-            struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads);
+            cplan = ggml_graph_plan(vae_graph, n_threads);

            ctx_size += cplan.work_size;
            LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",
@@ -4083,6 +4088,7 @@ class StableDiffusionGGML {
        int64_t W = z->ne[0];
        int64_t H = z->ne[1];
        struct ggml_tensor* result_img = NULL;
+        struct ggml_cplan cplan;

        {
            float* vec = (float*)z->data;
@@ -4110,7 +4116,7 @@ class StableDiffusionGGML {
            ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

            struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, img);
-            struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads);
+            cplan = ggml_graph_plan(vae_graph, n_threads);

            ctx_size += cplan.work_size;
            LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",