From fbd18e10593fc71f3825d151bd5d8b0a29f8f8bd Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 23 Oct 2023 21:10:46 +0800
Subject: [PATCH] fix: avoid stack overflow on MSVC

---
 stable-diffusion.cpp | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 57c4cb0..c3da036 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -3155,6 +3155,8 @@ class StableDiffusionGGML {
         struct ggml_tensor* c = ggml_new_tensor_4d(res_ctx, GGML_TYPE_F32, 1024, 2, 1, 1);
         ggml_set_f32(c, 0.5);
 
+        struct ggml_cplan cplan;
+
         size_t ctx_size = 10 * 1024 * 1024;  // 10MB
         // calculate the amount of memory required
         {
@@ -3179,7 +3181,7 @@ class StableDiffusionGGML {
             ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
 
             struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-            struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+            cplan = ggml_graph_plan(diffusion_graph, n_threads);
 
             ctx_size += cplan.work_size;
             LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB",
@@ -3212,7 +3214,7 @@ class StableDiffusionGGML {
         ggml_hold_dynamic_tensor(out);
 
         struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-        struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+        cplan = ggml_graph_plan(diffusion_graph, n_threads);
 
         ggml_set_dynamic(ctx, false);
         struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
@@ -3257,6 +3259,7 @@ class StableDiffusionGGML {
                                                             true);
         std::vector<int>& tokens = tokens_and_weights.first;
         std::vector<float>& weights = tokens_and_weights.second;
+        struct ggml_cplan cplan;
         size_t ctx_size = 10 * 1024 * 1024;  // 10MB
         // calculate the amount of memory required
         {
@@ -3278,8 +3281,8 @@ class StableDiffusionGGML {
 
             struct ggml_tensor* hidden_states = cond_stage_model.text_model.forward(ctx, input_ids);
 
-            struct ggml_cgraph cond_graph = ggml_build_forward(hidden_states);
-            struct ggml_cplan cplan = ggml_graph_plan(&cond_graph, n_threads);
+            struct ggml_cgraph* cond_graph = ggml_build_forward_ctx(ctx, hidden_states);
+            cplan = ggml_graph_plan(cond_graph, n_threads);
             ctx_size += cplan.work_size;
 
             ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
@@ -3390,6 +3393,7 @@ class StableDiffusionGGML {
         // print_ggml_tensor(x_t);
         struct ggml_tensor* x = ggml_dup_tensor(res_ctx, x_t);
         copy_ggml_tensor(x, x_t);
+        struct ggml_cplan cplan;
 
         size_t ctx_size = 10 * 1024 * 1024;  // 10MB
         // calculate the amount of memory required
@@ -3417,7 +3421,7 @@ class StableDiffusionGGML {
             ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
 
             struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-            struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+            cplan = ggml_graph_plan(diffusion_graph, n_threads);
 
             ctx_size += cplan.work_size;
             LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB",
@@ -3450,7 +3454,7 @@ class StableDiffusionGGML {
         ggml_hold_dynamic_tensor(out);
 
         struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
-        struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
+        cplan = ggml_graph_plan(diffusion_graph, n_threads);
 
         ggml_set_dynamic(ctx, false);
         struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
@@ -3961,6 +3965,7 @@ class StableDiffusionGGML {
         int64_t W = x->ne[0];
         int64_t H = x->ne[1];
         struct ggml_tensor* result = NULL;
+        struct ggml_cplan cplan;
 
         // calculate the amount of memory required
         size_t ctx_size = 10 * 1024 * 1024;  // 10MB
@@ -3981,7 +3986,7 @@ class StableDiffusionGGML {
             ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
 
             struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, moments);
-            struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads);
+            cplan = ggml_graph_plan(vae_graph, n_threads);
 
             ctx_size += cplan.work_size;
             LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",
@@ -4083,6 +4088,7 @@ class StableDiffusionGGML {
         int64_t W = z->ne[0];
         int64_t H = z->ne[1];
         struct ggml_tensor* result_img = NULL;
+        struct ggml_cplan cplan;
 
         {
             float* vec = (float*)z->data;
@@ -4110,7 +4116,7 @@ class StableDiffusionGGML {
             ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
 
             struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, img);
-            struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads);
+            cplan = ggml_graph_plan(vae_graph, n_threads);
 
             ctx_size += cplan.work_size;
             LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",