From fbd18e10593fc71f3825d151bd5d8b0a29f8f8bd Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 23 Oct 2023 21:10:46 +0800 Subject: [PATCH] fix: avoid stack overflow on MSVC --- stable-diffusion.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 57c4cb0..c3da036 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -3155,6 +3155,8 @@ class StableDiffusionGGML { struct ggml_tensor* c = ggml_new_tensor_4d(res_ctx, GGML_TYPE_F32, 1024, 2, 1, 1); ggml_set_f32(c, 0.5); + struct ggml_cplan cplan; + size_t ctx_size = 10 * 1024 * 1024; // 10MB // calculate the amount of memory required { @@ -3179,7 +3181,7 @@ class StableDiffusionGGML { ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx); struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out); - struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads); + cplan = ggml_graph_plan(diffusion_graph, n_threads); ctx_size += cplan.work_size; LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB", @@ -3212,7 +3214,7 @@ class StableDiffusionGGML { ggml_hold_dynamic_tensor(out); struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out); - struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads); + cplan = ggml_graph_plan(diffusion_graph, n_threads); ggml_set_dynamic(ctx, false); struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size); @@ -3257,6 +3259,7 @@ class StableDiffusionGGML { true); std::vector& tokens = tokens_and_weights.first; std::vector& weights = tokens_and_weights.second; + struct ggml_cplan cplan; size_t ctx_size = 10 * 1024 * 1024; // 10MB // calculate the amount of memory required { @@ -3278,8 +3281,8 @@ class StableDiffusionGGML { struct ggml_tensor* hidden_states = cond_stage_model.text_model.forward(ctx, input_ids); - struct ggml_cgraph cond_graph = ggml_build_forward(hidden_states); - struct ggml_cplan cplan = ggml_graph_plan(&cond_graph, n_threads); + struct ggml_cgraph* cond_graph = ggml_build_forward_ctx(ctx, hidden_states); + cplan = ggml_graph_plan(cond_graph, n_threads); ctx_size += cplan.work_size; ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx); @@ -3390,6 +3393,7 @@ class StableDiffusionGGML { // print_ggml_tensor(x_t); struct ggml_tensor* x = ggml_dup_tensor(res_ctx, x_t); copy_ggml_tensor(x, x_t); + struct ggml_cplan cplan; size_t ctx_size = 10 * 1024 * 1024; // 10MB // calculate the amount of memory required @@ -3417,7 +3421,7 @@ class StableDiffusionGGML { ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx); struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out); - struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads); + cplan = ggml_graph_plan(diffusion_graph, n_threads); ctx_size += cplan.work_size; LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB", @@ -3450,7 +3454,7 @@ class StableDiffusionGGML { ggml_hold_dynamic_tensor(out); struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out); - struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads); + cplan = ggml_graph_plan(diffusion_graph, n_threads); ggml_set_dynamic(ctx, false); struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size); @@ -3961,6 +3965,7 @@ class StableDiffusionGGML { int64_t W = x->ne[0]; int64_t H = x->ne[1]; struct ggml_tensor* result = NULL; + struct ggml_cplan cplan; // calculate the amount of memory required size_t ctx_size = 10 * 1024 * 1024; // 10MB @@ -3981,7 +3986,7 @@ class StableDiffusionGGML { ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx); struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, moments); - struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads); + cplan = ggml_graph_plan(vae_graph, n_threads); ctx_size += cplan.work_size; LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB", @@ -4083,6 +4088,7 @@ class StableDiffusionGGML { int64_t W = z->ne[0]; int64_t H = z->ne[1]; struct ggml_tensor* result_img = NULL; + struct ggml_cplan cplan; { float* vec = (float*)z->data; @@ -4110,7 +4116,7 @@ class StableDiffusionGGML { ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx); struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, img); - struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads); + cplan = ggml_graph_plan(vae_graph, n_threads); ctx_size += cplan.work_size; LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",