From ce1bcc74a6bf1f2c187d4d8ea14ee247cf562af2 Mon Sep 17 00:00:00 2001 From: Grauho <124803507+grauho@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:21:32 -0400 Subject: [PATCH] feat: add AYS(Align Your Steps) scheduler (#241) Added NVIDEA's new "Align Your Steps" style scheduler in accordance with their quick start guide. Currently has handling for SD1.5, SDXL, and SVD, using the noise levels from their paper to generate the sigma values. Can be selected using the --schedule ays command line switch. Updates the main.cpp help message and README to reflect this option, also they now inform the user of the --color switch as well. --------- Co-authored-by: leejet --- README.md | 3 +- denoiser.hpp | 141 +++++++++++++++++++++++++++++++++++++++++- examples/cli/main.cpp | 4 +- model.cpp | 1 + stable-diffusion.cpp | 5 ++ stable-diffusion.h | 1 + 6 files changed, 152 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b5920e6..8f4a5f3 100644 --- a/README.md +++ b/README.md @@ -190,12 +190,13 @@ arguments: --rng {std_default, cuda} RNG (default: cuda) -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0) -b, --batch-count COUNT number of images to generate. - --schedule {discrete, karras} Denoiser sigma schedule (default: discrete) + --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete) --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1) <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x --vae-tiling process vae in tiles to reduce memory usage --control-net-cpu keep controlnet in cpu (for low vram) --canny apply canny preprocessor (edge detection) + --color colors the logging tags according to level -v, --verbose print extra info ``` diff --git a/denoiser.hpp b/denoiser.hpp index fd93454..255167c 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -13,6 +13,7 @@ struct SigmaSchedule { float alphas_cumprod[TIMESTEPS]; float sigmas[TIMESTEPS]; float log_sigmas[TIMESTEPS]; + int version = 0; virtual std::vector get_sigmas(uint32_t n) = 0; @@ -75,6 +76,144 @@ struct DiscreteSchedule : SigmaSchedule { } }; +/* +https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html +*/ +struct AYSSchedule : SigmaSchedule { + /* interp and linear_interp adapted from dpilger26's NumCpp library: + * https://github.com/dpilger26/NumCpp/tree/5e40aab74d14e257d65d3dc385c9ff9e2120c60e */ + constexpr double interp(double left, double right, double perc) noexcept { + return (left * (1. - perc)) + (right * perc); + } + + /* This will make the assumption that the reference x and y values are + * already sorted in ascending order because they are being generated as + * such in the calling function */ + std::vector linear_interp(std::vector new_x, + const std::vector ref_x, + const std::vector ref_y) { + const size_t len_x = new_x.size(); + size_t i = 0; + size_t j = 0; + std::vector new_y(len_x); + + if (ref_x.size() != ref_y.size()) { + LOG_ERROR("Linear Interoplation Failed: length mismatch"); + return new_y; + } + + /* serves as the bounds checking for the below while loop */ + if ((new_x[0] < ref_x[0]) || (new_x[new_x.size() - 1] > ref_x[ref_x.size() - 1])) { + LOG_ERROR("Linear Interpolation Failed: bad bounds"); + return new_y; + } + + while (i < len_x) { + if ((ref_x[j] > new_x[i]) || (new_x[i] > ref_x[j + 1])) { + j++; + continue; + } + + const double perc = static_cast(new_x[i] - ref_x[j]) / static_cast(ref_x[j + 1] - ref_x[j]); + + new_y[i] = interp(ref_y[j], ref_y[j + 1], perc); + i++; + } + + return new_y; + } + + std::vector linear_space(const float start, const float end, const size_t num_points) { + std::vector result(num_points); + const float inc = (end - start) / (static_cast(num_points - 1)); + + if (num_points > 0) { + result[0] = start; + + for (size_t i = 1; i < num_points; i++) { + result[i] = result[i - 1] + inc; + } + } + + return result; + } + + std::vector log_linear_interpolation(std::vector sigma_in, + const size_t new_len) { + const size_t s_len = sigma_in.size(); + std::vector x_vals = linear_space(0.f, 1.f, s_len); + std::vector y_vals(s_len); + + /* Reverses the input array to be ascending instead of descending, + * also hits it with a log, it is log-linear interpolation after all */ + for (size_t i = 0; i < s_len; i++) { + y_vals[i] = std::log(sigma_in[s_len - i - 1]); + } + + std::vector new_x_vals = linear_space(0.f, 1.f, new_len); + std::vector new_y_vals = linear_interp(new_x_vals, x_vals, y_vals); + std::vector results(new_len); + + for (size_t i = 0; i < new_len; i++) { + results[i] = static_cast(std::exp(new_y_vals[new_len - i - 1])); + } + + return results; + } + + std::vector get_sigmas(uint32_t len) { + const std::vector noise_levels[] = { + /* SD1.5 */ + {14.6146412293f, 6.4745760956f, 3.8636745985f, 2.6946151520f, + 1.8841921177f, 1.3943805092f, 0.9642583904f, 0.6523686016f, + 0.3977456272f, 0.1515232662f, 0.0291671582f}, + /* SDXL */ + {14.6146412293f, 6.3184485287f, 3.7681790315f, 2.1811480769f, + 1.3405244945f, 0.8620721141f, 0.5550693289f, 0.3798540708f, + 0.2332364134f, 0.1114188177f, 0.0291671582f}, + /* SVD */ + {700.00f, 54.5f, 15.886f, 7.977f, 4.248f, 1.789f, 0.981f, 0.403f, + 0.173f, 0.034f, 0.002f}, + }; + + std::vector inputs; + std::vector results(len + 1); + + switch (version) { + case VERSION_2_x: /* fallthrough */ + LOG_WARN("AYS not designed for SD2.X models"); + case VERSION_1_x: + LOG_INFO("AYS using SD1.5 noise levels"); + inputs = noise_levels[0]; + break; + case VERSION_XL: + LOG_INFO("AYS using SDXL noise levels"); + inputs = noise_levels[1]; + break; + case VERSION_SVD: + LOG_INFO("AYS using SVD noise levels"); + inputs = noise_levels[2]; + break; + default: + LOG_ERROR("Version not compatable with AYS scheduler"); + return results; + } + + /* Stretches those pre-calculated reference levels out to the desired + * size using log-linear interpolation */ + if ((len + 1) != inputs.size()) { + results = log_linear_interpolation(inputs, len + 1); + } else { + results = inputs; + } + + /* Not sure if this is strictly neccessary */ + results[len] = 0.0f; + + return results; + } +}; + struct KarrasSchedule : SigmaSchedule { std::vector get_sigmas(uint32_t n) { // These *COULD* be function arguments here, @@ -122,4 +261,4 @@ struct CompVisVDenoiser : public Denoiser { } }; -#endif // __DENOISER_HPP__ \ No newline at end of file +#endif // __DENOISER_HPP__ diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 0f26644..565af74 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -43,6 +43,7 @@ const char* schedule_str[] = { "default", "discrete", "karras", + "ays", }; const char* modes_str[] = { @@ -190,12 +191,13 @@ void print_usage(int argc, const char* argv[]) { printf(" --rng {std_default, cuda} RNG (default: cuda)\n"); printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n"); printf(" -b, --batch-count COUNT number of images to generate.\n"); - printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n"); + printf(" --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)\n"); printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n"); printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n"); printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n"); printf(" --canny apply canny preprocessor (edge detection)\n"); + printf(" --color Colors the logging tags according to level\n"); printf(" -v, --verbose print extra info\n"); } diff --git a/model.cpp b/model.cpp index 3db919b..684317d 100644 --- a/model.cpp +++ b/model.cpp @@ -890,6 +890,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const // ggml/src/ggml.c:2745 if (n_dims < 1 || n_dims > GGML_MAX_DIMS) { + LOG_ERROR("skip tensor '%s' with n_dims %d", name.c_str(), n_dims); continue; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index abaae69..e4eb56e 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -450,6 +450,11 @@ public: LOG_INFO("running with Karras schedule"); denoiser->schedule = std::make_shared(); break; + case AYS: + LOG_INFO("Running with Align-Your-Steps schedule"); + denoiser->schedule = std::make_shared(); + denoiser->schedule->version = version; + break; case DEFAULT: // Don't touch anything. break; diff --git a/stable-diffusion.h b/stable-diffusion.h index 0de17ae..4031a09 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -49,6 +49,7 @@ enum schedule_t { DEFAULT, DISCRETE, KARRAS, + AYS, N_SCHEDULES };