#ifndef __DENOISER_HPP__ #define __DENOISER_HPP__ #include "ggml_extend.hpp" /*================================================= CompVisDenoiser ==================================================*/ // Ref: https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/external.py #define TIMESTEPS 1000 struct SigmaSchedule { float alphas_cumprod[TIMESTEPS]; float sigmas[TIMESTEPS]; float log_sigmas[TIMESTEPS]; virtual std::vector get_sigmas(uint32_t n) = 0; float sigma_to_t(float sigma) { float log_sigma = std::log(sigma); std::vector dists; dists.reserve(TIMESTEPS); for (float log_sigma_val : log_sigmas) { dists.push_back(log_sigma - log_sigma_val); } int low_idx = 0; for (size_t i = 0; i < TIMESTEPS; i++) { if (dists[i] >= 0) { low_idx++; } } low_idx = std::min(std::max(low_idx - 1, 0), TIMESTEPS - 2); int high_idx = low_idx + 1; float low = log_sigmas[low_idx]; float high = log_sigmas[high_idx]; float w = (low - log_sigma) / (low - high); w = std::max(0.f, std::min(1.f, w)); float t = (1.0f - w) * low_idx + w * high_idx; return t; } float t_to_sigma(float t) { int low_idx = static_cast(std::floor(t)); int high_idx = static_cast(std::ceil(t)); float w = t - static_cast(low_idx); float log_sigma = (1.0f - w) * log_sigmas[low_idx] + w * log_sigmas[high_idx]; return std::exp(log_sigma); } }; struct DiscreteSchedule : SigmaSchedule { std::vector get_sigmas(uint32_t n) { std::vector result; int t_max = TIMESTEPS - 1; if (n == 0) { return result; } else if (n == 1) { result.push_back(t_to_sigma((float)t_max)); result.push_back(0); return result; } float step = static_cast(t_max) / static_cast(n - 1); for (uint32_t i = 0; i < n; ++i) { float t = t_max - step * i; result.push_back(t_to_sigma(t)); } result.push_back(0); return result; } }; struct KarrasSchedule : SigmaSchedule { std::vector get_sigmas(uint32_t n) { // These *COULD* be function arguments here, // but does anybody ever bother to touch them? float sigma_min = 0.1f; float sigma_max = 10.f; float rho = 7.f; std::vector result(n + 1); float min_inv_rho = pow(sigma_min, (1.f / rho)); float max_inv_rho = pow(sigma_max, (1.f / rho)); for (uint32_t i = 0; i < n; i++) { // Eq. (5) from Karras et al 2022 result[i] = pow(max_inv_rho + (float)i / ((float)n - 1.f) * (min_inv_rho - max_inv_rho), rho); } result[n] = 0.; return result; } }; struct Denoiser { std::shared_ptr schedule = std::make_shared(); virtual std::vector get_scalings(float sigma) = 0; }; struct CompVisDenoiser : public Denoiser { float sigma_data = 1.0f; std::vector get_scalings(float sigma) { float c_out = -sigma; float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data); return {c_out, c_in}; } }; struct CompVisVDenoiser : public Denoiser { float sigma_data = 1.0f; std::vector get_scalings(float sigma) { float c_skip = sigma_data * sigma_data / (sigma * sigma + sigma_data * sigma_data); float c_out = -sigma * sigma_data / std::sqrt(sigma * sigma + sigma_data * sigma_data); float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data); return {c_skip, c_out, c_in}; } }; #endif // __DENOISER_HPP__