feat: add Euler, Heun and DPM++ (2M) samplers (#50)
* Add Euler sampler * Add Heun sampler * Add DPM++ (2M) sampler * Add modified DPM++ (2M) "v2" sampler. This was proposed in a issue discussion of the stable diffusion webui, at https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 and apparently works around overstepping of the DPM++ (2M) method with small step counts. The parameter is called dpmpp2mv2 here. * match code style --------- Co-authored-by: Urs Ganse <urs@nerd2nerd.org> Co-authored-by: leejet <leejet714@gmail.com>
This commit is contained in:
parent
b85b236b13
commit
b6899e8fc2
@ -20,6 +20,10 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
|
||||
- [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) style tokenizer (not all the features, only token weighting for now)
|
||||
- Sampling method
|
||||
- `Euler A`
|
||||
- `Euler`
|
||||
- `Heun`
|
||||
- `DPM++ 2M`
|
||||
- [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
|
||||
- Cross-platform reproducibility (`--rng cuda`, consistent with the `stable-diffusion-webui GPU RNG`)
|
||||
- Supported platforms
|
||||
- Linux
|
||||
@ -125,8 +129,10 @@ arguments:
|
||||
1.0 corresponds to full destruction of information in init image
|
||||
-H, --height H image height, in pixel space (default: 512)
|
||||
-W, --width W image width, in pixel space (default: 512)
|
||||
--sample-method SAMPLE_METHOD sample method (default: "eular a")
|
||||
--sampling-method {euler, euler_a, heun, dpm++2m, dpm++2mv2}
|
||||
sampling method (default: "euler_a")
|
||||
--steps STEPS number of sample steps (default: 20)
|
||||
--rng {std_default, cuda} RNG (default: cuda)
|
||||
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
||||
-v, --verbose print extra info
|
||||
```
|
||||
|
@ -72,6 +72,14 @@ const char* rng_type_to_str[] = {
|
||||
"cuda",
|
||||
};
|
||||
|
||||
// Names of the sampler method, same order as enum SampleMethod in stable-diffusion.h
|
||||
const char* sample_method_str[] = {
|
||||
"euler_a",
|
||||
"euler",
|
||||
"heun",
|
||||
"dpm++2m",
|
||||
"dpm++2mv2"};
|
||||
|
||||
struct Option {
|
||||
int n_threads = -1;
|
||||
std::string mode = TXT2IMG;
|
||||
@ -83,7 +91,7 @@ struct Option {
|
||||
float cfg_scale = 7.0f;
|
||||
int w = 512;
|
||||
int h = 512;
|
||||
SampleMethod sample_method = EULAR_A;
|
||||
SampleMethod sample_method = EULER_A;
|
||||
int sample_steps = 20;
|
||||
float strength = 0.75f;
|
||||
RNGType rng_type = CUDA_RNG;
|
||||
@ -102,7 +110,7 @@ struct Option {
|
||||
printf(" cfg_scale: %.2f\n", cfg_scale);
|
||||
printf(" width: %d\n", w);
|
||||
printf(" height: %d\n", h);
|
||||
printf(" sample_method: %s\n", "eular a");
|
||||
printf(" sample_method: %s\n", sample_method_str[sample_method]);
|
||||
printf(" sample_steps: %d\n", sample_steps);
|
||||
printf(" strength: %.2f\n", strength);
|
||||
printf(" rng: %s\n", rng_type_to_str[rng_type]);
|
||||
@ -128,7 +136,8 @@ void print_usage(int argc, const char* argv[]) {
|
||||
printf(" 1.0 corresponds to full destruction of information in init image\n");
|
||||
printf(" -H, --height H image height, in pixel space (default: 512)\n");
|
||||
printf(" -W, --width W image width, in pixel space (default: 512)\n");
|
||||
printf(" --sample-method SAMPLE_METHOD sample method (default: \"eular a\")\n");
|
||||
printf(" --sampling-method {euler, euler_a, heun, dpm++2m, dpm++2mv2}\n");
|
||||
printf(" sampling method (default: \"euler_a\")\n");
|
||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||
@ -234,6 +243,23 @@ void parse_args(int argc, const char* argv[], Option* opt) {
|
||||
break;
|
||||
}
|
||||
opt->seed = std::stoll(argv[i]);
|
||||
} else if (arg == "--sampling-method") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* sample_method_selected = argv[i];
|
||||
int sample_method_found = -1;
|
||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
||||
if (!strcmp(sample_method_selected, sample_method_str[m])) {
|
||||
sample_method_found = m;
|
||||
}
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
opt->sample_method = (SampleMethod)sample_method_found;
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
print_usage(argc, argv);
|
||||
exit(0);
|
||||
|
@ -3502,69 +3502,255 @@ class StableDiffusionGGML {
|
||||
ggml_graph_print(&diffusion_graph);
|
||||
#endif
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_INFO("step %d sampling completed, taking %.2fs", step, (t1 - t0) * 1.0f / 1000);
|
||||
LOG_DEBUG("diffusion graph use %.2fMB runtime memory: static %.2fMB, dynamic %.2fMB",
|
||||
(ctx_size + ggml_curr_max_dynamic_size()) * 1.0f / 1024 / 1024,
|
||||
ctx_size * 1.0f / 1024 / 1024,
|
||||
ggml_curr_max_dynamic_size() * 1.0f / 1024 / 1024);
|
||||
LOG_DEBUG("%zu bytes of dynamic memory has not been released yet", ggml_dynamic_size());
|
||||
if (step > 0) {
|
||||
LOG_INFO("step %d sampling completed, taking %.2fs", step, (t1 - t0) * 1.0f / 1000);
|
||||
LOG_DEBUG("diffusion graph use %.2fMB runtime memory: static %.2fMB, dynamic %.2fMB",
|
||||
(ctx_size + ggml_curr_max_dynamic_size()) * 1.0f / 1024 / 1024,
|
||||
ctx_size * 1.0f / 1024 / 1024,
|
||||
ggml_curr_max_dynamic_size() * 1.0f / 1024 / 1024);
|
||||
LOG_DEBUG("%zu bytes of dynamic memory has not been released yet", ggml_dynamic_size());
|
||||
}
|
||||
};
|
||||
|
||||
// sample_euler_ancestral
|
||||
{
|
||||
ggml_set_dynamic(ctx, false);
|
||||
struct ggml_tensor* noise = ggml_dup_tensor(ctx, x);
|
||||
struct ggml_tensor* d = ggml_dup_tensor(ctx, x);
|
||||
ggml_set_dynamic(ctx, params.dynamic);
|
||||
switch (method) {
|
||||
case EULER_A: {
|
||||
LOG_INFO("sampling using Euler A method");
|
||||
ggml_set_dynamic(ctx, false);
|
||||
struct ggml_tensor* noise = ggml_dup_tensor(ctx, x);
|
||||
struct ggml_tensor* d = ggml_dup_tensor(ctx, x);
|
||||
ggml_set_dynamic(ctx, params.dynamic);
|
||||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
|
||||
// denoise
|
||||
denoise(x, sigma, i + 1);
|
||||
// denoise
|
||||
denoise(x, sigma, i + 1);
|
||||
|
||||
// d = (x - denoised) / sigma
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
|
||||
for (int i = 0; i < ggml_nelements(d); i++) {
|
||||
vec_d[i] = (vec_x[i] - vec_denoised[i]) / sigma;
|
||||
}
|
||||
}
|
||||
|
||||
// get_ancestral_step
|
||||
float sigma_up = std::min(sigmas[i + 1],
|
||||
std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i])));
|
||||
float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up);
|
||||
|
||||
// Euler method
|
||||
float dt = sigma_down - sigmas[i];
|
||||
// x = x + d * dt
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
|
||||
for (int i = 0; i < ggml_nelements(x); i++) {
|
||||
vec_x[i] = vec_x[i] + vec_d[i] * dt;
|
||||
}
|
||||
}
|
||||
|
||||
if (sigmas[i + 1] > 0) {
|
||||
// x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
|
||||
ggml_tensor_set_f32_randn(noise, rng);
|
||||
// noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin");
|
||||
// d = (x - denoised) / sigma
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
|
||||
for (int i = 0; i < ggml_nelements(d); i++) {
|
||||
vec_d[i] = (vec_x[i] - vec_denoised[i]) / sigma;
|
||||
}
|
||||
}
|
||||
|
||||
// get_ancestral_step
|
||||
float sigma_up = std::min(sigmas[i + 1],
|
||||
std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i])));
|
||||
float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up);
|
||||
|
||||
// Euler method
|
||||
float dt = sigma_down - sigmas[i];
|
||||
// x = x + d * dt
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_noise = (float*)noise->data;
|
||||
|
||||
for (int i = 0; i < ggml_nelements(x); i++) {
|
||||
vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up;
|
||||
vec_x[i] = vec_x[i] + vec_d[i] * dt;
|
||||
}
|
||||
}
|
||||
|
||||
if (sigmas[i + 1] > 0) {
|
||||
// x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
|
||||
ggml_tensor_set_f32_randn(noise, rng);
|
||||
// noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin");
|
||||
{
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_noise = (float*)noise->data;
|
||||
|
||||
for (int i = 0; i < ggml_nelements(x); i++) {
|
||||
vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case EULER: // Implemented without any sigma churn
|
||||
{
|
||||
LOG_INFO("sampling using Euler method");
|
||||
ggml_set_dynamic(ctx, false);
|
||||
struct ggml_tensor* d = ggml_dup_tensor(ctx, x);
|
||||
ggml_set_dynamic(ctx, params.dynamic);
|
||||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
|
||||
// denoise
|
||||
denoise(x, sigma, i + 1);
|
||||
|
||||
// d = (x - denoised) / sigma
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
|
||||
for (int j = 0; j < ggml_nelements(d); j++) {
|
||||
vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigma;
|
||||
}
|
||||
}
|
||||
|
||||
float dt = sigmas[i + 1] - sigma;
|
||||
// x = x + d * dt
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] = vec_x[j] + vec_d[j] * dt;
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case HEUN: {
|
||||
LOG_INFO("sampling using Heun method");
|
||||
ggml_set_dynamic(ctx, false);
|
||||
struct ggml_tensor* d = ggml_dup_tensor(ctx, x);
|
||||
struct ggml_tensor* x2 = ggml_dup_tensor(ctx, x);
|
||||
ggml_set_dynamic(ctx, params.dynamic);
|
||||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
// denoise
|
||||
denoise(x, sigmas[i], -(i + 1));
|
||||
|
||||
// d = (x - denoised) / sigma
|
||||
{
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i];
|
||||
}
|
||||
}
|
||||
|
||||
float dt = sigmas[i + 1] - sigmas[i];
|
||||
if (sigmas[i + 1] == 0) {
|
||||
// Euler step
|
||||
// x = x + d * dt
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] = vec_x[j] + vec_d[j] * dt;
|
||||
}
|
||||
} else {
|
||||
// Heun step
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_d2 = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_x2 = (float*)x2->data;
|
||||
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x2[j] = vec_x[j] + vec_d[j] * dt;
|
||||
}
|
||||
|
||||
denoise(x2, sigmas[i + 1], i + 1);
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1];
|
||||
vec_d[j] = (vec_d[j] + d2) / 2;
|
||||
vec_x[j] = vec_x[j] + vec_d[j] * dt;
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case DPMPP2M: // DPM++ (2M) from Karras et al (2022)
|
||||
{
|
||||
LOG_INFO("sampling using DPM++ (2M) method");
|
||||
ggml_set_dynamic(ctx, false);
|
||||
struct ggml_tensor* old_denoised = ggml_dup_tensor(ctx, x);
|
||||
ggml_set_dynamic(ctx, params.dynamic);
|
||||
|
||||
auto t_fn = [](float sigma) -> float { return -log(sigma); };
|
||||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
// denoise
|
||||
denoise(x, sigmas[i], i + 1);
|
||||
|
||||
float t = t_fn(sigmas[i]);
|
||||
float t_next = t_fn(sigmas[i + 1]);
|
||||
float h = t_next - t;
|
||||
float a = sigmas[i + 1] / sigmas[i];
|
||||
float b = exp(-h) - 1.;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
float* vec_old_denoised = (float*)old_denoised->data;
|
||||
|
||||
if (i == 0 || sigmas[i + 1] == 0) {
|
||||
// Simpler step for the edge cases
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] = a * vec_x[j] - b * vec_denoised[j];
|
||||
}
|
||||
} else {
|
||||
float h_last = t - t_fn(sigmas[i - 1]);
|
||||
float r = h_last / h;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
float denoised_d = (1. + 1. / (2. * r)) * vec_denoised[j] - (1. / (2. * r)) * vec_old_denoised[j];
|
||||
vec_x[j] = a * vec_x[j] - b * denoised_d;
|
||||
}
|
||||
}
|
||||
|
||||
// old_denoised = denoised
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_old_denoised[j] = vec_denoised[j];
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case DPMPP2Mv2: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457
|
||||
{
|
||||
LOG_INFO("sampling using modified DPM++ (2M) method");
|
||||
ggml_set_dynamic(ctx, false);
|
||||
struct ggml_tensor* old_denoised = ggml_dup_tensor(ctx, x);
|
||||
ggml_set_dynamic(ctx, params.dynamic);
|
||||
|
||||
auto t_fn = [](float sigma) -> float { return -log(sigma); };
|
||||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
// denoise
|
||||
denoise(x, sigmas[i], i + 1);
|
||||
|
||||
float t = t_fn(sigmas[i]);
|
||||
float t_next = t_fn(sigmas[i + 1]);
|
||||
float h = t_next - t;
|
||||
float a = sigmas[i + 1] / sigmas[i];
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
float* vec_old_denoised = (float*)old_denoised->data;
|
||||
|
||||
if (i == 0 || sigmas[i + 1] == 0) {
|
||||
// Simpler step for the edge cases
|
||||
float b = exp(-h) - 1.;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] = a * vec_x[j] - b * vec_denoised[j];
|
||||
}
|
||||
} else {
|
||||
float h_last = t - t_fn(sigmas[i - 1]);
|
||||
float h_min = std::min(h_last, h);
|
||||
float h_max = std::max(h_last, h);
|
||||
float r = h_max / h_min;
|
||||
float h_d = (h_max + h_min) / 2.;
|
||||
float b = exp(-h_d) - 1.;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
float denoised_d = (1. + 1. / (2. * r)) * vec_denoised[j] - (1. / (2. * r)) * vec_old_denoised[j];
|
||||
vec_x[j] = a * vec_x[j] - b * denoised_d;
|
||||
}
|
||||
}
|
||||
|
||||
// old_denoised = denoised
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_old_denoised[j] = vec_denoised[j];
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
||||
default:
|
||||
LOG_ERROR("Attempting to sample with nonexisting sample method %i", method);
|
||||
abort();
|
||||
}
|
||||
|
||||
size_t rt_mem_size = ctx_size + ggml_curr_max_dynamic_size();
|
||||
@ -3599,7 +3785,7 @@ class StableDiffusionGGML {
|
||||
struct ggml_tensor* result = NULL;
|
||||
|
||||
// calculate the amount of memory required
|
||||
size_t ctx_size = 10 * 1024 * 1024; // 10MB
|
||||
size_t ctx_size = 10 * 1024 * 1024; // 10MB
|
||||
{
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = ctx_size;
|
||||
@ -3728,7 +3914,7 @@ class StableDiffusionGGML {
|
||||
}
|
||||
|
||||
// calculate the amount of memory required
|
||||
size_t ctx_size = 10 * 1024 * 1024; // 10MB
|
||||
size_t ctx_size = 10 * 1024 * 1024; // 10MB
|
||||
{
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = ctx_size;
|
||||
|
@ -17,7 +17,12 @@ enum RNGType {
|
||||
};
|
||||
|
||||
enum SampleMethod {
|
||||
EULAR_A,
|
||||
EULER_A,
|
||||
EULER,
|
||||
HEUN,
|
||||
DPMPP2M,
|
||||
DPMPP2Mv2,
|
||||
N_SAMPLE_METHODS
|
||||
};
|
||||
|
||||
class StableDiffusionGGML;
|
||||
|
Loading…
Reference in New Issue
Block a user