stable-diffusion.cpp/util.cpp
Grauho 48bcce493f
fix: avoid double free and fix sdxl lora naming conversion
* Fixed a double free issue when running multiple backends on the CPU, eg: CLIP
and the primary backend, as this would result in the *_backend pointers both
pointing to the same thing resulting in a segfault when calling the
StableDiffusionGGML destructor.

* Improve logging to allow for a color switch on the command line interface.
Changed the base log_printf function to not bake the log level directly into
the log buffer as that information is already passed the logging function via
the level parameter and it's easier to add in there than strip it out.

* Added a fix for certain SDXL LoRAs that don't seem to follow the expected
naming convention, converts over the tensor name during the LoRA model
loading. Added some logging of useful LoRA loading information. Had to
increase the base size of the GGML graph as the existing size results in an
insufficient graph memory error when using SDXL LoRAs.

* small fixes

---------

Co-authored-by: leejet <leejet714@gmail.com>
2024-03-20 22:00:22 +08:00

566 lines
18 KiB
C++

#include "util.h"
#include <stdarg.h>
#include <algorithm>
#include <cmath>
#include <codecvt>
#include <fstream>
#include <locale>
#include <sstream>
#include <string>
#include <thread>
#include <unordered_set>
#include <vector>
#include "preprocessing.hpp"
#if defined(__APPLE__) && defined(__MACH__)
#include <sys/sysctl.h>
#include <sys/types.h>
#endif
#if !defined(_WIN32)
#include <sys/ioctl.h>
#include <unistd.h>
#endif
#include "ggml/ggml.h"
#include "stable-diffusion.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#include "stb_image_resize.h"
bool ends_with(const std::string& str, const std::string& ending) {
if (str.length() >= ending.length()) {
return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
} else {
return false;
}
}
bool starts_with(const std::string& str, const std::string& start) {
if (str.find(start) == 0) {
return true;
}
return false;
}
bool contains(const std::string& str, const std::string& substr) {
if (str.find(substr) != std::string::npos) {
return true;
}
return false;
}
void replace_all_chars(std::string& str, char target, char replacement) {
for (size_t i = 0; i < str.length(); ++i) {
if (str[i] == target) {
str[i] = replacement;
}
}
}
std::string format(const char* fmt, ...) {
va_list ap;
va_list ap2;
va_start(ap, fmt);
va_copy(ap2, ap);
int size = vsnprintf(NULL, 0, fmt, ap);
std::vector<char> buf(size + 1);
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
va_end(ap2);
va_end(ap);
return std::string(buf.data(), size);
}
#ifdef _WIN32 // code for windows
#include <windows.h>
bool file_exists(const std::string& filename) {
DWORD attributes = GetFileAttributesA(filename.c_str());
return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
}
bool is_directory(const std::string& path) {
DWORD attributes = GetFileAttributesA(path.c_str());
return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY));
}
std::string get_full_path(const std::string& dir, const std::string& filename) {
std::string full_path = dir + "\\" + filename;
WIN32_FIND_DATA find_file_data;
HANDLE hFind = FindFirstFile(full_path.c_str(), &find_file_data);
if (hFind != INVALID_HANDLE_VALUE) {
FindClose(hFind);
return full_path;
} else {
return "";
}
}
std::vector<std::string> get_files_from_dir(const std::string& dir) {
std::vector<std::string> files;
WIN32_FIND_DATA findFileData;
HANDLE hFind;
char currentDirectory[MAX_PATH];
GetCurrentDirectory(MAX_PATH, currentDirectory);
char directoryPath[MAX_PATH]; // this is absolute path
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
// Find the first file in the directory
hFind = FindFirstFile(directoryPath, &findFileData);
// Check if the directory was found
if (hFind == INVALID_HANDLE_VALUE) {
printf("Unable to find directory.\n");
return files;
}
// Loop through all files in the directory
do {
// Check if the found file is a regular file (not a directory)
if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName));
}
} while (FindNextFile(hFind, &findFileData) != 0);
// Close the handle
FindClose(hFind);
sort(files.begin(), files.end());
return files;
}
#else // Unix
#include <dirent.h>
#include <sys/stat.h>
bool file_exists(const std::string& filename) {
struct stat buffer;
return (stat(filename.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
}
bool is_directory(const std::string& path) {
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
}
// TODO: add windows version
std::string get_full_path(const std::string& dir, const std::string& filename) {
DIR* dp = opendir(dir.c_str());
if (dp != nullptr) {
struct dirent* entry;
while ((entry = readdir(dp)) != nullptr) {
if (strcasecmp(entry->d_name, filename.c_str()) == 0) {
closedir(dp);
return dir + "/" + entry->d_name;
}
}
closedir(dp);
}
return "";
}
std::vector<std::string> get_files_from_dir(const std::string& dir) {
std::vector<std::string> files;
DIR* dp = opendir(dir.c_str());
if (dp != nullptr) {
struct dirent* entry;
while ((entry = readdir(dp)) != nullptr) {
std::string fname = dir + "/" + entry->d_name;
if (!is_directory(fname))
files.push_back(fname);
}
closedir(dp);
}
sort(files.begin(), files.end());
return files;
}
#endif
// get_num_physical_cores is copy from
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
int32_t get_num_physical_cores() {
#ifdef __linux__
// enumerate the set of thread siblings, num entries is num cores
std::unordered_set<std::string> siblings;
for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) {
std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings");
if (!thread_siblings.is_open()) {
break; // no more cpus
}
std::string line;
if (std::getline(thread_siblings, line)) {
siblings.insert(line);
}
}
if (siblings.size() > 0) {
return static_cast<int32_t>(siblings.size());
}
#elif defined(__APPLE__) && defined(__MACH__)
int32_t num_physical_cores;
size_t len = sizeof(num_physical_cores);
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
if (result == 0) {
return num_physical_cores;
}
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
if (result == 0) {
return num_physical_cores;
}
#elif defined(_WIN32)
// TODO: Implement
#endif
unsigned int n_threads = std::thread::hardware_concurrency();
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
static sd_progress_cb_t sd_progress_cb = NULL;
void* sd_progress_cb_data = NULL;
std::u32string utf8_to_utf32(const std::string& utf8_str) {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
return converter.from_bytes(utf8_str);
}
std::string utf32_to_utf8(const std::u32string& utf32_str) {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
return converter.to_bytes(utf32_str);
}
std::u32string unicode_value_to_utf32(int unicode_value) {
std::u32string utf32_string = {static_cast<char32_t>(unicode_value)};
return utf32_string;
}
static std::string sd_basename(const std::string& path) {
size_t pos = path.find_last_of('/');
if (pos != std::string::npos) {
return path.substr(pos + 1);
}
pos = path.find_last_of('\\');
if (pos != std::string::npos) {
return path.substr(pos + 1);
}
return path;
}
std::string path_join(const std::string& p1, const std::string& p2) {
if (p1.empty()) {
return p2;
}
if (p2.empty()) {
return p1;
}
if (p1[p1.length() - 1] == '/' || p1[p1.length() - 1] == '\\') {
return p1 + p2;
}
return p1 + "/" + p2;
}
sd_image_t* preprocess_id_image(sd_image_t* img) {
int shortest_edge = 224;
int size = shortest_edge;
sd_image_t* resized = NULL;
uint32_t w = img->width;
uint32_t h = img->height;
uint32_t c = img->channel;
// 1. do resize using stb_resize functions
unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 3 * size * size);
if (!stbir_resize_uint8(img->data, w, h, 0,
buf, size, size, 0,
c)) {
fprintf(stderr, "%s: resize operation failed \n ", __func__);
return resized;
}
// 2. do center crop (likely unnecessary due to step 1)
// 3. do rescale
// 4. do normalize
// 3 and 4 will need to be done in float format.
resized = new sd_image_t{(uint32_t)shortest_edge,
(uint32_t)shortest_edge,
3,
buf};
return resized;
}
void pretty_progress(int step, int steps, float time) {
if (sd_progress_cb) {
sd_progress_cb(step, steps, time, sd_progress_cb_data);
return;
}
if (step == 0) {
return;
}
std::string progress = " |";
int max_progress = 50;
int32_t current = (int32_t)(step * 1.f * max_progress / steps);
for (int i = 0; i < 50; i++) {
if (i > current) {
progress += " ";
} else if (i == current && i != max_progress - 1) {
progress += ">";
} else {
progress += "=";
}
}
progress += "|";
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",
progress.c_str(), step, steps,
time > 1.0f || time == 0 ? time : (1.0f / time));
fflush(stdout); // for linux
if (step == steps) {
printf("\n");
}
}
std::string ltrim(const std::string& s) {
auto it = std::find_if(s.begin(), s.end(), [](int ch) {
return !std::isspace(ch);
});
return std::string(it, s.end());
}
std::string rtrim(const std::string& s) {
auto it = std::find_if(s.rbegin(), s.rend(), [](int ch) {
return !std::isspace(ch);
});
return std::string(s.begin(), it.base());
}
std::string trim(const std::string& s) {
return rtrim(ltrim(s));
}
static sd_log_cb_t sd_log_cb = NULL;
void* sd_log_cb_data = NULL;
#define LOG_BUFFER_SIZE 1024
void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
va_list args;
va_start(args, format);
static char log_buffer[LOG_BUFFER_SIZE + 1];
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
if (written >= 0 && written < LOG_BUFFER_SIZE) {
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
}
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - strlen(log_buffer));
if (sd_log_cb) {
sd_log_cb(level, log_buffer, sd_log_cb_data);
}
va_end(args);
}
void sd_set_log_callback(sd_log_cb_t cb, void* data) {
sd_log_cb = cb;
sd_log_cb_data = data;
}
void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
sd_progress_cb = cb;
sd_progress_cb_data = data;
}
const char* sd_get_system_info() {
static char buffer[1024];
std::stringstream ss;
ss << "System Info: \n";
ss << " BLAS = " << ggml_cpu_has_blas() << std::endl;
ss << " SSE3 = " << ggml_cpu_has_sse3() << std::endl;
ss << " AVX = " << ggml_cpu_has_avx() << std::endl;
ss << " AVX2 = " << ggml_cpu_has_avx2() << std::endl;
ss << " AVX512 = " << ggml_cpu_has_avx512() << std::endl;
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << std::endl;
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << std::endl;
ss << " FMA = " << ggml_cpu_has_fma() << std::endl;
ss << " NEON = " << ggml_cpu_has_neon() << std::endl;
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << std::endl;
ss << " F16C = " << ggml_cpu_has_f16c() << std::endl;
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << std::endl;
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << std::endl;
ss << " VSX = " << ggml_cpu_has_vsx() << std::endl;
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
return buffer;
}
const char* sd_type_name(enum sd_type_t type) {
return ggml_type_name((ggml_type)type);
}
sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image) {
sd_image_f32_t converted_image;
converted_image.width = image.width;
converted_image.height = image.height;
converted_image.channel = image.channel;
// Allocate memory for float data
converted_image.data = (float*)malloc(image.width * image.height * image.channel * sizeof(float));
for (int i = 0; i < image.width * image.height * image.channel; i++) {
// Convert uint8_t to float
converted_image.data[i] = (float)image.data[i];
}
return converted_image;
}
// Function to perform double linear interpolation
float interpolate(float v1, float v2, float v3, float v4, float x_ratio, float y_ratio) {
return v1 * (1 - x_ratio) * (1 - y_ratio) + v2 * x_ratio * (1 - y_ratio) + v3 * (1 - x_ratio) * y_ratio + v4 * x_ratio * y_ratio;
}
sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int target_height) {
sd_image_f32_t resized_image;
resized_image.width = target_width;
resized_image.height = target_height;
resized_image.channel = image.channel;
// Allocate memory for resized float data
resized_image.data = (float*)malloc(target_width * target_height * image.channel * sizeof(float));
for (int y = 0; y < target_height; y++) {
for (int x = 0; x < target_width; x++) {
float original_x = (float)x * image.width / target_width;
float original_y = (float)y * image.height / target_height;
int x1 = (int)original_x;
int y1 = (int)original_y;
int x2 = x1 + 1;
int y2 = y1 + 1;
for (int k = 0; k < image.channel; k++) {
float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k);
float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k);
float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k);
float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k);
float x_ratio = original_x - x1;
float y_ratio = original_y - y1;
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);
*(resized_image.data + y * target_width * image.channel + x * image.channel + k) = value;
}
}
}
return resized_image;
}
void normalize_sd_image_f32_t(sd_image_f32_t image, float means[3], float stds[3]) {
for (int y = 0; y < image.height; y++) {
for (int x = 0; x < image.width; x++) {
for (int k = 0; k < image.channel; k++) {
int index = (y * image.width + x) * image.channel + k;
image.data[index] = (image.data[index] - means[k]) / stds[k];
}
}
}
}
// Constants for means and std
float means[3] = {0.48145466, 0.4578275, 0.40821073};
float stds[3] = {0.26862954, 0.26130258, 0.27577711};
// Function to clip and preprocess sd_image_f32_t
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
float scale = (float)size / fmin(image.width, image.height);
// Interpolation
int new_width = (int)(scale * image.width);
int new_height = (int)(scale * image.height);
float* resized_data = (float*)malloc(new_width * new_height * image.channel * sizeof(float));
for (int y = 0; y < new_height; y++) {
for (int x = 0; x < new_width; x++) {
float original_x = (float)x * image.width / new_width;
float original_y = (float)y * image.height / new_height;
int x1 = (int)original_x;
int y1 = (int)original_y;
int x2 = x1 + 1;
int y2 = y1 + 1;
for (int k = 0; k < image.channel; k++) {
float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k);
float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k);
float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k);
float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k);
float x_ratio = original_x - x1;
float y_ratio = original_y - y1;
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);
*(resized_data + y * new_width * image.channel + x * image.channel + k) = value;
}
}
}
// Clip and preprocess
int h = (new_height - size) / 2;
int w = (new_width - size) / 2;
sd_image_f32_t result;
result.width = size;
result.height = size;
result.channel = image.channel;
result.data = (float*)malloc(size * size * image.channel * sizeof(float));
for (int k = 0; k < image.channel; k++) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
*(result.data + i * size * image.channel + j * image.channel + k) =
fmin(fmax(*(resized_data + (i + h) * new_width * image.channel + (j + w) * image.channel + k), 0.0f), 255.0f) / 255.0f;
}
}
}
// Free allocated memory
free(resized_data);
// Normalize
for (int k = 0; k < image.channel; k++) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
// *(result.data + i * size * image.channel + j * image.channel + k) = 0.5f;
int offset = i * size * image.channel + j * image.channel + k;
float value = *(result.data + offset);
value = (value - means[k]) / stds[k];
// value = 0.5f;
*(result.data + offset) = value;
}
}
}
return result;
}