
* Fixed a double free issue when running multiple backends on the CPU, eg: CLIP and the primary backend, as this would result in the *_backend pointers both pointing to the same thing resulting in a segfault when calling the StableDiffusionGGML destructor. * Improve logging to allow for a color switch on the command line interface. Changed the base log_printf function to not bake the log level directly into the log buffer as that information is already passed the logging function via the level parameter and it's easier to add in there than strip it out. * Added a fix for certain SDXL LoRAs that don't seem to follow the expected naming convention, converts over the tensor name during the LoRA model loading. Added some logging of useful LoRA loading information. Had to increase the base size of the GGML graph as the existing size results in an insufficient graph memory error when using SDXL LoRAs. * small fixes --------- Co-authored-by: leejet <leejet714@gmail.com>
566 lines
18 KiB
C++
566 lines
18 KiB
C++
#include "util.h"
|
|
#include <stdarg.h>
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <codecvt>
|
|
#include <fstream>
|
|
#include <locale>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <thread>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
#include "preprocessing.hpp"
|
|
|
|
#if defined(__APPLE__) && defined(__MACH__)
|
|
#include <sys/sysctl.h>
|
|
#include <sys/types.h>
|
|
#endif
|
|
|
|
#if !defined(_WIN32)
|
|
#include <sys/ioctl.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include "ggml/ggml.h"
|
|
#include "stable-diffusion.h"
|
|
|
|
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
|
#include "stb_image_resize.h"
|
|
|
|
bool ends_with(const std::string& str, const std::string& ending) {
|
|
if (str.length() >= ending.length()) {
|
|
return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool starts_with(const std::string& str, const std::string& start) {
|
|
if (str.find(start) == 0) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool contains(const std::string& str, const std::string& substr) {
|
|
if (str.find(substr) != std::string::npos) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void replace_all_chars(std::string& str, char target, char replacement) {
|
|
for (size_t i = 0; i < str.length(); ++i) {
|
|
if (str[i] == target) {
|
|
str[i] = replacement;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string format(const char* fmt, ...) {
|
|
va_list ap;
|
|
va_list ap2;
|
|
va_start(ap, fmt);
|
|
va_copy(ap2, ap);
|
|
int size = vsnprintf(NULL, 0, fmt, ap);
|
|
std::vector<char> buf(size + 1);
|
|
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
|
va_end(ap2);
|
|
va_end(ap);
|
|
return std::string(buf.data(), size);
|
|
}
|
|
|
|
#ifdef _WIN32 // code for windows
|
|
#include <windows.h>
|
|
|
|
bool file_exists(const std::string& filename) {
|
|
DWORD attributes = GetFileAttributesA(filename.c_str());
|
|
return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
|
|
}
|
|
|
|
bool is_directory(const std::string& path) {
|
|
DWORD attributes = GetFileAttributesA(path.c_str());
|
|
return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY));
|
|
}
|
|
|
|
std::string get_full_path(const std::string& dir, const std::string& filename) {
|
|
std::string full_path = dir + "\\" + filename;
|
|
|
|
WIN32_FIND_DATA find_file_data;
|
|
HANDLE hFind = FindFirstFile(full_path.c_str(), &find_file_data);
|
|
|
|
if (hFind != INVALID_HANDLE_VALUE) {
|
|
FindClose(hFind);
|
|
return full_path;
|
|
} else {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
std::vector<std::string> get_files_from_dir(const std::string& dir) {
|
|
std::vector<std::string> files;
|
|
|
|
WIN32_FIND_DATA findFileData;
|
|
HANDLE hFind;
|
|
|
|
char currentDirectory[MAX_PATH];
|
|
GetCurrentDirectory(MAX_PATH, currentDirectory);
|
|
|
|
char directoryPath[MAX_PATH]; // this is absolute path
|
|
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
|
|
|
|
// Find the first file in the directory
|
|
hFind = FindFirstFile(directoryPath, &findFileData);
|
|
|
|
// Check if the directory was found
|
|
if (hFind == INVALID_HANDLE_VALUE) {
|
|
printf("Unable to find directory.\n");
|
|
return files;
|
|
}
|
|
|
|
// Loop through all files in the directory
|
|
do {
|
|
// Check if the found file is a regular file (not a directory)
|
|
if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
|
files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName));
|
|
}
|
|
} while (FindNextFile(hFind, &findFileData) != 0);
|
|
|
|
// Close the handle
|
|
FindClose(hFind);
|
|
|
|
sort(files.begin(), files.end());
|
|
|
|
return files;
|
|
}
|
|
|
|
#else // Unix
|
|
#include <dirent.h>
|
|
#include <sys/stat.h>
|
|
|
|
bool file_exists(const std::string& filename) {
|
|
struct stat buffer;
|
|
return (stat(filename.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
|
|
}
|
|
|
|
bool is_directory(const std::string& path) {
|
|
struct stat buffer;
|
|
return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
|
|
}
|
|
|
|
// TODO: add windows version
|
|
std::string get_full_path(const std::string& dir, const std::string& filename) {
|
|
DIR* dp = opendir(dir.c_str());
|
|
|
|
if (dp != nullptr) {
|
|
struct dirent* entry;
|
|
|
|
while ((entry = readdir(dp)) != nullptr) {
|
|
if (strcasecmp(entry->d_name, filename.c_str()) == 0) {
|
|
closedir(dp);
|
|
return dir + "/" + entry->d_name;
|
|
}
|
|
}
|
|
|
|
closedir(dp);
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
std::vector<std::string> get_files_from_dir(const std::string& dir) {
|
|
std::vector<std::string> files;
|
|
|
|
DIR* dp = opendir(dir.c_str());
|
|
|
|
if (dp != nullptr) {
|
|
struct dirent* entry;
|
|
|
|
while ((entry = readdir(dp)) != nullptr) {
|
|
std::string fname = dir + "/" + entry->d_name;
|
|
if (!is_directory(fname))
|
|
files.push_back(fname);
|
|
}
|
|
closedir(dp);
|
|
}
|
|
|
|
sort(files.begin(), files.end());
|
|
|
|
return files;
|
|
}
|
|
|
|
#endif
|
|
|
|
// get_num_physical_cores is copy from
|
|
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
|
|
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
|
|
int32_t get_num_physical_cores() {
|
|
#ifdef __linux__
|
|
// enumerate the set of thread siblings, num entries is num cores
|
|
std::unordered_set<std::string> siblings;
|
|
for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) {
|
|
std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings");
|
|
if (!thread_siblings.is_open()) {
|
|
break; // no more cpus
|
|
}
|
|
std::string line;
|
|
if (std::getline(thread_siblings, line)) {
|
|
siblings.insert(line);
|
|
}
|
|
}
|
|
if (siblings.size() > 0) {
|
|
return static_cast<int32_t>(siblings.size());
|
|
}
|
|
#elif defined(__APPLE__) && defined(__MACH__)
|
|
int32_t num_physical_cores;
|
|
size_t len = sizeof(num_physical_cores);
|
|
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
|
if (result == 0) {
|
|
return num_physical_cores;
|
|
}
|
|
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
|
if (result == 0) {
|
|
return num_physical_cores;
|
|
}
|
|
#elif defined(_WIN32)
|
|
// TODO: Implement
|
|
#endif
|
|
unsigned int n_threads = std::thread::hardware_concurrency();
|
|
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
|
}
|
|
|
|
static sd_progress_cb_t sd_progress_cb = NULL;
|
|
void* sd_progress_cb_data = NULL;
|
|
|
|
std::u32string utf8_to_utf32(const std::string& utf8_str) {
|
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
|
return converter.from_bytes(utf8_str);
|
|
}
|
|
|
|
std::string utf32_to_utf8(const std::u32string& utf32_str) {
|
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
|
return converter.to_bytes(utf32_str);
|
|
}
|
|
|
|
std::u32string unicode_value_to_utf32(int unicode_value) {
|
|
std::u32string utf32_string = {static_cast<char32_t>(unicode_value)};
|
|
return utf32_string;
|
|
}
|
|
|
|
static std::string sd_basename(const std::string& path) {
|
|
size_t pos = path.find_last_of('/');
|
|
if (pos != std::string::npos) {
|
|
return path.substr(pos + 1);
|
|
}
|
|
pos = path.find_last_of('\\');
|
|
if (pos != std::string::npos) {
|
|
return path.substr(pos + 1);
|
|
}
|
|
return path;
|
|
}
|
|
|
|
std::string path_join(const std::string& p1, const std::string& p2) {
|
|
if (p1.empty()) {
|
|
return p2;
|
|
}
|
|
|
|
if (p2.empty()) {
|
|
return p1;
|
|
}
|
|
|
|
if (p1[p1.length() - 1] == '/' || p1[p1.length() - 1] == '\\') {
|
|
return p1 + p2;
|
|
}
|
|
|
|
return p1 + "/" + p2;
|
|
}
|
|
|
|
sd_image_t* preprocess_id_image(sd_image_t* img) {
|
|
int shortest_edge = 224;
|
|
int size = shortest_edge;
|
|
sd_image_t* resized = NULL;
|
|
uint32_t w = img->width;
|
|
uint32_t h = img->height;
|
|
uint32_t c = img->channel;
|
|
|
|
// 1. do resize using stb_resize functions
|
|
|
|
unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 3 * size * size);
|
|
if (!stbir_resize_uint8(img->data, w, h, 0,
|
|
buf, size, size, 0,
|
|
c)) {
|
|
fprintf(stderr, "%s: resize operation failed \n ", __func__);
|
|
return resized;
|
|
}
|
|
|
|
// 2. do center crop (likely unnecessary due to step 1)
|
|
|
|
// 3. do rescale
|
|
|
|
// 4. do normalize
|
|
|
|
// 3 and 4 will need to be done in float format.
|
|
|
|
resized = new sd_image_t{(uint32_t)shortest_edge,
|
|
(uint32_t)shortest_edge,
|
|
3,
|
|
buf};
|
|
return resized;
|
|
}
|
|
|
|
void pretty_progress(int step, int steps, float time) {
|
|
if (sd_progress_cb) {
|
|
sd_progress_cb(step, steps, time, sd_progress_cb_data);
|
|
return;
|
|
}
|
|
if (step == 0) {
|
|
return;
|
|
}
|
|
std::string progress = " |";
|
|
int max_progress = 50;
|
|
int32_t current = (int32_t)(step * 1.f * max_progress / steps);
|
|
for (int i = 0; i < 50; i++) {
|
|
if (i > current) {
|
|
progress += " ";
|
|
} else if (i == current && i != max_progress - 1) {
|
|
progress += ">";
|
|
} else {
|
|
progress += "=";
|
|
}
|
|
}
|
|
progress += "|";
|
|
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",
|
|
progress.c_str(), step, steps,
|
|
time > 1.0f || time == 0 ? time : (1.0f / time));
|
|
fflush(stdout); // for linux
|
|
if (step == steps) {
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
std::string ltrim(const std::string& s) {
|
|
auto it = std::find_if(s.begin(), s.end(), [](int ch) {
|
|
return !std::isspace(ch);
|
|
});
|
|
return std::string(it, s.end());
|
|
}
|
|
|
|
std::string rtrim(const std::string& s) {
|
|
auto it = std::find_if(s.rbegin(), s.rend(), [](int ch) {
|
|
return !std::isspace(ch);
|
|
});
|
|
return std::string(s.begin(), it.base());
|
|
}
|
|
|
|
std::string trim(const std::string& s) {
|
|
return rtrim(ltrim(s));
|
|
}
|
|
|
|
static sd_log_cb_t sd_log_cb = NULL;
|
|
void* sd_log_cb_data = NULL;
|
|
|
|
#define LOG_BUFFER_SIZE 1024
|
|
|
|
void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
|
|
va_list args;
|
|
va_start(args, format);
|
|
|
|
static char log_buffer[LOG_BUFFER_SIZE + 1];
|
|
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
|
|
|
|
if (written >= 0 && written < LOG_BUFFER_SIZE) {
|
|
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
|
|
}
|
|
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - strlen(log_buffer));
|
|
|
|
if (sd_log_cb) {
|
|
sd_log_cb(level, log_buffer, sd_log_cb_data);
|
|
}
|
|
|
|
va_end(args);
|
|
}
|
|
|
|
void sd_set_log_callback(sd_log_cb_t cb, void* data) {
|
|
sd_log_cb = cb;
|
|
sd_log_cb_data = data;
|
|
}
|
|
void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
|
|
sd_progress_cb = cb;
|
|
sd_progress_cb_data = data;
|
|
}
|
|
const char* sd_get_system_info() {
|
|
static char buffer[1024];
|
|
std::stringstream ss;
|
|
ss << "System Info: \n";
|
|
ss << " BLAS = " << ggml_cpu_has_blas() << std::endl;
|
|
ss << " SSE3 = " << ggml_cpu_has_sse3() << std::endl;
|
|
ss << " AVX = " << ggml_cpu_has_avx() << std::endl;
|
|
ss << " AVX2 = " << ggml_cpu_has_avx2() << std::endl;
|
|
ss << " AVX512 = " << ggml_cpu_has_avx512() << std::endl;
|
|
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << std::endl;
|
|
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << std::endl;
|
|
ss << " FMA = " << ggml_cpu_has_fma() << std::endl;
|
|
ss << " NEON = " << ggml_cpu_has_neon() << std::endl;
|
|
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << std::endl;
|
|
ss << " F16C = " << ggml_cpu_has_f16c() << std::endl;
|
|
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << std::endl;
|
|
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << std::endl;
|
|
ss << " VSX = " << ggml_cpu_has_vsx() << std::endl;
|
|
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
|
|
return buffer;
|
|
}
|
|
|
|
const char* sd_type_name(enum sd_type_t type) {
|
|
return ggml_type_name((ggml_type)type);
|
|
}
|
|
|
|
sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image) {
|
|
sd_image_f32_t converted_image;
|
|
converted_image.width = image.width;
|
|
converted_image.height = image.height;
|
|
converted_image.channel = image.channel;
|
|
|
|
// Allocate memory for float data
|
|
converted_image.data = (float*)malloc(image.width * image.height * image.channel * sizeof(float));
|
|
|
|
for (int i = 0; i < image.width * image.height * image.channel; i++) {
|
|
// Convert uint8_t to float
|
|
converted_image.data[i] = (float)image.data[i];
|
|
}
|
|
|
|
return converted_image;
|
|
}
|
|
|
|
// Function to perform double linear interpolation
|
|
float interpolate(float v1, float v2, float v3, float v4, float x_ratio, float y_ratio) {
|
|
return v1 * (1 - x_ratio) * (1 - y_ratio) + v2 * x_ratio * (1 - y_ratio) + v3 * (1 - x_ratio) * y_ratio + v4 * x_ratio * y_ratio;
|
|
}
|
|
|
|
sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int target_height) {
|
|
sd_image_f32_t resized_image;
|
|
resized_image.width = target_width;
|
|
resized_image.height = target_height;
|
|
resized_image.channel = image.channel;
|
|
|
|
// Allocate memory for resized float data
|
|
resized_image.data = (float*)malloc(target_width * target_height * image.channel * sizeof(float));
|
|
|
|
for (int y = 0; y < target_height; y++) {
|
|
for (int x = 0; x < target_width; x++) {
|
|
float original_x = (float)x * image.width / target_width;
|
|
float original_y = (float)y * image.height / target_height;
|
|
|
|
int x1 = (int)original_x;
|
|
int y1 = (int)original_y;
|
|
int x2 = x1 + 1;
|
|
int y2 = y1 + 1;
|
|
|
|
for (int k = 0; k < image.channel; k++) {
|
|
float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k);
|
|
float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k);
|
|
float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k);
|
|
float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k);
|
|
|
|
float x_ratio = original_x - x1;
|
|
float y_ratio = original_y - y1;
|
|
|
|
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);
|
|
|
|
*(resized_image.data + y * target_width * image.channel + x * image.channel + k) = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return resized_image;
|
|
}
|
|
|
|
void normalize_sd_image_f32_t(sd_image_f32_t image, float means[3], float stds[3]) {
|
|
for (int y = 0; y < image.height; y++) {
|
|
for (int x = 0; x < image.width; x++) {
|
|
for (int k = 0; k < image.channel; k++) {
|
|
int index = (y * image.width + x) * image.channel + k;
|
|
image.data[index] = (image.data[index] - means[k]) / stds[k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Constants for means and std
|
|
float means[3] = {0.48145466, 0.4578275, 0.40821073};
|
|
float stds[3] = {0.26862954, 0.26130258, 0.27577711};
|
|
|
|
// Function to clip and preprocess sd_image_f32_t
|
|
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
|
|
float scale = (float)size / fmin(image.width, image.height);
|
|
|
|
// Interpolation
|
|
int new_width = (int)(scale * image.width);
|
|
int new_height = (int)(scale * image.height);
|
|
float* resized_data = (float*)malloc(new_width * new_height * image.channel * sizeof(float));
|
|
|
|
for (int y = 0; y < new_height; y++) {
|
|
for (int x = 0; x < new_width; x++) {
|
|
float original_x = (float)x * image.width / new_width;
|
|
float original_y = (float)y * image.height / new_height;
|
|
|
|
int x1 = (int)original_x;
|
|
int y1 = (int)original_y;
|
|
int x2 = x1 + 1;
|
|
int y2 = y1 + 1;
|
|
|
|
for (int k = 0; k < image.channel; k++) {
|
|
float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k);
|
|
float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k);
|
|
float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k);
|
|
float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k);
|
|
|
|
float x_ratio = original_x - x1;
|
|
float y_ratio = original_y - y1;
|
|
|
|
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);
|
|
|
|
*(resized_data + y * new_width * image.channel + x * image.channel + k) = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Clip and preprocess
|
|
int h = (new_height - size) / 2;
|
|
int w = (new_width - size) / 2;
|
|
|
|
sd_image_f32_t result;
|
|
result.width = size;
|
|
result.height = size;
|
|
result.channel = image.channel;
|
|
result.data = (float*)malloc(size * size * image.channel * sizeof(float));
|
|
|
|
for (int k = 0; k < image.channel; k++) {
|
|
for (int i = 0; i < size; i++) {
|
|
for (int j = 0; j < size; j++) {
|
|
*(result.data + i * size * image.channel + j * image.channel + k) =
|
|
fmin(fmax(*(resized_data + (i + h) * new_width * image.channel + (j + w) * image.channel + k), 0.0f), 255.0f) / 255.0f;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Free allocated memory
|
|
free(resized_data);
|
|
|
|
// Normalize
|
|
for (int k = 0; k < image.channel; k++) {
|
|
for (int i = 0; i < size; i++) {
|
|
for (int j = 0; j < size; j++) {
|
|
// *(result.data + i * size * image.channel + j * image.channel + k) = 0.5f;
|
|
int offset = i * size * image.channel + j * image.channel + k;
|
|
float value = *(result.data + offset);
|
|
value = (value - means[k]) / stds[k];
|
|
// value = 0.5f;
|
|
*(result.data + offset) = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|