stable-diffusion.cpp/util.cpp

#include "util.h"
#include <stdarg.h>
#include <algorithm>
#include <cmath>
#include <codecvt>
#include <fstream>
#include <locale>
#include <sstream>
#include <string>
#include <thread>
#include <unordered_set>
#include <vector>
#include "preprocessing.hpp"

#if defined(__APPLE__) && defined(__MACH__)
#include <sys/sysctl.h>
#include <sys/types.h>
#endif

#if !defined(_WIN32)
#include <sys/ioctl.h>
#include <unistd.h>
#endif

#include "ggml/ggml.h"
#include "stable-diffusion.h"

#define STB_IMAGE_RESIZE_IMPLEMENTATION
#include "stb_image_resize.h"

bool ends_with(const std::string& str, const std::string& ending) {
    if (str.length() >= ending.length()) {
        return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
    } else {
        return false;
    }
}

bool starts_with(const std::string& str, const std::string& start) {
    if (str.find(start) == 0) {
        return true;
    }
    return false;
}

bool contains(const std::string& str, const std::string& substr) {
    if (str.find(substr) != std::string::npos) {
        return true;
    }
    return false;
}

void replace_all_chars(std::string& str, char target, char replacement) {
    for (size_t i = 0; i < str.length(); ++i) {
        if (str[i] == target) {
            str[i] = replacement;
        }
    }
}

std::string format(const char* fmt, ...) {
    va_list ap;
    va_list ap2;
    va_start(ap, fmt);
    va_copy(ap2, ap);
    int size = vsnprintf(NULL, 0, fmt, ap);
    std::vector<char> buf(size + 1);
    int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
    va_end(ap2);
    va_end(ap);
    return std::string(buf.data(), size);
}

#ifdef _WIN32  // code for windows
#include <windows.h>

bool file_exists(const std::string& filename) {
    DWORD attributes = GetFileAttributesA(filename.c_str());
    return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
}

bool is_directory(const std::string& path) {
    DWORD attributes = GetFileAttributesA(path.c_str());
    return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY));
}

std::string get_full_path(const std::string& dir, const std::string& filename) {
    std::string full_path = dir + "\\" + filename;

    WIN32_FIND_DATA find_file_data;
    HANDLE hFind = FindFirstFile(full_path.c_str(), &find_file_data);

    if (hFind != INVALID_HANDLE_VALUE) {
        FindClose(hFind);
        return full_path;
    } else {
        return "";
    }
}

std::vector<std::string> get_files_from_dir(const std::string& dir) {
    std::vector<std::string> files;

    WIN32_FIND_DATA findFileData;
    HANDLE hFind;

    char currentDirectory[MAX_PATH];
    GetCurrentDirectory(MAX_PATH, currentDirectory);

    char directoryPath[MAX_PATH];  // this is absolute path
    sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());

    // Find the first file in the directory
    hFind = FindFirstFile(directoryPath, &findFileData);

    // Check if the directory was found
    if (hFind == INVALID_HANDLE_VALUE) {
        printf("Unable to find directory.\n");
        return files;
    }

    // Loop through all files in the directory
    do {
        // Check if the found file is a regular file (not a directory)
        if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
            files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName));
        }
    } while (FindNextFile(hFind, &findFileData) != 0);

    // Close the handle
    FindClose(hFind);

    sort(files.begin(), files.end());

    return files;
}

#else  // Unix
#include <dirent.h>
#include <sys/stat.h>

bool file_exists(const std::string& filename) {
    struct stat buffer;
    return (stat(filename.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
}

bool is_directory(const std::string& path) {
    struct stat buffer;
    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
}

// TODO: add windows version
std::string get_full_path(const std::string& dir, const std::string& filename) {
    DIR* dp = opendir(dir.c_str());

    if (dp != nullptr) {
        struct dirent* entry;

        while ((entry = readdir(dp)) != nullptr) {
            if (strcasecmp(entry->d_name, filename.c_str()) == 0) {
                closedir(dp);
                return dir + "/" + entry->d_name;
            }
        }

        closedir(dp);
    }

    return "";
}

std::vector<std::string> get_files_from_dir(const std::string& dir) {
    std::vector<std::string> files;

    DIR* dp = opendir(dir.c_str());

    if (dp != nullptr) {
        struct dirent* entry;

        while ((entry = readdir(dp)) != nullptr) {
            std::string fname = dir + "/" + entry->d_name;
            if (!is_directory(fname))
                files.push_back(fname);
        }
        closedir(dp);
    }

    sort(files.begin(), files.end());

    return files;
}

#endif

// get_num_physical_cores is copy from
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
int32_t get_num_physical_cores() {
#ifdef __linux__
    // enumerate the set of thread siblings, num entries is num cores
    std::unordered_set<std::string> siblings;
    for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) {
        std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings");
        if (!thread_siblings.is_open()) {
            break;  // no more cpus
        }
        std::string line;
        if (std::getline(thread_siblings, line)) {
            siblings.insert(line);
        }
    }
    if (siblings.size() > 0) {
        return static_cast<int32_t>(siblings.size());
    }
#elif defined(__APPLE__) && defined(__MACH__)
    int32_t num_physical_cores;
    size_t len = sizeof(num_physical_cores);
    int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
    if (result == 0) {
        return num_physical_cores;
    }
    result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
    if (result == 0) {
        return num_physical_cores;
    }
#elif defined(_WIN32)
    // TODO: Implement
#endif
    unsigned int n_threads = std::thread::hardware_concurrency();
    return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}

static sd_progress_cb_t sd_progress_cb = NULL;
void* sd_progress_cb_data              = NULL;

std::u32string utf8_to_utf32(const std::string& utf8_str) {
    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
    return converter.from_bytes(utf8_str);
}

std::string utf32_to_utf8(const std::u32string& utf32_str) {
    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
    return converter.to_bytes(utf32_str);
}

std::u32string unicode_value_to_utf32(int unicode_value) {
    std::u32string utf32_string = {static_cast<char32_t>(unicode_value)};
    return utf32_string;
}

static std::string sd_basename(const std::string& path) {
    size_t pos = path.find_last_of('/');
    if (pos != std::string::npos) {
        return path.substr(pos + 1);
    }
    pos = path.find_last_of('\\');
    if (pos != std::string::npos) {
        return path.substr(pos + 1);
    }
    return path;
}

std::string path_join(const std::string& p1, const std::string& p2) {
    if (p1.empty()) {
        return p2;
    }

    if (p2.empty()) {
        return p1;
    }

    if (p1[p1.length() - 1] == '/' || p1[p1.length() - 1] == '\\') {
        return p1 + p2;
    }

    return p1 + "/" + p2;
}

sd_image_t* preprocess_id_image(sd_image_t* img) {
    int shortest_edge   = 224;
    int size            = shortest_edge;
    sd_image_t* resized = NULL;
    uint32_t w          = img->width;
    uint32_t h          = img->height;
    uint32_t c          = img->channel;

    // 1. do resize using stb_resize functions

    unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 3 * size * size);
    if (!stbir_resize_uint8(img->data, w, h, 0,
                            buf, size, size, 0,
                            c)) {
        fprintf(stderr, "%s: resize operation failed \n ", __func__);
        return resized;
    }

    // 2. do center crop (likely unnecessary due to step 1)

    // 3. do rescale

    // 4. do normalize

    // 3 and 4 will need to be done in float format.

    resized = new sd_image_t{(uint32_t)shortest_edge,
                             (uint32_t)shortest_edge,
                             3,
                             buf};
    return resized;
}

void pretty_progress(int step, int steps, float time) {
    if (sd_progress_cb) {
        sd_progress_cb(step, steps, time, sd_progress_cb_data);
        return;
    }
    if (step == 0) {
        return;
    }
    std::string progress = "  |";
    int max_progress     = 50;
    int32_t current      = (int32_t)(step * 1.f * max_progress / steps);
    for (int i = 0; i < 50; i++) {
        if (i > current) {
            progress += " ";
        } else if (i == current && i != max_progress - 1) {
            progress += ">";
        } else {
            progress += "=";
        }
    }
    progress += "|";
    printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",
           progress.c_str(), step, steps,
           time > 1.0f || time == 0 ? time : (1.0f / time));
    fflush(stdout);  // for linux
    if (step == steps) {
        printf("\n");
    }
}

std::string ltrim(const std::string& s) {
    auto it = std::find_if(s.begin(), s.end(), [](int ch) {
        return !std::isspace(ch);
    });
    return std::string(it, s.end());
}

std::string rtrim(const std::string& s) {
    auto it = std::find_if(s.rbegin(), s.rend(), [](int ch) {
        return !std::isspace(ch);
    });
    return std::string(s.begin(), it.base());
}

std::string trim(const std::string& s) {
    return rtrim(ltrim(s));
}

static sd_log_cb_t sd_log_cb = NULL;
void* sd_log_cb_data         = NULL;

#define LOG_BUFFER_SIZE 1024

void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
    va_list args;
    va_start(args, format);

    static char log_buffer[LOG_BUFFER_SIZE + 1];
    int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);

    if (written >= 0 && written < LOG_BUFFER_SIZE) {
        vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
    }
    strncat(log_buffer, "\n", LOG_BUFFER_SIZE - strlen(log_buffer));

    if (sd_log_cb) {
        sd_log_cb(level, log_buffer, sd_log_cb_data);
    }

    va_end(args);
}

void sd_set_log_callback(sd_log_cb_t cb, void* data) {
    sd_log_cb      = cb;
    sd_log_cb_data = data;
}
void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
    sd_progress_cb      = cb;
    sd_progress_cb_data = data;
}
const char* sd_get_system_info() {
    static char buffer[1024];
    std::stringstream ss;
    ss << "System Info: \n";
    ss << "    BLAS = " << ggml_cpu_has_blas() << std::endl;
    ss << "    SSE3 = " << ggml_cpu_has_sse3() << std::endl;
    ss << "    AVX = " << ggml_cpu_has_avx() << std::endl;
    ss << "    AVX2 = " << ggml_cpu_has_avx2() << std::endl;
    ss << "    AVX512 = " << ggml_cpu_has_avx512() << std::endl;
    ss << "    AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << std::endl;
    ss << "    AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << std::endl;
    ss << "    FMA = " << ggml_cpu_has_fma() << std::endl;
    ss << "    NEON = " << ggml_cpu_has_neon() << std::endl;
    ss << "    ARM_FMA = " << ggml_cpu_has_arm_fma() << std::endl;
    ss << "    F16C = " << ggml_cpu_has_f16c() << std::endl;
    ss << "    FP16_VA = " << ggml_cpu_has_fp16_va() << std::endl;
    ss << "    WASM_SIMD = " << ggml_cpu_has_wasm_simd() << std::endl;
    ss << "    VSX = " << ggml_cpu_has_vsx() << std::endl;
    snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
    return buffer;
}

const char* sd_type_name(enum sd_type_t type) {
    return ggml_type_name((ggml_type)type);
}

sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image) {
    sd_image_f32_t converted_image;
    converted_image.width   = image.width;
    converted_image.height  = image.height;
    converted_image.channel = image.channel;

    // Allocate memory for float data
    converted_image.data = (float*)malloc(image.width * image.height * image.channel * sizeof(float));

    for (int i = 0; i < image.width * image.height * image.channel; i++) {
        // Convert uint8_t to float
        converted_image.data[i] = (float)image.data[i];
    }

    return converted_image;
}

// Function to perform double linear interpolation
float interpolate(float v1, float v2, float v3, float v4, float x_ratio, float y_ratio) {
    return v1 * (1 - x_ratio) * (1 - y_ratio) + v2 * x_ratio * (1 - y_ratio) + v3 * (1 - x_ratio) * y_ratio + v4 * x_ratio * y_ratio;
}

sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int target_height) {
    sd_image_f32_t resized_image;
    resized_image.width   = target_width;
    resized_image.height  = target_height;
    resized_image.channel = image.channel;

    // Allocate memory for resized float data
    resized_image.data = (float*)malloc(target_width * target_height * image.channel * sizeof(float));

    for (int y = 0; y < target_height; y++) {
        for (int x = 0; x < target_width; x++) {
            float original_x = (float)x * image.width / target_width;
            float original_y = (float)y * image.height / target_height;

            int x1 = (int)original_x;
            int y1 = (int)original_y;
            int x2 = x1 + 1;
            int y2 = y1 + 1;

            for (int k = 0; k < image.channel; k++) {
                float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k);
                float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k);
                float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k);
                float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k);

                float x_ratio = original_x - x1;
                float y_ratio = original_y - y1;

                float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);

                *(resized_image.data + y * target_width * image.channel + x * image.channel + k) = value;
            }
        }
    }

    return resized_image;
}

void normalize_sd_image_f32_t(sd_image_f32_t image, float means[3], float stds[3]) {
    for (int y = 0; y < image.height; y++) {
        for (int x = 0; x < image.width; x++) {
            for (int k = 0; k < image.channel; k++) {
                int index         = (y * image.width + x) * image.channel + k;
                image.data[index] = (image.data[index] - means[k]) / stds[k];
            }
        }
    }
}

// Constants for means and std
float means[3] = {0.48145466, 0.4578275, 0.40821073};
float stds[3]  = {0.26862954, 0.26130258, 0.27577711};

// Function to clip and preprocess sd_image_f32_t
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
    float scale = (float)size / fmin(image.width, image.height);

    // Interpolation
    int new_width       = (int)(scale * image.width);
    int new_height      = (int)(scale * image.height);
    float* resized_data = (float*)malloc(new_width * new_height * image.channel * sizeof(float));

    for (int y = 0; y < new_height; y++) {
        for (int x = 0; x < new_width; x++) {
            float original_x = (float)x * image.width / new_width;
            float original_y = (float)y * image.height / new_height;

            int x1 = (int)original_x;
            int y1 = (int)original_y;
            int x2 = x1 + 1;
            int y2 = y1 + 1;

            for (int k = 0; k < image.channel; k++) {
                float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k);
                float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k);
                float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k);
                float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k);

                float x_ratio = original_x - x1;
                float y_ratio = original_y - y1;

                float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);

                *(resized_data + y * new_width * image.channel + x * image.channel + k) = value;
            }
        }
    }

    // Clip and preprocess
    int h = (new_height - size) / 2;
    int w = (new_width - size) / 2;

    sd_image_f32_t result;
    result.width   = size;
    result.height  = size;
    result.channel = image.channel;
    result.data    = (float*)malloc(size * size * image.channel * sizeof(float));

    for (int k = 0; k < image.channel; k++) {
        for (int i = 0; i < size; i++) {
            for (int j = 0; j < size; j++) {
                *(result.data + i * size * image.channel + j * image.channel + k) =
                    fmin(fmax(*(resized_data + (i + h) * new_width * image.channel + (j + w) * image.channel + k), 0.0f), 255.0f) / 255.0f;
            }
        }
    }

    // Free allocated memory
    free(resized_data);

    // Normalize
    for (int k = 0; k < image.channel; k++) {
        for (int i = 0; i < size; i++) {
            for (int j = 0; j < size; j++) {
                // *(result.data + i * size * image.channel + j * image.channel + k) = 0.5f;
                int offset  = i * size * image.channel + j * image.channel + k;
                float value = *(result.data + offset);
                value       = (value - means[k]) / stds[k];
                // value = 0.5f;
                *(result.data + offset) = value;
            }
        }
    }

    return result;
}