stable-diffusion.cpp/util.h
bssrdf 2b1bc06477
feat: add PhotoMaker Version 2 support (#358)
* first attempt at updating to photomaker v2

* continue adding photomaker v2 modules

* finishing the last few pieces for photomaker v2; id_embeds need to be done by a manual step and pass as an input file

* added a name converter for Photomaker V2; build ok

* more debugging underway

* failing at cuda mat_mul

* updated chunk_half to be more efficient; redo feedforward

* fixed a bug: carefully using ggml_view_4d to get chunks of a tensor; strides need to be recalculated or set properly; still failing at soft_max cuda op

* redo weight calculation and weight*v

* fixed a bug now Photomaker V2 kinds of working

* add python script for face detection (Photomaker V2 needs)

* updated readme for photomaker

* fixed a bug causing PMV1 crashing; both V1 and V2 work

* fixed clean_input_ids for PMV2

* fixed a double counting bug in tokenize_with_trigger_token

* updated photomaker readme

* removed some commented code

* improved reconstructing class word free prompt

* changed reading id_embed to raw binary using existing load tensor function; this is more efficient than using model load and also makes it easier to work with sd server

* minor clean up

---------

Co-authored-by: bssrdf <bssrdf@gmail.com>
2024-11-23 11:50:14 +08:00

62 lines
2.2 KiB
C++

#ifndef __UTIL_H__
#define __UTIL_H__
#include <cstdint>
#include <string>
#include <vector>
#include "stable-diffusion.h"
bool ends_with(const std::string& str, const std::string& ending);
bool starts_with(const std::string& str, const std::string& start);
bool contains(const std::string& str, const std::string& substr);
std::string format(const char* fmt, ...);
void replace_all_chars(std::string& str, char target, char replacement);
bool file_exists(const std::string& filename);
bool is_directory(const std::string& path);
std::string get_full_path(const std::string& dir, const std::string& filename);
std::vector<std::string> get_files_from_dir(const std::string& dir);
std::u32string utf8_to_utf32(const std::string& utf8_str);
std::string utf32_to_utf8(const std::u32string& utf32_str);
std::u32string unicode_value_to_utf32(int unicode_value);
sd_image_t* preprocess_id_image(sd_image_t* img);
// std::string sd_basename(const std::string& path);
typedef struct {
uint32_t width;
uint32_t height;
uint32_t channel;
float* data;
} sd_image_f32_t;
void normalize_sd_image_f32_t(sd_image_f32_t image, float means[3], float stds[3]);
sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image);
sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int target_height);
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size);
std::string path_join(const std::string& p1, const std::string& p2);
std::vector<std::string> splitString(const std::string& str, char delimiter);
void pretty_progress(int step, int steps, float time);
void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
std::string trim(const std::string& s);
std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::string& text);
#define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_ERROR(format, ...) log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__)
#endif // __UTIL_H__