feat: load weights from safetensors and ckpt (#101)
This commit is contained in:
parent
47dd704198
commit
d7af2c2ba9
3
.gitignore
vendored
3
.gitignore
vendored
@ -8,5 +8,6 @@ test/
|
||||
*.bin
|
||||
*.exe
|
||||
*.gguf
|
||||
*.log
|
||||
output.png
|
||||
models/*
|
||||
models/
|
@ -25,7 +25,7 @@ endif()
|
||||
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
|
||||
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
|
||||
option(SD_CUBLAS "sd: cuda backend" OFF)
|
||||
option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
|
||||
option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
|
||||
option(BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
||||
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
||||
|
||||
@ -45,14 +45,15 @@ set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
# deps
|
||||
add_subdirectory(ggml)
|
||||
|
||||
add_subdirectory(thirdparty)
|
||||
|
||||
set(SD_LIB stable-diffusion)
|
||||
|
||||
add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp)
|
||||
target_link_libraries(${SD_LIB} PUBLIC ggml)
|
||||
target_include_directories(${SD_LIB} PUBLIC .)
|
||||
add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp model.h model.cpp util.h util.cpp)
|
||||
target_link_libraries(${SD_LIB} PUBLIC ggml zip)
|
||||
target_include_directories(${SD_LIB} PUBLIC . thirdparty)
|
||||
target_compile_features(${SD_LIB} PUBLIC cxx_std_11)
|
||||
|
||||
add_subdirectory(common)
|
||||
|
||||
if (SD_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
|
63
README.md
63
README.md
@ -10,13 +10,15 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
|
||||
|
||||
- Plain C/C++ implementation based on [ggml](https://github.com/ggerganov/ggml), working in the same way as [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
||||
- Super lightweight and without external dependencies.
|
||||
- SD1.x and SD2.x support
|
||||
- 16-bit, 32-bit float support
|
||||
- 4-bit, 5-bit and 8-bit integer quantization support
|
||||
- Accelerated memory-efficient CPU inference
|
||||
- Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
|
||||
- AVX, AVX2 and AVX512 support for x86 architectures
|
||||
- SD1.x and SD2.x support
|
||||
- Full CUDA backend for GPU acceleration, for now just for float16 and float32 models. There are some issues with quantized models and CUDA; it will be fixed in the future.
|
||||
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs models.
|
||||
- No need to convert to `.ggml` or `.gguf` anymore!
|
||||
- Flash Attention for memory usage optimization (only cpu for now).
|
||||
- Original `txt2img` and `img2img` mode
|
||||
- Negative prompt
|
||||
@ -68,7 +70,7 @@ git submodule init
|
||||
git submodule update
|
||||
```
|
||||
|
||||
### Convert weights
|
||||
### Download weights
|
||||
|
||||
- download original weights(.ckpt or .safetensors). For example
|
||||
- Stable Diffusion v1.4 from https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
|
||||
@ -81,22 +83,6 @@ git submodule update
|
||||
# curl -L -O https://huggingface.co/stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-nonema-pruned.safetensors
|
||||
```
|
||||
|
||||
- convert weights to gguf model format
|
||||
|
||||
```shell
|
||||
./bin/convert sd-v1-4.ckpt -t f16
|
||||
```
|
||||
|
||||
### Quantization
|
||||
|
||||
You can specify the output model format using the `--type` or `-t` parameter
|
||||
|
||||
- `f16` for 16-bit floating-point
|
||||
- `f32` for 32-bit floating-point
|
||||
- `q8_0` for 8-bit integer quantization
|
||||
- `q5_0` or `q5_1` for 5-bit integer quantization
|
||||
- `q4_0` or `q4_1` for 4-bit integer quantization
|
||||
|
||||
### Build
|
||||
|
||||
#### Build from scratch
|
||||
@ -144,9 +130,11 @@ arguments:
|
||||
-t, --threads N number of threads to use during computation (default: -1).
|
||||
If threads <= 0, then threads will be set to the number of CPU physical cores
|
||||
-m, --model [MODEL] path to model
|
||||
--lora-model-dir [DIR] lora model directory
|
||||
--vae [VAE] path to vae
|
||||
--type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
|
||||
If not specified, the default is the type of the weight file. --lora-model-dir [DIR] lora model directory
|
||||
-i, --init-img [IMAGE] path to the input image, required by img2img
|
||||
-o, --output OUTPUT path to write result image to (default: .\output.png)
|
||||
-o, --output OUTPUT path to write result image to (default: ./output.png)
|
||||
-p, --prompt [PROMPT] the prompt to render
|
||||
-n, --negative-prompt PROMPT the negative prompt (default: "")
|
||||
--cfg-scale SCALE unconditional guidance scale: (default: 7.0)
|
||||
@ -164,10 +152,21 @@ arguments:
|
||||
-v, --verbose print extra info
|
||||
```
|
||||
|
||||
#### Quantization
|
||||
|
||||
You can specify the model weight type using the `--type` parameter. The weights are automatically converted when loading the model.
|
||||
|
||||
- `f16` for 16-bit floating-point
|
||||
- `f32` for 32-bit floating-point
|
||||
- `q8_0` for 8-bit integer quantization
|
||||
- `q5_0` or `q5_1` for 5-bit integer quantization
|
||||
- `q4_0` or `q4_1` for 4-bit integer quantization
|
||||
|
||||
#### txt2img example
|
||||
|
||||
```
|
||||
./bin/sd -m ../sd-v1-4-f16.gguf -p "a lovely cat"
|
||||
```sh
|
||||
./bin/sd -m ../models/sd-v1-4.ckpt -p "a lovely cat"
|
||||
# ./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat"
|
||||
```
|
||||
|
||||
Using formats of different precisions will yield results of varying quality.
|
||||
@ -182,7 +181,7 @@ Using formats of different precisions will yield results of varying quality.
|
||||
|
||||
|
||||
```
|
||||
./bin/sd --mode img2img -m ../models/sd-v1-4-f16.gguf -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
|
||||
./bin/sd --mode img2img -m ../models/sd-v1-4.ckpt -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
|
||||
```
|
||||
|
||||
<p align="center">
|
||||
@ -191,13 +190,6 @@ Using formats of different precisions will yield results of varying quality.
|
||||
|
||||
#### with LoRA
|
||||
|
||||
- convert lora weights to gguf model format
|
||||
|
||||
```shell
|
||||
bin/convert [lora path] -t f16
|
||||
# For example, bin/convert marblesh.safetensors -t f16
|
||||
```
|
||||
|
||||
- You can specify the directory where the lora weights are stored via `--lora-model-dir`. If not specified, the default is the current working directory.
|
||||
|
||||
- LoRA is specified via prompt, just like [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#lora).
|
||||
@ -205,10 +197,10 @@ Using formats of different precisions will yield results of varying quality.
|
||||
Here's a simple example:
|
||||
|
||||
```
|
||||
./bin/sd -m ../models/v1-5-pruned-emaonly-f16.gguf -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
|
||||
./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
|
||||
```
|
||||
|
||||
`../models/marblesh.gguf` will be applied to the model
|
||||
`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
|
||||
|
||||
#### LCM/LCM-LoRA
|
||||
|
||||
@ -219,7 +211,7 @@ Here's a simple example:
|
||||
Here's a simple example:
|
||||
|
||||
```
|
||||
./bin/sd -m ../models/v1-5-pruned-emaonly-f16.gguf -p "a lovely cat<lora:lcm-lora-sdv1-5:1>" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
|
||||
./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:lcm-lora-sdv1-5:1>" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
|
||||
```
|
||||
|
||||
| without LCM-LoRA (--cfg-scale 7) | with LCM-LoRA (--cfg-scale 1) |
|
||||
@ -240,14 +232,13 @@ docker build -t sd .
|
||||
```shell
|
||||
docker run -v /path/to/models:/models -v /path/to/output/:/output sd [args...]
|
||||
# For example
|
||||
# docker run -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4-f16.gguf -p "a lovely cat" -v -o /output/output.png
|
||||
# docker run -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4.ckpt -p "a lovely cat" -v -o /output/output.png
|
||||
```
|
||||
|
||||
## Memory/Disk Requirements
|
||||
## Memory Requirements
|
||||
|
||||
| precision | f32 | f16 |q8_0 |q5_0 |q5_1 |q4_0 |q4_1 |
|
||||
| ---- | ---- |---- |---- |---- |---- |---- |---- |
|
||||
| **Disk** | 2.7G | 2.0G | 1.7G | 1.6G | 1.6G | 1.5G | 1.5G |
|
||||
| **Memory** (txt2img - 512 x 512) | ~2.8G | ~2.3G | ~2.1G | ~2.0G | ~2.0G | ~2.0G | ~2.0G |
|
||||
| **Memory** (txt2img - 512 x 512) *with Flash Attention* | ~2.4G | ~1.9G | ~1.6G | ~1.5G | ~1.5G | ~1.5G | ~1.5G |
|
||||
|
||||
|
@ -1,15 +0,0 @@
|
||||
set(TARGET common)
|
||||
|
||||
# json.hpp library from: https://github.com/nlohmann/json
|
||||
|
||||
add_library(${TARGET} OBJECT common.cpp common.h stb_image.h stb_image_write.h json.hpp)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC .)
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
||||
|
||||
# ZIP Library from: https://github.com/kuba--/zip
|
||||
|
||||
set(Z_TARGET zip)
|
||||
add_library(${Z_TARGET} OBJECT zip.c zip.h miniz.h)
|
||||
target_include_directories(${Z_TARGET} PUBLIC .)
|
@ -1,391 +0,0 @@
|
||||
#include "common.h"
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// get_num_physical_cores is copy from
|
||||
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
|
||||
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
|
||||
int32_t get_num_physical_cores() {
|
||||
#ifdef __linux__
|
||||
// enumerate the set of thread siblings, num entries is num cores
|
||||
std::unordered_set<std::string> siblings;
|
||||
for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) {
|
||||
std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings");
|
||||
if (!thread_siblings.is_open()) {
|
||||
break; // no more cpus
|
||||
}
|
||||
std::string line;
|
||||
if (std::getline(thread_siblings, line)) {
|
||||
siblings.insert(line);
|
||||
}
|
||||
}
|
||||
if (siblings.size() > 0) {
|
||||
return static_cast<int32_t>(siblings.size());
|
||||
}
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
int32_t num_physical_cores;
|
||||
size_t len = sizeof(num_physical_cores);
|
||||
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
// TODO: Implement
|
||||
#endif
|
||||
unsigned int n_threads = std::thread::hardware_concurrency();
|
||||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||
}
|
||||
|
||||
const char* rng_type_to_str[] = {
|
||||
"std_default",
|
||||
"cuda",
|
||||
};
|
||||
|
||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
||||
const char* sample_method_str[] = {
|
||||
"euler_a",
|
||||
"euler",
|
||||
"heun",
|
||||
"dpm2",
|
||||
"dpm++2s_a",
|
||||
"dpm++2m",
|
||||
"dpm++2mv2",
|
||||
"lcm",
|
||||
};
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
const char* schedule_str[] = {
|
||||
"default",
|
||||
"discrete",
|
||||
"karras"};
|
||||
|
||||
const char* modes_str[] = {
|
||||
"txt2img",
|
||||
"img2img"};
|
||||
|
||||
void print_params(SDParams params) {
|
||||
printf("Option: \n");
|
||||
printf(" n_threads: %d\n", params.n_threads);
|
||||
printf(" mode: %s\n", modes_str[params.mode]);
|
||||
printf(" model_path: %s\n", params.model_path.c_str());
|
||||
printf(" output_path: %s\n", params.output_path.c_str());
|
||||
printf(" init_img: %s\n", params.input_path.c_str());
|
||||
printf(" prompt: %s\n", params.prompt.c_str());
|
||||
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
|
||||
printf(" cfg_scale: %.2f\n", params.cfg_scale);
|
||||
printf(" width: %d\n", params.width);
|
||||
printf(" height: %d\n", params.height);
|
||||
printf(" sample_method: %s\n", sample_method_str[params.sample_method]);
|
||||
printf(" schedule: %s\n", schedule_str[params.schedule]);
|
||||
printf(" sample_steps: %d\n", params.sample_steps);
|
||||
printf(" strength: %.2f\n", params.strength);
|
||||
printf(" rng: %s\n", rng_type_to_str[params.rng_type]);
|
||||
printf(" seed: %ld\n", params.seed);
|
||||
printf(" batch_count: %d\n", params.batch_count);
|
||||
}
|
||||
|
||||
void print_usage(int argc, const char* argv[]) {
|
||||
printf("usage: %s [arguments]\n", argv[0]);
|
||||
printf("\n");
|
||||
printf("arguments:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
|
||||
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
|
||||
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
|
||||
printf(" -m, --model [MODEL] path to model\n");
|
||||
printf(" --lora-model-dir [DIR] lora model directory\n");
|
||||
printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
|
||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
|
||||
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
|
||||
printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n");
|
||||
printf(" 1.0 corresponds to full destruction of information in init image\n");
|
||||
printf(" -H, --height H image height, in pixel space (default: 512)\n");
|
||||
printf(" -W, --width W image width, in pixel space (default: 512)\n");
|
||||
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, lcm}\n");
|
||||
printf(" sampling method (default: \"euler_a\")\n");
|
||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||
printf(" -b, --batch-count COUNT number of images to generate.\n");
|
||||
printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n");
|
||||
printf(" -v, --verbose print extra info\n");
|
||||
}
|
||||
|
||||
void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
bool invalid_arg = false;
|
||||
std::string arg;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
arg = argv[i];
|
||||
|
||||
if (arg == "-t" || arg == "--threads") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.n_threads = std::stoi(argv[i]);
|
||||
} else if (arg == "-M" || arg == "--mode") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* mode_selected = argv[i];
|
||||
int mode_found = -1;
|
||||
for (int d = 0; d < MODE_COUNT; d++) {
|
||||
if (!strcmp(mode_selected, modes_str[d])) {
|
||||
mode_found = d;
|
||||
}
|
||||
}
|
||||
if (mode_found == -1) {
|
||||
fprintf(stderr, "error: invalid mode %s, must be one of [txt2img, img2img]\n",
|
||||
mode_selected);
|
||||
exit(1);
|
||||
}
|
||||
params.mode = (sd_mode)mode_found;
|
||||
} else if (arg == "-m" || arg == "--model") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.model_path = argv[i];
|
||||
} else if (arg == "--lora-model-dir") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.lora_model_dir = argv[i];
|
||||
} else if (arg == "-i" || arg == "--init-img") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.input_path = argv[i];
|
||||
} else if (arg == "-o" || arg == "--output") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.output_path = argv[i];
|
||||
} else if (arg == "-p" || arg == "--prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.prompt = argv[i];
|
||||
} else if (arg == "-n" || arg == "--negative-prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.negative_prompt = argv[i];
|
||||
} else if (arg == "--cfg-scale") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.cfg_scale = std::stof(argv[i]);
|
||||
} else if (arg == "--strength") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.strength = std::stof(argv[i]);
|
||||
} else if (arg == "-H" || arg == "--height") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.height = std::stoi(argv[i]);
|
||||
} else if (arg == "-W" || arg == "--width") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.width = std::stoi(argv[i]);
|
||||
} else if (arg == "--steps") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.sample_steps = std::stoi(argv[i]);
|
||||
} else if (arg == "-b" || arg == "--batch-count") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.batch_count = std::stoi(argv[i]);
|
||||
} else if (arg == "--rng") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
std::string rng_type_str = argv[i];
|
||||
if (rng_type_str == "std_default") {
|
||||
params.rng_type = STD_DEFAULT_RNG;
|
||||
} else if (rng_type_str == "cuda") {
|
||||
params.rng_type = CUDA_RNG;
|
||||
} else {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
} else if (arg == "--schedule") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* schedule_selected = argv[i];
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
||||
if (!strcmp(schedule_selected, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
}
|
||||
}
|
||||
if (schedule_found == -1) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.schedule = (Schedule)schedule_found;
|
||||
} else if (arg == "-s" || arg == "--seed") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.seed = std::stoll(argv[i]);
|
||||
} else if (arg == "--sampling-method") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* sample_method_selected = argv[i];
|
||||
int sample_method_found = -1;
|
||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
||||
if (!strcmp(sample_method_selected, sample_method_str[m])) {
|
||||
sample_method_found = m;
|
||||
}
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.sample_method = (SampleMethod)sample_method_found;
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
print_usage(argc, argv);
|
||||
exit(0);
|
||||
} else if (arg == "-v" || arg == "--verbose") {
|
||||
params.verbose = true;
|
||||
} else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (invalid_arg) {
|
||||
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
if (params.n_threads <= 0) {
|
||||
params.n_threads = get_num_physical_cores();
|
||||
}
|
||||
|
||||
if (params.prompt.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: prompt\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.model_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: model_path\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.mode == IMG2IMG && params.input_path.length() == 0) {
|
||||
fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.output_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: output_path\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.width <= 0 || params.width % 64 != 0) {
|
||||
fprintf(stderr, "error: the width must be a multiple of 64\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.height <= 0 || params.height % 64 != 0) {
|
||||
fprintf(stderr, "error: the height must be a multiple of 64\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.sample_steps <= 0) {
|
||||
fprintf(stderr, "error: the sample_steps must be greater than 0\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.strength < 0.f || params.strength > 1.f) {
|
||||
fprintf(stderr, "error: can only work with strength in [0.0, 1.0]\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.seed < 0) {
|
||||
srand((int)time(NULL));
|
||||
params.seed = rand();
|
||||
}
|
||||
}
|
||||
|
||||
std::string basename(const std::string& path) {
|
||||
size_t pos = path.find_last_of('/');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
pos = path.find_last_of('\\');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
std::string get_image_params(SDParams params, int seed) {
|
||||
std::string parameter_string = params.prompt + "\n";
|
||||
if (params.negative_prompt.size() != 0) {
|
||||
parameter_string += "Negative prompt: " + params.negative_prompt + "\n";
|
||||
}
|
||||
parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", ";
|
||||
parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", ";
|
||||
parameter_string += "Seed: " + std::to_string(seed) + ", ";
|
||||
parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
|
||||
parameter_string += "Model: " + basename(params.model_path) + ", ";
|
||||
parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", ";
|
||||
parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]);
|
||||
if (params.schedule == KARRAS) {
|
||||
parameter_string += " karras";
|
||||
}
|
||||
parameter_string += ", ";
|
||||
parameter_string += "Version: stable-diffusion.cpp";
|
||||
return parameter_string;
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
enum sd_mode {
|
||||
TXT2IMG,
|
||||
IMG2IMG,
|
||||
MODE_COUNT
|
||||
};
|
||||
|
||||
struct SDParams {
|
||||
int n_threads = -1;
|
||||
sd_mode mode = TXT2IMG;
|
||||
|
||||
std::string model_path;
|
||||
std::string lora_model_dir;
|
||||
std::string output_path = "output.png";
|
||||
std::string input_path;
|
||||
|
||||
std::string prompt;
|
||||
std::string negative_prompt;
|
||||
float cfg_scale = 7.0f;
|
||||
int width = 512;
|
||||
int height = 512;
|
||||
int batch_count = 1;
|
||||
|
||||
SampleMethod sample_method = EULER_A;
|
||||
Schedule schedule = DEFAULT;
|
||||
int sample_steps = 20;
|
||||
float strength = 0.75f;
|
||||
RNGType rng_type = CUDA_RNG;
|
||||
int64_t seed = 42;
|
||||
bool verbose = false;
|
||||
};
|
||||
|
||||
void print_params(SDParams params);
|
||||
|
||||
void print_usage(int argc, const char* argv[]);
|
||||
|
||||
void parse_args(int argc, const char** argv, SDParams& params);
|
||||
|
||||
std::string get_image_params(SDParams params, int seed);
|
@ -1,4 +1,3 @@
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
add_subdirectory(cli)
|
||||
add_subdirectory(convert)
|
||||
add_subdirectory(cli)
|
@ -2,5 +2,5 @@ set(TARGET sd)
|
||||
|
||||
add_executable(${TARGET} main.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion common ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
@ -1,8 +1,9 @@
|
||||
#include <stdio.h>
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include "common.h"
|
||||
#include "ggml/ggml.h"
|
||||
#include "stable-diffusion.h"
|
||||
#include "util.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
@ -11,6 +12,405 @@
|
||||
#define STB_IMAGE_WRITE_STATIC
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
const char* rng_type_to_str[] = {
|
||||
"std_default",
|
||||
"cuda",
|
||||
};
|
||||
|
||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
||||
const char* sample_method_str[] = {
|
||||
"euler_a",
|
||||
"euler",
|
||||
"heun",
|
||||
"dpm2",
|
||||
"dpm++2s_a",
|
||||
"dpm++2m",
|
||||
"dpm++2mv2",
|
||||
"lcm",
|
||||
};
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
const char* schedule_str[] = {
|
||||
"default",
|
||||
"discrete",
|
||||
"karras",
|
||||
};
|
||||
|
||||
const char* modes_str[] = {
|
||||
"txt2img",
|
||||
"img2img",
|
||||
};
|
||||
|
||||
enum SDMode {
|
||||
TXT2IMG,
|
||||
IMG2IMG,
|
||||
MODE_COUNT
|
||||
};
|
||||
|
||||
struct SDParams {
|
||||
int n_threads = -1;
|
||||
SDMode mode = TXT2IMG;
|
||||
|
||||
std::string model_path;
|
||||
std::string vae_path;
|
||||
ggml_type wtype = GGML_TYPE_COUNT;
|
||||
std::string lora_model_dir;
|
||||
std::string output_path = "output.png";
|
||||
std::string input_path;
|
||||
|
||||
std::string prompt;
|
||||
std::string negative_prompt;
|
||||
float cfg_scale = 7.0f;
|
||||
int width = 512;
|
||||
int height = 512;
|
||||
int batch_count = 1;
|
||||
|
||||
SampleMethod sample_method = EULER_A;
|
||||
Schedule schedule = DEFAULT;
|
||||
int sample_steps = 20;
|
||||
float strength = 0.75f;
|
||||
RNGType rng_type = CUDA_RNG;
|
||||
int64_t seed = 42;
|
||||
bool verbose = false;
|
||||
};
|
||||
|
||||
void print_params(SDParams params) {
|
||||
printf("Option: \n");
|
||||
printf(" n_threads: %d\n", params.n_threads);
|
||||
printf(" mode: %s\n", modes_str[params.mode]);
|
||||
printf(" model_path: %s\n", params.model_path.c_str());
|
||||
printf(" wtype: %s\n", params.wtype < GGML_TYPE_COUNT ? ggml_type_name(params.wtype) : "unspecified");
|
||||
printf(" vae_path: %s\n", params.vae_path.c_str());
|
||||
printf(" output_path: %s\n", params.output_path.c_str());
|
||||
printf(" init_img: %s\n", params.input_path.c_str());
|
||||
printf(" prompt: %s\n", params.prompt.c_str());
|
||||
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
|
||||
printf(" cfg_scale: %.2f\n", params.cfg_scale);
|
||||
printf(" width: %d\n", params.width);
|
||||
printf(" height: %d\n", params.height);
|
||||
printf(" sample_method: %s\n", sample_method_str[params.sample_method]);
|
||||
printf(" schedule: %s\n", schedule_str[params.schedule]);
|
||||
printf(" sample_steps: %d\n", params.sample_steps);
|
||||
printf(" strength(img2img): %.2f\n", params.strength);
|
||||
printf(" rng: %s\n", rng_type_to_str[params.rng_type]);
|
||||
printf(" seed: %ld\n", params.seed);
|
||||
printf(" batch_count: %d\n", params.batch_count);
|
||||
}
|
||||
|
||||
void print_usage(int argc, const char* argv[]) {
|
||||
printf("usage: %s [arguments]\n", argv[0]);
|
||||
printf("\n");
|
||||
printf("arguments:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
|
||||
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
|
||||
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
|
||||
printf(" -m, --model [MODEL] path to model\n");
|
||||
printf(" --vae [VAE] path to vae\n");
|
||||
printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)\n");
|
||||
printf(" If not specified, the default is the type of the weight file.");
|
||||
printf(" --lora-model-dir [DIR] lora model directory\n");
|
||||
printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
|
||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
|
||||
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
|
||||
printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n");
|
||||
printf(" 1.0 corresponds to full destruction of information in init image\n");
|
||||
printf(" -H, --height H image height, in pixel space (default: 512)\n");
|
||||
printf(" -W, --width W image width, in pixel space (default: 512)\n");
|
||||
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, lcm}\n");
|
||||
printf(" sampling method (default: \"euler_a\")\n");
|
||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||
printf(" -b, --batch-count COUNT number of images to generate.\n");
|
||||
printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n");
|
||||
printf(" -v, --verbose print extra info\n");
|
||||
}
|
||||
|
||||
void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
bool invalid_arg = false;
|
||||
std::string arg;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
arg = argv[i];
|
||||
|
||||
if (arg == "-t" || arg == "--threads") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.n_threads = std::stoi(argv[i]);
|
||||
} else if (arg == "-M" || arg == "--mode") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* mode_selected = argv[i];
|
||||
int mode_found = -1;
|
||||
for (int d = 0; d < MODE_COUNT; d++) {
|
||||
if (!strcmp(mode_selected, modes_str[d])) {
|
||||
mode_found = d;
|
||||
}
|
||||
}
|
||||
if (mode_found == -1) {
|
||||
fprintf(stderr, "error: invalid mode %s, must be one of [txt2img, img2img]\n",
|
||||
mode_selected);
|
||||
exit(1);
|
||||
}
|
||||
params.mode = (SDMode)mode_found;
|
||||
} else if (arg == "-m" || arg == "--model") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.model_path = argv[i];
|
||||
} else if (arg == "--vae") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.vae_path = argv[i];
|
||||
} else if (arg == "--type") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
std::string type = argv[i];
|
||||
if (type == "f32") {
|
||||
params.wtype = GGML_TYPE_F32;
|
||||
} else if (type == "f16") {
|
||||
params.wtype = GGML_TYPE_F16;
|
||||
} else if (type == "q4_0") {
|
||||
params.wtype = GGML_TYPE_Q4_0;
|
||||
} else if (type == "q4_1") {
|
||||
params.wtype = GGML_TYPE_Q4_1;
|
||||
} else if (type == "q5_0") {
|
||||
params.wtype = GGML_TYPE_Q5_0;
|
||||
} else if (type == "q5_1") {
|
||||
params.wtype = GGML_TYPE_Q5_1;
|
||||
} else if (type == "q8_0") {
|
||||
params.wtype = GGML_TYPE_Q8_0;
|
||||
} else {
|
||||
fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n",
|
||||
type.c_str());
|
||||
exit(1);
|
||||
}
|
||||
} else if (arg == "--lora-model-dir") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.lora_model_dir = argv[i];
|
||||
} else if (arg == "-i" || arg == "--init-img") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.input_path = argv[i];
|
||||
} else if (arg == "-o" || arg == "--output") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.output_path = argv[i];
|
||||
} else if (arg == "-p" || arg == "--prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.prompt = argv[i];
|
||||
} else if (arg == "-n" || arg == "--negative-prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.negative_prompt = argv[i];
|
||||
} else if (arg == "--cfg-scale") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.cfg_scale = std::stof(argv[i]);
|
||||
} else if (arg == "--strength") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.strength = std::stof(argv[i]);
|
||||
} else if (arg == "-H" || arg == "--height") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.height = std::stoi(argv[i]);
|
||||
} else if (arg == "-W" || arg == "--width") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.width = std::stoi(argv[i]);
|
||||
} else if (arg == "--steps") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.sample_steps = std::stoi(argv[i]);
|
||||
} else if (arg == "-b" || arg == "--batch-count") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.batch_count = std::stoi(argv[i]);
|
||||
} else if (arg == "--rng") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
std::string rng_type_str = argv[i];
|
||||
if (rng_type_str == "std_default") {
|
||||
params.rng_type = STD_DEFAULT_RNG;
|
||||
} else if (rng_type_str == "cuda") {
|
||||
params.rng_type = CUDA_RNG;
|
||||
} else {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
} else if (arg == "--schedule") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* schedule_selected = argv[i];
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
||||
if (!strcmp(schedule_selected, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
}
|
||||
}
|
||||
if (schedule_found == -1) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.schedule = (Schedule)schedule_found;
|
||||
} else if (arg == "-s" || arg == "--seed") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.seed = std::stoll(argv[i]);
|
||||
} else if (arg == "--sampling-method") {
|
||||
if (++i >= argc) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
const char* sample_method_selected = argv[i];
|
||||
int sample_method_found = -1;
|
||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
||||
if (!strcmp(sample_method_selected, sample_method_str[m])) {
|
||||
sample_method_found = m;
|
||||
}
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
invalid_arg = true;
|
||||
break;
|
||||
}
|
||||
params.sample_method = (SampleMethod)sample_method_found;
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
print_usage(argc, argv);
|
||||
exit(0);
|
||||
} else if (arg == "-v" || arg == "--verbose") {
|
||||
params.verbose = true;
|
||||
} else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (invalid_arg) {
|
||||
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
if (params.n_threads <= 0) {
|
||||
params.n_threads = get_num_physical_cores();
|
||||
}
|
||||
|
||||
if (params.prompt.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: prompt\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.model_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: model_path\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.mode == IMG2IMG && params.input_path.length() == 0) {
|
||||
fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.output_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: output_path\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.width <= 0 || params.width % 64 != 0) {
|
||||
fprintf(stderr, "error: the width must be a multiple of 64\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.height <= 0 || params.height % 64 != 0) {
|
||||
fprintf(stderr, "error: the height must be a multiple of 64\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.sample_steps <= 0) {
|
||||
fprintf(stderr, "error: the sample_steps must be greater than 0\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.strength < 0.f || params.strength > 1.f) {
|
||||
fprintf(stderr, "error: can only work with strength in [0.0, 1.0]\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.seed < 0) {
|
||||
srand((int)time(NULL));
|
||||
params.seed = rand();
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_image_params(SDParams params, int64_t seed) {
|
||||
std::string parameter_string = params.prompt + "\n";
|
||||
if (params.negative_prompt.size() != 0) {
|
||||
parameter_string += "Negative prompt: " + params.negative_prompt + "\n";
|
||||
}
|
||||
parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", ";
|
||||
parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", ";
|
||||
parameter_string += "Seed: " + std::to_string(seed) + ", ";
|
||||
parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
|
||||
parameter_string += "Model: " + basename(params.model_path) + ", ";
|
||||
parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", ";
|
||||
parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]);
|
||||
if (params.schedule == KARRAS) {
|
||||
parameter_string += " karras";
|
||||
}
|
||||
parameter_string += ", ";
|
||||
parameter_string += "Version: stable-diffusion.cpp";
|
||||
return parameter_string;
|
||||
}
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
SDParams params;
|
||||
parse_args(argc, argv, params);
|
||||
@ -50,7 +450,7 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
StableDiffusion sd(params.n_threads, vae_decode_only, true, params.lora_model_dir, params.rng_type);
|
||||
if (!sd.load_from_file(params.model_path, params.schedule)) {
|
||||
if (!sd.load_from_file(params.model_path, params.vae_path, params.wtype, params.schedule)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -79,7 +479,7 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
if (results.size() == 0 || results.size() != params.batch_count) {
|
||||
fprintf(stderr, "generate failed\n");
|
||||
LOG_ERROR("generate failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -88,7 +488,7 @@ int main(int argc, const char* argv[]) {
|
||||
for (int i = 0; i < params.batch_count; i++) {
|
||||
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
|
||||
stbi_write_png(final_image_path.c_str(), params.width, params.height, 3, results[i], 0, get_image_params(params, params.seed + i).c_str());
|
||||
printf("save result image to '%s'\n", final_image_path.c_str());
|
||||
LOG_INFO("save result image to '%s'", final_image_path.c_str());
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1,5 +0,0 @@
|
||||
set(TARGET convert)
|
||||
|
||||
add_executable(${TARGET} convert.cpp vocab.hpp)
|
||||
target_link_libraries(${TARGET} PRIVATE ggml zip ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
@ -1,16 +0,0 @@
|
||||
# Model Convert
|
||||
|
||||
## Usage
|
||||
```
|
||||
usage: convert.exe [MODEL_PATH] --type [OUT_TYPE] [arguments]
|
||||
Model supported for conversion: .safetensors models or .ckpt checkpoints models
|
||||
|
||||
arguments:
|
||||
-h, --help show this help message and exit
|
||||
-o, --out [FILENAME] path or name to converted model
|
||||
--vocab [FILENAME] path to custom vocab.json (usually unnecessary)
|
||||
-v, --verbose print processing info - dev info
|
||||
-l, --lora force read the model as a LoRA
|
||||
--vae [FILENAME] merge a custom VAE
|
||||
-t, --type [OUT_TYPE] output format (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
|
||||
```
|
File diff suppressed because it is too large
Load Diff
2
format-code.sh
Normal file
2
format-code.sh
Normal file
@ -0,0 +1,2 @@
|
||||
clang-format -style=file -i *.cpp *.h
|
||||
clang-format -style=file -i examples/cli/*.cpp
|
142
model.h
Normal file
142
model.h
Normal file
@ -0,0 +1,142 @@
|
||||
#ifndef __MODEL_H__
|
||||
#define __MODEL_H__
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml/ggml.h"
|
||||
#include "json.hpp"
|
||||
#include "zip.h"
|
||||
|
||||
enum SDVersion {
|
||||
VERSION_1_x,
|
||||
VERSION_2_x,
|
||||
VERSION_XL,
|
||||
VERSION_COUNT,
|
||||
};
|
||||
|
||||
struct TensorStorage {
|
||||
std::string name;
|
||||
ggml_type type = GGML_TYPE_F32;
|
||||
bool is_bf16 = false;
|
||||
int64_t ne[4] = {1, 1, 1, 1};
|
||||
int n_dims = 0;
|
||||
|
||||
size_t file_index = 0;
|
||||
int index_in_zip = -1; // >= means stored in a zip file
|
||||
size_t offset = 0; // offset in file
|
||||
|
||||
TensorStorage() = default;
|
||||
|
||||
TensorStorage(const std::string& name, ggml_type type, int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
|
||||
: name(name), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
|
||||
for (int i = 0; i < n_dims; i++) {
|
||||
this->ne[i] = ne[i];
|
||||
}
|
||||
}
|
||||
|
||||
int64_t nelements() const {
|
||||
return ne[0] * ne[1] * ne[2] * ne[3];
|
||||
}
|
||||
|
||||
int64_t nbytes() const {
|
||||
return nelements() * ggml_type_size(type) / ggml_blck_size(type);
|
||||
}
|
||||
|
||||
int64_t nbytes_to_read() const {
|
||||
if (is_bf16) {
|
||||
return nbytes() / 2;
|
||||
} else {
|
||||
return nbytes();
|
||||
}
|
||||
}
|
||||
|
||||
void unsqueeze() {
|
||||
if (n_dims == 2) {
|
||||
n_dims = 4;
|
||||
ne[3] = ne[1];
|
||||
ne[2] = ne[0];
|
||||
ne[1] = 1;
|
||||
ne[0] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TensorStorage> chunk(size_t n) {
|
||||
std::vector<TensorStorage> chunks;
|
||||
size_t chunk_size = nbytes_to_read() / n;
|
||||
reverse_ne();
|
||||
for (int i = 0; i < n; i++) {
|
||||
TensorStorage chunk_i = *this;
|
||||
chunk_i.ne[0] = ne[0] / n;
|
||||
chunk_i.offset = offset + i * chunk_size;
|
||||
chunk_i.reverse_ne();
|
||||
chunks.push_back(chunk_i);
|
||||
}
|
||||
reverse_ne();
|
||||
return chunks;
|
||||
}
|
||||
|
||||
void reverse_ne() {
|
||||
int64_t new_ne[4] = {1, 1, 1, 1};
|
||||
for (int i = 0; i < n_dims; i++) {
|
||||
new_ne[i] = ne[n_dims - 1 - i];
|
||||
}
|
||||
for (int i = 0; i < n_dims; i++) {
|
||||
ne[i] = new_ne[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
|
||||
typedef std::function<void(const std::string&, int32_t)> on_new_token_cb_t;
|
||||
|
||||
class ModelLoader {
|
||||
protected:
|
||||
std::vector<std::string> file_paths_;
|
||||
std::vector<TensorStorage> tensor_storages;
|
||||
|
||||
public:
|
||||
virtual bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
||||
virtual bool init_from_files(const std::vector<std::string>& file_paths);
|
||||
virtual SDVersion get_sd_version();
|
||||
virtual ggml_type get_sd_wtype();
|
||||
virtual bool load_vocab(on_new_token_cb_t on_new_token_cb);
|
||||
virtual bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb);
|
||||
virtual int64_t cal_mem_size();
|
||||
virtual ~ModelLoader() = default;
|
||||
};
|
||||
|
||||
class GGUFModelLoader : public ModelLoader {
|
||||
public:
|
||||
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
||||
};
|
||||
|
||||
class SafeTensorsModelLoader : public ModelLoader {
|
||||
public:
|
||||
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
||||
};
|
||||
|
||||
class CkptModelLoader : public ModelLoader {
|
||||
private:
|
||||
bool parse_data_pkl(uint8_t* buffer,
|
||||
size_t buffer_size,
|
||||
zip_t* zip,
|
||||
std::string dir,
|
||||
size_t file_index,
|
||||
const std::string& prefix);
|
||||
|
||||
public:
|
||||
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
||||
};
|
||||
|
||||
class DiffusersModelLoader : public SafeTensorsModelLoader {
|
||||
public:
|
||||
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
||||
};
|
||||
|
||||
ModelLoader* init_model_loader_from_file(const std::string& file_path);
|
||||
|
||||
#endif // __MODEL_H__
|
@ -1,5 +1,6 @@
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdarg.h>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
@ -22,61 +23,24 @@
|
||||
#include "ggml-cuda.h"
|
||||
#endif
|
||||
|
||||
#include "model.h"
|
||||
#include "rng.h"
|
||||
#include "rng_philox.h"
|
||||
#include "stable-diffusion.h"
|
||||
#include "util.h"
|
||||
|
||||
#define EPS 1e-05f
|
||||
|
||||
static SDLogLevel log_level = SDLogLevel::INFO;
|
||||
|
||||
#define UNET_GRAPH_SIZE 3328
|
||||
#define LORA_GRAPH_SIZE 4096
|
||||
|
||||
#define __FILENAME__ "stable-diffusion.cpp"
|
||||
#define SD_LOG(level, format, ...) \
|
||||
do { \
|
||||
if (level < log_level) { \
|
||||
break; \
|
||||
} \
|
||||
if (level == SDLogLevel::DEBUG) { \
|
||||
printf("[DEBUG] %s:%-4d - " format "\n", __FILENAME__, __LINE__, ##__VA_ARGS__); \
|
||||
fflush(stdout); \
|
||||
} else if (level == SDLogLevel::INFO) { \
|
||||
printf("[INFO] %s:%-4d - " format "\n", __FILENAME__, __LINE__, ##__VA_ARGS__); \
|
||||
fflush(stdout); \
|
||||
} else if (level == SDLogLevel::WARN) { \
|
||||
fprintf(stderr, "[WARN] %s:%-4d - " format "\n", __FILENAME__, __LINE__, ##__VA_ARGS__); \
|
||||
fflush(stdout); \
|
||||
} else if (level == SDLogLevel::ERROR) { \
|
||||
fprintf(stderr, "[ERROR] %s:%-4d - " format "\n", __FILENAME__, __LINE__, ##__VA_ARGS__); \
|
||||
fflush(stdout); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define LOG_DEBUG(format, ...) SD_LOG(SDLogLevel::DEBUG, format, ##__VA_ARGS__)
|
||||
#define LOG_INFO(format, ...) SD_LOG(SDLogLevel::INFO, format, ##__VA_ARGS__)
|
||||
#define LOG_WARN(format, ...) SD_LOG(SDLogLevel::WARN, format, ##__VA_ARGS__)
|
||||
#define LOG_ERROR(format, ...) SD_LOG(SDLogLevel::ERROR, format, ##__VA_ARGS__)
|
||||
|
||||
#define TIMESTEPS 1000
|
||||
|
||||
enum SDVersion {
|
||||
VERSION_1_x,
|
||||
VERSION_2_x,
|
||||
VERSION_XL,
|
||||
VERSION_COUNT,
|
||||
};
|
||||
|
||||
const char* model_version_to_str[] = {
|
||||
"1.x",
|
||||
"2.x",
|
||||
"XL"};
|
||||
|
||||
const char* lora_type_to_str[] = {
|
||||
"regular",
|
||||
"diffusers",
|
||||
"transformers"};
|
||||
"XL",
|
||||
};
|
||||
|
||||
const char* sampling_methods_str[] = {
|
||||
"Euler A",
|
||||
@ -86,14 +50,11 @@ const char* sampling_methods_str[] = {
|
||||
"DPM++ (2s)",
|
||||
"DPM++ (2M)",
|
||||
"modified DPM++ (2M)",
|
||||
"LCM"};
|
||||
"LCM",
|
||||
};
|
||||
|
||||
/*================================================== Helper Functions ================================================*/
|
||||
|
||||
void set_sd_log_level(SDLogLevel level) {
|
||||
log_level = level;
|
||||
}
|
||||
|
||||
std::string sd_get_system_info() {
|
||||
std::stringstream ss;
|
||||
ss << "System Info: \n";
|
||||
@ -188,7 +149,7 @@ void print_ggml_tensor(struct ggml_tensor* tensor, bool shape_only = false) {
|
||||
if (shape_only) {
|
||||
return;
|
||||
}
|
||||
int range = 3;
|
||||
int range = 1000;
|
||||
for (int i = 0; i < tensor->ne[3]; i++) {
|
||||
if (i >= range && i + range < tensor->ne[3]) {
|
||||
continue;
|
||||
@ -277,15 +238,46 @@ void sd_fread(void* ptr, size_t size, size_t count, FILE* stream) {
|
||||
}
|
||||
}
|
||||
|
||||
void copy_ggml_tensor(
|
||||
struct ggml_tensor* dst,
|
||||
const struct ggml_tensor* src) {
|
||||
dst->nb[0] = src->nb[0];
|
||||
dst->nb[1] = src->nb[1];
|
||||
dst->nb[2] = src->nb[2];
|
||||
dst->nb[3] = src->nb[3];
|
||||
void copy_ggml_tensor(struct ggml_tensor* dst, struct ggml_tensor* src) {
|
||||
if (dst->type == src->type) {
|
||||
dst->nb[0] = src->nb[0];
|
||||
dst->nb[1] = src->nb[1];
|
||||
dst->nb[2] = src->nb[2];
|
||||
dst->nb[3] = src->nb[3];
|
||||
|
||||
memcpy(((char*)dst->data), ((char*)src->data), ggml_nbytes(dst));
|
||||
memcpy(((char*)dst->data), ((char*)src->data), ggml_nbytes(dst));
|
||||
return;
|
||||
}
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = 10 * 1024 * 1024; // for padding
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* ctx = ggml_init(params);
|
||||
if (!ctx) {
|
||||
LOG_ERROR("ggml_init() failed");
|
||||
return;
|
||||
}
|
||||
ggml_tensor* final = ggml_cpy_inplace(ctx, src, dst);
|
||||
|
||||
struct ggml_cgraph* graph = ggml_new_graph(ctx);
|
||||
ggml_build_forward_expand(graph, final);
|
||||
ggml_graph_compute_with_ctx(ctx, graph, 1);
|
||||
ggml_free(ctx);
|
||||
}
|
||||
|
||||
void calculate_alphas_cumprod(float* alphas_cumprod,
|
||||
float linear_start = 0.00085f,
|
||||
float linear_end = 0.0120,
|
||||
int timesteps = TIMESTEPS) {
|
||||
float ls_sqrt = sqrtf(linear_start);
|
||||
float le_sqrt = sqrtf(linear_end);
|
||||
float amount = le_sqrt - ls_sqrt;
|
||||
float product = 1.0f;
|
||||
for (int i = 0; i < timesteps; i++) {
|
||||
float beta = ls_sqrt + amount * ((float)i / (timesteps - 1));
|
||||
product *= 1.0f - powf(beta, 2.0f);
|
||||
alphas_cumprod[i] = product;
|
||||
}
|
||||
}
|
||||
|
||||
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
||||
@ -396,22 +388,6 @@ std::pair<std::unordered_map<std::string, float>, std::string> extract_and_remov
|
||||
return std::make_pair(filename2multiplier, text);
|
||||
}
|
||||
|
||||
bool ends_with(const std::string& str, const std::string& ending) {
|
||||
if (str.length() >= ending.length()) {
|
||||
return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void replace_all_chars(std::string& str, char target, char replacement) {
|
||||
for (size_t i = 0; i < str.length(); ++i) {
|
||||
if (str[i] == target) {
|
||||
str[i] = replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*================================================== CLIPTokenizer ===================================================*/
|
||||
|
||||
const std::string UNK_TOKEN = "<|endoftext|>";
|
||||
@ -3244,7 +3220,7 @@ struct AutoEncoderKL {
|
||||
struct ggml_cgraph* gf = build_graph(x, decode);
|
||||
|
||||
// compute the required memory
|
||||
size_t compute_memory_buffer_size = ggml_allocr_alloc_graph(compute_alloc, gf);
|
||||
size_t compute_memory_buffer_size = ggml_allocr_alloc_graph(compute_alloc, gf) + 10 * 1024 * 1024;
|
||||
|
||||
// recreate the allocator with the required memory
|
||||
ggml_allocr_free(compute_alloc);
|
||||
@ -3281,9 +3257,21 @@ struct AutoEncoderKL {
|
||||
}
|
||||
};
|
||||
|
||||
float ggml_backend_tensor_get_f32(ggml_tensor* tensor) {
|
||||
GGML_ASSERT(tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_F16);
|
||||
float value;
|
||||
if (tensor->type == GGML_TYPE_F32) {
|
||||
ggml_backend_tensor_get(tensor, &value, 0, sizeof(value));
|
||||
} else { // GGML_TYPE_F16
|
||||
ggml_fp16_t f16_value;
|
||||
ggml_backend_tensor_get(tensor, &f16_value, 0, sizeof(f16_value));
|
||||
value = ggml_fp16_to_fp32(f16_value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
struct LoraModel {
|
||||
float strength = 1.0f;
|
||||
std::map<std::string, float> lora_alphas;
|
||||
float multiplier = 1.0f;
|
||||
std::map<std::string, struct ggml_tensor*> lora_tensors;
|
||||
|
||||
struct ggml_context* ctx;
|
||||
@ -3293,37 +3281,15 @@ struct LoraModel {
|
||||
bool load(ggml_backend_t backend_, std::string file_path) {
|
||||
backend = backend_;
|
||||
LOG_INFO("loading LoRA from '%s'", file_path.c_str());
|
||||
ggml_context* ctx_meta = NULL;
|
||||
gguf_context* ctx_gguf = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta});
|
||||
std::shared_ptr<ModelLoader> model_loader = std::shared_ptr<ModelLoader>(init_model_loader_from_file(file_path));
|
||||
|
||||
if (!ctx_gguf) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
if (!model_loader) {
|
||||
LOG_ERROR("init lora model loader from file failed: '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
FILE* fp = std::fopen(file_path.c_str(), "rb");
|
||||
|
||||
SDVersion version = VERSION_COUNT;
|
||||
|
||||
int n_kv = gguf_get_n_kv(ctx_gguf);
|
||||
int n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||
|
||||
for (int i = 0; i < n_kv; i++) {
|
||||
const char* name = gguf_get_key(ctx_gguf, i);
|
||||
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
|
||||
LOG_DEBUG("%s: - kv %3d: %42s %-8s", __func__, i, name, gguf_type_name(type));
|
||||
}
|
||||
|
||||
{
|
||||
int nidx = gguf_find_key(ctx_gguf, "sd.lora.name");
|
||||
int tidx = gguf_find_key(ctx_gguf, "sd.lora.type");
|
||||
if (tidx >= 0 && nidx >= 0) {
|
||||
LOG_INFO("LoRA Type: %s | %s", lora_type_to_str[gguf_get_val_i32(ctx_gguf, tidx) - 1], gguf_get_val_str(ctx_gguf, nidx));
|
||||
}
|
||||
}
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(n_tensors * ggml_tensor_overhead());
|
||||
params.mem_size = static_cast<size_t>(1024 * ggml_tensor_overhead());
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = true;
|
||||
|
||||
@ -3333,82 +3299,28 @@ struct LoraModel {
|
||||
return false;
|
||||
}
|
||||
|
||||
ggml_type wtype = GGML_TYPE_COUNT;
|
||||
{
|
||||
int idx = gguf_find_key(ctx_gguf, "sd.lora.dtype");
|
||||
if (idx >= 0) {
|
||||
wtype = (ggml_type)gguf_get_val_i32(ctx_gguf, idx);
|
||||
LOG_INFO("LoRA data type: %s", ggml_type_name(wtype));
|
||||
}
|
||||
}
|
||||
ggml_type wtype = model_loader->get_sd_wtype();
|
||||
|
||||
LOG_DEBUG("calculating buffer size");
|
||||
int memory_buffer_size = 0;
|
||||
|
||||
for (int i = 0; i < n_tensors; i++) {
|
||||
std::string name = gguf_get_tensor_name(ctx_gguf, i);
|
||||
struct ggml_tensor* dummy = ggml_get_tensor(ctx_meta, name.c_str());
|
||||
memory_buffer_size += (int)ggml_nbytes(dummy);
|
||||
}
|
||||
int64_t memory_buffer_size = model_loader->cal_mem_size();
|
||||
LOG_DEBUG("lora params backend buffer size = % 6.2f MB", memory_buffer_size / (1024.0 * 1024.0));
|
||||
|
||||
params_buffer_lora = ggml_backend_alloc_buffer(backend, memory_buffer_size);
|
||||
|
||||
LOG_DEBUG("loading alphas");
|
||||
{
|
||||
int kidx = gguf_find_key(ctx_gguf, "sd.lora.alphas_k");
|
||||
int vidx = gguf_find_key(ctx_gguf, "sd.lora.alphas_v");
|
||||
int n_alphas = gguf_get_arr_n(ctx_gguf, kidx);
|
||||
if (n_alphas * 2 != n_tensors) {
|
||||
LOG_ERROR("lora alphas expected: %i, got %i", n_tensors, n_alphas * 2);
|
||||
return false;
|
||||
}
|
||||
float* alphas_values = (float*)gguf_get_arr_data(ctx_gguf, vidx);
|
||||
for (int i = 0; i < n_alphas; i++) {
|
||||
std::string alpha_name = gguf_get_arr_str(ctx_gguf, kidx, i);
|
||||
lora_alphas[alpha_name] = alphas_values[i];
|
||||
}
|
||||
}
|
||||
|
||||
ggml_allocr* alloc = ggml_allocr_new_from_buffer(params_buffer_lora);
|
||||
|
||||
size_t data_offset = gguf_get_data_offset(ctx_gguf);
|
||||
std::vector<char> read_buf;
|
||||
for (int i = 0; i < n_tensors; i++) {
|
||||
std::string name = gguf_get_tensor_name(ctx_gguf, i);
|
||||
struct ggml_tensor* dummy = ggml_get_tensor(ctx_meta, name.c_str());
|
||||
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf, i);
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
|
||||
#ifdef _WIN32
|
||||
int ret = _fseeki64(fp, (__int64)offset, SEEK_SET);
|
||||
#else
|
||||
int ret = std::fseek(fp, (long)offset, SEEK_SET);
|
||||
#endif
|
||||
if (ret == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ggml_tensor* real = ggml_dup_tensor(ctx, dummy);
|
||||
struct ggml_tensor* real = ggml_new_tensor(ctx, tensor_storage.type, tensor_storage.n_dims, tensor_storage.ne);
|
||||
ggml_allocr_alloc(alloc, real);
|
||||
|
||||
int num_bytes = (int)ggml_nbytes(dummy);
|
||||
|
||||
if (ggml_backend_is_cpu(backend)) {
|
||||
// for the CPU and Metal backend, we can read directly into the tensor
|
||||
sd_fread(real->data, 1, num_bytes, fp);
|
||||
} else {
|
||||
// read into a temporary buffer first, then copy to device memory
|
||||
read_buf.resize(num_bytes);
|
||||
sd_fread(read_buf.data(), 1, num_bytes, fp);
|
||||
ggml_backend_tensor_set(real, read_buf.data(), 0, num_bytes);
|
||||
}
|
||||
*dst_tensor = real;
|
||||
|
||||
lora_tensors[name] = real;
|
||||
}
|
||||
read_buf.clear();
|
||||
std::fclose(fp);
|
||||
gguf_free(ctx_gguf);
|
||||
ggml_free(ctx_meta);
|
||||
return true;
|
||||
};
|
||||
|
||||
model_loader->load_tensors(on_new_tensor_cb);
|
||||
|
||||
LOG_DEBUG("finished loaded lora");
|
||||
ggml_allocr_free(alloc);
|
||||
@ -3428,54 +3340,94 @@ struct LoraModel {
|
||||
};
|
||||
|
||||
struct ggml_context* ctx0 = ggml_init(params);
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(ctx0, LORA_GRAPH_SIZE, false);
|
||||
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(ctx0, LORA_GRAPH_SIZE, false);
|
||||
std::set<std::string> applied_lora_tensors;
|
||||
for (auto it : model_tensors) {
|
||||
std::string k_tensor = it.first;
|
||||
std::string k_tensor = it.first;
|
||||
struct ggml_tensor* weight = model_tensors[it.first];
|
||||
|
||||
size_t k_pos = k_tensor.find(".weight");
|
||||
if (k_pos == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
k_tensor = k_tensor.substr(0, k_pos);
|
||||
std::string lora_up_name = "lora." + k_tensor + ".lora_up.weight";
|
||||
std::string lora_down_name = "lora." + k_tensor + ".lora_down.weight";
|
||||
std::string lora_alpha_name = "lora." + k_tensor + ".alpha";
|
||||
if (
|
||||
lora_tensors.find(lora_up_name) != lora_tensors.end() &&
|
||||
lora_tensors.find(lora_down_name) != lora_tensors.end() &&
|
||||
lora_alphas.find(lora_alpha_name) != lora_alphas.end()) {
|
||||
struct ggml_tensor* loraA = lora_tensors[lora_up_name];
|
||||
struct ggml_tensor* loraB = lora_tensors[lora_down_name];
|
||||
struct ggml_tensor* weight = model_tensors[it.first];
|
||||
k_tensor = k_tensor.substr(0, k_pos);
|
||||
replace_all_chars(k_tensor, '.', '_');
|
||||
std::string lora_up_name = "lora." + k_tensor + ".lora_up.weight";
|
||||
std::string lora_down_name = "lora." + k_tensor + ".lora_down.weight";
|
||||
std::string alpha_name = "lora." + k_tensor + ".alpha";
|
||||
std::string scale_name = "lora." + k_tensor + ".scale";
|
||||
|
||||
float scale = strength;
|
||||
scale *= (lora_alphas[lora_alpha_name] / loraB->ne[loraB->n_dims - 1]);
|
||||
ggml_tensor* lora_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
|
||||
ggml_tensor* lora_up = NULL;
|
||||
ggml_tensor* lora_down = NULL;
|
||||
|
||||
ggml_allocr_alloc(compute_alloc, lora_scale);
|
||||
if (!ggml_allocr_is_measure(compute_alloc)) {
|
||||
ggml_backend_tensor_set(lora_scale, &scale, 0, ggml_nbytes(lora_scale));
|
||||
}
|
||||
if (lora_tensors.find(lora_up_name) != lora_tensors.end()) {
|
||||
lora_up = lora_tensors[lora_up_name];
|
||||
}
|
||||
|
||||
// flat lora tensors to multiply it
|
||||
int64_t loraA_rows = loraA->ne[loraA->n_dims - 1];
|
||||
loraA = ggml_reshape_2d(ctx0, loraA, ggml_nelements(loraA) / loraA_rows, loraA_rows);
|
||||
int64_t loraB_rows = loraB->ne[loraB->n_dims - 1];
|
||||
loraB = ggml_reshape_2d(ctx0, loraB, ggml_nelements(loraB) / loraB_rows, loraB_rows);
|
||||
if (lora_tensors.find(lora_down_name) != lora_tensors.end()) {
|
||||
lora_down = lora_tensors[lora_down_name];
|
||||
}
|
||||
|
||||
// ggml_mul_mat requires tensor b transposed
|
||||
loraB = ggml_cont(ctx0, ggml_transpose(ctx0, loraB));
|
||||
struct ggml_tensor* loraBA = ggml_mul_mat(ctx0, loraA, loraB);
|
||||
loraBA = ggml_cont(ctx0, ggml_transpose(ctx0, loraBA));
|
||||
loraBA = ggml_reshape(ctx0, loraBA, weight);
|
||||
GGML_ASSERT(ggml_nelements(loraBA) == ggml_nelements(weight));
|
||||
loraBA = ggml_scale_inplace(ctx0, loraBA, lora_scale);
|
||||
ggml_tensor* final_weight;
|
||||
final_weight = ggml_add_inplace(ctx0, weight, loraBA); // apply directly
|
||||
ggml_build_forward_expand(gf, final_weight);
|
||||
if (lora_up == NULL || lora_down == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
applied_lora_tensors.insert(lora_up_name);
|
||||
applied_lora_tensors.insert(lora_down_name);
|
||||
applied_lora_tensors.insert(alpha_name);
|
||||
applied_lora_tensors.insert(scale_name);
|
||||
|
||||
// calc_cale
|
||||
int64_t dim = lora_down->ne[lora_down->n_dims - 1];
|
||||
float scale_value = 1.0f;
|
||||
if (lora_tensors.find(scale_name) != lora_tensors.end()) {
|
||||
scale_value = ggml_backend_tensor_get_f32(lora_tensors[scale_name]);
|
||||
} else if (lora_tensors.find(alpha_name) != lora_tensors.end()) {
|
||||
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
|
||||
scale_value = alpha / dim;
|
||||
}
|
||||
scale_value *= multiplier;
|
||||
|
||||
ggml_tensor* lora_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
|
||||
|
||||
ggml_allocr_alloc(compute_alloc, lora_scale);
|
||||
if (!ggml_allocr_is_measure(compute_alloc)) {
|
||||
ggml_backend_tensor_set(lora_scale, &scale_value, 0, ggml_nbytes(lora_scale));
|
||||
}
|
||||
|
||||
// flat lora tensors to multiply it
|
||||
int64_t lora_up_rows = lora_up->ne[lora_up->n_dims - 1];
|
||||
lora_up = ggml_reshape_2d(ctx0, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
|
||||
int64_t lora_down_rows = lora_down->ne[lora_down->n_dims - 1];
|
||||
lora_down = ggml_reshape_2d(ctx0, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
|
||||
|
||||
// ggml_mul_mat requires tensor b transposed
|
||||
lora_down = ggml_cont(ctx0, ggml_transpose(ctx0, lora_down));
|
||||
struct ggml_tensor* updown = ggml_mul_mat(ctx0, lora_up, lora_down);
|
||||
updown = ggml_cont(ctx0, ggml_transpose(ctx0, updown));
|
||||
updown = ggml_reshape(ctx0, updown, weight);
|
||||
GGML_ASSERT(ggml_nelements(updown) == ggml_nelements(weight));
|
||||
updown = ggml_scale_inplace(ctx0, updown, lora_scale);
|
||||
ggml_tensor* final_weight;
|
||||
// if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
|
||||
// final_weight = ggml_new_tensor(ctx0, GGML_TYPE_F32, weight->n_dims, weight->ne);
|
||||
// final_weight = ggml_cpy_inplace(ctx0, weight, final_weight);
|
||||
// final_weight = ggml_add_inplace(ctx0, final_weight, updown);
|
||||
// final_weight = ggml_cpy_inplace(ctx0, final_weight, weight);
|
||||
// } else {
|
||||
// final_weight = ggml_add_inplace(ctx0, weight, updown);
|
||||
// }
|
||||
final_weight = ggml_add_inplace(ctx0, weight, updown); // apply directly
|
||||
ggml_build_forward_expand(gf, final_weight);
|
||||
}
|
||||
|
||||
for (auto& kv : lora_tensors) {
|
||||
if (applied_lora_tensors.find(kv.first) == applied_lora_tensors.end()) {
|
||||
LOG_WARN("unused lora tensor %s", kv.first.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
return gf;
|
||||
}
|
||||
|
||||
@ -3683,11 +3635,7 @@ public:
|
||||
} else if (rng_type == CUDA_RNG) {
|
||||
rng = std::make_shared<PhiloxRNG>();
|
||||
}
|
||||
if (lora_model_dir.size() > 0) {
|
||||
if (lora_model_dir[lora_model_dir.size() - 1] != '/' && lora_model_dir[lora_model_dir.size() - 1] != '\\') {
|
||||
this->lora_model_dir = lora_model_dir + "/";
|
||||
}
|
||||
}
|
||||
this->lora_model_dir = lora_model_dir;
|
||||
}
|
||||
|
||||
~StableDiffusionGGML() {
|
||||
@ -3696,7 +3644,10 @@ public:
|
||||
first_stage_model.destroy();
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& file_path, Schedule schedule) {
|
||||
bool load_from_file(const std::string& model_path,
|
||||
const std::string& vae_path,
|
||||
ggml_type wtype,
|
||||
Schedule schedule) {
|
||||
#ifdef SD_USE_CUBLAS
|
||||
LOG_DEBUG("Using CUDA backend");
|
||||
backend = ggml_backend_cuda_init();
|
||||
@ -3712,59 +3663,44 @@ public:
|
||||
LOG_INFO("Flash Attention enabled");
|
||||
#endif
|
||||
#endif
|
||||
LOG_INFO("loading model from '%s'", file_path.c_str());
|
||||
ggml_context* ctx_meta = NULL;
|
||||
gguf_context* ctx_gguf = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta});
|
||||
if (!ctx_gguf) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
LOG_INFO("loading model from '%s'", model_path.c_str());
|
||||
std::shared_ptr<ModelLoader> model_loader = std::shared_ptr<ModelLoader>(init_model_loader_from_file(model_path));
|
||||
|
||||
if (!model_loader) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
FILE* fp = std::fopen(file_path.c_str(), "rb");
|
||||
|
||||
SDVersion version = VERSION_COUNT;
|
||||
|
||||
int n_kv = gguf_get_n_kv(ctx_gguf);
|
||||
int n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||
|
||||
for (int i = 0; i < n_kv; i++) {
|
||||
const char* name = gguf_get_key(ctx_gguf, i);
|
||||
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
|
||||
LOG_DEBUG("%s: - kv %3d: %42s %-8s", __func__, i, name, gguf_type_name(type));
|
||||
}
|
||||
|
||||
{
|
||||
int nidx = gguf_find_key(ctx_gguf, "sd.model.name");
|
||||
int vidx = gguf_find_key(ctx_gguf, "sd.model.version");
|
||||
if (vidx >= 0 && nidx >= 0) {
|
||||
version = (SDVersion)gguf_get_val_i8(ctx_gguf, vidx);
|
||||
cond_stage_model = FrozenCLIPEmbedderWithCustomWords(version);
|
||||
diffusion_model = UNetModel(version);
|
||||
LOG_INFO("Stable Diffusion %s | %s", model_version_to_str[version], gguf_get_val_str(ctx_gguf, nidx));
|
||||
if (vae_path.size() > 0) {
|
||||
LOG_INFO("loading vae from '%s'", vae_path.c_str());
|
||||
if (!model_loader->init_from_file(vae_path, "vae.")) {
|
||||
LOG_WARN("loading vae from '%s' failed", vae_path.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int idx = gguf_find_key(ctx_gguf, "sd.model.dtype");
|
||||
if (idx >= 0) {
|
||||
model_data_type = (ggml_type)gguf_get_val_i32(ctx_gguf, idx);
|
||||
LOG_INFO("model data type: %s", ggml_type_name(model_data_type));
|
||||
}
|
||||
SDVersion version = model_loader->get_sd_version();
|
||||
if (version == VERSION_COUNT) {
|
||||
LOG_ERROR("get sd version from file failed: '%s'", model_path.c_str());
|
||||
return false;
|
||||
}
|
||||
cond_stage_model = FrozenCLIPEmbedderWithCustomWords(version);
|
||||
diffusion_model = UNetModel(version);
|
||||
LOG_INFO("Stable Diffusion %s ", model_version_to_str[version]);
|
||||
if (wtype == GGML_TYPE_COUNT) {
|
||||
model_data_type = model_loader->get_sd_wtype();
|
||||
} else {
|
||||
model_data_type = wtype;
|
||||
}
|
||||
LOG_INFO("Stable Diffusion weight type: %s", ggml_type_name(model_data_type));
|
||||
|
||||
LOG_DEBUG("loading vocab");
|
||||
|
||||
// load vocab
|
||||
{
|
||||
int tidx = gguf_find_key(ctx_gguf, "sd.vocab.tokens");
|
||||
if (tidx == -1) {
|
||||
LOG_ERROR("vocab not found");
|
||||
return false;
|
||||
}
|
||||
int n_vocab = gguf_get_arr_n(ctx_gguf, tidx);
|
||||
for (int i = 0; i < n_vocab; i++) {
|
||||
cond_stage_model.tokenizer.add_token(gguf_get_arr_str(ctx_gguf, tidx, i), i);
|
||||
}
|
||||
auto add_token = [&](const std::string& token, int32_t token_id) {
|
||||
cond_stage_model.tokenizer.add_token(token, token_id);
|
||||
};
|
||||
bool success = model_loader->load_vocab(add_token);
|
||||
if (!success) {
|
||||
LOG_ERROR("get vocab from file failed: '%s'", model_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// create the ggml context for network params
|
||||
@ -3793,34 +3729,33 @@ public:
|
||||
first_stage_model.map_by_name(tensors, "first_stage_model.");
|
||||
}
|
||||
|
||||
std::set<std::string> tensor_names_in_file;
|
||||
int64_t t0 = ggml_time_ms();
|
||||
LOG_DEBUG("loading weights");
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024) * 1024; // 10M
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* ctx = ggml_init(params); // for alphas_cumprod and is_using_v_parameterization check
|
||||
if (!ctx) {
|
||||
LOG_ERROR("ggml_init() failed");
|
||||
return false;
|
||||
}
|
||||
ggml_tensor* alphas_cumprod_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, TIMESTEPS);
|
||||
calculate_alphas_cumprod((float*)alphas_cumprod_tensor->data);
|
||||
|
||||
// load weights
|
||||
float alphas_cumprod[TIMESTEPS];
|
||||
LOG_DEBUG("loading weights");
|
||||
std::set<std::string> tensor_names_in_file;
|
||||
int64_t t0 = ggml_time_ms();
|
||||
|
||||
size_t total_size = 0;
|
||||
std::vector<char> read_buf;
|
||||
size_t total_size = 0;
|
||||
size_t data_offset = gguf_get_data_offset(ctx_gguf);
|
||||
for (int i = 0; i < n_tensors; i++) {
|
||||
std::string name = gguf_get_tensor_name(ctx_gguf, i);
|
||||
struct ggml_tensor* dummy = ggml_get_tensor(ctx_meta, name.c_str());
|
||||
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf, i);
|
||||
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
tensor_names_in_file.insert(name);
|
||||
|
||||
#ifdef _WIN32
|
||||
int ret = _fseeki64(fp, (__int64)offset, SEEK_SET);
|
||||
#else
|
||||
int ret = std::fseek(fp, (long)offset, SEEK_SET);
|
||||
#endif
|
||||
if (ret == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (name == "alphas_cumprod") {
|
||||
sd_fread(alphas_cumprod, 1, ggml_nbytes(dummy), fp);
|
||||
continue;
|
||||
*dst_tensor = alphas_cumprod_tensor;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct ggml_tensor* real;
|
||||
@ -3832,54 +3767,46 @@ public:
|
||||
} else {
|
||||
if (!vae_decode_only) {
|
||||
LOG_WARN("unknown tensor '%s' in model file", name.data());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
real->ne[0] != dummy->ne[0] ||
|
||||
real->ne[1] != dummy->ne[1] ||
|
||||
real->ne[2] != dummy->ne[2] ||
|
||||
real->ne[3] != dummy->ne[3]) {
|
||||
real->ne[0] != tensor_storage.ne[0] ||
|
||||
real->ne[1] != tensor_storage.ne[1] ||
|
||||
real->ne[2] != tensor_storage.ne[2] ||
|
||||
real->ne[3] != tensor_storage.ne[3]) {
|
||||
LOG_ERROR(
|
||||
"tensor '%s' has wrong shape in model file: "
|
||||
"got [%d, %d, %d, %d], expected [%d, %d, %d, %d]",
|
||||
name.c_str(),
|
||||
(int)dummy->ne[0], (int)dummy->ne[1], (int)dummy->ne[2], (int)dummy->ne[3],
|
||||
(int)tensor_storage.ne[0], (int)tensor_storage.ne[1], (int)tensor_storage.ne[2], (int)tensor_storage.ne[3],
|
||||
(int)real->ne[0], (int)real->ne[1], (int)real->ne[2], (int)real->ne[3]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (real->type != dummy->type) {
|
||||
LOG_ERROR("tensor '%s' has wrong type in model file: got %s, expect %s",
|
||||
name.c_str(), ggml_type_name(dummy->type), ggml_type_name(real->type));
|
||||
return false;
|
||||
}
|
||||
*dst_tensor = real;
|
||||
|
||||
int num_bytes = (int)ggml_nbytes(dummy);
|
||||
total_size += ggml_nbytes(real);
|
||||
return true;
|
||||
};
|
||||
|
||||
if (ggml_backend_is_cpu(backend)) {
|
||||
// for the CPU and Metal backend, we can read directly into the tensor
|
||||
sd_fread(real->data, 1, num_bytes, fp);
|
||||
} else {
|
||||
// read into a temporary buffer first, then copy to device memory
|
||||
read_buf.resize(num_bytes);
|
||||
sd_fread(read_buf.data(), 1, num_bytes, fp);
|
||||
ggml_backend_tensor_set(real, read_buf.data(), 0, num_bytes);
|
||||
}
|
||||
// print_ggml_tensor(alphas_cumprod_tensor);
|
||||
|
||||
total_size += ggml_nbytes(dummy);
|
||||
success = model_loader->load_tensors(on_new_tensor_cb);
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from file failed");
|
||||
ggml_free(ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
gguf_free(ctx_gguf);
|
||||
ggml_free(ctx_meta);
|
||||
// print_ggml_tensor(alphas_cumprod_tensor);
|
||||
|
||||
std::fclose(fp);
|
||||
read_buf.clear();
|
||||
// calculate_alphas_cumprod((float*)alphas_cumprod_tensor->data);
|
||||
|
||||
bool some_tensor_not_init = false;
|
||||
|
||||
for (auto pair : tensors) {
|
||||
if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {
|
||||
continue;
|
||||
@ -3891,12 +3818,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (tensor_names_in_file.find("alphas_cumprod") == tensor_names_in_file.end()) {
|
||||
LOG_ERROR("tensor alphas_cumprod not in model file");
|
||||
some_tensor_not_init = true;
|
||||
}
|
||||
|
||||
if (some_tensor_not_init) {
|
||||
ggml_free(ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3912,24 +3835,14 @@ public:
|
||||
diffusion_model.memory_buffer_size / 1024.0 / 1024.0,
|
||||
first_stage_model.memory_buffer_size / 1024.0 / 1024.0);
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_INFO("loading model from '%s' completed, taking %.2fs", file_path.c_str(), (t1 - t0) * 1.0f / 1000);
|
||||
LOG_INFO("loading model from '%s' completed, taking %.2fs", model_path.c_str(), (t1 - t0) * 1.0f / 1000);
|
||||
|
||||
// check is_using_v_parameterization_for_sd2
|
||||
bool is_using_v_parameterization = false;
|
||||
if (version == VERSION_2_x) {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024) * 1024; // 10M
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* ctx = ggml_init(params);
|
||||
if (!ctx) {
|
||||
LOG_ERROR("ggml_init() failed");
|
||||
return false;
|
||||
}
|
||||
if (is_using_v_parameterization_for_sd2(ctx)) {
|
||||
is_using_v_parameterization = true;
|
||||
}
|
||||
ggml_free(ctx);
|
||||
}
|
||||
|
||||
if (is_using_v_parameterization) {
|
||||
@ -3959,11 +3872,12 @@ public:
|
||||
}
|
||||
|
||||
for (int i = 0; i < TIMESTEPS; i++) {
|
||||
denoiser->schedule->alphas_cumprod[i] = alphas_cumprod[i];
|
||||
denoiser->schedule->alphas_cumprod[i] = ((float*)alphas_cumprod_tensor->data)[i];
|
||||
denoiser->schedule->sigmas[i] = std::sqrt((1 - denoiser->schedule->alphas_cumprod[i]) / denoiser->schedule->alphas_cumprod[i]);
|
||||
denoiser->schedule->log_sigmas[i] = std::log(denoiser->schedule->sigmas[i]);
|
||||
}
|
||||
LOG_DEBUG("finished loaded file");
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -4005,13 +3919,26 @@ public:
|
||||
void apply_lora(const std::string& lora_name, float multiplier) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
LoraModel lora;
|
||||
std::string file_path = lora_model_dir + lora_name + ".gguf";
|
||||
if (lora.load(backend, file_path)) {
|
||||
lora.strength = multiplier;
|
||||
lora.apply(tensors, n_threads);
|
||||
loras[lora_name] = lora;
|
||||
lora.release();
|
||||
std::string st_file_path = path_join(lora_model_dir, lora_name + ".safetensors");
|
||||
std::string ckpt_file_path = path_join(lora_model_dir, lora_name + ".ckpt");
|
||||
std::string file_path;
|
||||
if (file_exists(st_file_path)) {
|
||||
file_path = st_file_path;
|
||||
} else if (file_exists(ckpt_file_path)) {
|
||||
file_path = ckpt_file_path;
|
||||
} else {
|
||||
LOG_WARN("can not find %s or %s for lora %s", st_file_path.c_str(), ckpt_file_path.c_str(), lora_name.c_str());
|
||||
return;
|
||||
}
|
||||
if (!lora.load(backend, file_path)) {
|
||||
LOG_WARN("load lora tensors from %s failed", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
lora.multiplier = multiplier;
|
||||
lora.apply(tensors, n_threads);
|
||||
loras[lora_name] = lora;
|
||||
lora.release();
|
||||
|
||||
int64_t t1 = ggml_time_ms();
|
||||
|
||||
@ -4621,8 +4548,11 @@ StableDiffusion::StableDiffusion(int n_threads,
|
||||
rng_type);
|
||||
}
|
||||
|
||||
bool StableDiffusion::load_from_file(const std::string& file_path, Schedule s) {
|
||||
return sd->load_from_file(file_path, s);
|
||||
bool StableDiffusion::load_from_file(const std::string& model_path,
|
||||
const std::string& vae_path,
|
||||
ggml_type wtype,
|
||||
Schedule s) {
|
||||
return sd->load_from_file(model_path, vae_path, wtype, s);
|
||||
}
|
||||
|
||||
std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,
|
||||
|
@ -5,13 +5,6 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
enum SDLogLevel {
|
||||
DEBUG,
|
||||
INFO,
|
||||
WARN,
|
||||
ERROR
|
||||
};
|
||||
|
||||
enum RNGType {
|
||||
STD_DEFAULT_RNG,
|
||||
CUDA_RNG
|
||||
@ -48,7 +41,10 @@ public:
|
||||
bool free_params_immediately = false,
|
||||
std::string lora_model_dir = "",
|
||||
RNGType rng_type = STD_DEFAULT_RNG);
|
||||
bool load_from_file(const std::string& file_path, Schedule d = DEFAULT);
|
||||
bool load_from_file(const std::string& model_path,
|
||||
const std::string& vae_path,
|
||||
ggml_type wtype,
|
||||
Schedule d = DEFAULT);
|
||||
std::vector<uint8_t*> txt2img(
|
||||
std::string prompt,
|
||||
std::string negative_prompt,
|
||||
@ -73,7 +69,6 @@ public:
|
||||
int64_t seed);
|
||||
};
|
||||
|
||||
void set_sd_log_level(SDLogLevel level);
|
||||
std::string sd_get_system_info();
|
||||
|
||||
#endif // __STABLE_DIFFUSION_H__
|
3
thirdparty/CMakeLists.txt
vendored
Normal file
3
thirdparty/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
set(Z_TARGET zip)
|
||||
add_library(${Z_TARGET} OBJECT zip.c zip.h miniz.h)
|
||||
target_include_directories(${Z_TARGET} PUBLIC .)
|
2
thirdparty/README.md
vendored
Normal file
2
thirdparty/README.md
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
- json.hpp library from: https://github.com/nlohmann/json
|
||||
- ZIP Library from: https://github.com/kuba--/zip
|
24596
thirdparty/json.hpp
vendored
Normal file
24596
thirdparty/json.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
10130
thirdparty/miniz.h
vendored
Normal file
10130
thirdparty/miniz.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
7987
thirdparty/stb_image.h
vendored
Normal file
7987
thirdparty/stb_image.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1741
thirdparty/stb_image_write.h
vendored
Normal file
1741
thirdparty/stb_image_write.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1836
thirdparty/zip.c
vendored
Normal file
1836
thirdparty/zip.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
509
thirdparty/zip.h
vendored
Normal file
509
thirdparty/zip.h
vendored
Normal file
@ -0,0 +1,509 @@
|
||||
/*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef ZIP_H
|
||||
#define ZIP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifndef ZIP_SHARED
|
||||
#define ZIP_EXPORT
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
#ifdef ZIP_BUILD_SHARED
|
||||
#define ZIP_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define ZIP_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define ZIP_EXPORT __attribute__((visibility("default")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(_POSIX_C_SOURCE) && defined(_MSC_VER)
|
||||
// 64-bit Windows is the only mainstream platform
|
||||
// where sizeof(long) != sizeof(void*)
|
||||
#ifdef _WIN64
|
||||
typedef long long ssize_t; /* byte count or error */
|
||||
#else
|
||||
typedef long ssize_t; /* byte count or error */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @mainpage
|
||||
*
|
||||
* Documentation for @ref zip.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup zip
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Default zip compression level.
|
||||
*/
|
||||
#define ZIP_DEFAULT_COMPRESSION_LEVEL 6
|
||||
|
||||
/**
|
||||
* Error codes
|
||||
*/
|
||||
#define ZIP_ENOINIT -1 // not initialized
|
||||
#define ZIP_EINVENTNAME -2 // invalid entry name
|
||||
#define ZIP_ENOENT -3 // entry not found
|
||||
#define ZIP_EINVMODE -4 // invalid zip mode
|
||||
#define ZIP_EINVLVL -5 // invalid compression level
|
||||
#define ZIP_ENOSUP64 -6 // no zip 64 support
|
||||
#define ZIP_EMEMSET -7 // memset error
|
||||
#define ZIP_EWRTENT -8 // cannot write data to entry
|
||||
#define ZIP_ETDEFLINIT -9 // cannot initialize tdefl compressor
|
||||
#define ZIP_EINVIDX -10 // invalid index
|
||||
#define ZIP_ENOHDR -11 // header not found
|
||||
#define ZIP_ETDEFLBUF -12 // cannot flush tdefl buffer
|
||||
#define ZIP_ECRTHDR -13 // cannot create entry header
|
||||
#define ZIP_EWRTHDR -14 // cannot write entry header
|
||||
#define ZIP_EWRTDIR -15 // cannot write to central dir
|
||||
#define ZIP_EOPNFILE -16 // cannot open file
|
||||
#define ZIP_EINVENTTYPE -17 // invalid entry type
|
||||
#define ZIP_EMEMNOALLOC -18 // extracting data using no memory allocation
|
||||
#define ZIP_ENOFILE -19 // file not found
|
||||
#define ZIP_ENOPERM -20 // no permission
|
||||
#define ZIP_EOOMEM -21 // out of memory
|
||||
#define ZIP_EINVZIPNAME -22 // invalid zip archive name
|
||||
#define ZIP_EMKDIR -23 // make dir error
|
||||
#define ZIP_ESYMLINK -24 // symlink error
|
||||
#define ZIP_ECLSZIP -25 // close archive error
|
||||
#define ZIP_ECAPSIZE -26 // capacity size too small
|
||||
#define ZIP_EFSEEK -27 // fseek error
|
||||
#define ZIP_EFREAD -28 // fread error
|
||||
#define ZIP_EFWRITE -29 // fwrite error
|
||||
#define ZIP_ERINIT -30 // cannot initialize reader
|
||||
#define ZIP_EWINIT -31 // cannot initialize writer
|
||||
#define ZIP_EWRINIT -32 // cannot initialize writer from reader
|
||||
|
||||
/**
|
||||
* Looks up the error message string corresponding to an error number.
|
||||
* @param errnum error number
|
||||
* @return error message string corresponding to errnum or NULL if error is not
|
||||
* found.
|
||||
*/
|
||||
extern ZIP_EXPORT const char *zip_strerror(int errnum);
|
||||
|
||||
/**
|
||||
* @struct zip_t
|
||||
*
|
||||
* This data structure is used throughout the library to represent zip archive -
|
||||
* forward declaration.
|
||||
*/
|
||||
struct zip_t;
|
||||
|
||||
/**
|
||||
* Opens zip archive with compression level using the given mode.
|
||||
*
|
||||
* @param zipname zip archive file name.
|
||||
* @param level compression level (0-9 are the standard zlib-style levels).
|
||||
* @param mode file access mode.
|
||||
* - 'r': opens a file for reading/extracting (the file must exists).
|
||||
* - 'w': creates an empty file for writing.
|
||||
* - 'a': appends to an existing archive.
|
||||
*
|
||||
* @return the zip archive handler or NULL on error
|
||||
*/
|
||||
extern ZIP_EXPORT struct zip_t *zip_open(const char *zipname, int level,
|
||||
char mode);
|
||||
|
||||
/**
|
||||
* Opens zip archive with compression level using the given mode.
|
||||
* The function additionally returns @param errnum -
|
||||
*
|
||||
* @param zipname zip archive file name.
|
||||
* @param level compression level (0-9 are the standard zlib-style levels).
|
||||
* @param mode file access mode.
|
||||
* - 'r': opens a file for reading/extracting (the file must exists).
|
||||
* - 'w': creates an empty file for writing.
|
||||
* - 'a': appends to an existing archive.
|
||||
* @param errnum 0 on success, negative number (< 0) on error.
|
||||
*
|
||||
* @return the zip archive handler or NULL on error
|
||||
*/
|
||||
extern ZIP_EXPORT struct zip_t *
|
||||
zip_openwitherror(const char *zipname, int level, char mode, int *errnum);
|
||||
|
||||
/**
|
||||
* Closes the zip archive, releases resources - always finalize.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*/
|
||||
extern ZIP_EXPORT void zip_close(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Determines if the archive has a zip64 end of central directory headers.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the return code - 1 (true), 0 (false), negative number (< 0) on
|
||||
* error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_is64(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Opens an entry by name in the zip archive.
|
||||
*
|
||||
* For zip archive opened in 'w' or 'a' mode the function will append
|
||||
* a new entry. In readonly mode the function tries to locate the entry
|
||||
* in global dictionary.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param entryname an entry name in local dictionary.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_open(struct zip_t *zip, const char *entryname);
|
||||
|
||||
/**
|
||||
* Opens an entry by name in the zip archive.
|
||||
*
|
||||
* For zip archive opened in 'w' or 'a' mode the function will append
|
||||
* a new entry. In readonly mode the function tries to locate the entry
|
||||
* in global dictionary (case sensitive).
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param entryname an entry name in local dictionary (case sensitive).
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_opencasesensitive(struct zip_t *zip,
|
||||
const char *entryname);
|
||||
|
||||
/**
|
||||
* Opens a new entry by index in the zip archive.
|
||||
*
|
||||
* This function is only valid if zip archive was opened in 'r' (readonly) mode.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param index index in local dictionary.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_openbyindex(struct zip_t *zip, size_t index);
|
||||
|
||||
/**
|
||||
* Closes a zip entry, flushes buffer and releases resources.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_close(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Returns a local name of the current zip entry.
|
||||
*
|
||||
* The main difference between user's entry name and local entry name
|
||||
* is optional relative path.
|
||||
* Following .ZIP File Format Specification - the path stored MUST not contain
|
||||
* a drive or device letter, or a leading slash.
|
||||
* All slashes MUST be forward slashes '/' as opposed to backwards slashes '\'
|
||||
* for compatibility with Amiga and UNIX file systems etc.
|
||||
*
|
||||
* @param zip: zip archive handler.
|
||||
*
|
||||
* @return the pointer to the current zip entry name, or NULL on error.
|
||||
*/
|
||||
extern ZIP_EXPORT const char *zip_entry_name(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Returns an index of the current zip entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the index on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT ssize_t zip_entry_index(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Determines if the current zip entry is a directory entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the return code - 1 (true), 0 (false), negative number (< 0) on
|
||||
* error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_isdir(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Returns the uncompressed size of the current zip entry.
|
||||
* Alias for zip_entry_uncomp_size (for backward compatibility).
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the uncompressed size in bytes.
|
||||
*/
|
||||
extern ZIP_EXPORT unsigned long long zip_entry_size(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Returns the uncompressed size of the current zip entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the uncompressed size in bytes.
|
||||
*/
|
||||
extern ZIP_EXPORT unsigned long long zip_entry_uncomp_size(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Returns the compressed size of the current zip entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the compressed size in bytes.
|
||||
*/
|
||||
extern ZIP_EXPORT unsigned long long zip_entry_comp_size(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Returns CRC-32 checksum of the current zip entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the CRC-32 checksum.
|
||||
*/
|
||||
extern ZIP_EXPORT unsigned int zip_entry_crc32(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Compresses an input buffer for the current zip entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param buf input buffer.
|
||||
* @param bufsize input buffer size (in bytes).
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_write(struct zip_t *zip, const void *buf,
|
||||
size_t bufsize);
|
||||
|
||||
/**
|
||||
* Compresses a file for the current zip entry.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param filename input file.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_fwrite(struct zip_t *zip, const char *filename);
|
||||
|
||||
/**
|
||||
* Extracts the current zip entry into output buffer.
|
||||
*
|
||||
* The function allocates sufficient memory for a output buffer.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param buf output buffer.
|
||||
* @param bufsize output buffer size (in bytes).
|
||||
*
|
||||
* @note remember to release memory allocated for a output buffer.
|
||||
* for large entries, please take a look at zip_entry_extract function.
|
||||
*
|
||||
* @return the return code - the number of bytes actually read on success.
|
||||
* Otherwise a negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT ssize_t zip_entry_read(struct zip_t *zip, void **buf,
|
||||
size_t *bufsize);
|
||||
|
||||
/**
|
||||
* Extracts the current zip entry into a memory buffer using no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param buf preallocated output buffer.
|
||||
* @param bufsize output buffer size (in bytes).
|
||||
*
|
||||
* @note ensure supplied output buffer is large enough.
|
||||
* zip_entry_size function (returns uncompressed size for the current
|
||||
* entry) can be handy to estimate how big buffer is needed.
|
||||
* For large entries, please take a look at zip_entry_extract function.
|
||||
*
|
||||
* @return the return code - the number of bytes actually read on success.
|
||||
* Otherwise a negative number (< 0) on error (e.g. bufsize is not large
|
||||
* enough).
|
||||
*/
|
||||
extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf,
|
||||
size_t bufsize);
|
||||
|
||||
/**
|
||||
* Extracts the current zip entry into output file.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param filename output file.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_entry_fread(struct zip_t *zip, const char *filename);
|
||||
|
||||
/**
|
||||
* Extracts the current zip entry using a callback function (on_extract).
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param on_extract callback function.
|
||||
* @param arg opaque pointer (optional argument, which you can pass to the
|
||||
* on_extract callback)
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int
|
||||
zip_entry_extract(struct zip_t *zip,
|
||||
size_t (*on_extract)(void *arg, uint64_t offset,
|
||||
const void *data, size_t size),
|
||||
void *arg);
|
||||
|
||||
/**
|
||||
* Returns the number of all entries (files and directories) in the zip archive.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return the return code - the number of entries on success, negative number
|
||||
* (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT ssize_t zip_entries_total(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Deletes zip archive entries.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param entries array of zip archive entries to be deleted.
|
||||
* @param len the number of entries to be deleted.
|
||||
* @return the number of deleted entries, or negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT ssize_t zip_entries_delete(struct zip_t *zip,
|
||||
char *const entries[], size_t len);
|
||||
|
||||
/**
|
||||
* Extracts a zip archive stream into directory.
|
||||
*
|
||||
* If on_extract is not NULL, the callback will be called after
|
||||
* successfully extracted each zip entry.
|
||||
* Returning a negative value from the callback will cause abort and return an
|
||||
* error. The last argument (void *arg) is optional, which you can use to pass
|
||||
* data to the on_extract callback.
|
||||
*
|
||||
* @param stream zip archive stream.
|
||||
* @param size stream size.
|
||||
* @param dir output directory.
|
||||
* @param on_extract on extract callback.
|
||||
* @param arg opaque pointer.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int
|
||||
zip_stream_extract(const char *stream, size_t size, const char *dir,
|
||||
int (*on_extract)(const char *filename, void *arg),
|
||||
void *arg);
|
||||
|
||||
/**
|
||||
* Opens zip archive stream into memory.
|
||||
*
|
||||
* @param stream zip archive stream.
|
||||
* @param size stream size.
|
||||
* @param level compression level (0-9 are the standard zlib-style levels).
|
||||
* @param mode file access mode.
|
||||
* - 'r': opens a file for reading/extracting (the file must exists).
|
||||
* - 'w': creates an empty file for writing.
|
||||
* - 'a': appends to an existing archive.
|
||||
*
|
||||
* @return the zip archive handler or NULL on error
|
||||
*/
|
||||
extern ZIP_EXPORT struct zip_t *zip_stream_open(const char *stream, size_t size,
|
||||
int level, char mode);
|
||||
|
||||
/**
|
||||
* Opens zip archive stream into memory.
|
||||
* The function additionally returns @param errnum -
|
||||
*
|
||||
* @param stream zip archive stream.
|
||||
* @param size stream size.*
|
||||
* @param level compression level (0-9 are the standard zlib-style levels).
|
||||
* @param mode file access mode.
|
||||
* - 'r': opens a file for reading/extracting (the file must exists).
|
||||
* - 'w': creates an empty file for writing.
|
||||
* - 'a': appends to an existing archive.
|
||||
* @param errnum 0 on success, negative number (< 0) on error.
|
||||
*
|
||||
* @return the zip archive handler or NULL on error
|
||||
*/
|
||||
extern ZIP_EXPORT struct zip_t *zip_stream_openwitherror(const char *stream,
|
||||
size_t size, int level,
|
||||
char mode,
|
||||
int *errnum);
|
||||
|
||||
/**
|
||||
* Copy zip archive stream output buffer.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
* @param buf output buffer. User should free buf.
|
||||
* @param bufsize output buffer size (in bytes).
|
||||
*
|
||||
* @return copy size
|
||||
*/
|
||||
extern ZIP_EXPORT ssize_t zip_stream_copy(struct zip_t *zip, void **buf,
|
||||
size_t *bufsize);
|
||||
|
||||
/**
|
||||
* Close zip archive releases resources.
|
||||
*
|
||||
* @param zip zip archive handler.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
extern ZIP_EXPORT void zip_stream_close(struct zip_t *zip);
|
||||
|
||||
/**
|
||||
* Creates a new archive and puts files into a single zip archive.
|
||||
*
|
||||
* @param zipname zip archive file.
|
||||
* @param filenames input files.
|
||||
* @param len: number of input files.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_create(const char *zipname, const char *filenames[],
|
||||
size_t len);
|
||||
|
||||
/**
|
||||
* Extracts a zip archive file into directory.
|
||||
*
|
||||
* If on_extract_entry is not NULL, the callback will be called after
|
||||
* successfully extracted each zip entry.
|
||||
* Returning a negative value from the callback will cause abort and return an
|
||||
* error. The last argument (void *arg) is optional, which you can use to pass
|
||||
* data to the on_extract_entry callback.
|
||||
*
|
||||
* @param zipname zip archive file.
|
||||
* @param dir output directory.
|
||||
* @param on_extract_entry on extract callback.
|
||||
* @param arg opaque pointer.
|
||||
*
|
||||
* @return the return code - 0 on success, negative number (< 0) on error.
|
||||
*/
|
||||
extern ZIP_EXPORT int zip_extract(const char *zipname, const char *dir,
|
||||
int (*on_extract_entry)(const char *filename,
|
||||
void *arg),
|
||||
void *arg);
|
||||
/** @} */
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
186
util.cpp
Normal file
186
util.cpp
Normal file
@ -0,0 +1,186 @@
|
||||
#include "util.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
bool ends_with(const std::string& str, const std::string& ending) {
|
||||
if (str.length() >= ending.length()) {
|
||||
return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool starts_with(const std::string& str, const std::string& start) {
|
||||
if (str.find(start) == 0) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void replace_all_chars(std::string& str, char target, char replacement) {
|
||||
for (size_t i = 0; i < str.length(); ++i) {
|
||||
if (str[i] == target) {
|
||||
str[i] = replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string format(const char* fmt, ...) {
|
||||
va_list ap;
|
||||
va_list ap2;
|
||||
va_start(ap, fmt);
|
||||
va_copy(ap2, ap);
|
||||
int size = vsnprintf(NULL, 0, fmt, ap);
|
||||
std::vector<char> buf(size + 1);
|
||||
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
||||
va_end(ap2);
|
||||
va_end(ap);
|
||||
return std::string(buf.data(), size);
|
||||
}
|
||||
|
||||
#ifdef _WIN32 // code for windows
|
||||
#include <windows.h>
|
||||
|
||||
bool file_exists(const std::string& filename) {
|
||||
DWORD attributes = GetFileAttributesA(filename.c_str());
|
||||
return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
|
||||
}
|
||||
|
||||
bool is_directory(const std::string& path) {
|
||||
DWORD attributes = GetFileAttributesA(path.c_str());
|
||||
return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY));
|
||||
}
|
||||
|
||||
#else // Unix
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
bool file_exists(const std::string& filename) {
|
||||
struct stat buffer;
|
||||
return (stat(filename.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
|
||||
}
|
||||
|
||||
bool is_directory(const std::string& path) {
|
||||
struct stat buffer;
|
||||
return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// get_num_physical_cores is copy from
|
||||
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
|
||||
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
|
||||
int32_t get_num_physical_cores() {
|
||||
#ifdef __linux__
|
||||
// enumerate the set of thread siblings, num entries is num cores
|
||||
std::unordered_set<std::string> siblings;
|
||||
for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) {
|
||||
std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings");
|
||||
if (!thread_siblings.is_open()) {
|
||||
break; // no more cpus
|
||||
}
|
||||
std::string line;
|
||||
if (std::getline(thread_siblings, line)) {
|
||||
siblings.insert(line);
|
||||
}
|
||||
}
|
||||
if (siblings.size() > 0) {
|
||||
return static_cast<int32_t>(siblings.size());
|
||||
}
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
int32_t num_physical_cores;
|
||||
size_t len = sizeof(num_physical_cores);
|
||||
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
// TODO: Implement
|
||||
#endif
|
||||
unsigned int n_threads = std::thread::hardware_concurrency();
|
||||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||
}
|
||||
|
||||
std::string basename(const std::string& path) {
|
||||
size_t pos = path.find_last_of('/');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
pos = path.find_last_of('\\');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
std::string path_join(const std::string& p1, const std::string& p2) {
|
||||
if (p1.empty()) {
|
||||
return p2;
|
||||
}
|
||||
|
||||
if (p2.empty()) {
|
||||
return p1;
|
||||
}
|
||||
|
||||
if (p1[p1.length() - 1] == '/' || p1[p1.length() - 1] == '\\') {
|
||||
return p1 + p2;
|
||||
}
|
||||
|
||||
return p1 + "/" + p2;
|
||||
}
|
||||
|
||||
static SDLogLevel log_level = SDLogLevel::INFO;
|
||||
|
||||
void set_sd_log_level(SDLogLevel level) {
|
||||
log_level = level;
|
||||
}
|
||||
|
||||
void log_printf(SDLogLevel level, const char* file, int line, const char* format, ...) {
|
||||
if (level < log_level) {
|
||||
return;
|
||||
}
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
|
||||
if (level == SDLogLevel::DEBUG) {
|
||||
printf("[DEBUG] %s:%-4d - ", basename(file).c_str(), line);
|
||||
vprintf(format, args);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
} else if (level == SDLogLevel::INFO) {
|
||||
printf("[INFO] %s:%-4d - ", basename(file).c_str(), line);
|
||||
vprintf(format, args);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
} else if (level == SDLogLevel::WARN) {
|
||||
fprintf(stdout, "[WARN] %s:%-4d - ", basename(file).c_str(), line);
|
||||
vfprintf(stdout, format, args);
|
||||
fprintf(stdout, "\n");
|
||||
fflush(stdout);
|
||||
} else {
|
||||
fprintf(stderr, "[ERROR] %s:%-4d - ", basename(file).c_str(), line);
|
||||
vfprintf(stderr, format, args);
|
||||
fprintf(stderr, "\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
}
|
37
util.h
Normal file
37
util.h
Normal file
@ -0,0 +1,37 @@
|
||||
#ifndef __UTIL_H__
|
||||
#define __UTIL_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
bool ends_with(const std::string& str, const std::string& ending);
|
||||
bool starts_with(const std::string& str, const std::string& start);
|
||||
|
||||
std::string format(const char* fmt, ...);
|
||||
|
||||
void replace_all_chars(std::string& str, char target, char replacement);
|
||||
|
||||
bool file_exists(const std::string& filename);
|
||||
bool is_directory(const std::string& path);
|
||||
|
||||
std::string basename(const std::string& path);
|
||||
|
||||
std::string path_join(const std::string& p1, const std::string& p2);
|
||||
|
||||
int32_t get_num_physical_cores();
|
||||
|
||||
enum SDLogLevel {
|
||||
DEBUG,
|
||||
INFO,
|
||||
WARN,
|
||||
ERROR
|
||||
};
|
||||
|
||||
void set_sd_log_level(SDLogLevel level);
|
||||
|
||||
void log_printf(SDLogLevel level, const char* file, int line, const char* format, ...);
|
||||
|
||||
#define LOG_DEBUG(format, ...) log_printf(SDLogLevel::DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_INFO(format, ...) log_printf(SDLogLevel::INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_WARN(format, ...) log_printf(SDLogLevel::WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_ERROR(format, ...) log_printf(SDLogLevel::ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#endif // __UTIL_H__
|
Loading…
Reference in New Issue
Block a user