feat: add convert api (#142)

This commit is contained in:
leejet 2024-01-14 11:43:24 +08:00 committed by GitHub
parent 2b6ec97fe2
commit 5c614e4bc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 167 additions and 25 deletions

View File

@ -126,7 +126,7 @@ cmake .. -DSD_METAL=ON
cmake --build . --config Release
```
### Using Flash Attention
##### Using Flash Attention
Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUBLAS is enabled because the kernel implementation is missing.
@ -142,7 +142,7 @@ usage: ./bin/sd [arguments]
arguments:
-h, --help show this help message and exit
-M, --mode [txt2img or img2img] generation mode (default: txt2img)
-M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)
-t, --threads N number of threads to use during computation (default: -1).
If threads <= 0, then threads will be set to the number of CPU physical cores
-m, --model [MODEL] path to model
@ -168,7 +168,8 @@ arguments:
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
-b, --batch-count COUNT number of images to generate.
--schedule {discrete, karras} Denoiser sigma schedule (default: discrete)
--clip-skip N number of layers to skip of clip model (default: 0)
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
--vae-tiling process vae in tiles to reduce memory usage
-v, --verbose print extra info
```
@ -183,6 +184,16 @@ You can specify the model weight type using the `--type` parameter. The weights
- `q5_0` or `q5_1` for 5-bit integer quantization
- `q4_0` or `q4_1` for 4-bit integer quantization
#### Convert to GGUF
You can also convert weights in the formats `ckpt/safetensors/diffusers` to gguf and perform quantization in advance, avoiding the need for quantization every time you load them.
For example:
```sh
./bin/sd -M convert -m ../models/v1-5-pruned-emaonly.safetensors -o ../models/v1-5-pruned-emaonly.q8_0.gguf -v --type q8_0
```
#### txt2img example
```sh
@ -240,7 +251,7 @@ Here's a simple example:
| ---- |---- |
| ![](./assets/without_lcm.png) |![](./assets/with_lcm.png) |
## Using TAESD to faster decoding
#### Using TAESD to faster decoding
You can use TAESD to accelerate the decoding of latent images by following these steps:
@ -258,7 +269,7 @@ curl -L -O https://huggingface.co/madebyollin/taesd/blob/main/diffusion_pytorch_
sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --taesd ../models/diffusion_pytorch_model.safetensors
```
## Using ESRGAN to upscale results
#### Using ESRGAN to upscale results
You can use ESRGAN to upscale the generated images. At the moment, only the [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth) model is supported. Support for more models of this architecture will be added soon.

View File

@ -42,11 +42,13 @@ const char* schedule_str[] = {
const char* modes_str[] = {
"txt2img",
"img2img",
"convert",
};
enum SDMode {
TXT2IMG,
IMG2IMG,
CONVERT,
MODE_COUNT
};
@ -125,7 +127,7 @@ void print_usage(int argc, const char* argv[]) {
printf("\n");
printf("arguments:\n");
printf(" -h, --help show this help message and exit\n");
printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n");
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
printf(" -m, --model [MODEL] path to model\n");
@ -384,7 +386,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
params.n_threads = get_num_physical_cores();
}
if (params.prompt.length() == 0) {
if (params.mode != CONVERT && params.prompt.length() == 0) {
fprintf(stderr, "error: the following arguments are required: prompt\n");
print_usage(argc, argv);
exit(1);
@ -432,6 +434,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
srand((int)time(NULL));
params.seed = rand();
}
if (params.mode == CONVERT) {
if (params.output_path == "output.png") {
params.output_path = "output.gguf";
}
}
}
std::string get_image_params(SDParams params, int64_t seed) {
@ -479,6 +487,24 @@ int main(int argc, const char* argv[]) {
printf("%s", sd_get_system_info());
}
if (params.mode == CONVERT) {
bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
if (!success) {
fprintf(stderr,
"convert '%s'/'%s' to '%s' failed\n",
params.model_path.c_str(),
params.vae_path.c_str(),
params.output_path.c_str());
return 1;
} else {
printf("convert '%s'/'%s' to '%s' success\n",
params.model_path.c_str(),
params.vae_path.c_str(),
params.output_path.c_str());
return 0;
}
}
bool vae_decode_only = true;
uint8_t* input_image_buffer = NULL;
if (params.mode == IMG2IMG) {

132
model.cpp
View File

@ -15,6 +15,8 @@
#include "ggml/ggml-backend.h"
#include "ggml/ggml.h"
#include "stable-diffusion.h"
#ifdef SD_USE_METAL
#include "ggml-metal.h"
#endif
@ -609,7 +611,7 @@ bool is_safetensors_file(const std::string& file_path) {
}
size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_) {
if (header_size_ >= file_size_ || header_size_ <= 2) {
return false;
}
@ -1181,6 +1183,9 @@ SDVersion ModelLoader::get_sd_version() {
if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos) {
return VERSION_XL;
}
if (tensor_storage.name.find("cond_stage_model.1") != std::string::npos) {
return VERSION_XL;
}
if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
@ -1218,7 +1223,35 @@ std::string ModelLoader::load_merges() {
return merges_utf8_str;
}
void remove_duplicates(std::vector<TensorStorage>& vec) {
std::unordered_map<std::string, size_t> name_to_index_map;
for (size_t i = 0; i < vec.size(); ++i) {
const std::string& current_name = vec[i].name;
auto it = name_to_index_map.find(current_name);
if (it != name_to_index_map.end()) {
vec[it->second] = vec[i];
} else {
name_to_index_map[current_name] = i;
}
}
vec.resize(name_to_index_map.size());
}
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) {
std::vector<TensorStorage> processed_tensor_storages;
for (auto& tensor_storage : tensor_storages) {
// LOG_DEBUG("%s", name.c_str());
if (is_unused_tensor(tensor_storage.name)) {
continue;
}
preprocess_tensor(tensor_storage, processed_tensor_storages);
}
remove_duplicates(processed_tensor_storages);
bool success = true;
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
std::string file_path = file_paths_[file_index];
@ -1276,22 +1309,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
return true;
};
std::vector<TensorStorage> processed_tensor_storages;
for (auto& tensor_storage : tensor_storages) {
for (auto& tensor_storage : processed_tensor_storages) {
if (tensor_storage.file_index != file_index) {
continue;
}
// LOG_DEBUG("%s", name.c_str());
if (is_unused_tensor(tensor_storage.name)) {
continue;
}
preprocess_tensor(tensor_storage, processed_tensor_storages);
}
for (auto& tensor_storage : processed_tensor_storages) {
// LOG_DEBUG("%s", tensor_storage.name.c_str());
ggml_tensor* dst_tensor = NULL;
@ -1437,7 +1458,61 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
return true;
}
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += tensor_storages.size() * ggml_tensor_overhead();
mem_size += cal_mem_size(backend, type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});
gguf_context* gguf_ctx = gguf_init_empty();
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = tensor_storage.type;
if (type != GGML_TYPE_COUNT) {
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
tensor_type = GGML_TYPE_F16;
} else {
tensor_type = type;
}
}
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == NULL) {
LOG_ERROR("ggml_new_tensor failed");
return false;
}
ggml_set_name(tensor, name.c_str());
// LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
// ggml_nbytes(tensor), ggml_type_name(tensor_type),
// tensor_storage.n_dims,
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
*dst_tensor = tensor;
gguf_add_tensor(gguf_ctx, tensor);
return true;
};
bool success = load_tensors(on_new_tensor_cb, backend);
ggml_backend_free(backend);
LOG_INFO("load tensors done");
LOG_INFO("trying to save tensors to %s", file_path.c_str());
if (success) {
gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
}
ggml_free(ggml_ctx);
gguf_free(gguf_ctx);
return success;
}
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend, ggml_type type) {
size_t alignment = 128;
if (backend != NULL) {
alignment = ggml_backend_get_alignment(backend);
@ -1452,8 +1527,35 @@ int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
}
for (auto& tensor_storage : processed_tensor_storages) {
ggml_type tensor_type = tensor_storage.type;
if (type != GGML_TYPE_COUNT) {
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
tensor_type = GGML_TYPE_F16;
} else {
tensor_type = type;
}
}
tensor_storage.type = tensor_type;
mem_size += tensor_storage.nbytes() + alignment;
}
return mem_size;
}
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) {
ModelLoader model_loader;
if (!model_loader.init_from_file(input_path)) {
LOG_ERROR("init model loader from file failed: '%s'", input_path);
return false;
}
if (vae_path != NULL && strlen(vae_path) > 0) {
if (!model_loader.init_from_file(vae_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
return false;
}
}
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
return success;
}

View File

@ -4,9 +4,9 @@
#include <functional>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <set>
#include "ggml/ggml-backend.h"
#include "ggml/ggml.h"
@ -121,7 +121,8 @@ public:
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
ggml_backend_t backend,
std::set<std::string> ignore_tensors = {});
int64_t cal_mem_size(ggml_backend_t backend);
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
int64_t cal_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
~ModelLoader() = default;
};
#endif // __MODEL_H__

View File

@ -148,7 +148,9 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
enum sd_type_t wtype);
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
SD_API sd_image_t upscale(upscaler_ctx_t*, sd_image_t input_image, uint32_t upscale_factor);
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type);
#ifdef __cplusplus
}