feat: add convert api (#142)
This commit is contained in:
parent
2b6ec97fe2
commit
5c614e4bc2
21
README.md
21
README.md
@ -126,7 +126,7 @@ cmake .. -DSD_METAL=ON
|
||||
cmake --build . --config Release
|
||||
```
|
||||
|
||||
### Using Flash Attention
|
||||
##### Using Flash Attention
|
||||
|
||||
Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUBLAS is enabled because the kernel implementation is missing.
|
||||
|
||||
@ -142,7 +142,7 @@ usage: ./bin/sd [arguments]
|
||||
|
||||
arguments:
|
||||
-h, --help show this help message and exit
|
||||
-M, --mode [txt2img or img2img] generation mode (default: txt2img)
|
||||
-M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)
|
||||
-t, --threads N number of threads to use during computation (default: -1).
|
||||
If threads <= 0, then threads will be set to the number of CPU physical cores
|
||||
-m, --model [MODEL] path to model
|
||||
@ -168,7 +168,8 @@ arguments:
|
||||
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
||||
-b, --batch-count COUNT number of images to generate.
|
||||
--schedule {discrete, karras} Denoiser sigma schedule (default: discrete)
|
||||
--clip-skip N number of layers to skip of clip model (default: 0)
|
||||
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
||||
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
||||
--vae-tiling process vae in tiles to reduce memory usage
|
||||
-v, --verbose print extra info
|
||||
```
|
||||
@ -183,6 +184,16 @@ You can specify the model weight type using the `--type` parameter. The weights
|
||||
- `q5_0` or `q5_1` for 5-bit integer quantization
|
||||
- `q4_0` or `q4_1` for 4-bit integer quantization
|
||||
|
||||
#### Convert to GGUF
|
||||
|
||||
You can also convert weights in the formats `ckpt/safetensors/diffusers` to gguf and perform quantization in advance, avoiding the need for quantization every time you load them.
|
||||
|
||||
For example:
|
||||
|
||||
```sh
|
||||
./bin/sd -M convert -m ../models/v1-5-pruned-emaonly.safetensors -o ../models/v1-5-pruned-emaonly.q8_0.gguf -v --type q8_0
|
||||
```
|
||||
|
||||
#### txt2img example
|
||||
|
||||
```sh
|
||||
@ -240,7 +251,7 @@ Here's a simple example:
|
||||
| ---- |---- |
|
||||
|  | |
|
||||
|
||||
## Using TAESD to faster decoding
|
||||
#### Using TAESD to faster decoding
|
||||
|
||||
You can use TAESD to accelerate the decoding of latent images by following these steps:
|
||||
|
||||
@ -258,7 +269,7 @@ curl -L -O https://huggingface.co/madebyollin/taesd/blob/main/diffusion_pytorch_
|
||||
sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --taesd ../models/diffusion_pytorch_model.safetensors
|
||||
```
|
||||
|
||||
## Using ESRGAN to upscale results
|
||||
#### Using ESRGAN to upscale results
|
||||
|
||||
You can use ESRGAN to upscale the generated images. At the moment, only the [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth) model is supported. Support for more models of this architecture will be added soon.
|
||||
|
||||
|
@ -42,11 +42,13 @@ const char* schedule_str[] = {
|
||||
const char* modes_str[] = {
|
||||
"txt2img",
|
||||
"img2img",
|
||||
"convert",
|
||||
};
|
||||
|
||||
enum SDMode {
|
||||
TXT2IMG,
|
||||
IMG2IMG,
|
||||
CONVERT,
|
||||
MODE_COUNT
|
||||
};
|
||||
|
||||
@ -125,7 +127,7 @@ void print_usage(int argc, const char* argv[]) {
|
||||
printf("\n");
|
||||
printf("arguments:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
|
||||
printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n");
|
||||
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
|
||||
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
|
||||
printf(" -m, --model [MODEL] path to model\n");
|
||||
@ -384,7 +386,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
params.n_threads = get_num_physical_cores();
|
||||
}
|
||||
|
||||
if (params.prompt.length() == 0) {
|
||||
if (params.mode != CONVERT && params.prompt.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: prompt\n");
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
@ -432,6 +434,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
srand((int)time(NULL));
|
||||
params.seed = rand();
|
||||
}
|
||||
|
||||
if (params.mode == CONVERT) {
|
||||
if (params.output_path == "output.png") {
|
||||
params.output_path = "output.gguf";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_image_params(SDParams params, int64_t seed) {
|
||||
@ -479,6 +487,24 @@ int main(int argc, const char* argv[]) {
|
||||
printf("%s", sd_get_system_info());
|
||||
}
|
||||
|
||||
if (params.mode == CONVERT) {
|
||||
bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
|
||||
if (!success) {
|
||||
fprintf(stderr,
|
||||
"convert '%s'/'%s' to '%s' failed\n",
|
||||
params.model_path.c_str(),
|
||||
params.vae_path.c_str(),
|
||||
params.output_path.c_str());
|
||||
return 1;
|
||||
} else {
|
||||
printf("convert '%s'/'%s' to '%s' success\n",
|
||||
params.model_path.c_str(),
|
||||
params.vae_path.c_str(),
|
||||
params.output_path.c_str());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool vae_decode_only = true;
|
||||
uint8_t* input_image_buffer = NULL;
|
||||
if (params.mode == IMG2IMG) {
|
||||
|
132
model.cpp
132
model.cpp
@ -15,6 +15,8 @@
|
||||
#include "ggml/ggml-backend.h"
|
||||
#include "ggml/ggml.h"
|
||||
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#ifdef SD_USE_METAL
|
||||
#include "ggml-metal.h"
|
||||
#endif
|
||||
@ -609,7 +611,7 @@ bool is_safetensors_file(const std::string& file_path) {
|
||||
}
|
||||
|
||||
size_t header_size_ = read_u64(header_size_buf);
|
||||
if (header_size_ >= file_size_) {
|
||||
if (header_size_ >= file_size_ || header_size_ <= 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1181,6 +1183,9 @@ SDVersion ModelLoader::get_sd_version() {
|
||||
if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos) {
|
||||
return VERSION_XL;
|
||||
}
|
||||
if (tensor_storage.name.find("cond_stage_model.1") != std::string::npos) {
|
||||
return VERSION_XL;
|
||||
}
|
||||
if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
|
||||
tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
|
||||
tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
|
||||
@ -1218,7 +1223,35 @@ std::string ModelLoader::load_merges() {
|
||||
return merges_utf8_str;
|
||||
}
|
||||
|
||||
void remove_duplicates(std::vector<TensorStorage>& vec) {
|
||||
std::unordered_map<std::string, size_t> name_to_index_map;
|
||||
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
const std::string& current_name = vec[i].name;
|
||||
auto it = name_to_index_map.find(current_name);
|
||||
|
||||
if (it != name_to_index_map.end()) {
|
||||
vec[it->second] = vec[i];
|
||||
} else {
|
||||
name_to_index_map[current_name] = i;
|
||||
}
|
||||
}
|
||||
|
||||
vec.resize(name_to_index_map.size());
|
||||
}
|
||||
|
||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) {
|
||||
std::vector<TensorStorage> processed_tensor_storages;
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
// LOG_DEBUG("%s", name.c_str());
|
||||
|
||||
if (is_unused_tensor(tensor_storage.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
preprocess_tensor(tensor_storage, processed_tensor_storages);
|
||||
}
|
||||
remove_duplicates(processed_tensor_storages);
|
||||
bool success = true;
|
||||
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
|
||||
std::string file_path = file_paths_[file_index];
|
||||
@ -1276,22 +1309,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
|
||||
return true;
|
||||
};
|
||||
|
||||
std::vector<TensorStorage> processed_tensor_storages;
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
for (auto& tensor_storage : processed_tensor_storages) {
|
||||
if (tensor_storage.file_index != file_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LOG_DEBUG("%s", name.c_str());
|
||||
|
||||
if (is_unused_tensor(tensor_storage.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
preprocess_tensor(tensor_storage, processed_tensor_storages);
|
||||
}
|
||||
|
||||
for (auto& tensor_storage : processed_tensor_storages) {
|
||||
// LOG_DEBUG("%s", tensor_storage.name.c_str());
|
||||
|
||||
ggml_tensor* dst_tensor = NULL;
|
||||
@ -1437,7 +1458,61 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
||||
return true;
|
||||
}
|
||||
|
||||
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
|
||||
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
|
||||
auto backend = ggml_backend_cpu_init();
|
||||
size_t mem_size = 1 * 1024 * 1024; // for padding
|
||||
mem_size += tensor_storages.size() * ggml_tensor_overhead();
|
||||
mem_size += cal_mem_size(backend, type);
|
||||
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
|
||||
ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});
|
||||
|
||||
gguf_context* gguf_ctx = gguf_init_empty();
|
||||
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
|
||||
ggml_type tensor_type = tensor_storage.type;
|
||||
if (type != GGML_TYPE_COUNT) {
|
||||
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
|
||||
tensor_type = GGML_TYPE_F16;
|
||||
} else {
|
||||
tensor_type = type;
|
||||
}
|
||||
}
|
||||
|
||||
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
|
||||
if (tensor == NULL) {
|
||||
LOG_ERROR("ggml_new_tensor failed");
|
||||
return false;
|
||||
}
|
||||
ggml_set_name(tensor, name.c_str());
|
||||
|
||||
// LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
|
||||
// ggml_nbytes(tensor), ggml_type_name(tensor_type),
|
||||
// tensor_storage.n_dims,
|
||||
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
|
||||
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
||||
|
||||
*dst_tensor = tensor;
|
||||
|
||||
gguf_add_tensor(gguf_ctx, tensor);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool success = load_tensors(on_new_tensor_cb, backend);
|
||||
ggml_backend_free(backend);
|
||||
LOG_INFO("load tensors done");
|
||||
LOG_INFO("trying to save tensors to %s", file_path.c_str());
|
||||
if (success) {
|
||||
gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
|
||||
}
|
||||
ggml_free(ggml_ctx);
|
||||
gguf_free(gguf_ctx);
|
||||
return success;
|
||||
}
|
||||
|
||||
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend, ggml_type type) {
|
||||
size_t alignment = 128;
|
||||
if (backend != NULL) {
|
||||
alignment = ggml_backend_get_alignment(backend);
|
||||
@ -1452,8 +1527,35 @@ int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
|
||||
}
|
||||
|
||||
for (auto& tensor_storage : processed_tensor_storages) {
|
||||
ggml_type tensor_type = tensor_storage.type;
|
||||
if (type != GGML_TYPE_COUNT) {
|
||||
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
|
||||
tensor_type = GGML_TYPE_F16;
|
||||
} else {
|
||||
tensor_type = type;
|
||||
}
|
||||
}
|
||||
tensor_storage.type = tensor_type;
|
||||
mem_size += tensor_storage.nbytes() + alignment;
|
||||
}
|
||||
|
||||
return mem_size;
|
||||
}
|
||||
|
||||
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) {
|
||||
ModelLoader model_loader;
|
||||
|
||||
if (!model_loader.init_from_file(input_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", input_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vae_path != NULL && strlen(vae_path) > 0) {
|
||||
if (!model_loader.init_from_file(vae_path, "vae.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
|
||||
return success;
|
||||
}
|
5
model.h
5
model.h
@ -4,9 +4,9 @@
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
#include "ggml/ggml-backend.h"
|
||||
#include "ggml/ggml.h"
|
||||
@ -121,7 +121,8 @@ public:
|
||||
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
||||
ggml_backend_t backend,
|
||||
std::set<std::string> ignore_tensors = {});
|
||||
int64_t cal_mem_size(ggml_backend_t backend);
|
||||
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
|
||||
int64_t cal_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
|
||||
~ModelLoader() = default;
|
||||
};
|
||||
#endif // __MODEL_H__
|
@ -148,7 +148,9 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
||||
enum sd_type_t wtype);
|
||||
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
||||
|
||||
SD_API sd_image_t upscale(upscaler_ctx_t*, sd_image_t input_image, uint32_t upscale_factor);
|
||||
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
|
||||
|
||||
SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user