chore: change SD_CUBLAS/SD_USE_CUBLAS to SD_CUDA/SD_USE_CUDA

This commit is contained in:
leejet 2024-12-28 13:27:51 +08:00
parent 348a54e34a
commit dcf91f9e0f
6 changed files with 13 additions and 13 deletions

View File

@ -163,7 +163,7 @@ jobs:
- build: "avx512"
defines: "-DGGML_AVX512=ON -DSD_BUILD_SHARED_LIBS=ON"
- build: "cuda12"
defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON"
# - build: "rocm5.5"
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
- build: 'vulkan'

View File

@ -24,7 +24,7 @@ endif()
# general
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
option(SD_CUBLAS "sd: cuda backend" OFF)
option(SD_CUDA "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF)
option(SD_VULKAN "sd: vulkan backend" OFF)
@ -34,10 +34,10 @@ option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
#option(SD_BUILD_SERVER "sd: build server example" ON)
if(SD_CUBLAS)
message("-- Use CUBLAS as backend stable-diffusion")
if(SD_CUDA)
message("-- Use CUDA as backend stable-diffusion")
set(GGML_CUDA ON)
add_definitions(-DSD_USE_CUBLAS)
add_definitions(-DSD_USE_CUDA)
endif()
if(SD_METAL)
@ -55,7 +55,7 @@ endif ()
if (SD_HIPBLAS)
message("-- Use HIPBLAS as backend stable-diffusion")
set(GGML_HIPBLAS ON)
add_definitions(-DSD_USE_CUBLAS)
add_definitions(-DSD_USE_CUDA)
if(SD_FAST_SOFTMAX)
set(GGML_CUDA_FAST_SOFTMAX ON)
endif()

View File

@ -113,12 +113,12 @@ cmake .. -DGGML_OPENBLAS=ON
cmake --build . --config Release
```
##### Using CUBLAS
##### Using CUDA
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
```
cmake .. -DSD_CUBLAS=ON
cmake .. -DSD_CUDA=ON
cmake --build . --config Release
```

View File

@ -27,7 +27,7 @@
#include "model.h"
#ifdef SD_USE_CUBLAS
#ifdef SD_USE_CUDA
#include "ggml-cuda.h"
#endif
@ -708,7 +708,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
struct ggml_tensor* k,
struct ggml_tensor* v,
bool mask = false) {
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUDA) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
#else
float d_head = (float)q->ne[0];
@ -864,7 +864,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
}
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
#if defined(SD_USE_CUDA) || defined(SD_USE_SYCL)
if (!ggml_backend_is_cpu(backend)) {
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
ggml_backend_synchronize(backend);

View File

@ -159,7 +159,7 @@ public:
bool vae_on_cpu,
bool diffusion_flash_attn) {
use_tiny_autoencoder = taesd_path.size() > 0;
#ifdef SD_USE_CUBLAS
#ifdef SD_USE_CUDA
LOG_DEBUG("Using CUDA backend");
backend = ggml_backend_cuda_init(0);
#endif

View File

@ -15,7 +15,7 @@ struct UpscalerGGML {
}
bool load_from_file(const std::string& esrgan_path) {
#ifdef SD_USE_CUBLAS
#ifdef SD_USE_CUDA
LOG_DEBUG("Using CUDA backend");
backend = ggml_backend_cuda_init(0);
#endif