From dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a Mon Sep 17 00:00:00 2001 From: leejet Date: Sat, 28 Dec 2024 13:27:51 +0800 Subject: [PATCH] chore: change SD_CUBLAS/SD_USE_CUBLAS to SD_CUDA/SD_USE_CUDA --- .github/workflows/build.yml | 2 +- CMakeLists.txt | 10 +++++----- README.md | 4 ++-- ggml_extend.hpp | 6 +++--- stable-diffusion.cpp | 2 +- upscaler.cpp | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c0eeb4f..8569ccf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -163,7 +163,7 @@ jobs: - build: "avx512" defines: "-DGGML_AVX512=ON -DSD_BUILD_SHARED_LIBS=ON" - build: "cuda12" - defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON" + defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON" # - build: "rocm5.5" # defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON' - build: 'vulkan' diff --git a/CMakeLists.txt b/CMakeLists.txt index 455de26..6a60b8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,7 @@ endif() # general #option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE}) option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) -option(SD_CUBLAS "sd: cuda backend" OFF) +option(SD_CUDA "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) option(SD_VULKAN "sd: vulkan backend" OFF) @@ -34,10 +34,10 @@ option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic ( option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) #option(SD_BUILD_SERVER "sd: build server example" ON) -if(SD_CUBLAS) - message("-- Use CUBLAS as backend stable-diffusion") +if(SD_CUDA) + message("-- Use CUDA as backend stable-diffusion") set(GGML_CUDA ON) - add_definitions(-DSD_USE_CUBLAS) + add_definitions(-DSD_USE_CUDA) endif() if(SD_METAL) @@ -55,7 +55,7 @@ endif () if (SD_HIPBLAS) message("-- Use HIPBLAS as backend stable-diffusion") set(GGML_HIPBLAS ON) - add_definitions(-DSD_USE_CUBLAS) + add_definitions(-DSD_USE_CUDA) if(SD_FAST_SOFTMAX) set(GGML_CUDA_FAST_SOFTMAX ON) endif() diff --git a/README.md b/README.md index 5ea36b6..01b1fa4 100644 --- a/README.md +++ b/README.md @@ -113,12 +113,12 @@ cmake .. -DGGML_OPENBLAS=ON cmake --build . --config Release ``` -##### Using CUBLAS +##### Using CUDA This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM. ``` -cmake .. -DSD_CUBLAS=ON +cmake .. -DSD_CUDA=ON cmake --build . --config Release ``` diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 4aea858..035f088 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -27,7 +27,7 @@ #include "model.h" -#ifdef SD_USE_CUBLAS +#ifdef SD_USE_CUDA #include "ggml-cuda.h" #endif @@ -708,7 +708,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx struct ggml_tensor* k, struct ggml_tensor* v, bool mask = false) { -#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUDA) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] #else float d_head = (float)q->ne[0]; @@ -864,7 +864,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct } __STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) { -#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL) +#if defined(SD_USE_CUDA) || defined(SD_USE_SYCL) if (!ggml_backend_is_cpu(backend)) { ggml_backend_tensor_get_async(backend, tensor, data, offset, size); ggml_backend_synchronize(backend); diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 35d49af..e2daf57 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -159,7 +159,7 @@ public: bool vae_on_cpu, bool diffusion_flash_attn) { use_tiny_autoencoder = taesd_path.size() > 0; -#ifdef SD_USE_CUBLAS +#ifdef SD_USE_CUDA LOG_DEBUG("Using CUDA backend"); backend = ggml_backend_cuda_init(0); #endif diff --git a/upscaler.cpp b/upscaler.cpp index 86e5e9b..0c11b66 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -15,7 +15,7 @@ struct UpscalerGGML { } bool load_from_file(const std::string& esrgan_path) { -#ifdef SD_USE_CUBLAS +#ifdef SD_USE_CUDA LOG_DEBUG("Using CUDA backend"); backend = ggml_backend_cuda_init(0); #endif