chore: change SD_CUBLAS/SD_USE_CUBLAS to SD_CUDA/SD_USE_CUDA
This commit is contained in:
parent
348a54e34a
commit
dcf91f9e0f
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@ -163,7 +163,7 @@ jobs:
|
|||||||
- build: "avx512"
|
- build: "avx512"
|
||||||
defines: "-DGGML_AVX512=ON -DSD_BUILD_SHARED_LIBS=ON"
|
defines: "-DGGML_AVX512=ON -DSD_BUILD_SHARED_LIBS=ON"
|
||||||
- build: "cuda12"
|
- build: "cuda12"
|
||||||
defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON"
|
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON"
|
||||||
# - build: "rocm5.5"
|
# - build: "rocm5.5"
|
||||||
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
|
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
|
||||||
- build: 'vulkan'
|
- build: 'vulkan'
|
||||||
|
@ -24,7 +24,7 @@ endif()
|
|||||||
# general
|
# general
|
||||||
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
|
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
|
||||||
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
|
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
|
||||||
option(SD_CUBLAS "sd: cuda backend" OFF)
|
option(SD_CUDA "sd: cuda backend" OFF)
|
||||||
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
||||||
option(SD_METAL "sd: metal backend" OFF)
|
option(SD_METAL "sd: metal backend" OFF)
|
||||||
option(SD_VULKAN "sd: vulkan backend" OFF)
|
option(SD_VULKAN "sd: vulkan backend" OFF)
|
||||||
@ -34,10 +34,10 @@ option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (
|
|||||||
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
||||||
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
||||||
|
|
||||||
if(SD_CUBLAS)
|
if(SD_CUDA)
|
||||||
message("-- Use CUBLAS as backend stable-diffusion")
|
message("-- Use CUDA as backend stable-diffusion")
|
||||||
set(GGML_CUDA ON)
|
set(GGML_CUDA ON)
|
||||||
add_definitions(-DSD_USE_CUBLAS)
|
add_definitions(-DSD_USE_CUDA)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(SD_METAL)
|
if(SD_METAL)
|
||||||
@ -55,7 +55,7 @@ endif ()
|
|||||||
if (SD_HIPBLAS)
|
if (SD_HIPBLAS)
|
||||||
message("-- Use HIPBLAS as backend stable-diffusion")
|
message("-- Use HIPBLAS as backend stable-diffusion")
|
||||||
set(GGML_HIPBLAS ON)
|
set(GGML_HIPBLAS ON)
|
||||||
add_definitions(-DSD_USE_CUBLAS)
|
add_definitions(-DSD_USE_CUDA)
|
||||||
if(SD_FAST_SOFTMAX)
|
if(SD_FAST_SOFTMAX)
|
||||||
set(GGML_CUDA_FAST_SOFTMAX ON)
|
set(GGML_CUDA_FAST_SOFTMAX ON)
|
||||||
endif()
|
endif()
|
||||||
|
@ -113,12 +113,12 @@ cmake .. -DGGML_OPENBLAS=ON
|
|||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
##### Using CUBLAS
|
##### Using CUDA
|
||||||
|
|
||||||
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
|
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
|
||||||
|
|
||||||
```
|
```
|
||||||
cmake .. -DSD_CUBLAS=ON
|
cmake .. -DSD_CUDA=ON
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
#include "model.h"
|
#include "model.h"
|
||||||
|
|
||||||
#ifdef SD_USE_CUBLAS
|
#ifdef SD_USE_CUDA
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -708,7 +708,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
|
|||||||
struct ggml_tensor* k,
|
struct ggml_tensor* k,
|
||||||
struct ggml_tensor* v,
|
struct ggml_tensor* v,
|
||||||
bool mask = false) {
|
bool mask = false) {
|
||||||
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
|
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUDA) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
|
||||||
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
|
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
|
||||||
#else
|
#else
|
||||||
float d_head = (float)q->ne[0];
|
float d_head = (float)q->ne[0];
|
||||||
@ -864,7 +864,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
|||||||
}
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
||||||
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
|
#if defined(SD_USE_CUDA) || defined(SD_USE_SYCL)
|
||||||
if (!ggml_backend_is_cpu(backend)) {
|
if (!ggml_backend_is_cpu(backend)) {
|
||||||
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
||||||
ggml_backend_synchronize(backend);
|
ggml_backend_synchronize(backend);
|
||||||
|
@ -159,7 +159,7 @@ public:
|
|||||||
bool vae_on_cpu,
|
bool vae_on_cpu,
|
||||||
bool diffusion_flash_attn) {
|
bool diffusion_flash_attn) {
|
||||||
use_tiny_autoencoder = taesd_path.size() > 0;
|
use_tiny_autoencoder = taesd_path.size() > 0;
|
||||||
#ifdef SD_USE_CUBLAS
|
#ifdef SD_USE_CUDA
|
||||||
LOG_DEBUG("Using CUDA backend");
|
LOG_DEBUG("Using CUDA backend");
|
||||||
backend = ggml_backend_cuda_init(0);
|
backend = ggml_backend_cuda_init(0);
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,7 +15,7 @@ struct UpscalerGGML {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool load_from_file(const std::string& esrgan_path) {
|
bool load_from_file(const std::string& esrgan_path) {
|
||||||
#ifdef SD_USE_CUBLAS
|
#ifdef SD_USE_CUDA
|
||||||
LOG_DEBUG("Using CUDA backend");
|
LOG_DEBUG("Using CUDA backend");
|
||||||
backend = ggml_backend_cuda_init(0);
|
backend = ggml_backend_cuda_init(0);
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user