feat: add vulkan backend support (#291)

* Fix includes and init vulkan the same as llama.cpp

* Add Windows Vulkan CI

* Updated ggml submodule

* support epsilon as a parameter for ggml_group_norm

---------

Co-authored-by: Cloudwalk <cloudwalk@icculus.org>
Co-authored-by: Oleg Skutte <00.00.oleg.00.00@gmail.com>
Co-authored-by: leejet <leejet714@gmail.com>
This commit is contained in:
soham 2024-08-27 21:26:09 +05:30 committed by GitHub
parent 8847114abf
commit 2027b16fda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 44 additions and 3 deletions

View File

@ -148,6 +148,9 @@ jobs:
windows-latest-cmake: windows-latest-cmake:
runs-on: windows-2019 runs-on: windows-2019
env:
VULKAN_VERSION: 1.3.261.1
strategy: strategy:
matrix: matrix:
include: include:
@ -163,6 +166,8 @@ jobs:
defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON" defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON"
- build: "rocm5.5" - build: "rocm5.5"
defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON' defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
- build: 'vulkan'
defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON"
steps: steps:
- name: Clone - name: Clone
id: checkout id: checkout
@ -192,6 +197,14 @@ jobs:
uses: urkle/action-get-ninja@v1 uses: urkle/action-get-ninja@v1
with: with:
version: 1.11.1 version: 1.11.1
- name: Install Vulkan SDK
id: get_vulkan
if: ${{ matrix.build == 'vulkan' }}
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
- name: Build - name: Build
id: cmake_build id: cmake_build

View File

@ -27,6 +27,7 @@ option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
option(SD_CUBLAS "sd: cuda backend" OFF) option(SD_CUBLAS "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF) option(SD_METAL "sd: metal backend" OFF)
option(SD_VULKAN "sd: vulkan backend" OFF)
option(SD_SYCL "sd: sycl backend" OFF) option(SD_SYCL "sd: sycl backend" OFF)
option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
@ -45,6 +46,12 @@ if(SD_METAL)
add_definitions(-DSD_USE_METAL) add_definitions(-DSD_USE_METAL)
endif() endif()
if (SD_VULKAN)
message("Use Vulkan as backend stable-diffusion")
set(GGML_VULKAN ON)
add_definitions(-DSD_USE_VULKAN)
endif ()
if (SD_HIPBLAS) if (SD_HIPBLAS)
message("Use HIPBLAS as backend stable-diffusion") message("Use HIPBLAS as backend stable-diffusion")
set(GGML_HIPBLAS ON) set(GGML_HIPBLAS ON)

2
ggml

@ -1 +1 @@
Subproject commit a06c68343e9976fdfc80917a958b903a0d7c8cc6 Subproject commit 21f9e5c426b105841c2e346d8f1aafec398edf15

View File

@ -32,6 +32,10 @@
#include "ggml-metal.h" #include "ggml-metal.h"
#endif #endif
#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_SYCL #ifdef SD_USE_SYCL
#include "ggml-sycl.h" #include "ggml-sycl.h"
#endif #endif
@ -655,7 +659,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
struct ggml_tensor* k, struct ggml_tensor* k,
struct ggml_tensor* v, struct ggml_tensor* v,
bool mask = false) { bool mask = false) {
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_SYCL) #if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
#else #else
float d_head = (float)q->ne[0]; float d_head = (float)q->ne[0];

View File

@ -21,6 +21,10 @@
#include "ggml-metal.h" #include "ggml-metal.h"
#endif #endif
#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif
#define ST_HEADER_SIZE_LEN 8 #define ST_HEADER_SIZE_LEN 8
uint64_t read_u64(uint8_t* buffer) { uint64_t read_u64(uint8_t* buffer) {

View File

@ -160,6 +160,15 @@ public:
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
backend = ggml_backend_metal_init(); backend = ggml_backend_metal_init();
#endif #endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
backend = ggml_backend_vk_init(device);
}
if(!backend) {
LOG_WARN("Failed to initialize Vulkan backend");
}
#endif
#ifdef SD_USE_SYCL #ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend"); LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0); backend = ggml_backend_sycl_init(0);
@ -170,7 +179,7 @@ public:
backend = ggml_backend_cpu_init(); backend = ggml_backend_cpu_init();
} }
#ifdef SD_USE_FLASH_ATTENTION #ifdef SD_USE_FLASH_ATTENTION
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL) #if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL) || defined(SD_USE_VULKAN)
LOG_WARN("Flash Attention not supported with GPU Backend"); LOG_WARN("Flash Attention not supported with GPU Backend");
#else #else
LOG_INFO("Flash Attention enabled"); LOG_INFO("Flash Attention enabled");

View File

@ -24,6 +24,10 @@ struct UpscalerGGML {
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
backend = ggml_backend_metal_init(); backend = ggml_backend_metal_init();
#endif #endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
backend = ggml_backend_vk_init(0);
#endif
#ifdef SD_USE_SYCL #ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend"); LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0); backend = ggml_backend_sycl_init(0);