sync: update ggml to fix large image generation with SYCL backend (#380)
* turn off fast-math on host in SYCL backend Signed-off-by: zhentaoyu <zhentao.yu@intel.com> * update ggml for sync some sycl ops Signed-off-by: zhentaoyu <zhentao.yu@intel.com> * update sycl readme and ggml Signed-off-by: zhentaoyu <zhentao.yu@intel.com> --------- Signed-off-by: zhentaoyu <zhentao.yu@intel.com>
This commit is contained in:
parent
58d54738e2
commit
e410aeb534
@ -35,25 +35,25 @@ option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
|||||||
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
||||||
|
|
||||||
if(SD_CUBLAS)
|
if(SD_CUBLAS)
|
||||||
message("Use CUBLAS as backend stable-diffusion")
|
message("-- Use CUBLAS as backend stable-diffusion")
|
||||||
set(GGML_CUDA ON)
|
set(GGML_CUDA ON)
|
||||||
add_definitions(-DSD_USE_CUBLAS)
|
add_definitions(-DSD_USE_CUBLAS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(SD_METAL)
|
if(SD_METAL)
|
||||||
message("Use Metal as backend stable-diffusion")
|
message("-- Use Metal as backend stable-diffusion")
|
||||||
set(GGML_METAL ON)
|
set(GGML_METAL ON)
|
||||||
add_definitions(-DSD_USE_METAL)
|
add_definitions(-DSD_USE_METAL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (SD_VULKAN)
|
if (SD_VULKAN)
|
||||||
message("Use Vulkan as backend stable-diffusion")
|
message("-- Use Vulkan as backend stable-diffusion")
|
||||||
set(GGML_VULKAN ON)
|
set(GGML_VULKAN ON)
|
||||||
add_definitions(-DSD_USE_VULKAN)
|
add_definitions(-DSD_USE_VULKAN)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (SD_HIPBLAS)
|
if (SD_HIPBLAS)
|
||||||
message("Use HIPBLAS as backend stable-diffusion")
|
message("-- Use HIPBLAS as backend stable-diffusion")
|
||||||
set(GGML_HIPBLAS ON)
|
set(GGML_HIPBLAS ON)
|
||||||
add_definitions(-DSD_USE_CUBLAS)
|
add_definitions(-DSD_USE_CUBLAS)
|
||||||
if(SD_FAST_SOFTMAX)
|
if(SD_FAST_SOFTMAX)
|
||||||
@ -61,14 +61,8 @@ if (SD_HIPBLAS)
|
|||||||
endif()
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if(SD_SYCL)
|
|
||||||
message("Use SYCL as backend stable-diffusion")
|
|
||||||
set(GGML_SYCL ON)
|
|
||||||
add_definitions(-DSD_USE_SYCL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(SD_FLASH_ATTN)
|
if(SD_FLASH_ATTN)
|
||||||
message("Use Flash Attention for memory optimization")
|
message("-- Use Flash Attention for memory optimization")
|
||||||
add_definitions(-DSD_USE_FLASH_ATTENTION)
|
add_definitions(-DSD_USE_FLASH_ATTENTION)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -82,7 +76,7 @@ file(GLOB SD_LIB_SOURCES
|
|||||||
|
|
||||||
# we can get only one share lib
|
# we can get only one share lib
|
||||||
if(SD_BUILD_SHARED_LIBS)
|
if(SD_BUILD_SHARED_LIBS)
|
||||||
message("Build shared library")
|
message("-- Build shared library")
|
||||||
message(${SD_LIB_SOURCES})
|
message(${SD_LIB_SOURCES})
|
||||||
set(BUILD_SHARED_LIBS OFF)
|
set(BUILD_SHARED_LIBS OFF)
|
||||||
add_library(${SD_LIB} SHARED ${SD_LIB_SOURCES})
|
add_library(${SD_LIB} SHARED ${SD_LIB_SOURCES})
|
||||||
@ -90,11 +84,25 @@ if(SD_BUILD_SHARED_LIBS)
|
|||||||
target_compile_definitions(${SD_LIB} PRIVATE -DSD_BUILD_DLL)
|
target_compile_definitions(${SD_LIB} PRIVATE -DSD_BUILD_DLL)
|
||||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
else()
|
else()
|
||||||
message("Build static library")
|
message("-- Build static library")
|
||||||
set(BUILD_SHARED_LIBS OFF)
|
set(BUILD_SHARED_LIBS OFF)
|
||||||
add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
|
add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(SD_SYCL)
|
||||||
|
message("-- Use SYCL as backend stable-diffusion")
|
||||||
|
set(GGML_SYCL ON)
|
||||||
|
add_definitions(-DSD_USE_SYCL)
|
||||||
|
# disable fast-math on host, see:
|
||||||
|
# https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/fp-model-fp.html
|
||||||
|
if (WIN32)
|
||||||
|
set(SYCL_COMPILE_OPTIONS /fp:precise)
|
||||||
|
else()
|
||||||
|
set(SYCL_COMPILE_OPTIONS -fp-model=precise)
|
||||||
|
endif()
|
||||||
|
message("-- Turn off fast-math for host in SYCL backend")
|
||||||
|
target_compile_options(${SD_LIB} PRIVATE ${SYCL_COMPILE_OPTIONS})
|
||||||
|
endif()
|
||||||
|
|
||||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||||
|
|
||||||
|
@ -172,14 +172,12 @@ Example of text2img by using SYCL backend:
|
|||||||
|
|
||||||
- download `stable-diffusion` model weight, refer to [download-weight](#download-weights).
|
- download `stable-diffusion` model weight, refer to [download-weight](#download-weights).
|
||||||
|
|
||||||
- run `./bin/sd -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors --cfg-scale 5 --steps 30 --sampling-method euler -H 512 -W 512 --seed 42 -p "fantasy medieval village world inside a glass sphere , high detail, fantasy, realistic, light effect, hyper detail, volumetric lighting, cinematic, macro, depth of field, blur, red light and clouds from the back, highly detailed epic cinematic concept art cg render made in maya, blender and photoshop, octane render, excellent composition, dynamic dramatic cinematic lighting, aesthetic, very inspirational, world inside a glass sphere by james gurney by artgerm with james jean, joe fenton and tristan eaton by ross tran, fine details, 4k resolution"`
|
- run `./bin/sd -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors --cfg-scale 5 --steps 30 --sampling-method euler -H 1024 -W 1024 --seed 42 -p "fantasy medieval village world inside a glass sphere , high detail, fantasy, realistic, light effect, hyper detail, volumetric lighting, cinematic, macro, depth of field, blur, red light and clouds from the back, highly detailed epic cinematic concept art cg render made in maya, blender and photoshop, octane render, excellent composition, dynamic dramatic cinematic lighting, aesthetic, very inspirational, world inside a glass sphere by james gurney by artgerm with james jean, joe fenton and tristan eaton by ross tran, fine details, 4k resolution"`
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="./assets/sycl_sd3_output.png" width="360x">
|
<img src="./assets/sycl_sd3_output.png" width="360x">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> Try to set smaller image height and width (for example, `-H 512 -W 512`) if you meet `Provided range is out of integer limits. Pass '-fno-sycl-id-queries-fit-in-int' to disable range check.`
|
|
||||||
|
|
||||||
|
|
||||||
##### Using Flash Attention
|
##### Using Flash Attention
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 547 KiB After Width: | Height: | Size: 1.7 MiB |
2
ggml
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit 21f9e5c426b105841c2e346d8f1aafec398edf15
|
Subproject commit 21d3a308fcb7f31cb9beceaeebad4fb622f3c337
|
@ -741,7 +741,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
|
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
|
||||||
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
|
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
|
||||||
LOG_DEBUG("k->ne[1] == %d", k->ne[1]);
|
LOG_DEBUG("k->ne[1] == %d", k->ne[1]);
|
||||||
kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0);
|
kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, L_k]
|
v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, L_k]
|
||||||
v = ggml_reshape_3d(ctx, v, L_k, d_head, n_head * N); // [N * n_head, d_head, L_k]
|
v = ggml_reshape_3d(ctx, v, L_k, d_head, n_head * N); // [N * n_head, d_head, L_k]
|
||||||
|
Loading…
Reference in New Issue
Block a user