fix: set default n_threads to physical cores number

This commit is contained in:
leejet 2023-08-15 21:00:23 +08:00
parent 3265464090
commit fec86b8b76

View File

@ -4,6 +4,7 @@
#include <random>
#include <string>
#include <thread>
#include <unordered_set>
#include "stable-diffusion.h"
@ -11,6 +12,55 @@
#define STB_IMAGE_WRITE_STATIC
#include "stb_image_write.h"
#if defined(__APPLE__) && defined(__MACH__)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if !defined(_WIN32)
#include <sys/ioctl.h>
#include <unistd.h>
#endif
// get_num_physical_cores is copy from
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
int32_t get_num_physical_cores() {
#ifdef __linux__
// enumerate the set of thread siblings, num entries is num cores
std::unordered_set<std::string> siblings;
for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) {
std::ifstream thread_siblings("/sys/devices/system/cpu"
+ std::to_string(cpu) + "/topology/thread_siblings");
if (!thread_siblings.is_open()) {
break; // no more cpus
}
std::string line;
if (std::getline(thread_siblings, line)) {
siblings.insert(line);
}
}
if (siblings.size() > 0) {
return static_cast<int32_t>(siblings.size());
}
#elif defined(__APPLE__) && defined(__MACH__)
int32_t num_physical_cores;
size_t len = sizeof(num_physical_cores);
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
if (result == 0) {
return num_physical_cores;
}
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
if (result == 0) {
return num_physical_cores;
}
#elif defined(_WIN32)
//TODO: Implement
#endif
unsigned int n_threads = std::thread::hardware_concurrency();
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
struct Option {
int n_threads = -1;
std::string model_path;
@ -47,7 +97,7 @@ void print_usage(int argc, const char* argv[]) {
printf("arguments:\n");
printf(" -h, --help show this help message and exit\n");
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
printf(" If threads <= 0, then threads will be set to the number of CPU cores\n");
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
printf(" -m, --model [MODEL] path to model\n");
printf(" -o, --output OUTPUT path to write result image to (default: .\\output.png)\n");
printf(" -p, --prompt [PROMPT] the prompt to render\n");
@ -145,7 +195,7 @@ void parse_args(int argc, const char* argv[], Option* opt) {
}
if (opt->n_threads <= 0) {
opt->n_threads = std::thread::hardware_concurrency();
opt->n_threads = get_num_physical_cores();
}
if (opt->prompt.length() == 0) {