fix: fix clip tokenizer (#383)

This commit is contained in:
stduhpf
2024-09-02 16:31:46 +02:00
committed by GitHub
parent e410aeb534
commit 14206fd488

View File

@@ -388,7 +388,7 @@ public:
std::string token_str = token.str();
std::u32string utf32_token;
for (int i = 0; i < token_str.length(); i++) {
char b = token_str[i];
unsigned char b = token_str[i];
utf32_token += byte_encoder[b];
}
auto bpe_strs = bpe(utf32_token);