fix: fix clip tokenizer (#383)

This commit is contained in:
stduhpf 2024-09-02 16:31:46 +02:00 committed by GitHub
parent e410aeb534
commit 14206fd488
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -388,7 +388,7 @@ public:
std::string token_str = token.str(); std::string token_str = token.str();
std::u32string utf32_token; std::u32string utf32_token;
for (int i = 0; i < token_str.length(); i++) { for (int i = 0; i < token_str.length(); i++) {
char b = token_str[i]; unsigned char b = token_str[i];
utf32_token += byte_encoder[b]; utf32_token += byte_encoder[b];
} }
auto bpe_strs = bpe(utf32_token); auto bpe_strs = bpe(utf32_token);