fix: fix clip tokenizer (#383)
This commit is contained in:
parent
e410aeb534
commit
14206fd488
2
clip.hpp
2
clip.hpp
@ -388,7 +388,7 @@ public:
|
|||||||
std::string token_str = token.str();
|
std::string token_str = token.str();
|
||||||
std::u32string utf32_token;
|
std::u32string utf32_token;
|
||||||
for (int i = 0; i < token_str.length(); i++) {
|
for (int i = 0; i < token_str.length(); i++) {
|
||||||
char b = token_str[i];
|
unsigned char b = token_str[i];
|
||||||
utf32_token += byte_encoder[b];
|
utf32_token += byte_encoder[b];
|
||||||
}
|
}
|
||||||
auto bpe_strs = bpe(utf32_token);
|
auto bpe_strs = bpe(utf32_token);
|
||||||
|
Loading…
Reference in New Issue
Block a user