encoding.utf8: fix len and ulen and optimize raw_index (#9682)

pull/9700/head
ChAoS_UnItY 2021-04-12 10:58:03 +08:00 committed by GitHub
parent 67d8639917
commit d7a64bbc8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 0 deletions

View File

@ -15,6 +15,10 @@ Utility functions
// len return the length as number of unicode chars from a string // len return the length as number of unicode chars from a string
pub fn len(s string) int { pub fn len(s string) int {
if s.len == 0 {
return 0
}
mut count := 0 mut count := 0
mut index := 0 mut index := 0
@ -82,10 +86,16 @@ pub fn get_uchar(s string, index int) int {
// raw_index - get the raw chracter from the string by the given index value. // raw_index - get the raw chracter from the string by the given index value.
// example: '我是V Lang'.raw_index(1) => '是' // example: '我是V Lang'.raw_index(1) => '是'
// raw_index - get the raw chracter from the string by the given index value.
// example: utf8.raw_index('我是V Lang', 1) => '是'
pub fn raw_index(s string, index int) string { pub fn raw_index(s string, index int) string {
mut r := []rune{} mut r := []rune{}
for i := 0; i < s.len; i++ { for i := 0; i < s.len; i++ {
if r.len - 1 == index {
break
}
b := s[i] b := s[i]
ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3)

View File

@ -21,10 +21,12 @@ fn test_utf8_util() {
assert lower1 == (src_lower.ustring()) assert lower1 == (src_lower.ustring())
// test len function // test len function
assert utf8.len('') == 0
assert utf8.len('pippo') == 5 assert utf8.len('pippo') == 5
assert utf8.len(src) == 15 // 29 assert utf8.len(src) == 15 // 29
assert src.len == 24 // 49 assert src.len == 24 // 49
// test u_len function // test u_len function
assert utf8.u_len(''.ustring()) == 0
assert utf8.u_len(src1) == 15 // 29 assert utf8.u_len(src1) == 15 // 29
assert utf8.u_len('pippo'.ustring()) == 5 assert utf8.u_len('pippo'.ustring()) == 5