encoding.utf8: fix len and ulen and optimize raw_index (#9682)
parent
67d8639917
commit
d7a64bbc8a
|
@ -15,6 +15,10 @@ Utility functions
|
||||||
|
|
||||||
// len return the length as number of unicode chars from a string
|
// len return the length as number of unicode chars from a string
|
||||||
pub fn len(s string) int {
|
pub fn len(s string) int {
|
||||||
|
if s.len == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
mut count := 0
|
mut count := 0
|
||||||
mut index := 0
|
mut index := 0
|
||||||
|
|
||||||
|
@ -82,10 +86,16 @@ pub fn get_uchar(s string, index int) int {
|
||||||
// raw_index - get the raw chracter from the string by the given index value.
|
// raw_index - get the raw chracter from the string by the given index value.
|
||||||
// example: '我是V Lang'.raw_index(1) => '是'
|
// example: '我是V Lang'.raw_index(1) => '是'
|
||||||
|
|
||||||
|
// raw_index - get the raw chracter from the string by the given index value.
|
||||||
|
// example: utf8.raw_index('我是V Lang', 1) => '是'
|
||||||
pub fn raw_index(s string, index int) string {
|
pub fn raw_index(s string, index int) string {
|
||||||
mut r := []rune{}
|
mut r := []rune{}
|
||||||
|
|
||||||
for i := 0; i < s.len; i++ {
|
for i := 0; i < s.len; i++ {
|
||||||
|
if r.len - 1 == index {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
b := s[i]
|
b := s[i]
|
||||||
ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3)
|
ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3)
|
||||||
|
|
||||||
|
|
|
@ -21,10 +21,12 @@ fn test_utf8_util() {
|
||||||
assert lower1 == (src_lower.ustring())
|
assert lower1 == (src_lower.ustring())
|
||||||
|
|
||||||
// test len function
|
// test len function
|
||||||
|
assert utf8.len('') == 0
|
||||||
assert utf8.len('pippo') == 5
|
assert utf8.len('pippo') == 5
|
||||||
assert utf8.len(src) == 15 // 29
|
assert utf8.len(src) == 15 // 29
|
||||||
assert src.len == 24 // 49
|
assert src.len == 24 // 49
|
||||||
// test u_len function
|
// test u_len function
|
||||||
|
assert utf8.u_len(''.ustring()) == 0
|
||||||
assert utf8.u_len(src1) == 15 // 29
|
assert utf8.u_len(src1) == 15 // 29
|
||||||
assert utf8.u_len('pippo'.ustring()) == 5
|
assert utf8.u_len('pippo'.ustring()) == 5
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue