encoding.utf8: add support for indexing a utf8 str (#9670)

pull/9672/head
ChAoS_UnItY 2021-04-11 14:04:18 +08:00 committed by GitHub
parent a2a18ef92c
commit acb58d4923
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 0 deletions

View File

@ -87,6 +87,27 @@ pub fn get_uchar(s string, index int) int {
}
// raw_index - get the raw chracter from the string by the given index value.
// example: '我是V Lang'.raw_index(1) => '是'
pub fn raw_index(s string, index int) string {
mut r := []rune{}
for i := 0; i < s.len; i++ {
b := s[i]
ch_len := ((0xe5000000>>((b>>3) & 0x1e)) & 3)
r << if ch_len > 0 {
i += ch_len
rune(get_uchar(s,i-ch_len))
} else {
rune(b)
}
}
return r[index].str()
}
/*
Conversion functions

View File

@ -50,3 +50,20 @@ fn test_utf8_util() {
// test utility functions
assert utf8.get_uchar(b,0)==0x002E
}
fn test_raw_indexing() {
a := "V Lang!"
// test non ascii characters
assert utf8.raw_index(a, 0) == ''
assert utf8.raw_index(a, 1) == ''
// test ascii characters
assert utf8.raw_index(a, 2) == 'V'
assert utf8.raw_index(a, 3) == ' '
assert utf8.raw_index(a, 4) == 'L'
assert utf8.raw_index(a, 5) == 'a'
assert utf8.raw_index(a, 6) == 'n'
assert utf8.raw_index(a, 7) == 'g'
assert utf8.raw_index(a, 8) == '!'
}