encoding.utf8: add support for indexing a utf8 str (#9670)
parent
a2a18ef92c
commit
acb58d4923
|
@ -87,6 +87,27 @@ pub fn get_uchar(s string, index int) int {
|
|||
}
|
||||
|
||||
|
||||
// raw_index - get the raw chracter from the string by the given index value.
|
||||
// example: '我是V Lang'.raw_index(1) => '是'
|
||||
|
||||
pub fn raw_index(s string, index int) string {
|
||||
mut r := []rune{}
|
||||
|
||||
for i := 0; i < s.len; i++ {
|
||||
b := s[i]
|
||||
ch_len := ((0xe5000000>>((b>>3) & 0x1e)) & 3)
|
||||
|
||||
r << if ch_len > 0 {
|
||||
i += ch_len
|
||||
rune(get_uchar(s,i-ch_len))
|
||||
} else {
|
||||
rune(b)
|
||||
}
|
||||
}
|
||||
|
||||
return r[index].str()
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Conversion functions
|
||||
|
|
|
@ -50,3 +50,20 @@ fn test_utf8_util() {
|
|||
// test utility functions
|
||||
assert utf8.get_uchar(b,0)==0x002E
|
||||
}
|
||||
|
||||
fn test_raw_indexing() {
|
||||
a := "我是V Lang!"
|
||||
|
||||
// test non ascii characters
|
||||
assert utf8.raw_index(a, 0) == '我'
|
||||
assert utf8.raw_index(a, 1) == '是'
|
||||
|
||||
// test ascii characters
|
||||
assert utf8.raw_index(a, 2) == 'V'
|
||||
assert utf8.raw_index(a, 3) == ' '
|
||||
assert utf8.raw_index(a, 4) == 'L'
|
||||
assert utf8.raw_index(a, 5) == 'a'
|
||||
assert utf8.raw_index(a, 6) == 'n'
|
||||
assert utf8.raw_index(a, 7) == 'g'
|
||||
assert utf8.raw_index(a, 8) == '!'
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue